Home | History | Annotate | Line # | Download | only in gcc
tree-vect-generic.cc revision 1.1.1.1
      1 /* Lower vector operations to scalar operations.
      2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify it
      7 under the terms of the GNU General Public License as published by the
      8 Free Software Foundation; either version 3, or (at your option) any
      9 later version.
     10 
     11 GCC is distributed in the hope that it will be useful, but WITHOUT
     12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14 for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "config.h"
     21 #include "system.h"
     22 #include "coretypes.h"
     23 #include "backend.h"
     24 #include "rtl.h"
     25 #include "tree.h"
     26 #include "gimple.h"
     27 #include "tree-pass.h"
     28 #include "ssa.h"
     29 #include "expmed.h"
     30 #include "optabs-tree.h"
     31 #include "diagnostic.h"
     32 #include "fold-const.h"
     33 #include "stor-layout.h"
     34 #include "langhooks.h"
     35 #include "tree-eh.h"
     36 #include "gimple-iterator.h"
     37 #include "gimplify-me.h"
     38 #include "gimplify.h"
     39 #include "tree-cfg.h"
     40 #include "tree-vector-builder.h"
     41 #include "vec-perm-indices.h"
     42 #include "insn-config.h"
     43 #include "tree-ssa-dce.h"
     44 #include "gimple-fold.h"
     45 #include "gimple-match.h"
     46 #include "recog.h"		/* FIXME: for insn_data */
     47 
     48 
     49 /* Build a ternary operation and gimplify it.  Emit code before GSI.
     50    Return the gimple_val holding the result.  */
     51 
     52 static tree
     53 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
     54 		 tree type, tree a, tree b, tree c)
     55 {
     56   location_t loc = gimple_location (gsi_stmt (*gsi));
     57   gimple_seq stmts = NULL;
     58   tree ret = gimple_build (&stmts, loc, code, type, a, b, c);
     59   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
     60   return ret;
     61 }
     62 
     63 /* Build a binary operation and gimplify it.  Emit code before GSI.
     64    Return the gimple_val holding the result.  */
     65 
     66 static tree
     67 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
     68 		 tree type, tree a, tree b)
     69 {
     70   location_t loc = gimple_location (gsi_stmt (*gsi));
     71   gimple_seq stmts = NULL;
     72   tree ret = gimple_build (&stmts, loc, code, type, a, b);
     73   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
     74   return ret;
     75 }
     76 
     77 /* Build a unary operation and gimplify it.  Emit code before GSI.
     78    Return the gimple_val holding the result.  */
     79 
     80 static tree
     81 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
     82 		 tree a)
     83 {
     84   location_t loc = gimple_location (gsi_stmt (*gsi));
     85   gimple_seq stmts = NULL;
     86   tree ret = gimple_build (&stmts, loc, code, type, a);
     87   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
     88   return ret;
     89 }
     90 
     91 
     92 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
     93 
     94 /* Return the number of elements in a vector type TYPE that we have
     95    already decided needs to be expanded piecewise.  We don't support
     96    this kind of expansion for variable-length vectors, since we should
     97    always check for target support before introducing uses of those.  */
     98 static unsigned int
     99 nunits_for_known_piecewise_op (const_tree type)
    100 {
    101   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
    102 }
    103 
    104 /* Return true if TYPE1 has more elements than TYPE2, where either
    105    type may be a vector or a scalar.  */
    106 
    107 static inline bool
    108 subparts_gt (tree type1, tree type2)
    109 {
    110   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
    111   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
    112   return known_gt (n1, n2);
    113 }
    114 
    115 /* Build a constant of type TYPE, made of VALUE's bits replicated
    116    every WIDTH bits to fit TYPE's precision.  */
    117 static tree
    118 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
    119 {
    120   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
    121     / HOST_BITS_PER_WIDE_INT;
    122   unsigned HOST_WIDE_INT low, mask;
    123   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
    124   int i;
    125 
    126   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
    127 
    128   if (width == HOST_BITS_PER_WIDE_INT)
    129     low = value;
    130   else
    131     {
    132       mask = ((HOST_WIDE_INT)1 << width) - 1;
    133       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
    134     }
    135 
    136   for (i = 0; i < n; i++)
    137     a[i] = low;
    138 
    139   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
    140   return wide_int_to_tree
    141     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
    142 }
    143 
    144 static GTY(()) tree vector_inner_type;
    145 static GTY(()) tree vector_last_type;
    146 static GTY(()) int vector_last_nunits;
    147 
    148 /* Return a suitable vector types made of SUBPARTS units each of mode
    149    "word_mode" (the global variable).  */
    150 static tree
    151 build_word_mode_vector_type (int nunits)
    152 {
    153   if (!vector_inner_type)
    154     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
    155   else if (vector_last_nunits == nunits)
    156     {
    157       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
    158       return vector_last_type;
    159     }
    160 
    161   vector_last_nunits = nunits;
    162   vector_last_type = build_vector_type (vector_inner_type, nunits);
    163   return vector_last_type;
    164 }
    165 
    166 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
    167 			      tree, tree, tree, tree, tree, enum tree_code,
    168 			      tree);
    169 
    170 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
    171    and return it.  */
    172 
    173 tree
    174 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
    175 		  tree t, tree bitsize, tree bitpos)
    176 {
    177   /* We're using the resimplify API and maybe_push_res_to_seq to
    178      simplify the BIT_FIELD_REF but restrict the simplification to
    179      a single stmt while at the same time following SSA edges for
    180      simplification with already emitted CTORs.  */
    181   gimple_match_op opr;
    182   opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
    183   opr.resimplify (NULL, follow_all_ssa_edges);
    184   gimple_seq stmts = NULL;
    185   tree res = maybe_push_res_to_seq (&opr, &stmts);
    186   if (!res)
    187     {
    188       /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
    189 	 used.  Build BIT_FIELD_REF manually otherwise.  */
    190       t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
    191       res = make_ssa_name (type);
    192       gimple *g = gimple_build_assign (res, t);
    193       gsi_insert_before (gsi, g, GSI_SAME_STMT);
    194       return res;
    195     }
    196   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
    197   return res;
    198 }
    199 
    200 static tree
    201 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
    202 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
    203 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
    204 {
    205   tree rhs_type = inner_type;
    206 
    207   /* For ABSU_EXPR, use the signed type for the rhs if the rhs was signed. */
    208   if (code == ABSU_EXPR
    209       && ANY_INTEGRAL_TYPE_P (TREE_TYPE (a))
    210       && !TYPE_UNSIGNED (TREE_TYPE (a)))
    211     rhs_type = signed_type_for (rhs_type);
    212 
    213   a = tree_vec_extract (gsi, rhs_type, a, bitsize, bitpos);
    214   return gimplify_build1 (gsi, code, inner_type, a);
    215 }
    216 
    217 static tree
    218 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
    219 	  tree bitpos, tree bitsize, enum tree_code code,
    220 	  tree type ATTRIBUTE_UNUSED)
    221 {
    222   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
    223     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
    224   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
    225     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
    226   return gimplify_build2 (gsi, code, inner_type, a, b);
    227 }
    228 
    229 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
    230 
    231    INNER_TYPE is the type of A and B elements
    232 
    233    returned expression is of signed integer type with the
    234    size equal to the size of INNER_TYPE.  */
    235 static tree
    236 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
    237 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
    238 {
    239   tree stype = TREE_TYPE (type);
    240   tree cst_false = build_zero_cst (stype);
    241   tree cst_true = build_all_ones_cst (stype);
    242   tree cmp;
    243 
    244   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
    245   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
    246 
    247   cmp = build2 (code, boolean_type_node, a, b);
    248   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
    249 }
    250 
    251 /* Expand vector addition to scalars.  This does bit twiddling
    252    in order to increase parallelism:
    253 
    254    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
    255            (a ^ b) & 0x80808080
    256 
    257    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
    258             (a ^ ~b) & 0x80808080
    259 
    260    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
    261 
    262    This optimization should be done only if 4 vector items or more
    263    fit into a word.  */
    264 static tree
    265 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
    266 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
    267 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
    268 {
    269   unsigned int width = vector_element_bits (TREE_TYPE (a));
    270   tree inner_type = TREE_TYPE (TREE_TYPE (a));
    271   unsigned HOST_WIDE_INT max;
    272   tree low_bits, high_bits, a_low, b_low, result_low, signs;
    273 
    274   max = GET_MODE_MASK (TYPE_MODE (inner_type));
    275   low_bits = build_replicated_const (word_type, width, max >> 1);
    276   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
    277 
    278   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
    279   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
    280 
    281   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
    282   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
    283   if (code == PLUS_EXPR)
    284     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
    285   else
    286     {
    287       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
    288       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
    289     }
    290 
    291   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
    292   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
    293   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
    294 }
    295 
    296 static tree
    297 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
    298 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
    299 	   tree bitsize ATTRIBUTE_UNUSED,
    300 	   enum tree_code code ATTRIBUTE_UNUSED,
    301 	   tree type ATTRIBUTE_UNUSED)
    302 {
    303   unsigned int width = vector_element_bits (TREE_TYPE (b));
    304   tree inner_type = TREE_TYPE (TREE_TYPE (b));
    305   HOST_WIDE_INT max;
    306   tree low_bits, high_bits, b_low, result_low, signs;
    307 
    308   max = GET_MODE_MASK (TYPE_MODE (inner_type));
    309   low_bits = build_replicated_const (word_type, width, max >> 1);
    310   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
    311 
    312   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
    313 
    314   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
    315   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
    316   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
    317   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
    318   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
    319 }
    320 
    321 /* Expand a vector operation to scalars, by using many operations
    322    whose type is the vector type's inner type.  */
    323 static tree
    324 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
    325 			 tree type, tree inner_type,
    326 			 tree a, tree b, enum tree_code code,
    327 			 bool parallel_p, tree ret_type = NULL_TREE)
    328 {
    329   vec<constructor_elt, va_gc> *v;
    330   tree part_width = TYPE_SIZE (inner_type);
    331   tree index = bitsize_int (0);
    332   int nunits = nunits_for_known_piecewise_op (type);
    333   int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
    334   int i;
    335   location_t loc = gimple_location (gsi_stmt (*gsi));
    336 
    337   if (nunits == 1
    338       || warning_suppressed_p (gsi_stmt (*gsi),
    339 			       OPT_Wvector_operation_performance))
    340     /* Do not diagnose decomposing single element vectors or when
    341        decomposing vectorizer produced operations.  */
    342     ;
    343   else if (ret_type || !parallel_p)
    344     warning_at (loc, OPT_Wvector_operation_performance,
    345 		"vector operation will be expanded piecewise");
    346   else
    347     warning_at (loc, OPT_Wvector_operation_performance,
    348 		"vector operation will be expanded in parallel");
    349 
    350   if (!ret_type)
    351     ret_type = type;
    352   vec_alloc (v, (nunits + delta - 1) / delta);
    353   bool constant_p = true;
    354   for (i = 0; i < nunits;
    355        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
    356     {
    357       tree result = f (gsi, inner_type, a, b, index, part_width, code,
    358 		       ret_type);
    359       if (!CONSTANT_CLASS_P (result))
    360 	constant_p = false;
    361       constructor_elt ce = {NULL_TREE, result};
    362       v->quick_push (ce);
    363     }
    364 
    365   if (constant_p)
    366     return build_vector_from_ctor (ret_type, v);
    367   else
    368     return build_constructor (ret_type, v);
    369 }
    370 
    371 /* Expand a vector operation to scalars with the freedom to use
    372    a scalar integer type, or to use a different size for the items
    373    in the vector type.  */
    374 static tree
    375 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
    376 			tree a, tree b, enum tree_code code)
    377 {
    378   tree result, compute_type;
    379   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
    380   location_t loc = gimple_location (gsi_stmt (*gsi));
    381 
    382   /* We have three strategies.  If the type is already correct, just do
    383      the operation an element at a time.  Else, if the vector is wider than
    384      one word, do it a word at a time; finally, if the vector is smaller
    385      than one word, do it as a scalar.  */
    386   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
    387      return expand_vector_piecewise (gsi, f,
    388 				     type, TREE_TYPE (type),
    389 				     a, b, code, true);
    390   else if (n_words > 1)
    391     {
    392       tree word_type = build_word_mode_vector_type (n_words);
    393       result = expand_vector_piecewise (gsi, f,
    394 				        word_type, TREE_TYPE (word_type),
    395 					a, b, code, true);
    396       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
    397                                          GSI_SAME_STMT);
    398     }
    399   else
    400     {
    401       /* Use a single scalar operation with a mode no wider than word_mode.  */
    402       if (!warning_suppressed_p (gsi_stmt (*gsi),
    403 				 OPT_Wvector_operation_performance))
    404 	warning_at (loc, OPT_Wvector_operation_performance,
    405 		    "vector operation will be expanded with a "
    406 		    "single scalar operation");
    407       scalar_int_mode mode
    408 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
    409       compute_type = lang_hooks.types.type_for_mode (mode, 1);
    410       result = f (gsi, compute_type, a, b, bitsize_zero_node,
    411 		  TYPE_SIZE (compute_type), code, type);
    412     }
    413 
    414   return result;
    415 }
    416 
    417 /* Expand a vector operation to scalars; for integer types we can use
    418    special bit twiddling tricks to do the sums a word at a time, using
    419    function F_PARALLEL instead of F.  These tricks are done only if
    420    they can process at least four items, that is, only if the vector
    421    holds at least four items and if a word can hold four items.  */
    422 static tree
    423 expand_vector_addition (gimple_stmt_iterator *gsi,
    424 			elem_op_func f, elem_op_func f_parallel,
    425 			tree type, tree a, tree b, enum tree_code code)
    426 {
    427   int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
    428 
    429   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
    430       && parts_per_word >= 4
    431       && nunits_for_known_piecewise_op (type) >= 4)
    432     return expand_vector_parallel (gsi, f_parallel,
    433 				   type, a, b, code);
    434   else
    435     return expand_vector_piecewise (gsi, f,
    436 				    type, TREE_TYPE (type),
    437 				    a, b, code, false);
    438 }
    439 
    440 static bool
    441 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
    442 
    443 /* Try to expand vector comparison expression OP0 CODE OP1 by
    444    querying optab if the following expression:
    445 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
    446    can be expanded.  */
    447 static tree
    448 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
    449 			  tree op1, enum tree_code code,
    450 			  bitmap dce_ssa_names)
    451 {
    452   tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
    453   use_operand_p use_p;
    454   imm_use_iterator iterator;
    455   bool vec_cond_expr_only = true;
    456 
    457   /* As seen in PR95830, we should not expand comparisons that are only
    458      feeding a VEC_COND_EXPR statement.  */
    459   auto_vec<gimple *> uses;
    460   FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
    461     {
    462       gimple *use = USE_STMT (use_p);
    463       if (is_gimple_debug (use))
    464 	continue;
    465       if (is_gimple_assign (use)
    466 	  && gimple_assign_rhs_code (use) == VEC_COND_EXPR
    467 	  && gimple_assign_rhs1 (use) == lhs
    468 	  && gimple_assign_rhs2 (use) != lhs
    469 	  && gimple_assign_rhs3 (use) != lhs)
    470 	uses.safe_push (use);
    471       else
    472 	vec_cond_expr_only = false;
    473     }
    474 
    475   if (vec_cond_expr_only)
    476     for (gimple *use : uses)
    477       {
    478 	gimple_stmt_iterator it = gsi_for_stmt (use);
    479 	if (!expand_vector_condition (&it, dce_ssa_names))
    480 	  {
    481 	    vec_cond_expr_only = false;
    482 	    break;
    483 	  }
    484       }
    485 
    486   if (!uses.is_empty () && vec_cond_expr_only)
    487     return NULL_TREE;
    488 
    489   tree t;
    490   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
    491     {
    492       if (VECTOR_BOOLEAN_TYPE_P (type)
    493 	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
    494 	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
    495 		       TYPE_VECTOR_SUBPARTS (type)
    496 		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
    497 						(TREE_TYPE (type)))))
    498 	{
    499 	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
    500 	  tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
    501 	  tree index = bitsize_int (0);
    502 	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
    503 	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
    504 	  tree ret_type = build_nonstandard_integer_type (prec, 1);
    505 	  tree ret_inner_type = boolean_type_node;
    506 	  int i;
    507 	  location_t loc = gimple_location (gsi_stmt (*gsi));
    508 	  t = build_zero_cst (ret_type);
    509 
    510 	  if (TYPE_PRECISION (ret_inner_type) != 1)
    511 	    ret_inner_type = build_nonstandard_integer_type (1, 1);
    512 	  if (!warning_suppressed_p (gsi_stmt (*gsi),
    513 				     OPT_Wvector_operation_performance))
    514 	    warning_at (loc, OPT_Wvector_operation_performance,
    515 			"vector operation will be expanded piecewise");
    516 	  for (i = 0; i < nunits;
    517 	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
    518 	    {
    519 	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
    520 					 index);
    521 	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
    522 					 index);
    523 	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
    524 	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
    525 				   bitsize_int (i));
    526 	    }
    527 	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
    528 	}
    529       else
    530 	t = expand_vector_piecewise (gsi, do_compare, type,
    531 				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
    532 				     code, false);
    533     }
    534   else
    535     t = NULL_TREE;
    536 
    537   return t;
    538 }
    539 
    540 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
    541    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
    542    the result if successful, otherwise return NULL_TREE.  */
    543 static tree
    544 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
    545 {
    546   optab op;
    547   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
    548   bool scalar_shift = true;
    549 
    550   for (i = 1; i < nunits; i++)
    551     {
    552       if (shiftcnts[i] != shiftcnts[0])
    553 	scalar_shift = false;
    554     }
    555 
    556   if (scalar_shift && shiftcnts[0] == 0)
    557     return op0;
    558 
    559   if (scalar_shift)
    560     {
    561       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
    562       if (op != unknown_optab
    563 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
    564 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
    565 				build_int_cst (NULL_TREE, shiftcnts[0]));
    566     }
    567 
    568   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
    569   if (op != unknown_optab
    570       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
    571     {
    572       tree_vector_builder vec (type, nunits, 1);
    573       for (i = 0; i < nunits; i++)
    574 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
    575       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
    576     }
    577 
    578   return NULL_TREE;
    579 }
    580 
    581 /* Try to expand integer vector division by constant using
    582    widening multiply, shifts and additions.  */
    583 static tree
    584 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
    585 		      tree op1, enum tree_code code)
    586 {
    587   bool use_pow2 = true;
    588   bool has_vector_shift = true;
    589   bool use_abs_op1 = false;
    590   int mode = -1, this_mode;
    591   int pre_shift = -1, post_shift;
    592   unsigned int nunits = nunits_for_known_piecewise_op (type);
    593   int *shifts = XALLOCAVEC (int, nunits * 4);
    594   int *pre_shifts = shifts + nunits;
    595   int *post_shifts = pre_shifts + nunits;
    596   int *shift_temps = post_shifts + nunits;
    597   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
    598   int prec = TYPE_PRECISION (TREE_TYPE (type));
    599   int dummy_int;
    600   unsigned int i;
    601   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
    602   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
    603   tree cur_op, mulcst, tem;
    604   optab op;
    605 
    606   if (prec > HOST_BITS_PER_WIDE_INT)
    607     return NULL_TREE;
    608 
    609   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
    610   if (op == unknown_optab
    611       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
    612     has_vector_shift = false;
    613 
    614   /* Analysis phase.  Determine if all op1 elements are either power
    615      of two and it is possible to expand it using shifts (or for remainder
    616      using masking).  Additionally compute the multiplicative constants
    617      and pre and post shifts if the division is to be expanded using
    618      widening or high part multiplication plus shifts.  */
    619   for (i = 0; i < nunits; i++)
    620     {
    621       tree cst = VECTOR_CST_ELT (op1, i);
    622       unsigned HOST_WIDE_INT ml;
    623 
    624       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
    625 	return NULL_TREE;
    626       pre_shifts[i] = 0;
    627       post_shifts[i] = 0;
    628       mulc[i] = 0;
    629       if (use_pow2
    630 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
    631 	use_pow2 = false;
    632       if (use_pow2)
    633 	{
    634 	  shifts[i] = tree_log2 (cst);
    635 	  if (shifts[i] != shifts[0]
    636 	      && code == TRUNC_DIV_EXPR
    637 	      && !has_vector_shift)
    638 	    use_pow2 = false;
    639 	}
    640       if (mode == -2)
    641 	continue;
    642       if (sign_p == UNSIGNED)
    643 	{
    644 	  unsigned HOST_WIDE_INT mh;
    645 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
    646 
    647 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    648 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
    649 	    return NULL_TREE;
    650 
    651 	  if (d <= 1)
    652 	    {
    653 	      mode = -2;
    654 	      continue;
    655 	    }
    656 
    657 	  /* Find a suitable multiplier and right shift count
    658 	     instead of multiplying with D.  */
    659 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
    660 
    661 	  /* If the suggested multiplier is more than SIZE bits, we can
    662 	     do better for even divisors, using an initial right shift.  */
    663 	  if ((mh != 0 && (d & 1) == 0)
    664 	      || (!has_vector_shift && pre_shift != -1))
    665 	    {
    666 	      if (has_vector_shift)
    667 		pre_shift = ctz_or_zero (d);
    668 	      else if (pre_shift == -1)
    669 		{
    670 		  unsigned int j;
    671 		  for (j = 0; j < nunits; j++)
    672 		    {
    673 		      tree cst2 = VECTOR_CST_ELT (op1, j);
    674 		      unsigned HOST_WIDE_INT d2;
    675 		      int this_pre_shift;
    676 
    677 		      if (!tree_fits_uhwi_p (cst2))
    678 			return NULL_TREE;
    679 		      d2 = tree_to_uhwi (cst2) & mask;
    680 		      if (d2 == 0)
    681 			return NULL_TREE;
    682 		      this_pre_shift = floor_log2 (d2 & -d2);
    683 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
    684 			pre_shift = this_pre_shift;
    685 		    }
    686 		  if (i != 0 && pre_shift != 0)
    687 		    {
    688 		      /* Restart.  */
    689 		      i = -1U;
    690 		      mode = -1;
    691 		      continue;
    692 		    }
    693 		}
    694 	      if (pre_shift != 0)
    695 		{
    696 		  if ((d >> pre_shift) <= 1)
    697 		    {
    698 		      mode = -2;
    699 		      continue;
    700 		    }
    701 		  mh = choose_multiplier (d >> pre_shift, prec,
    702 					  prec - pre_shift,
    703 					  &ml, &post_shift, &dummy_int);
    704 		  gcc_assert (!mh);
    705 		  pre_shifts[i] = pre_shift;
    706 		}
    707 	    }
    708 	  if (!mh)
    709 	    this_mode = 0;
    710 	  else
    711 	    this_mode = 1;
    712 	}
    713       else
    714 	{
    715 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
    716 	  unsigned HOST_WIDE_INT abs_d;
    717 
    718 	  if (d == -1)
    719 	    return NULL_TREE;
    720 
    721 	  /* Since d might be INT_MIN, we have to cast to
    722 	     unsigned HOST_WIDE_INT before negating to avoid
    723 	     undefined signed overflow.  */
    724 	  abs_d = (d >= 0
    725 		  ? (unsigned HOST_WIDE_INT) d
    726 		  : - (unsigned HOST_WIDE_INT) d);
    727 
    728 	  /* n rem d = n rem -d */
    729 	  if (code == TRUNC_MOD_EXPR && d < 0)
    730 	    {
    731 	      d = abs_d;
    732 	      use_abs_op1 = true;
    733 	    }
    734 	  if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
    735 	    {
    736 	      /* This case is not handled correctly below.  */
    737 	      mode = -2;
    738 	      continue;
    739 	    }
    740 	  if (abs_d <= 1)
    741 	    {
    742 	      mode = -2;
    743 	      continue;
    744 	    }
    745 
    746 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
    747 			     &post_shift, &dummy_int);
    748 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    749 	    {
    750 	      this_mode = 4 + (d < 0);
    751 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
    752 	    }
    753 	  else
    754 	    this_mode = 2 + (d < 0);
    755 	}
    756       mulc[i] = ml;
    757       post_shifts[i] = post_shift;
    758       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
    759 	  || post_shift >= prec
    760 	  || pre_shifts[i] >= prec)
    761 	this_mode = -2;
    762 
    763       if (i == 0)
    764 	mode = this_mode;
    765       else if (mode != this_mode)
    766 	mode = -2;
    767     }
    768 
    769   if (use_pow2)
    770     {
    771       tree addend = NULL_TREE;
    772       if (sign_p == SIGNED)
    773 	{
    774 	  tree uns_type;
    775 
    776 	  /* Both division and remainder sequences need
    777 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
    778 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
    779 	     if none of the shifts is 0, or as the conditional.  */
    780 	  for (i = 0; i < nunits; i++)
    781 	    if (shifts[i] == 0)
    782 	      break;
    783 	  uns_type
    784 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
    785 				 nunits);
    786 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
    787 	    {
    788 	      for (i = 0; i < nunits; i++)
    789 		shift_temps[i] = prec - 1;
    790 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
    791 	      if (cur_op != NULL_TREE)
    792 		{
    793 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
    794 					    uns_type, cur_op);
    795 		  for (i = 0; i < nunits; i++)
    796 		    shift_temps[i] = prec - shifts[i];
    797 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
    798 		  if (cur_op != NULL_TREE)
    799 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
    800 					      type, cur_op);
    801 		}
    802 	    }
    803 	  if (addend == NULL_TREE
    804 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
    805 	    {
    806 	      tree zero, cst, mask_type, mask;
    807 	      gimple *stmt, *cond;
    808 
    809 	      mask_type = truth_type_for (type);
    810 	      zero = build_zero_cst (type);
    811 	      mask = make_ssa_name (mask_type);
    812 	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
    813 	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
    814 	      tree_vector_builder vec (type, nunits, 1);
    815 	      for (i = 0; i < nunits; i++)
    816 		vec.quick_push (build_int_cst (TREE_TYPE (type),
    817 					       (HOST_WIDE_INT_1U
    818 						<< shifts[i]) - 1));
    819 	      cst = vec.build ();
    820 	      addend = make_ssa_name (type);
    821 	      stmt
    822 		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
    823 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
    824 	    }
    825 	}
    826       if (code == TRUNC_DIV_EXPR)
    827 	{
    828 	  if (sign_p == UNSIGNED)
    829 	    {
    830 	      /* q = op0 >> shift;  */
    831 	      cur_op = add_rshift (gsi, type, op0, shifts);
    832 	      if (cur_op != NULL_TREE)
    833 		return cur_op;
    834 	    }
    835 	  else if (addend != NULL_TREE)
    836 	    {
    837 	      /* t1 = op0 + addend;
    838 		 q = t1 >> shift;  */
    839 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
    840 	      if (op != unknown_optab
    841 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
    842 		{
    843 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
    844 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
    845 		  if (cur_op != NULL_TREE)
    846 		    return cur_op;
    847 		}
    848 	    }
    849 	}
    850       else
    851 	{
    852 	  tree mask;
    853 	  tree_vector_builder vec (type, nunits, 1);
    854 	  for (i = 0; i < nunits; i++)
    855 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
    856 					   (HOST_WIDE_INT_1U
    857 					    << shifts[i]) - 1));
    858 	  mask = vec.build ();
    859 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
    860 	  if (op != unknown_optab
    861 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
    862 	    {
    863 	      if (sign_p == UNSIGNED)
    864 		/* r = op0 & mask;  */
    865 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
    866 	      else if (addend != NULL_TREE)
    867 		{
    868 		  /* t1 = op0 + addend;
    869 		     t2 = t1 & mask;
    870 		     r = t2 - addend;  */
    871 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
    872 		  if (op != unknown_optab
    873 		      && optab_handler (op, TYPE_MODE (type))
    874 			 != CODE_FOR_nothing)
    875 		    {
    876 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
    877 						addend);
    878 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
    879 						cur_op, mask);
    880 		      op = optab_for_tree_code (MINUS_EXPR, type,
    881 						optab_default);
    882 		      if (op != unknown_optab
    883 			  && optab_handler (op, TYPE_MODE (type))
    884 			     != CODE_FOR_nothing)
    885 			return gimplify_build2 (gsi, MINUS_EXPR, type,
    886 						cur_op, addend);
    887 		    }
    888 		}
    889 	    }
    890 	}
    891     }
    892 
    893   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
    894     return NULL_TREE;
    895 
    896   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
    897     return NULL_TREE;
    898 
    899   cur_op = op0;
    900 
    901   switch (mode)
    902     {
    903     case 0:
    904       gcc_assert (sign_p == UNSIGNED);
    905       /* t1 = oprnd0 >> pre_shift;
    906 	 t2 = t1 h* ml;
    907 	 q = t2 >> post_shift;  */
    908       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
    909       if (cur_op == NULL_TREE)
    910 	return NULL_TREE;
    911       break;
    912     case 1:
    913       gcc_assert (sign_p == UNSIGNED);
    914       for (i = 0; i < nunits; i++)
    915 	{
    916 	  shift_temps[i] = 1;
    917 	  post_shifts[i]--;
    918 	}
    919       break;
    920     case 2:
    921     case 3:
    922     case 4:
    923     case 5:
    924       gcc_assert (sign_p == SIGNED);
    925       for (i = 0; i < nunits; i++)
    926 	shift_temps[i] = prec - 1;
    927       break;
    928     default:
    929       return NULL_TREE;
    930     }
    931 
    932   tree_vector_builder vec (type, nunits, 1);
    933   for (i = 0; i < nunits; i++)
    934     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
    935   mulcst = vec.build ();
    936 
    937   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
    938 
    939   switch (mode)
    940     {
    941     case 0:
    942       /* t1 = oprnd0 >> pre_shift;
    943 	 t2 = t1 h* ml;
    944 	 q = t2 >> post_shift;  */
    945       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
    946       break;
    947     case 1:
    948       /* t1 = oprnd0 h* ml;
    949 	 t2 = oprnd0 - t1;
    950 	 t3 = t2 >> 1;
    951 	 t4 = t1 + t3;
    952 	 q = t4 >> (post_shift - 1);  */
    953       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
    954       if (op == unknown_optab
    955 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
    956 	return NULL_TREE;
    957       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
    958       tem = add_rshift (gsi, type, tem, shift_temps);
    959       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
    960       if (op == unknown_optab
    961 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
    962 	return NULL_TREE;
    963       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
    964       cur_op = add_rshift (gsi, type, tem, post_shifts);
    965       if (cur_op == NULL_TREE)
    966 	return NULL_TREE;
    967       break;
    968     case 2:
    969     case 3:
    970     case 4:
    971     case 5:
    972       /* t1 = oprnd0 h* ml;
    973 	 t2 = t1; [ iff (mode & 2) != 0 ]
    974 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
    975 	 t3 = t2 >> post_shift;
    976 	 t4 = oprnd0 >> (prec - 1);
    977 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
    978 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
    979       if ((mode & 2) == 0)
    980 	{
    981 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
    982 	  if (op == unknown_optab
    983 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
    984 	    return NULL_TREE;
    985 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
    986 	}
    987       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
    988       if (cur_op == NULL_TREE)
    989 	return NULL_TREE;
    990       tem = add_rshift (gsi, type, op0, shift_temps);
    991       if (tem == NULL_TREE)
    992 	return NULL_TREE;
    993       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
    994       if (op == unknown_optab
    995 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
    996 	return NULL_TREE;
    997       if ((mode & 1) == 0)
    998 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
    999       else
   1000 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
   1001       break;
   1002     default:
   1003       gcc_unreachable ();
   1004     }
   1005 
   1006   if (code == TRUNC_DIV_EXPR)
   1007     return cur_op;
   1008 
   1009   /* We divided.  Now finish by:
   1010      t1 = q * oprnd1;
   1011      r = oprnd0 - t1;  */
   1012   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
   1013   if (op == unknown_optab
   1014       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
   1015     return NULL_TREE;
   1016   if (use_abs_op1)
   1017     {
   1018       tree_vector_builder elts;
   1019       if (!elts.new_unary_operation (type, op1, false))
   1020 	return NULL_TREE;
   1021       unsigned int count = elts.encoded_nelts ();
   1022       for (unsigned int i = 0; i < count; ++i)
   1023 	{
   1024 	  tree elem1 = VECTOR_CST_ELT (op1, i);
   1025 
   1026 	  tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
   1027 	  if (elt == NULL_TREE)
   1028 	    return NULL_TREE;
   1029 	  elts.quick_push (elt);
   1030 	}
   1031       op1 = elts.build ();
   1032     }
   1033   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
   1034   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
   1035   if (op == unknown_optab
   1036       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
   1037     return NULL_TREE;
   1038   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
   1039 }
   1040 
   1041 /* Expand a vector condition to scalars, by using many conditions
   1042    on the vector's elements.  */
   1043 
   1044 static bool
   1045 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
   1046 {
   1047   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
   1048   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
   1049   tree a = gimple_assign_rhs1 (stmt);
   1050   tree a1 = a;
   1051   tree a2 = NULL_TREE;
   1052   bool a_is_comparison = false;
   1053   bool a_is_scalar_bitmask = false;
   1054   tree b = gimple_assign_rhs2 (stmt);
   1055   tree c = gimple_assign_rhs3 (stmt);
   1056   vec<constructor_elt, va_gc> *v;
   1057   tree constr;
   1058   tree inner_type = TREE_TYPE (type);
   1059   tree width = vector_element_bits_tree (type);
   1060   tree cond_type = TREE_TYPE (TREE_TYPE (a));
   1061   tree comp_inner_type = cond_type;
   1062   tree index = bitsize_int (0);
   1063   tree comp_width = width;
   1064   tree comp_index = index;
   1065   location_t loc = gimple_location (gsi_stmt (*gsi));
   1066   tree_code code = TREE_CODE (a);
   1067   gassign *assign = NULL;
   1068 
   1069   if (code == SSA_NAME)
   1070     {
   1071       assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
   1072       if (assign != NULL
   1073 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
   1074 	{
   1075 	  a_is_comparison = true;
   1076 	  a1 = gimple_assign_rhs1 (assign);
   1077 	  a2 = gimple_assign_rhs2 (assign);
   1078 	  code = gimple_assign_rhs_code (assign);
   1079 	  comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
   1080 	  comp_width = vector_element_bits_tree (TREE_TYPE (a1));
   1081 	}
   1082     }
   1083 
   1084   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
   1085       || (integer_all_onesp (b) && integer_zerop (c)
   1086 	  && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
   1087     {
   1088       gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
   1089       return true;
   1090     }
   1091 
   1092   /* If a has vector boolean type and is a comparison, above
   1093      expand_vec_cond_expr_p might fail, even if both the comparison and
   1094      VEC_COND_EXPR could be supported individually.  See PR109176.  */
   1095   if (a_is_comparison
   1096       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
   1097       && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
   1098       && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
   1099     return true;
   1100 
   1101   /* Handle vector boolean types with bitmasks.  If there is a comparison
   1102      and we can expand the comparison into the vector boolean bitmask,
   1103      or otherwise if it is compatible with type, we can transform
   1104       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
   1105      into
   1106       tmp_6 = x_2 < y_3;
   1107       tmp_7 = tmp_6 & vbfld_4;
   1108       tmp_8 = ~tmp_6;
   1109       tmp_9 = tmp_8 & vbfld_5;
   1110       vbfld_1 = tmp_7 | tmp_9;
   1111      Similarly for vbfld_10 instead of x_2 < y_3.  */
   1112   if (VECTOR_BOOLEAN_TYPE_P (type)
   1113       && SCALAR_INT_MODE_P (TYPE_MODE (type))
   1114       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
   1115 		   TYPE_VECTOR_SUBPARTS (type)
   1116 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
   1117       && (a_is_comparison
   1118 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
   1119 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
   1120     {
   1121       if (a_is_comparison)
   1122 	a = gimplify_build2 (gsi, code, type, a1, a2);
   1123       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
   1124       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
   1125       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
   1126       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
   1127       gimple_assign_set_rhs_from_tree (gsi, a);
   1128       update_stmt (gsi_stmt (*gsi));
   1129       return true;
   1130     }
   1131 
   1132   /* TODO: try and find a smaller vector type.  */
   1133 
   1134   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
   1135     warning_at (loc, OPT_Wvector_operation_performance,
   1136 		"vector condition will be expanded piecewise");
   1137 
   1138   if (!a_is_comparison
   1139       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
   1140       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
   1141       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
   1142 		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
   1143 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
   1144 						(TREE_TYPE (TREE_TYPE (a))))))
   1145     {
   1146       a_is_scalar_bitmask = true;
   1147       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
   1148       tree atype = build_nonstandard_integer_type (prec, 1);
   1149       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
   1150     }
   1151   else if (!a_is_comparison
   1152 	   && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
   1153     comp_width = vector_element_bits_tree (TREE_TYPE (a));
   1154 
   1155   int nunits = nunits_for_known_piecewise_op (type);
   1156   vec_alloc (v, nunits);
   1157   bool constant_p = true;
   1158   for (int i = 0; i < nunits; i++)
   1159     {
   1160       tree aa, result;
   1161       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
   1162       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
   1163       if (a_is_comparison)
   1164 	{
   1165 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
   1166 				       comp_width, comp_index);
   1167 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
   1168 				       comp_width, comp_index);
   1169 	  aa = build2 (code, cond_type, aa1, aa2);
   1170 	}
   1171       else if (a_is_scalar_bitmask)
   1172 	{
   1173 	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
   1174 	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
   1175 				    a, wide_int_to_tree (TREE_TYPE (a), w));
   1176 	  aa = build2 (NE_EXPR, boolean_type_node, result,
   1177 		       build_zero_cst (TREE_TYPE (a)));
   1178 	}
   1179       else
   1180 	aa = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
   1181       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
   1182       if (!CONSTANT_CLASS_P (result))
   1183 	constant_p = false;
   1184       constructor_elt ce = {NULL_TREE, result};
   1185       v->quick_push (ce);
   1186       index = int_const_binop (PLUS_EXPR, index, width);
   1187       if (width == comp_width)
   1188 	comp_index = index;
   1189       else
   1190 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
   1191     }
   1192 
   1193   if (constant_p)
   1194     constr = build_vector_from_ctor (type, v);
   1195   else
   1196     constr = build_constructor (type, v);
   1197   gimple_assign_set_rhs_from_tree (gsi, constr);
   1198   update_stmt (gsi_stmt (*gsi));
   1199 
   1200   if (a_is_comparison)
   1201     bitmap_set_bit (dce_ssa_names,
   1202 		    SSA_NAME_VERSION (gimple_assign_lhs (assign)));
   1203 
   1204   return false;
   1205 }
   1206 
   1207 static tree
   1208 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
   1209 			 gassign *assign, enum tree_code code,
   1210 			 bitmap dce_ssa_names)
   1211 {
   1212   machine_mode compute_mode = TYPE_MODE (compute_type);
   1213 
   1214   /* If the compute mode is not a vector mode (hence we are not decomposing
   1215      a BLKmode vector to smaller, hardware-supported vectors), we may want
   1216      to expand the operations in parallel.  */
   1217   if (!VECTOR_MODE_P (compute_mode))
   1218     switch (code)
   1219       {
   1220       case PLUS_EXPR:
   1221       case MINUS_EXPR:
   1222         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
   1223 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
   1224 					 gimple_assign_rhs1 (assign),
   1225 					 gimple_assign_rhs2 (assign), code);
   1226 	break;
   1227 
   1228       case NEGATE_EXPR:
   1229         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
   1230           return expand_vector_addition (gsi, do_unop, do_negate, type,
   1231 		      		         gimple_assign_rhs1 (assign),
   1232 					 NULL_TREE, code);
   1233 	break;
   1234 
   1235       case BIT_AND_EXPR:
   1236       case BIT_IOR_EXPR:
   1237       case BIT_XOR_EXPR:
   1238         return expand_vector_parallel (gsi, do_binop, type,
   1239 		      		       gimple_assign_rhs1 (assign),
   1240 				       gimple_assign_rhs2 (assign), code);
   1241 
   1242       case BIT_NOT_EXPR:
   1243         return expand_vector_parallel (gsi, do_unop, type,
   1244 		      		       gimple_assign_rhs1 (assign),
   1245         			       NULL_TREE, code);
   1246       case EQ_EXPR:
   1247       case NE_EXPR:
   1248       case GT_EXPR:
   1249       case LT_EXPR:
   1250       case GE_EXPR:
   1251       case LE_EXPR:
   1252       case UNEQ_EXPR:
   1253       case UNGT_EXPR:
   1254       case UNLT_EXPR:
   1255       case UNGE_EXPR:
   1256       case UNLE_EXPR:
   1257       case LTGT_EXPR:
   1258       case ORDERED_EXPR:
   1259       case UNORDERED_EXPR:
   1260 	{
   1261 	  tree rhs1 = gimple_assign_rhs1 (assign);
   1262 	  tree rhs2 = gimple_assign_rhs2 (assign);
   1263 
   1264 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
   1265 					   dce_ssa_names);
   1266 	}
   1267 
   1268       case TRUNC_DIV_EXPR:
   1269       case TRUNC_MOD_EXPR:
   1270 	{
   1271 	  tree rhs1 = gimple_assign_rhs1 (assign);
   1272 	  tree rhs2 = gimple_assign_rhs2 (assign);
   1273 	  tree ret;
   1274 
   1275 	  if (!optimize
   1276 	      || !VECTOR_INTEGER_TYPE_P (type)
   1277 	      || TREE_CODE (rhs2) != VECTOR_CST
   1278 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
   1279 	    break;
   1280 
   1281 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
   1282 	  if (ret != NULL_TREE)
   1283 	    return ret;
   1284 	  break;
   1285 	}
   1286 
   1287       default:
   1288 	break;
   1289       }
   1290 
   1291   if (TREE_CODE_CLASS (code) == tcc_unary)
   1292     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
   1293 				    gimple_assign_rhs1 (assign),
   1294 				    NULL_TREE, code, false);
   1295   else
   1296     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
   1297 				    gimple_assign_rhs1 (assign),
   1298 				    gimple_assign_rhs2 (assign), code, false);
   1299 }
   1300 
   1301 /* Try to optimize
   1302    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
   1303    style stmts into:
   1304    _9 = { b_7, b_7, b_7, b_7 };
   1305    a_5 = _9 + { 0, 3, 6, 9 };
   1306    because vector splat operation is usually more efficient
   1307    than piecewise initialization of the vector.  */
   1308 
   1309 static void
   1310 optimize_vector_constructor (gimple_stmt_iterator *gsi)
   1311 {
   1312   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
   1313   tree lhs = gimple_assign_lhs (stmt);
   1314   tree rhs = gimple_assign_rhs1 (stmt);
   1315   tree type = TREE_TYPE (rhs);
   1316   unsigned int i, j;
   1317   unsigned HOST_WIDE_INT nelts;
   1318   bool all_same = true;
   1319   constructor_elt *elt;
   1320   gimple *g;
   1321   tree base = NULL_TREE;
   1322   optab op;
   1323 
   1324   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
   1325       || nelts <= 2
   1326       || CONSTRUCTOR_NELTS (rhs) != nelts)
   1327     return;
   1328   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
   1329   if (op == unknown_optab
   1330       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
   1331     return;
   1332   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
   1333     if (TREE_CODE (elt->value) != SSA_NAME
   1334 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
   1335       return;
   1336     else
   1337       {
   1338 	tree this_base = elt->value;
   1339 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
   1340 	  all_same = false;
   1341 	for (j = 0; j < nelts + 1; j++)
   1342 	  {
   1343 	    g = SSA_NAME_DEF_STMT (this_base);
   1344 	    if (is_gimple_assign (g)
   1345 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
   1346 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
   1347 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
   1348 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
   1349 	      this_base = gimple_assign_rhs1 (g);
   1350 	    else
   1351 	      break;
   1352 	  }
   1353 	if (i == 0)
   1354 	  base = this_base;
   1355 	else if (this_base != base)
   1356 	  return;
   1357       }
   1358   if (all_same)
   1359     return;
   1360   tree_vector_builder cst (type, nelts, 1);
   1361   for (i = 0; i < nelts; i++)
   1362     {
   1363       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
   1364       tree elt = build_zero_cst (TREE_TYPE (base));
   1365       while (this_base != base)
   1366 	{
   1367 	  g = SSA_NAME_DEF_STMT (this_base);
   1368 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
   1369 			     elt, gimple_assign_rhs2 (g));
   1370 	  if (elt == NULL_TREE
   1371 	      || TREE_CODE (elt) != INTEGER_CST
   1372 	      || TREE_OVERFLOW (elt))
   1373 	    return;
   1374 	  this_base = gimple_assign_rhs1 (g);
   1375 	}
   1376       cst.quick_push (elt);
   1377     }
   1378   for (i = 0; i < nelts; i++)
   1379     CONSTRUCTOR_ELT (rhs, i)->value = base;
   1380   g = gimple_build_assign (make_ssa_name (type), rhs);
   1381   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   1382   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
   1383 			   cst.build ());
   1384   gsi_replace (gsi, g, false);
   1385 }
   1386 
   1387 /* Return a type for the widest vector mode with the same element type as
   1389    type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
   1390    ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
   1391    with optab OP, or return NULL_TREE if none is found.  */
   1392 
   1393 static tree
   1394 type_for_widest_vector_mode (tree original_vector_type, optab op)
   1395 {
   1396   gcc_assert (VECTOR_TYPE_P (original_vector_type));
   1397   tree type = TREE_TYPE (original_vector_type);
   1398   machine_mode inner_mode = TYPE_MODE (type);
   1399   machine_mode best_mode = VOIDmode, mode;
   1400   poly_int64 best_nunits = 0;
   1401 
   1402   if (SCALAR_FLOAT_MODE_P (inner_mode))
   1403     mode = MIN_MODE_VECTOR_FLOAT;
   1404   else if (SCALAR_FRACT_MODE_P (inner_mode))
   1405     mode = MIN_MODE_VECTOR_FRACT;
   1406   else if (SCALAR_UFRACT_MODE_P (inner_mode))
   1407     mode = MIN_MODE_VECTOR_UFRACT;
   1408   else if (SCALAR_ACCUM_MODE_P (inner_mode))
   1409     mode = MIN_MODE_VECTOR_ACCUM;
   1410   else if (SCALAR_UACCUM_MODE_P (inner_mode))
   1411     mode = MIN_MODE_VECTOR_UACCUM;
   1412   else if (inner_mode == BImode)
   1413     mode = MIN_MODE_VECTOR_BOOL;
   1414   else
   1415     mode = MIN_MODE_VECTOR_INT;
   1416 
   1417   FOR_EACH_MODE_FROM (mode, mode)
   1418     if (GET_MODE_INNER (mode) == inner_mode
   1419 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
   1420 	&& optab_handler (op, mode) != CODE_FOR_nothing
   1421 	&& known_le (GET_MODE_NUNITS (mode),
   1422 		     TYPE_VECTOR_SUBPARTS (original_vector_type)))
   1423       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
   1424 
   1425   if (best_mode == VOIDmode)
   1426     return NULL_TREE;
   1427   else
   1428     return build_vector_type_for_mode (type, best_mode);
   1429 }
   1430 
   1431 
   1432 /* Build a reference to the element of the vector VECT.  Function
   1433    returns either the element itself, either BIT_FIELD_REF, or an
   1434    ARRAY_REF expression.
   1435 
   1436    GSI is required to insert temporary variables while building a
   1437    refernece to the element of the vector VECT.
   1438 
   1439    PTMPVEC is a pointer to the temporary variable for caching
   1440    purposes.  In case when PTMPVEC is NULL new temporary variable
   1441    will be created.  */
   1442 static tree
   1443 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
   1444 {
   1445   tree vect_type, vect_elt_type;
   1446   gimple *asgn;
   1447   tree tmpvec;
   1448   tree arraytype;
   1449   bool need_asgn = true;
   1450   unsigned int elements;
   1451 
   1452   vect_type = TREE_TYPE (vect);
   1453   vect_elt_type = TREE_TYPE (vect_type);
   1454   elements = nunits_for_known_piecewise_op (vect_type);
   1455 
   1456   if (TREE_CODE (idx) == INTEGER_CST)
   1457     {
   1458       unsigned HOST_WIDE_INT index;
   1459 
   1460       /* Given that we're about to compute a binary modulus,
   1461 	 we don't care about the high bits of the value.  */
   1462       index = TREE_INT_CST_LOW (idx);
   1463       if (!tree_fits_uhwi_p (idx) || index >= elements)
   1464 	{
   1465 	  index &= elements - 1;
   1466 	  idx = build_int_cst (TREE_TYPE (idx), index);
   1467 	}
   1468 
   1469       /* When lowering a vector statement sequence do some easy
   1470          simplification by looking through intermediate vector results.  */
   1471       if (TREE_CODE (vect) == SSA_NAME)
   1472 	{
   1473 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
   1474 	  if (is_gimple_assign (def_stmt)
   1475 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
   1476 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
   1477 	    vect = gimple_assign_rhs1 (def_stmt);
   1478 	}
   1479 
   1480       if (TREE_CODE (vect) == VECTOR_CST)
   1481 	return VECTOR_CST_ELT (vect, index);
   1482       else if (TREE_CODE (vect) == CONSTRUCTOR
   1483 	       && (CONSTRUCTOR_NELTS (vect) == 0
   1484 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
   1485 		      != VECTOR_TYPE))
   1486         {
   1487 	  if (index < CONSTRUCTOR_NELTS (vect))
   1488 	    return CONSTRUCTOR_ELT (vect, index)->value;
   1489           return build_zero_cst (vect_elt_type);
   1490         }
   1491       else
   1492         {
   1493 	  tree size = vector_element_bits_tree (vect_type);
   1494 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
   1495 				  size);
   1496 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
   1497         }
   1498     }
   1499 
   1500   if (!ptmpvec)
   1501     tmpvec = create_tmp_var (vect_type, "vectmp");
   1502   else if (!*ptmpvec)
   1503     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
   1504   else
   1505     {
   1506       tmpvec = *ptmpvec;
   1507       need_asgn = false;
   1508     }
   1509 
   1510   if (need_asgn)
   1511     {
   1512       TREE_ADDRESSABLE (tmpvec) = 1;
   1513       asgn = gimple_build_assign (tmpvec, vect);
   1514       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
   1515     }
   1516 
   1517   arraytype = build_array_type_nelts (vect_elt_type, elements);
   1518   return build4 (ARRAY_REF, vect_elt_type,
   1519                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
   1520                  idx, NULL_TREE, NULL_TREE);
   1521 }
   1522 
   1523 /* Check if VEC_PERM_EXPR within the given setting is supported
   1524    by hardware, or lower it piecewise.
   1525 
   1526    When VEC_PERM_EXPR has the same first and second operands:
   1527    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
   1528    {v0[mask[0]], v0[mask[1]], ...}
   1529    MASK and V0 must have the same number of elements.
   1530 
   1531    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
   1532    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
   1533    V0 and V1 must have the same type.  MASK, V0, V1 must have the
   1534    same number of arguments.  */
   1535 
   1536 static void
   1537 lower_vec_perm (gimple_stmt_iterator *gsi)
   1538 {
   1539   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
   1540   tree mask = gimple_assign_rhs3 (stmt);
   1541   tree vec0 = gimple_assign_rhs1 (stmt);
   1542   tree vec1 = gimple_assign_rhs2 (stmt);
   1543   tree vect_type = TREE_TYPE (vec0);
   1544   tree mask_type = TREE_TYPE (mask);
   1545   tree vect_elt_type = TREE_TYPE (vect_type);
   1546   tree mask_elt_type = TREE_TYPE (mask_type);
   1547   unsigned HOST_WIDE_INT elements;
   1548   vec<constructor_elt, va_gc> *v;
   1549   tree constr, t, si, i_val;
   1550   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
   1551   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
   1552   location_t loc = gimple_location (gsi_stmt (*gsi));
   1553   unsigned i;
   1554 
   1555   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
   1556     return;
   1557 
   1558   if (TREE_CODE (mask) == SSA_NAME)
   1559     {
   1560       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
   1561       if (is_gimple_assign (def_stmt)
   1562 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
   1563 	mask = gimple_assign_rhs1 (def_stmt);
   1564     }
   1565 
   1566   vec_perm_builder sel_int;
   1567 
   1568   if (TREE_CODE (mask) == VECTOR_CST
   1569       && tree_to_vec_perm_builder (&sel_int, mask))
   1570     {
   1571       vec_perm_indices indices (sel_int, 2, elements);
   1572       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
   1573 	{
   1574 	  gimple_assign_set_rhs3 (stmt, mask);
   1575 	  update_stmt (stmt);
   1576 	  return;
   1577 	}
   1578       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
   1579 	 vector as VEC1 and a right element shift MASK.  */
   1580       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
   1581 	  != CODE_FOR_nothing
   1582 	  && TREE_CODE (vec1) == VECTOR_CST
   1583 	  && initializer_zerop (vec1)
   1584 	  && maybe_ne (indices[0], 0)
   1585 	  && known_lt (poly_uint64 (indices[0]), elements))
   1586 	{
   1587 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
   1588 	  if (!ok_p)
   1589 	    {
   1590 	      for (i = 1; i < elements; ++i)
   1591 		{
   1592 		  poly_uint64 actual = indices[i];
   1593 		  poly_uint64 expected = i + indices[0];
   1594 		  /* Indices into the second vector are all equivalent.  */
   1595 		  if (maybe_lt (actual, elements)
   1596 		      ? maybe_ne (actual, expected)
   1597 		      : maybe_lt (expected, elements))
   1598 		    break;
   1599 		}
   1600 	      ok_p = i == elements;
   1601 	    }
   1602 	  if (ok_p)
   1603 	    {
   1604 	      gimple_assign_set_rhs3 (stmt, mask);
   1605 	      update_stmt (stmt);
   1606 	      return;
   1607 	    }
   1608 	}
   1609       /* And similarly vec_shl pattern.  */
   1610       if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
   1611 	  != CODE_FOR_nothing
   1612 	  && TREE_CODE (vec0) == VECTOR_CST
   1613 	  && initializer_zerop (vec0))
   1614 	{
   1615 	  unsigned int first = 0;
   1616 	  for (i = 0; i < elements; ++i)
   1617 	    if (known_eq (poly_uint64 (indices[i]), elements))
   1618 	      {
   1619 		if (i == 0 || first)
   1620 		  break;
   1621 		first = i;
   1622 	      }
   1623 	    else if (first
   1624 		     ? maybe_ne (poly_uint64 (indices[i]),
   1625 					      elements + i - first)
   1626 		     : maybe_ge (poly_uint64 (indices[i]), elements))
   1627 	      break;
   1628 	  if (first && i == elements)
   1629 	    {
   1630 	      gimple_assign_set_rhs3 (stmt, mask);
   1631 	      update_stmt (stmt);
   1632 	      return;
   1633 	    }
   1634 	}
   1635     }
   1636   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
   1637     return;
   1638 
   1639   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
   1640     warning_at (loc, OPT_Wvector_operation_performance,
   1641 		"vector shuffling operation will be expanded piecewise");
   1642 
   1643   vec_alloc (v, elements);
   1644   bool constant_p = true;
   1645   for (i = 0; i < elements; i++)
   1646     {
   1647       si = size_int (i);
   1648       i_val = vector_element (gsi, mask, si, &masktmp);
   1649 
   1650       if (TREE_CODE (i_val) == INTEGER_CST)
   1651         {
   1652 	  unsigned HOST_WIDE_INT index;
   1653 
   1654 	  index = TREE_INT_CST_LOW (i_val);
   1655 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
   1656 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
   1657 
   1658           if (two_operand_p && (index & elements) != 0)
   1659 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
   1660 	  else
   1661 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
   1662 
   1663           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
   1664 					true, GSI_SAME_STMT);
   1665         }
   1666       else
   1667         {
   1668 	  tree cond = NULL_TREE, v0_val;
   1669 
   1670 	  if (two_operand_p)
   1671 	    {
   1672 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
   1673 			          build_int_cst (mask_elt_type, elements));
   1674 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
   1675 					       true, GSI_SAME_STMT);
   1676 	    }
   1677 
   1678 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
   1679 			       build_int_cst (mask_elt_type, elements - 1));
   1680 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
   1681 					    true, GSI_SAME_STMT);
   1682 
   1683 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
   1684 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
   1685 					     true, GSI_SAME_STMT);
   1686 
   1687 	  if (two_operand_p)
   1688 	    {
   1689 	      tree v1_val;
   1690 
   1691 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
   1692 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
   1693 						 true, GSI_SAME_STMT);
   1694 
   1695 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
   1696 				  cond, build_zero_cst (mask_elt_type));
   1697 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
   1698 				  cond, v0_val, v1_val);
   1699               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
   1700 					    true, GSI_SAME_STMT);
   1701             }
   1702 	  else
   1703 	    t = v0_val;
   1704         }
   1705 
   1706       if (!CONSTANT_CLASS_P (t))
   1707 	constant_p = false;
   1708       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
   1709     }
   1710 
   1711   if (constant_p)
   1712     constr = build_vector_from_ctor (vect_type, v);
   1713   else
   1714     constr = build_constructor (vect_type, v);
   1715   gimple_assign_set_rhs_from_tree (gsi, constr);
   1716   update_stmt (gsi_stmt (*gsi));
   1717 }
   1718 
   1719 /* If OP is a uniform vector return the element it is a splat from.  */
   1720 
   1721 static tree
   1722 ssa_uniform_vector_p (tree op)
   1723 {
   1724   if (TREE_CODE (op) == VECTOR_CST
   1725       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
   1726       || TREE_CODE (op) == CONSTRUCTOR)
   1727     return uniform_vector_p (op);
   1728   if (TREE_CODE (op) == SSA_NAME)
   1729     {
   1730       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
   1731       if (gimple_assign_single_p (def_stmt))
   1732 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
   1733     }
   1734   return NULL_TREE;
   1735 }
   1736 
   1737 /* Return type in which CODE operation with optab OP can be
   1738    computed.  */
   1739 
   1740 static tree
   1741 get_compute_type (enum tree_code code, optab op, tree type)
   1742 {
   1743   /* For very wide vectors, try using a smaller vector mode.  */
   1744   tree compute_type = type;
   1745   if (op
   1746       && (!VECTOR_MODE_P (TYPE_MODE (type))
   1747 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
   1748     {
   1749       tree vector_compute_type
   1750 	= type_for_widest_vector_mode (type, op);
   1751       if (vector_compute_type != NULL_TREE
   1752 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
   1753 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
   1754 	      != CODE_FOR_nothing))
   1755 	compute_type = vector_compute_type;
   1756     }
   1757 
   1758   /* If we are breaking a BLKmode vector into smaller pieces,
   1759      type_for_widest_vector_mode has already looked into the optab,
   1760      so skip these checks.  */
   1761   if (compute_type == type)
   1762     {
   1763       machine_mode compute_mode = TYPE_MODE (compute_type);
   1764       if (VECTOR_MODE_P (compute_mode))
   1765 	{
   1766 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
   1767 	    return compute_type;
   1768 	  if (code == MULT_HIGHPART_EXPR
   1769 	      && can_mult_highpart_p (compute_mode,
   1770 				      TYPE_UNSIGNED (compute_type)))
   1771 	    return compute_type;
   1772 	}
   1773       /* There is no operation in hardware, so fall back to scalars.  */
   1774       compute_type = TREE_TYPE (type);
   1775     }
   1776 
   1777   return compute_type;
   1778 }
   1779 
   1780 static tree
   1781 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
   1782 	 tree bitpos, tree bitsize, enum tree_code code,
   1783 	 tree type ATTRIBUTE_UNUSED)
   1784 {
   1785   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
   1786     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
   1787   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
   1788     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
   1789   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
   1790   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
   1791 }
   1792 
   1793 /* Expand a vector COND_EXPR to scalars, piecewise.  */
   1794 static void
   1795 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
   1796 {
   1797   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
   1798   tree lhs = gimple_assign_lhs (stmt);
   1799   tree type = TREE_TYPE (lhs);
   1800   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
   1801   machine_mode compute_mode = TYPE_MODE (compute_type);
   1802   gcc_assert (compute_mode != BLKmode);
   1803   tree rhs2 = gimple_assign_rhs2 (stmt);
   1804   tree rhs3 = gimple_assign_rhs3 (stmt);
   1805   tree new_rhs;
   1806 
   1807   /* If the compute mode is not a vector mode (hence we are not decomposing
   1808      a BLKmode vector to smaller, hardware-supported vectors), we may want
   1809      to expand the operations in parallel.  */
   1810   if (!VECTOR_MODE_P (compute_mode))
   1811     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
   1812 				      COND_EXPR);
   1813   else
   1814     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
   1815 				       rhs2, rhs3, COND_EXPR, false);
   1816   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
   1817     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
   1818 			       new_rhs);
   1819 
   1820   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
   1821      way to do it is change expand_vector_operation and its callees to
   1822      return a tree_code, RHS1 and RHS2 instead of a tree. */
   1823   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
   1824   update_stmt (gsi_stmt (*gsi));
   1825 }
   1826 
   1827 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
   1828    lowering.  If INNER_TYPE is not a vector type, this is a scalar
   1829    fallback.  */
   1830 
   1831 static tree
   1832 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
   1833 		   tree decl, tree bitpos, tree bitsize,
   1834 		   enum tree_code code, tree type)
   1835 {
   1836   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
   1837   if (!VECTOR_TYPE_P (inner_type))
   1838     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
   1839   if (code == CALL_EXPR)
   1840     {
   1841       gimple *g = gimple_build_call (decl, 1, a);
   1842       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
   1843       gimple_call_set_lhs (g, lhs);
   1844       gsi_insert_before (gsi, g, GSI_SAME_STMT);
   1845       return lhs;
   1846     }
   1847   else
   1848     {
   1849       tree outer_type = build_vector_type (TREE_TYPE (type),
   1850 					   TYPE_VECTOR_SUBPARTS (inner_type));
   1851       return gimplify_build1 (gsi, code, outer_type, a);
   1852     }
   1853 }
   1854 
   1855 /* Similarly, but for narrowing conversion.  */
   1856 
   1857 static tree
   1858 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
   1859 			  tree, tree bitpos, tree, enum tree_code code,
   1860 			  tree type)
   1861 {
   1862   tree itype = build_vector_type (TREE_TYPE (inner_type),
   1863 				  exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
   1864 					     2));
   1865   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
   1866   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
   1867 			     int_const_binop (PLUS_EXPR, bitpos,
   1868 					      TYPE_SIZE (itype)));
   1869   tree outer_type = build_vector_type (TREE_TYPE (type),
   1870 				       TYPE_VECTOR_SUBPARTS (inner_type));
   1871   return gimplify_build2 (gsi, code, outer_type, b, c);
   1872 }
   1873 
   1874 /* Expand VEC_CONVERT ifn call.  */
   1875 
   1876 static void
   1877 expand_vector_conversion (gimple_stmt_iterator *gsi)
   1878 {
   1879   gimple *stmt = gsi_stmt (*gsi);
   1880   gimple *g;
   1881   tree lhs = gimple_call_lhs (stmt);
   1882   if (lhs == NULL_TREE)
   1883     {
   1884       g = gimple_build_nop ();
   1885       gsi_replace (gsi, g, false);
   1886       return;
   1887     }
   1888   tree arg = gimple_call_arg (stmt, 0);
   1889   tree ret_type = TREE_TYPE (lhs);
   1890   tree arg_type = TREE_TYPE (arg);
   1891   tree new_rhs, compute_type = TREE_TYPE (arg_type);
   1892   enum tree_code code = NOP_EXPR;
   1893   enum tree_code code1 = ERROR_MARK;
   1894   enum { NARROW, NONE, WIDEN } modifier = NONE;
   1895   optab optab1 = unknown_optab;
   1896 
   1897   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
   1898   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
   1899       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
   1900     code = FIX_TRUNC_EXPR;
   1901   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
   1902 	   && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
   1903     code = FLOAT_EXPR;
   1904   unsigned int ret_elt_bits = vector_element_bits (ret_type);
   1905   unsigned int arg_elt_bits = vector_element_bits (arg_type);
   1906   if (ret_elt_bits < arg_elt_bits)
   1907     modifier = NARROW;
   1908   else if (ret_elt_bits > arg_elt_bits)
   1909     modifier = WIDEN;
   1910 
   1911   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
   1912     {
   1913       if (supportable_convert_operation (code, ret_type, arg_type, &code1))
   1914 	{
   1915 	  g = gimple_build_assign (lhs, code1, arg);
   1916 	  gsi_replace (gsi, g, false);
   1917 	  return;
   1918 	}
   1919       /* Can't use get_compute_type here, as supportable_convert_operation
   1920 	 doesn't necessarily use an optab and needs two arguments.  */
   1921       tree vec_compute_type
   1922 	= type_for_widest_vector_mode (arg_type, mov_optab);
   1923       if (vec_compute_type
   1924 	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
   1925 	{
   1926 	  unsigned HOST_WIDE_INT nelts
   1927 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
   1928 	  while (nelts > 1)
   1929 	    {
   1930 	      tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
   1931 	      tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
   1932 	      if (supportable_convert_operation (code, ret1_type, arg1_type,
   1933 						 &code1))
   1934 		{
   1935 		  new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
   1936 						     ret_type, arg1_type, arg,
   1937 						     NULL_TREE, code1, false);
   1938 		  g = gimple_build_assign (lhs, new_rhs);
   1939 		  gsi_replace (gsi, g, false);
   1940 		  return;
   1941 		}
   1942 	      nelts = nelts / 2;
   1943 	    }
   1944 	}
   1945     }
   1946   else if (modifier == NARROW)
   1947     {
   1948       switch (code)
   1949 	{
   1950 	CASE_CONVERT:
   1951 	  code1 = VEC_PACK_TRUNC_EXPR;
   1952 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
   1953 	  break;
   1954 	case FIX_TRUNC_EXPR:
   1955 	  code1 = VEC_PACK_FIX_TRUNC_EXPR;
   1956 	  /* The signedness is determined from output operand.  */
   1957 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
   1958 	  break;
   1959 	case FLOAT_EXPR:
   1960 	  code1 = VEC_PACK_FLOAT_EXPR;
   1961 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
   1962 	  break;
   1963 	default:
   1964 	  gcc_unreachable ();
   1965 	}
   1966 
   1967       if (optab1)
   1968 	compute_type = get_compute_type (code1, optab1, arg_type);
   1969       enum insn_code icode1;
   1970       if (VECTOR_TYPE_P (compute_type)
   1971 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
   1972 	      != CODE_FOR_nothing)
   1973 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
   1974 	{
   1975 	  tree cretd_type
   1976 	    = build_vector_type (TREE_TYPE (ret_type),
   1977 				 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
   1978 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
   1979 	    {
   1980 	      if (compute_type == arg_type)
   1981 		{
   1982 		  new_rhs = gimplify_build2 (gsi, code1, cretd_type,
   1983 					     arg, build_zero_cst (arg_type));
   1984 		  new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
   1985 					      TYPE_SIZE (ret_type),
   1986 					      bitsize_int (0));
   1987 		  g = gimple_build_assign (lhs, new_rhs);
   1988 		  gsi_replace (gsi, g, false);
   1989 		  return;
   1990 		}
   1991 	      tree dcompute_type
   1992 		= build_vector_type (TREE_TYPE (compute_type),
   1993 				     TYPE_VECTOR_SUBPARTS (compute_type) * 2);
   1994 	      if (TYPE_MAIN_VARIANT (dcompute_type)
   1995 		  == TYPE_MAIN_VARIANT (arg_type))
   1996 		new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
   1997 						    NULL_TREE, bitsize_int (0),
   1998 						    NULL_TREE, code1,
   1999 						    ret_type);
   2000 	      else
   2001 		new_rhs = expand_vector_piecewise (gsi,
   2002 						   do_vec_narrow_conversion,
   2003 						   arg_type, dcompute_type,
   2004 						   arg, NULL_TREE, code1,
   2005 						   false, ret_type);
   2006 	      g = gimple_build_assign (lhs, new_rhs);
   2007 	      gsi_replace (gsi, g, false);
   2008 	      return;
   2009 	    }
   2010 	}
   2011     }
   2012   else if (modifier == WIDEN)
   2013     {
   2014       enum tree_code code2 = ERROR_MARK;
   2015       optab optab2 = unknown_optab;
   2016       switch (code)
   2017 	{
   2018 	CASE_CONVERT:
   2019 	  code1 = VEC_UNPACK_LO_EXPR;
   2020           code2 = VEC_UNPACK_HI_EXPR;
   2021 	  break;
   2022 	case FIX_TRUNC_EXPR:
   2023 	  code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
   2024 	  code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
   2025 	  break;
   2026 	case FLOAT_EXPR:
   2027 	  code1 = VEC_UNPACK_FLOAT_LO_EXPR;
   2028 	  code2 = VEC_UNPACK_FLOAT_HI_EXPR;
   2029 	  break;
   2030 	default:
   2031 	  gcc_unreachable ();
   2032 	}
   2033       if (BYTES_BIG_ENDIAN)
   2034 	std::swap (code1, code2);
   2035 
   2036       if (code == FIX_TRUNC_EXPR)
   2037 	{
   2038 	  /* The signedness is determined from output operand.  */
   2039 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
   2040 	  optab2 = optab_for_tree_code (code2, ret_type, optab_default);
   2041 	}
   2042       else
   2043 	{
   2044 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
   2045 	  optab2 = optab_for_tree_code (code2, arg_type, optab_default);
   2046 	}
   2047 
   2048       if (optab1 && optab2)
   2049 	compute_type = get_compute_type (code1, optab1, arg_type);
   2050 
   2051       enum insn_code icode1, icode2;
   2052       if (VECTOR_TYPE_P (compute_type)
   2053 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
   2054 	      != CODE_FOR_nothing)
   2055 	  && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
   2056 	      != CODE_FOR_nothing)
   2057 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
   2058 	  && (insn_data[icode1].operand[0].mode
   2059 	      == insn_data[icode2].operand[0].mode))
   2060 	{
   2061 	  poly_uint64 nunits
   2062 	    = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
   2063 	  tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
   2064 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
   2065 	    {
   2066 	      vec<constructor_elt, va_gc> *v;
   2067 	      tree part_width = TYPE_SIZE (compute_type);
   2068 	      tree index = bitsize_int (0);
   2069 	      int nunits = nunits_for_known_piecewise_op (arg_type);
   2070 	      int delta = tree_to_uhwi (part_width) / arg_elt_bits;
   2071 	      int i;
   2072 	      location_t loc = gimple_location (gsi_stmt (*gsi));
   2073 
   2074 	      if (compute_type != arg_type)
   2075 		{
   2076 		  if (!warning_suppressed_p (gsi_stmt (*gsi),
   2077 					     OPT_Wvector_operation_performance))
   2078 		    warning_at (loc, OPT_Wvector_operation_performance,
   2079 				"vector operation will be expanded piecewise");
   2080 		}
   2081 	      else
   2082 		{
   2083 		  nunits = 1;
   2084 		  delta = 1;
   2085 		}
   2086 
   2087 	      vec_alloc (v, (nunits + delta - 1) / delta * 2);
   2088 	      bool constant_p = true;
   2089 	      for (i = 0; i < nunits;
   2090 		   i += delta, index = int_const_binop (PLUS_EXPR, index,
   2091 							part_width))
   2092 		{
   2093 		  tree a = arg;
   2094 		  if (compute_type != arg_type)
   2095 		    a = tree_vec_extract (gsi, compute_type, a, part_width,
   2096 					  index);
   2097 		  tree result = gimplify_build1 (gsi, code1, cretd_type, a);
   2098 		  constructor_elt ce = { NULL_TREE, result };
   2099 		  if (!CONSTANT_CLASS_P (ce.value))
   2100 		    constant_p = false;
   2101 		  v->quick_push (ce);
   2102 		  ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
   2103 		  if (!CONSTANT_CLASS_P (ce.value))
   2104 		    constant_p = false;
   2105 		  v->quick_push (ce);
   2106 		}
   2107 
   2108 	      if (constant_p)
   2109 		new_rhs = build_vector_from_ctor (ret_type, v);
   2110 	      else
   2111 		new_rhs = build_constructor (ret_type, v);
   2112 	      g = gimple_build_assign (lhs, new_rhs);
   2113 	      gsi_replace (gsi, g, false);
   2114 	      return;
   2115 	    }
   2116 	}
   2117     }
   2118 
   2119   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
   2120 				     TREE_TYPE (arg_type), arg,
   2121 				     NULL_TREE, code, false, ret_type);
   2122   g = gimple_build_assign (lhs, new_rhs);
   2123   gsi_replace (gsi, g, false);
   2124 }
   2125 
   2126 /* Process one statement.  If we identify a vector operation, expand it.  */
   2127 
   2128 static void
   2129 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
   2130 			    bitmap dce_ssa_names)
   2131 {
   2132   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
   2133   enum tree_code code;
   2134   optab op = unknown_optab;
   2135   enum gimple_rhs_class rhs_class;
   2136   tree new_rhs;
   2137 
   2138   /* Only consider code == GIMPLE_ASSIGN. */
   2139   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
   2140   if (!stmt)
   2141     {
   2142       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
   2143 	expand_vector_conversion (gsi);
   2144       return;
   2145     }
   2146 
   2147   code = gimple_assign_rhs_code (stmt);
   2148   rhs_class = get_gimple_rhs_class (code);
   2149   lhs = gimple_assign_lhs (stmt);
   2150 
   2151   if (code == VEC_PERM_EXPR)
   2152     {
   2153       lower_vec_perm (gsi);
   2154       return;
   2155     }
   2156 
   2157   if (code == VEC_COND_EXPR)
   2158     {
   2159       expand_vector_condition (gsi, dce_ssa_names);
   2160       return;
   2161     }
   2162 
   2163   if (code == COND_EXPR
   2164       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
   2165       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
   2166     {
   2167       expand_vector_scalar_condition (gsi);
   2168       return;
   2169     }
   2170 
   2171   if (code == CONSTRUCTOR
   2172       && TREE_CODE (lhs) == SSA_NAME
   2173       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
   2174       && !gimple_clobber_p (stmt)
   2175       && optimize)
   2176     {
   2177       optimize_vector_constructor (gsi);
   2178       return;
   2179     }
   2180 
   2181   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
   2182     return;
   2183 
   2184   rhs1 = gimple_assign_rhs1 (stmt);
   2185   if (rhs_class == GIMPLE_BINARY_RHS)
   2186     rhs2 = gimple_assign_rhs2 (stmt);
   2187 
   2188   type = TREE_TYPE (lhs);
   2189   if (!VECTOR_TYPE_P (type)
   2190       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
   2191     return;
   2192 
   2193   /* A scalar operation pretending to be a vector one.  */
   2194   if (VECTOR_BOOLEAN_TYPE_P (type)
   2195       && !VECTOR_MODE_P (TYPE_MODE (type))
   2196       && TYPE_MODE (type) != BLKmode
   2197       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
   2198 	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
   2199 	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
   2200 	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
   2201     return;
   2202 
   2203   /* If the vector operation is operating on all same vector elements
   2204      implement it with a scalar operation and a splat if the target
   2205      supports the scalar operation.  */
   2206   tree srhs1, srhs2 = NULL_TREE;
   2207   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
   2208       && (rhs2 == NULL_TREE
   2209 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
   2210 	      && (srhs2 = rhs2))
   2211 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
   2212       /* As we query direct optabs restrict to non-convert operations.  */
   2213       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
   2214     {
   2215       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
   2216       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
   2217 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
   2218 	{
   2219 	  tree stype = TREE_TYPE (TREE_TYPE (lhs));
   2220 	  tree slhs = (rhs2 != NULL_TREE)
   2221 		      ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
   2222 		      : gimplify_build1 (gsi, code, stype, srhs1);
   2223 	  gimple_assign_set_rhs_from_tree (gsi,
   2224 					   build_vector_from_val (type, slhs));
   2225 	  update_stmt (stmt);
   2226 	  return;
   2227 	}
   2228     }
   2229 
   2230   if (CONVERT_EXPR_CODE_P (code)
   2231       || code == FLOAT_EXPR
   2232       || code == FIX_TRUNC_EXPR
   2233       || code == VIEW_CONVERT_EXPR)
   2234     return;
   2235 
   2236   /* The signedness is determined from input argument.  */
   2237   if (code == VEC_UNPACK_FLOAT_HI_EXPR
   2238       || code == VEC_UNPACK_FLOAT_LO_EXPR
   2239       || code == VEC_PACK_FLOAT_EXPR)
   2240     {
   2241       /* We do not know how to scalarize those.  */
   2242       return;
   2243     }
   2244 
   2245   /* For widening/narrowing vector operations, the relevant type is of the
   2246      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
   2247      calculated in the same way above.  */
   2248   if (code == WIDEN_SUM_EXPR
   2249       || code == VEC_WIDEN_PLUS_HI_EXPR
   2250       || code == VEC_WIDEN_PLUS_LO_EXPR
   2251       || code == VEC_WIDEN_MINUS_HI_EXPR
   2252       || code == VEC_WIDEN_MINUS_LO_EXPR
   2253       || code == VEC_WIDEN_MULT_HI_EXPR
   2254       || code == VEC_WIDEN_MULT_LO_EXPR
   2255       || code == VEC_WIDEN_MULT_EVEN_EXPR
   2256       || code == VEC_WIDEN_MULT_ODD_EXPR
   2257       || code == VEC_UNPACK_HI_EXPR
   2258       || code == VEC_UNPACK_LO_EXPR
   2259       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
   2260       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
   2261       || code == VEC_PACK_TRUNC_EXPR
   2262       || code == VEC_PACK_SAT_EXPR
   2263       || code == VEC_PACK_FIX_TRUNC_EXPR
   2264       || code == VEC_WIDEN_LSHIFT_HI_EXPR
   2265       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
   2266     {
   2267       /* We do not know how to scalarize those.  */
   2268       return;
   2269     }
   2270 
   2271   /* Choose between vector shift/rotate by vector and vector shift/rotate by
   2272      scalar */
   2273   if (code == LSHIFT_EXPR
   2274       || code == RSHIFT_EXPR
   2275       || code == LROTATE_EXPR
   2276       || code == RROTATE_EXPR)
   2277     {
   2278       optab opv;
   2279 
   2280       /* Check whether we have vector <op> {x,x,x,x} where x
   2281          could be a scalar variable or a constant.  Transform
   2282          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
   2283       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
   2284         {
   2285           tree first;
   2286 
   2287           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
   2288             {
   2289               gimple_assign_set_rhs2 (stmt, first);
   2290               update_stmt (stmt);
   2291               rhs2 = first;
   2292             }
   2293         }
   2294 
   2295       opv = optab_for_tree_code (code, type, optab_vector);
   2296       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
   2297 	op = opv;
   2298       else
   2299 	{
   2300           op = optab_for_tree_code (code, type, optab_scalar);
   2301 
   2302 	  compute_type = get_compute_type (code, op, type);
   2303 	  if (compute_type == type)
   2304 	    return;
   2305 	  /* The rtl expander will expand vector/scalar as vector/vector
   2306 	     if necessary.  Pick one with wider vector type.  */
   2307 	  tree compute_vtype = get_compute_type (code, opv, type);
   2308 	  if (subparts_gt (compute_vtype, compute_type))
   2309 	    {
   2310 	      compute_type = compute_vtype;
   2311 	      op = opv;
   2312 	    }
   2313 	}
   2314 
   2315       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
   2316 	{
   2317 	  if (compute_type == NULL_TREE)
   2318 	    compute_type = get_compute_type (code, op, type);
   2319 	  if (compute_type == type)
   2320 	    return;
   2321 	  /* Before splitting vector rotates into scalar rotates,
   2322 	     see if we can't use vector shifts and BIT_IOR_EXPR
   2323 	     instead.  For vector by vector rotates we'd also
   2324 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
   2325 	     for now, fold doesn't seem to create such rotates anyway.  */
   2326 	  if (compute_type == TREE_TYPE (type)
   2327 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
   2328 	    {
   2329 	      optab oplv = vashl_optab, opl = ashl_optab;
   2330 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
   2331 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
   2332 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
   2333 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
   2334 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
   2335 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
   2336 	      /* The rtl expander will expand vector/scalar as vector/vector
   2337 		 if necessary.  Pick one with wider vector type.  */
   2338 	      if (subparts_gt (compute_lvtype, compute_ltype))
   2339 		{
   2340 		  compute_ltype = compute_lvtype;
   2341 		  opl = oplv;
   2342 		}
   2343 	      if (subparts_gt (compute_rvtype, compute_rtype))
   2344 		{
   2345 		  compute_rtype = compute_rvtype;
   2346 		  opr = oprv;
   2347 		}
   2348 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
   2349 		 BIT_IOR_EXPR.  */
   2350 	      compute_type = compute_ltype;
   2351 	      if (subparts_gt (compute_type, compute_rtype))
   2352 		compute_type = compute_rtype;
   2353 	      if (subparts_gt (compute_type, compute_otype))
   2354 		compute_type = compute_otype;
   2355 	      /* Verify all 3 operations can be performed in that type.  */
   2356 	      if (compute_type != TREE_TYPE (type))
   2357 		{
   2358 		  if (optab_handler (opl, TYPE_MODE (compute_type))
   2359 		      == CODE_FOR_nothing
   2360 		      || optab_handler (opr, TYPE_MODE (compute_type))
   2361 			 == CODE_FOR_nothing
   2362 		      || optab_handler (opo, TYPE_MODE (compute_type))
   2363 			 == CODE_FOR_nothing)
   2364 		    compute_type = TREE_TYPE (type);
   2365 		}
   2366 	    }
   2367 	}
   2368     }
   2369   else
   2370     op = optab_for_tree_code (code, type, optab_default);
   2371 
   2372   /* Optabs will try converting a negation into a subtraction, so
   2373      look for it as well.  TODO: negation of floating-point vectors
   2374      might be turned into an exclusive OR toggling the sign bit.  */
   2375   if (op == unknown_optab
   2376       && code == NEGATE_EXPR
   2377       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
   2378     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
   2379 
   2380   if (compute_type == NULL_TREE)
   2381     compute_type = get_compute_type (code, op, type);
   2382   if (compute_type == type)
   2383     return;
   2384 
   2385   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
   2386 				     dce_ssa_names);
   2387 
   2388   /* Leave expression untouched for later expansion.  */
   2389   if (new_rhs == NULL_TREE)
   2390     return;
   2391 
   2392   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
   2393     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
   2394                                new_rhs);
   2395 
   2396   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
   2397      way to do it is change expand_vector_operation and its callees to
   2398      return a tree_code, RHS1 and RHS2 instead of a tree. */
   2399   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
   2400   update_stmt (gsi_stmt (*gsi));
   2401 }
   2402 
   2403 /* Use this to lower vector operations introduced by the vectorizer,
   2405    if it may need the bit-twiddling tricks implemented in this file.  */
   2406 
   2407 static unsigned int
   2408 expand_vector_operations (void)
   2409 {
   2410   gimple_stmt_iterator gsi;
   2411   basic_block bb;
   2412   bool cfg_changed = false;
   2413 
   2414   auto_bitmap dce_ssa_names;
   2415 
   2416   FOR_EACH_BB_FN (bb, cfun)
   2417     {
   2418       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
   2419 	{
   2420 	  expand_vector_operations_1 (&gsi, dce_ssa_names);
   2421 	  /* ???  If we do not cleanup EH then we will ICE in
   2422 	     verification.  But in reality we have created wrong-code
   2423 	     as we did not properly transition EH info and edges to
   2424 	     the piecewise computations.  */
   2425 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
   2426 	      && gimple_purge_dead_eh_edges (bb))
   2427 	    cfg_changed = true;
   2428 	}
   2429     }
   2430 
   2431   simple_dce_from_worklist (dce_ssa_names);
   2432 
   2433   return cfg_changed ? TODO_cleanup_cfg : 0;
   2434 }
   2435 
   2436 namespace {
   2437 
   2438 const pass_data pass_data_lower_vector =
   2439 {
   2440   GIMPLE_PASS, /* type */
   2441   "veclower", /* name */
   2442   OPTGROUP_VEC, /* optinfo_flags */
   2443   TV_NONE, /* tv_id */
   2444   PROP_cfg, /* properties_required */
   2445   PROP_gimple_lvec, /* properties_provided */
   2446   0, /* properties_destroyed */
   2447   0, /* todo_flags_start */
   2448   TODO_update_ssa, /* todo_flags_finish */
   2449 };
   2450 
   2451 class pass_lower_vector : public gimple_opt_pass
   2452 {
   2453 public:
   2454   pass_lower_vector (gcc::context *ctxt)
   2455     : gimple_opt_pass (pass_data_lower_vector, ctxt)
   2456   {}
   2457 
   2458   /* opt_pass methods: */
   2459   virtual bool gate (function *fun)
   2460     {
   2461       return !(fun->curr_properties & PROP_gimple_lvec);
   2462     }
   2463 
   2464   virtual unsigned int execute (function *)
   2465     {
   2466       return expand_vector_operations ();
   2467     }
   2468 
   2469 }; // class pass_lower_vector
   2470 
   2471 } // anon namespace
   2472 
   2473 gimple_opt_pass *
   2474 make_pass_lower_vector (gcc::context *ctxt)
   2475 {
   2476   return new pass_lower_vector (ctxt);
   2477 }
   2478 
   2479 namespace {
   2480 
   2481 const pass_data pass_data_lower_vector_ssa =
   2482 {
   2483   GIMPLE_PASS, /* type */
   2484   "veclower2", /* name */
   2485   OPTGROUP_VEC, /* optinfo_flags */
   2486   TV_NONE, /* tv_id */
   2487   PROP_cfg, /* properties_required */
   2488   PROP_gimple_lvec, /* properties_provided */
   2489   0, /* properties_destroyed */
   2490   0, /* todo_flags_start */
   2491   ( TODO_update_ssa
   2492     | TODO_cleanup_cfg ), /* todo_flags_finish */
   2493 };
   2494 
   2495 class pass_lower_vector_ssa : public gimple_opt_pass
   2496 {
   2497 public:
   2498   pass_lower_vector_ssa (gcc::context *ctxt)
   2499     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
   2500   {}
   2501 
   2502   /* opt_pass methods: */
   2503   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
   2504   virtual unsigned int execute (function *)
   2505     {
   2506       return expand_vector_operations ();
   2507     }
   2508 
   2509 }; // class pass_lower_vector_ssa
   2510 
   2511 } // anon namespace
   2512 
   2513 gimple_opt_pass *
   2514 make_pass_lower_vector_ssa (gcc::context *ctxt)
   2515 {
   2516   return new pass_lower_vector_ssa (ctxt);
   2517 }
   2518 
   2519 #include "gt-tree-vect-generic.h"
   2520