Home | History | Annotate | Line # | Download | only in gcc
tree-vect-patterns.cc revision 1.1.1.1
      1 /* Analysis Utilities for Loop Vectorization.
      2    Copyright (C) 2006-2022 Free Software Foundation, Inc.
      3    Contributed by Dorit Nuzman <dorit (at) il.ibm.com>
      4 
      5 This file is part of GCC.
      6 
      7 GCC is free software; you can redistribute it and/or modify it under
      8 the terms of the GNU General Public License as published by the Free
      9 Software Foundation; either version 3, or (at your option) any later
     10 version.
     11 
     12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15 for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with GCC; see the file COPYING3.  If not see
     19 <http://www.gnu.org/licenses/>.  */
     20 
     21 #include "config.h"
     22 #include "system.h"
     23 #include "coretypes.h"
     24 #include "backend.h"
     25 #include "rtl.h"
     26 #include "tree.h"
     27 #include "gimple.h"
     28 #include "ssa.h"
     29 #include "expmed.h"
     30 #include "optabs-tree.h"
     31 #include "insn-config.h"
     32 #include "recog.h"		/* FIXME: for insn_data */
     33 #include "fold-const.h"
     34 #include "stor-layout.h"
     35 #include "tree-eh.h"
     36 #include "gimplify.h"
     37 #include "gimple-iterator.h"
     38 #include "cfgloop.h"
     39 #include "tree-vectorizer.h"
     40 #include "dumpfile.h"
     41 #include "builtins.h"
     42 #include "internal-fn.h"
     43 #include "case-cfn-macros.h"
     44 #include "fold-const-call.h"
     45 #include "attribs.h"
     46 #include "cgraph.h"
     47 #include "omp-simd-clone.h"
     48 #include "predict.h"
     49 #include "tree-vector-builder.h"
     50 #include "vec-perm-indices.h"
     51 #include "gimple-range.h"
     52 
     53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
     54    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
     55 
     56 static bool
     57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
     58 {
     59   value_range vr;
     60   get_range_query (cfun)->range_of_expr (vr, var);
     61   if (vr.undefined_p ())
     62     vr.set_varying (TREE_TYPE (var));
     63   *min_value = wi::to_wide (vr.min ());
     64   *max_value = wi::to_wide (vr.max ());
     65   value_range_kind vr_type = vr.kind ();
     66   wide_int nonzero = get_nonzero_bits (var);
     67   signop sgn = TYPE_SIGN (TREE_TYPE (var));
     68   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
     69 					 nonzero, sgn) == VR_RANGE)
     70     {
     71       if (dump_enabled_p ())
     72 	{
     73 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     74 	  dump_printf (MSG_NOTE, " has range [");
     75 	  dump_hex (MSG_NOTE, *min_value);
     76 	  dump_printf (MSG_NOTE, ", ");
     77 	  dump_hex (MSG_NOTE, *max_value);
     78 	  dump_printf (MSG_NOTE, "]\n");
     79 	}
     80       return true;
     81     }
     82   else
     83     {
     84       if (dump_enabled_p ())
     85 	{
     86 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     87 	  dump_printf (MSG_NOTE, " has no range info\n");
     88 	}
     89       return false;
     90     }
     91 }
     92 
     93 /* Report that we've found an instance of pattern PATTERN in
     94    statement STMT.  */
     95 
     96 static void
     97 vect_pattern_detected (const char *name, gimple *stmt)
     98 {
     99   if (dump_enabled_p ())
    100     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
    101 }
    102 
    103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
    104    return the pattern statement's stmt_vec_info.  Set its vector type to
    105    VECTYPE if it doesn't have one already.  */
    106 
    107 static stmt_vec_info
    108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
    109 			stmt_vec_info orig_stmt_info, tree vectype)
    110 {
    111   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
    112   if (pattern_stmt_info == NULL)
    113     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    114   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
    115 
    116   pattern_stmt_info->pattern_stmt_p = true;
    117   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
    118   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
    119     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
    120   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
    121     {
    122       gcc_assert (!vectype
    123 		  || (VECTOR_BOOLEAN_TYPE_P (vectype)
    124 		      == vect_use_mask_type_p (orig_stmt_info)));
    125       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
    126       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
    127     }
    128   return pattern_stmt_info;
    129 }
    130 
    131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
    132    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
    133    have one already.  */
    134 
    135 static void
    136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
    137 		       stmt_vec_info orig_stmt_info, tree vectype)
    138 {
    139   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
    140   STMT_VINFO_RELATED_STMT (orig_stmt_info)
    141     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
    142 }
    143 
    144 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
    145    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
    146    be different from the vector type of the final pattern statement.
    147    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
    148    from which it was derived.  */
    149 
    150 static inline void
    151 append_pattern_def_seq (vec_info *vinfo,
    152 			stmt_vec_info stmt_info, gimple *new_stmt,
    153 			tree vectype = NULL_TREE,
    154 			tree scalar_type_for_mask = NULL_TREE)
    155 {
    156   gcc_assert (!scalar_type_for_mask
    157 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
    158   if (vectype)
    159     {
    160       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
    161       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
    162       if (scalar_type_for_mask)
    163 	new_stmt_info->mask_precision
    164 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
    165     }
    166   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
    167 				      new_stmt);
    168 }
    169 
    170 /* The caller wants to perform new operations on vect_external variable
    171    VAR, so that the result of the operations would also be vect_external.
    172    Return the edge on which the operations can be performed, if one exists.
    173    Return null if the operations should instead be treated as part of
    174    the pattern that needs them.  */
    175 
    176 static edge
    177 vect_get_external_def_edge (vec_info *vinfo, tree var)
    178 {
    179   edge e = NULL;
    180   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    181     {
    182       e = loop_preheader_edge (loop_vinfo->loop);
    183       if (!SSA_NAME_IS_DEFAULT_DEF (var))
    184 	{
    185 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
    186 	  if (bb == NULL
    187 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
    188 	    e = NULL;
    189 	}
    190     }
    191   return e;
    192 }
    193 
    194 /* Return true if the target supports a vector version of CODE,
    195    where CODE is known to map to a direct optab with the given SUBTYPE.
    196    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
    197    specifies the type of the scalar result.
    198 
    199    If CODE allows the inputs and outputs to have different type
    200    (such as for WIDEN_SUM_EXPR), it is the input mode rather
    201    than the output mode that determines the appropriate target pattern.
    202    Operand 0 of the target pattern then specifies the mode that the output
    203    must have.
    204 
    205    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
    206    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
    207    is nonnull.  */
    208 
    209 static bool
    210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
    211 				 tree itype, tree *vecotype_out,
    212 				 tree *vecitype_out = NULL,
    213 				 enum optab_subtype subtype = optab_default)
    214 {
    215   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    216   if (!vecitype)
    217     return false;
    218 
    219   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
    220   if (!vecotype)
    221     return false;
    222 
    223   optab optab = optab_for_tree_code (code, vecitype, subtype);
    224   if (!optab)
    225     return false;
    226 
    227   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
    228   if (icode == CODE_FOR_nothing
    229       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
    230     return false;
    231 
    232   *vecotype_out = vecotype;
    233   if (vecitype_out)
    234     *vecitype_out = vecitype;
    235   return true;
    236 }
    237 
    238 /* Round bit precision PRECISION up to a full element.  */
    239 
    240 static unsigned int
    241 vect_element_precision (unsigned int precision)
    242 {
    243   precision = 1 << ceil_log2 (precision);
    244   return MAX (precision, BITS_PER_UNIT);
    245 }
    246 
    247 /* If OP is defined by a statement that's being considered for vectorization,
    248    return information about that statement, otherwise return NULL.  */
    249 
    250 static stmt_vec_info
    251 vect_get_internal_def (vec_info *vinfo, tree op)
    252 {
    253   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
    254   if (def_stmt_info
    255       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
    256     return def_stmt_info;
    257   return NULL;
    258 }
    259 
    260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
    261    is a result of a type promotion, such that:
    262      DEF_STMT: NAME = NOP (name0)
    263    If CHECK_SIGN is TRUE, check that either both types are signed or both are
    264    unsigned.  */
    265 
    266 static bool
    267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
    268 		   tree *orig_type, gimple **def_stmt, bool *promotion)
    269 {
    270   tree type = TREE_TYPE (name);
    271   tree oprnd0;
    272   enum vect_def_type dt;
    273 
    274   stmt_vec_info def_stmt_info;
    275   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
    276     return false;
    277 
    278   if (dt != vect_internal_def
    279       && dt != vect_external_def && dt != vect_constant_def)
    280     return false;
    281 
    282   if (!*def_stmt)
    283     return false;
    284 
    285   if (!is_gimple_assign (*def_stmt))
    286     return false;
    287 
    288   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
    289     return false;
    290 
    291   oprnd0 = gimple_assign_rhs1 (*def_stmt);
    292 
    293   *orig_type = TREE_TYPE (oprnd0);
    294   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
    295       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
    296     return false;
    297 
    298   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
    299     *promotion = true;
    300   else
    301     *promotion = false;
    302 
    303   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
    304     return false;
    305 
    306   return true;
    307 }
    308 
    309 /* Holds information about an input operand after some sign changes
    310    and type promotions have been peeled away.  */
    311 class vect_unpromoted_value {
    312 public:
    313   vect_unpromoted_value ();
    314 
    315   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
    316 
    317   /* The value obtained after peeling away zero or more casts.  */
    318   tree op;
    319 
    320   /* The type of OP.  */
    321   tree type;
    322 
    323   /* The definition type of OP.  */
    324   vect_def_type dt;
    325 
    326   /* If OP is the result of peeling at least one cast, and if the cast
    327      of OP itself is a vectorizable statement, CASTER identifies that
    328      statement, otherwise it is null.  */
    329   stmt_vec_info caster;
    330 };
    331 
    332 inline vect_unpromoted_value::vect_unpromoted_value ()
    333   : op (NULL_TREE),
    334     type (NULL_TREE),
    335     dt (vect_uninitialized_def),
    336     caster (NULL)
    337 {
    338 }
    339 
    340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
    341    statement that casts it to CASTER_IN.  */
    342 
    343 inline void
    344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
    345 			       stmt_vec_info caster_in)
    346 {
    347   op = op_in;
    348   type = TREE_TYPE (op);
    349   dt = dt_in;
    350   caster = caster_in;
    351 }
    352 
    353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
    354    to reach some vectorizable inner operand OP', continuing as long as it
    355    is possible to convert OP' back to OP using a possible sign change
    356    followed by a possible promotion P.  Return this OP', or null if OP is
    357    not a vectorizable SSA name.  If there is a promotion P, describe its
    358    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
    359    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
    360    have more than one user.
    361 
    362    A successful return means that it is possible to go from OP' to OP
    363    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
    364    whereas the cast from UNPROM to OP might be a promotion, a sign
    365    change, or a nop.
    366 
    367    E.g. say we have:
    368 
    369        signed short *ptr = ...;
    370        signed short C = *ptr;
    371        unsigned short B = (unsigned short) C;    // sign change
    372        signed int A = (signed int) B;            // unsigned promotion
    373        ...possible other uses of A...
    374        unsigned int OP = (unsigned int) A;       // sign change
    375 
    376    In this case it's possible to go directly from C to OP using:
    377 
    378        OP = (unsigned int) (unsigned short) C;
    379 	    +------------+ +--------------+
    380 	       promotion      sign change
    381 
    382    so OP' would be C.  The input to the promotion is B, so UNPROM
    383    would describe B.  */
    384 
    385 static tree
    386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
    387 				      vect_unpromoted_value *unprom,
    388 				      bool *single_use_p = NULL)
    389 {
    390   tree res = NULL_TREE;
    391   tree op_type = TREE_TYPE (op);
    392   unsigned int orig_precision = TYPE_PRECISION (op_type);
    393   unsigned int min_precision = orig_precision;
    394   stmt_vec_info caster = NULL;
    395   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
    396     {
    397       /* See whether OP is simple enough to vectorize.  */
    398       stmt_vec_info def_stmt_info;
    399       gimple *def_stmt;
    400       vect_def_type dt;
    401       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
    402 	break;
    403 
    404       /* If OP is the input of a demotion, skip over it to see whether
    405 	 OP is itself the result of a promotion.  If so, the combined
    406 	 effect of the promotion and the demotion might fit the required
    407 	 pattern, otherwise neither operation fits.
    408 
    409 	 This copes with cases such as the result of an arithmetic
    410 	 operation being truncated before being stored, and where that
    411 	 arithmetic operation has been recognized as an over-widened one.  */
    412       if (TYPE_PRECISION (op_type) <= min_precision)
    413 	{
    414 	  /* Use OP as the UNPROM described above if we haven't yet
    415 	     found a promotion, or if using the new input preserves the
    416 	     sign of the previous promotion.  */
    417 	  if (!res
    418 	      || TYPE_PRECISION (unprom->type) == orig_precision
    419 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
    420 	    {
    421 	      unprom->set_op (op, dt, caster);
    422 	      min_precision = TYPE_PRECISION (op_type);
    423 	    }
    424 	  /* Stop if we've already seen a promotion and if this
    425 	     conversion does more than change the sign.  */
    426 	  else if (TYPE_PRECISION (op_type)
    427 		   != TYPE_PRECISION (unprom->type))
    428 	    break;
    429 
    430 	  /* The sequence now extends to OP.  */
    431 	  res = op;
    432 	}
    433 
    434       /* See whether OP is defined by a cast.  Record it as CASTER if
    435 	 the cast is potentially vectorizable.  */
    436       if (!def_stmt)
    437 	break;
    438       caster = def_stmt_info;
    439 
    440       /* Ignore pattern statements, since we don't link uses for them.  */
    441       if (caster
    442 	  && single_use_p
    443 	  && !STMT_VINFO_RELATED_STMT (caster)
    444 	  && !has_single_use (res))
    445 	*single_use_p = false;
    446 
    447       gassign *assign = dyn_cast <gassign *> (def_stmt);
    448       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
    449 	break;
    450 
    451       /* Continue with the input to the cast.  */
    452       op = gimple_assign_rhs1 (def_stmt);
    453       op_type = TREE_TYPE (op);
    454     }
    455   return res;
    456 }
    457 
    458 /* OP is an integer operand to an operation that returns TYPE, and we
    459    want to treat the operation as a widening one.  So far we can treat
    460    it as widening from *COMMON_TYPE.
    461 
    462    Return true if OP is suitable for such a widening operation,
    463    either widening from *COMMON_TYPE or from some supertype of it.
    464    Update *COMMON_TYPE to the supertype in the latter case.
    465 
    466    SHIFT_P is true if OP is a shift amount.  */
    467 
    468 static bool
    469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
    470 			    tree *common_type)
    471 {
    472   /* Calculate the minimum precision required by OP, without changing
    473      the sign of either operand.  */
    474   unsigned int precision;
    475   if (shift_p)
    476     {
    477       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
    478 	return false;
    479       precision = TREE_INT_CST_LOW (op);
    480     }
    481   else
    482     {
    483       precision = wi::min_precision (wi::to_widest (op),
    484 				     TYPE_SIGN (*common_type));
    485       if (precision * 2 > TYPE_PRECISION (type))
    486 	return false;
    487     }
    488 
    489   /* If OP requires a wider type, switch to that type.  The checks
    490      above ensure that this is still narrower than the result.  */
    491   precision = vect_element_precision (precision);
    492   if (TYPE_PRECISION (*common_type) < precision)
    493     *common_type = build_nonstandard_integer_type
    494       (precision, TYPE_UNSIGNED (*common_type));
    495   return true;
    496 }
    497 
    498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
    499    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
    500 
    501 static bool
    502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
    503 {
    504   if (types_compatible_p (*common_type, new_type))
    505     return true;
    506 
    507   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
    508   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
    509       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
    510     return true;
    511 
    512   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
    513   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
    514       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
    515     {
    516       *common_type = new_type;
    517       return true;
    518     }
    519 
    520   /* We have mismatched signs, with the signed type being
    521      no wider than the unsigned type.  In this case we need
    522      a wider signed type.  */
    523   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
    524 				TYPE_PRECISION (new_type));
    525   precision *= 2;
    526 
    527   if (precision * 2 > TYPE_PRECISION (type))
    528     return false;
    529 
    530   *common_type = build_nonstandard_integer_type (precision, false);
    531   return true;
    532 }
    533 
    534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
    535    in which each node either performs CODE or WIDENED_CODE, and where
    536    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
    537    specifies the maximum number of leaf operands.  SHIFT_P says whether
    538    CODE and WIDENED_CODE are some sort of shift.
    539 
    540    If STMT_INFO is such a tree, return the number of leaf operands
    541    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
    542    to a type that (a) is narrower than the result of STMT_INFO and
    543    (b) can hold all leaf operand values.
    544 
    545    If SUBTYPE then allow that the signs of the operands
    546    may differ in signs but not in precision.  SUBTYPE is updated to reflect
    547    this.
    548 
    549    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
    550    exists.  */
    551 
    552 static unsigned int
    553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
    554 		      tree_code widened_code, bool shift_p,
    555 		      unsigned int max_nops,
    556 		      vect_unpromoted_value *unprom, tree *common_type,
    557 		      enum optab_subtype *subtype = NULL)
    558 {
    559   /* Check for an integer operation with the right code.  */
    560   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    561   if (!assign)
    562     return 0;
    563 
    564   tree_code rhs_code = gimple_assign_rhs_code (assign);
    565   if (rhs_code != code && rhs_code != widened_code)
    566     return 0;
    567 
    568   tree type = TREE_TYPE (gimple_assign_lhs (assign));
    569   if (!INTEGRAL_TYPE_P (type))
    570     return 0;
    571 
    572   /* Assume that both operands will be leaf operands.  */
    573   max_nops -= 2;
    574 
    575   /* Check the operands.  */
    576   unsigned int next_op = 0;
    577   for (unsigned int i = 0; i < 2; ++i)
    578     {
    579       vect_unpromoted_value *this_unprom = &unprom[next_op];
    580       unsigned int nops = 1;
    581       tree op = gimple_op (assign, i + 1);
    582       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
    583 	{
    584 	  /* We already have a common type from earlier operands.
    585 	     Update it to account for OP.  */
    586 	  this_unprom->set_op (op, vect_constant_def);
    587 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
    588 	    return 0;
    589 	}
    590       else
    591 	{
    592 	  /* Only allow shifts by constants.  */
    593 	  if (shift_p && i == 1)
    594 	    return 0;
    595 
    596 	  if (rhs_code != code)
    597 	    {
    598 	      /* If rhs_code is widened_code, don't look through further
    599 		 possible promotions, there is a promotion already embedded
    600 		 in the WIDEN_*_EXPR.  */
    601 	      if (TREE_CODE (op) != SSA_NAME
    602 		  || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
    603 		return 0;
    604 
    605 	      stmt_vec_info def_stmt_info;
    606 	      gimple *def_stmt;
    607 	      vect_def_type dt;
    608 	      if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
    609 				       &def_stmt))
    610 		return 0;
    611 	      this_unprom->set_op (op, dt, NULL);
    612 	    }
    613 	  else if (!vect_look_through_possible_promotion (vinfo, op,
    614 							  this_unprom))
    615 	    return 0;
    616 
    617 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
    618 	    {
    619 	      /* The operand isn't widened.  If STMT_INFO has the code
    620 		 for an unwidened operation, recursively check whether
    621 		 this operand is a node of the tree.  */
    622 	      if (rhs_code != code
    623 		  || max_nops == 0
    624 		  || this_unprom->dt != vect_internal_def)
    625 		return 0;
    626 
    627 	      /* Give back the leaf slot allocated above now that we're
    628 		 not treating this as a leaf operand.  */
    629 	      max_nops += 1;
    630 
    631 	      /* Recursively process the definition of the operand.  */
    632 	      stmt_vec_info def_stmt_info
    633 		= vinfo->lookup_def (this_unprom->op);
    634 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
    635 					   widened_code, shift_p, max_nops,
    636 					   this_unprom, common_type,
    637 					   subtype);
    638 	      if (nops == 0)
    639 		return 0;
    640 
    641 	      max_nops -= nops;
    642 	    }
    643 	  else
    644 	    {
    645 	      /* Make sure that the operand is narrower than the result.  */
    646 	      if (TYPE_PRECISION (this_unprom->type) * 2
    647 		  > TYPE_PRECISION (type))
    648 		return 0;
    649 
    650 	      /* Update COMMON_TYPE for the new operand.  */
    651 	      if (i == 0)
    652 		*common_type = this_unprom->type;
    653 	      else if (!vect_joust_widened_type (type, this_unprom->type,
    654 						 common_type))
    655 		{
    656 		  if (subtype)
    657 		    {
    658 		      /* See if we can sign extend the smaller type.  */
    659 		      if (TYPE_PRECISION (this_unprom->type)
    660 			  > TYPE_PRECISION (*common_type))
    661 			*common_type = this_unprom->type;
    662 		      *subtype = optab_vector_mixed_sign;
    663 		    }
    664 		  else
    665 		    return 0;
    666 		}
    667 	    }
    668 	}
    669       next_op += nops;
    670     }
    671   return next_op;
    672 }
    673 
    674 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
    675    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
    676 
    677 static tree
    678 vect_recog_temp_ssa_var (tree type, gimple *stmt)
    679 {
    680   return make_temp_ssa_name (type, stmt, "patt");
    681 }
    682 
    683 /* STMT2_INFO describes a type conversion that could be split into STMT1
    684    followed by a version of STMT2_INFO that takes NEW_RHS as its first
    685    input.  Try to do this using pattern statements, returning true on
    686    success.  */
    687 
    688 static bool
    689 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
    690 		      gimple *stmt1, tree vectype)
    691 {
    692   if (is_pattern_stmt_p (stmt2_info))
    693     {
    694       /* STMT2_INFO is part of a pattern.  Get the statement to which
    695 	 the pattern is attached.  */
    696       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
    697       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
    698 
    699       if (dump_enabled_p ())
    700 	dump_printf_loc (MSG_NOTE, vect_location,
    701 			 "Splitting pattern statement: %G", stmt2_info->stmt);
    702 
    703       /* Since STMT2_INFO is a pattern statement, we can change it
    704 	 in-situ without worrying about changing the code for the
    705 	 containing block.  */
    706       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
    707 
    708       if (dump_enabled_p ())
    709 	{
    710 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
    711 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
    712 			   stmt2_info->stmt);
    713 	}
    714 
    715       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
    716       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
    717 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
    718 	   to the end of the definition sequence.  */
    719 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
    720       else
    721 	{
    722 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
    723 	     before it.  */
    724 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
    725 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
    726 	}
    727       return true;
    728     }
    729   else
    730     {
    731       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
    732 	 two-statement pattern now.  */
    733       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
    734       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
    735       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
    736       if (!lhs_vectype)
    737 	return false;
    738 
    739       if (dump_enabled_p ())
    740 	dump_printf_loc (MSG_NOTE, vect_location,
    741 			 "Splitting statement: %G", stmt2_info->stmt);
    742 
    743       /* Add STMT1 as a singleton pattern definition sequence.  */
    744       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
    745       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
    746       gimple_seq_add_stmt_without_update (def_seq, stmt1);
    747 
    748       /* Build the second of the two pattern statements.  */
    749       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
    750       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
    751       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
    752 
    753       if (dump_enabled_p ())
    754 	{
    755 	  dump_printf_loc (MSG_NOTE, vect_location,
    756 			   "into pattern statements: %G", stmt1);
    757 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
    758 	}
    759 
    760       return true;
    761     }
    762 }
    763 
    764 /* Convert UNPROM to TYPE and return the result, adding new statements
    765    to STMT_INFO's pattern definition statements if no better way is
    766    available.  VECTYPE is the vector form of TYPE.
    767 
    768    If SUBTYPE then convert the type based on the subtype.  */
    769 
    770 static tree
    771 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
    772 		    vect_unpromoted_value *unprom, tree vectype,
    773 		    enum optab_subtype subtype = optab_default)
    774 {
    775 
    776   /* Update the type if the signs differ.  */
    777   if (subtype == optab_vector_mixed_sign
    778       && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
    779     type = build_nonstandard_integer_type (TYPE_PRECISION (type),
    780 					   TYPE_SIGN (unprom->type));
    781 
    782   /* Check for a no-op conversion.  */
    783   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
    784     return unprom->op;
    785 
    786   /* Allow the caller to create constant vect_unpromoted_values.  */
    787   if (TREE_CODE (unprom->op) == INTEGER_CST)
    788     return wide_int_to_tree (type, wi::to_widest (unprom->op));
    789 
    790   tree input = unprom->op;
    791   if (unprom->caster)
    792     {
    793       tree lhs = gimple_get_lhs (unprom->caster->stmt);
    794       tree lhs_type = TREE_TYPE (lhs);
    795 
    796       /* If the result of the existing cast is the right width, use it
    797 	 instead of the source of the cast.  */
    798       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
    799 	input = lhs;
    800       /* If the precision we want is between the source and result
    801 	 precisions of the existing cast, try splitting the cast into
    802 	 two and tapping into a mid-way point.  */
    803       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
    804 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
    805 	{
    806 	  /* In order to preserve the semantics of the original cast,
    807 	     give the mid-way point the same signedness as the input value.
    808 
    809 	     It would be possible to use a signed type here instead if
    810 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
    811 	     make the sign of the midtype sensitive to the order in
    812 	     which we process the statements, since the signedness of
    813 	     TYPE is the signedness required by just one of possibly
    814 	     many users.  Also, unsigned promotions are usually as cheap
    815 	     as or cheaper than signed ones, so it's better to keep an
    816 	     unsigned promotion.  */
    817 	  tree midtype = build_nonstandard_integer_type
    818 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
    819 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
    820 	  if (vec_midtype)
    821 	    {
    822 	      input = vect_recog_temp_ssa_var (midtype, NULL);
    823 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
    824 						       unprom->op);
    825 	      if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
    826 					 vec_midtype))
    827 		append_pattern_def_seq (vinfo, stmt_info,
    828 					new_stmt, vec_midtype);
    829 	    }
    830 	}
    831 
    832       /* See if we can reuse an existing result.  */
    833       if (types_compatible_p (type, TREE_TYPE (input)))
    834 	return input;
    835     }
    836 
    837   /* We need a new conversion statement.  */
    838   tree new_op = vect_recog_temp_ssa_var (type, NULL);
    839   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
    840 
    841   /* If OP is an external value, see if we can insert the new statement
    842      on an incoming edge.  */
    843   if (input == unprom->op && unprom->dt == vect_external_def)
    844     if (edge e = vect_get_external_def_edge (vinfo, input))
    845       {
    846 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
    847 	gcc_assert (!new_bb);
    848 	return new_op;
    849       }
    850 
    851   /* As a (common) last resort, add the statement to the pattern itself.  */
    852   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
    853   return new_op;
    854 }
    855 
    856 /* Invoke vect_convert_input for N elements of UNPROM and store the
    857    result in the corresponding elements of RESULT.
    858 
    859    If SUBTYPE then convert the type based on the subtype.  */
    860 
    861 static void
    862 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
    863 		     tree *result, tree type, vect_unpromoted_value *unprom,
    864 		     tree vectype, enum optab_subtype subtype = optab_default)
    865 {
    866   for (unsigned int i = 0; i < n; ++i)
    867     {
    868       unsigned int j;
    869       for (j = 0; j < i; ++j)
    870 	if (unprom[j].op == unprom[i].op)
    871 	  break;
    872 
    873       if (j < i)
    874 	result[i] = result[j];
    875       else
    876 	result[i] = vect_convert_input (vinfo, stmt_info,
    877 					type, &unprom[i], vectype, subtype);
    878     }
    879 }
    880 
    881 /* The caller has created a (possibly empty) sequence of pattern definition
    882    statements followed by a single statement PATTERN_STMT.  Cast the result
    883    of this final statement to TYPE.  If a new statement is needed, add
    884    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
    885    and return the new statement, otherwise return PATTERN_STMT as-is.
    886    VECITYPE is the vector form of PATTERN_STMT's result type.  */
    887 
    888 static gimple *
    889 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
    890 		     gimple *pattern_stmt, tree vecitype)
    891 {
    892   tree lhs = gimple_get_lhs (pattern_stmt);
    893   if (!types_compatible_p (type, TREE_TYPE (lhs)))
    894     {
    895       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
    896       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
    897       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
    898     }
    899   return pattern_stmt;
    900 }
    901 
    902 /* Return true if STMT_VINFO describes a reduction for which reassociation
    903    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    904    a reduction chain and optimistically assume that all statements
    905    except the last allow reassociation.
    906    Also require it to have code CODE and to be a reduction
    907    in the outermost loop.  When returning true, store the operands in
    908    *OP0_OUT and *OP1_OUT.  */
    909 
    910 static bool
    911 vect_reassociating_reduction_p (vec_info *vinfo,
    912 				stmt_vec_info stmt_info, tree_code code,
    913 				tree *op0_out, tree *op1_out)
    914 {
    915   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    916   if (!loop_info)
    917     return false;
    918 
    919   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    920   if (!assign || gimple_assign_rhs_code (assign) != code)
    921     return false;
    922 
    923   /* We don't allow changing the order of the computation in the inner-loop
    924      when doing outer-loop vectorization.  */
    925   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    926   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    927     return false;
    928 
    929   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
    930     {
    931       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    932 				       code))
    933 	return false;
    934     }
    935   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
    936     return false;
    937 
    938   *op0_out = gimple_assign_rhs1 (assign);
    939   *op1_out = gimple_assign_rhs2 (assign);
    940   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    941     std::swap (*op0_out, *op1_out);
    942   return true;
    943 }
    944 
    945 /* match.pd function to match
    946    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    947    with conditions:
    948    1) @1, @2, c, d, a, b are all integral type.
    949    2) There's single_use for both @1 and @2.
    950    3) a, c have same precision.
    951    4) c and @1 have different precision.
    952    5) c, d are the same type or they can differ in sign when convert is
    953    truncation.
    954 
    955    record a and c and d and @3.  */
    956 
    957 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    958 
    959 /* Function vect_recog_cond_expr_convert
    960 
    961    Try to find the following pattern:
    962 
    963    TYPE_AB A,B;
    964    TYPE_CD C,D;
    965    TYPE_E E;
    966    TYPE_E op_true = (TYPE_E) A;
    967    TYPE_E op_false = (TYPE_E) B;
    968 
    969    E = C cmp D ? op_true : op_false;
    970 
    971    where
    972    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    973    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    974    single_use of op_true and op_false.
    975    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    976 
    977    Input:
    978 
    979    * STMT_VINFO: The stmt from which the pattern search begins.
    980    here it starts with E = c cmp D ? op_true : op_false;
    981 
    982    Output:
    983 
    984    TYPE1 E' = C cmp D ? A : B;
    985    TYPE3 E = (TYPE3) E';
    986 
    987    There may extra nop_convert for A or B to handle different signness.
    988 
    989    * TYPE_OUT: The vector type of the output of this pattern.
    990 
    991    * Return value: A new stmt that will be used to replace the sequence of
    992    stmts that constitute the pattern. In this case it will be:
    993    E = (TYPE3)E';
    994    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    995 
    996 static gimple *
    997 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    998 				      stmt_vec_info stmt_vinfo, tree *type_out)
    999 {
   1000   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
   1001   tree lhs, match[4], temp, type, new_lhs, op2;
   1002   gimple *cond_stmt;
   1003   gimple *pattern_stmt;
   1004 
   1005   if (!last_stmt)
   1006     return NULL;
   1007 
   1008   lhs = gimple_assign_lhs (last_stmt);
   1009 
   1010   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
   1011      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
   1012   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
   1013     return NULL;
   1014 
   1015   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
   1016 
   1017   op2 = match[2];
   1018   type = TREE_TYPE (match[1]);
   1019   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
   1020     {
   1021       op2 = vect_recog_temp_ssa_var (type, NULL);
   1022       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
   1023       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
   1024 			      get_vectype_for_scalar_type (vinfo, type));
   1025     }
   1026 
   1027   temp = vect_recog_temp_ssa_var (type, NULL);
   1028   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
   1029 						 match[1], op2));
   1030   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
   1031 			  get_vectype_for_scalar_type (vinfo, type));
   1032   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   1033   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
   1034   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
   1035 
   1036   if (dump_enabled_p ())
   1037     dump_printf_loc (MSG_NOTE, vect_location,
   1038 		     "created pattern stmt: %G", pattern_stmt);
   1039   return pattern_stmt;
   1040 }
   1041 
   1042 /* Function vect_recog_dot_prod_pattern
   1043 
   1044    Try to find the following pattern:
   1045 
   1046      type1a x_t
   1047      type1b y_t;
   1048      TYPE1 prod;
   1049      TYPE2 sum = init;
   1050    loop:
   1051      sum_0 = phi <init, sum_1>
   1052      S1  x_t = ...
   1053      S2  y_t = ...
   1054      S3  x_T = (TYPE1) x_t;
   1055      S4  y_T = (TYPE1) y_t;
   1056      S5  prod = x_T * y_T;
   1057      [S6  prod = (TYPE2) prod;  #optional]
   1058      S7  sum_1 = prod + sum_0;
   1059 
   1060    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
   1061    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
   1062    'type1a' and 'type1b' can differ.
   1063 
   1064    Input:
   1065 
   1066    * STMT_VINFO: The stmt from which the pattern search begins.  In the
   1067    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
   1068    will be detected.
   1069 
   1070    Output:
   1071 
   1072    * TYPE_OUT: The type of the output  of this pattern.
   1073 
   1074    * Return value: A new stmt that will be used to replace the sequence of
   1075    stmts that constitute the pattern. In this case it will be:
   1076         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
   1077 
   1078    Note: The dot-prod idiom is a widening reduction pattern that is
   1079          vectorized without preserving all the intermediate results. It
   1080          produces only N/2 (widened) results (by summing up pairs of
   1081          intermediate results) rather than all N results.  Therefore, we
   1082          cannot allow this pattern when we want to get all the results and in
   1083          the correct order (as is the case when this computation is in an
   1084          inner-loop nested in an outer-loop that us being vectorized).  */
   1085 
   1086 static gimple *
   1087 vect_recog_dot_prod_pattern (vec_info *vinfo,
   1088 			     stmt_vec_info stmt_vinfo, tree *type_out)
   1089 {
   1090   tree oprnd0, oprnd1;
   1091   gimple *last_stmt = stmt_vinfo->stmt;
   1092   tree type, half_type;
   1093   gimple *pattern_stmt;
   1094   tree var;
   1095 
   1096   /* Look for the following pattern
   1097           DX = (TYPE1) X;
   1098           DY = (TYPE1) Y;
   1099           DPROD = DX * DY;
   1100           DDPROD = (TYPE2) DPROD;
   1101           sum_1 = DDPROD + sum_0;
   1102      In which
   1103      - DX is double the size of X
   1104      - DY is double the size of Y
   1105      - DX, DY, DPROD all have the same type but the sign
   1106        between X, Y and DPROD can differ.
   1107      - sum is the same size of DPROD or bigger
   1108      - sum has been recognized as a reduction variable.
   1109 
   1110      This is equivalent to:
   1111        DPROD = X w* Y;          #widen mult
   1112        sum_1 = DPROD w+ sum_0;  #widen summation
   1113      or
   1114        DPROD = X w* Y;          #widen mult
   1115        sum_1 = DPROD + sum_0;   #summation
   1116    */
   1117 
   1118   /* Starting from LAST_STMT, follow the defs of its uses in search
   1119      of the above pattern.  */
   1120 
   1121   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1122 				       &oprnd0, &oprnd1))
   1123     return NULL;
   1124 
   1125   type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1126 
   1127   vect_unpromoted_value unprom_mult;
   1128   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
   1129 
   1130   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1131      we know that oprnd1 is the reduction variable (defined by a loop-header
   1132      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
   1133      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
   1134   if (!oprnd0)
   1135     return NULL;
   1136 
   1137   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
   1138   if (!mult_vinfo)
   1139     return NULL;
   1140 
   1141   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1142      inside the loop (in case we are analyzing an outer-loop).  */
   1143   vect_unpromoted_value unprom0[2];
   1144   enum optab_subtype subtype = optab_vector;
   1145   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
   1146 			     false, 2, unprom0, &half_type, &subtype))
   1147     return NULL;
   1148 
   1149   /* If there are two widening operations, make sure they agree on the sign
   1150      of the extension.  The result of an optab_vector_mixed_sign operation
   1151      is signed; otherwise, the result has the same sign as the operands.  */
   1152   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
   1153       && (subtype == optab_vector_mixed_sign
   1154 	? TYPE_UNSIGNED (unprom_mult.type)
   1155 	: TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
   1156     return NULL;
   1157 
   1158   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
   1159 
   1160   tree half_vectype;
   1161   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
   1162 					type_out, &half_vectype, subtype))
   1163     return NULL;
   1164 
   1165   /* Get the inputs in the appropriate types.  */
   1166   tree mult_oprnd[2];
   1167   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
   1168 		       unprom0, half_vectype, subtype);
   1169 
   1170   var = vect_recog_temp_ssa_var (type, NULL);
   1171   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
   1172 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
   1173 
   1174   return pattern_stmt;
   1175 }
   1176 
   1177 
   1178 /* Function vect_recog_sad_pattern
   1179 
   1180    Try to find the following Sum of Absolute Difference (SAD) pattern:
   1181 
   1182      type x_t, y_t;
   1183      signed TYPE1 diff, abs_diff;
   1184      TYPE2 sum = init;
   1185    loop:
   1186      sum_0 = phi <init, sum_1>
   1187      S1  x_t = ...
   1188      S2  y_t = ...
   1189      S3  x_T = (TYPE1) x_t;
   1190      S4  y_T = (TYPE1) y_t;
   1191      S5  diff = x_T - y_T;
   1192      S6  abs_diff = ABS_EXPR <diff>;
   1193      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
   1194      S8  sum_1 = abs_diff + sum_0;
   1195 
   1196    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
   1197    same size of 'TYPE1' or bigger. This is a special case of a reduction
   1198    computation.
   1199 
   1200    Input:
   1201 
   1202    * STMT_VINFO: The stmt from which the pattern search begins.  In the
   1203    example, when this function is called with S8, the pattern
   1204    {S3,S4,S5,S6,S7,S8} will be detected.
   1205 
   1206    Output:
   1207 
   1208    * TYPE_OUT: The type of the output of this pattern.
   1209 
   1210    * Return value: A new stmt that will be used to replace the sequence of
   1211    stmts that constitute the pattern. In this case it will be:
   1212         SAD_EXPR <x_t, y_t, sum_0>
   1213   */
   1214 
   1215 static gimple *
   1216 vect_recog_sad_pattern (vec_info *vinfo,
   1217 			stmt_vec_info stmt_vinfo, tree *type_out)
   1218 {
   1219   gimple *last_stmt = stmt_vinfo->stmt;
   1220   tree half_type;
   1221 
   1222   /* Look for the following pattern
   1223           DX = (TYPE1) X;
   1224           DY = (TYPE1) Y;
   1225           DDIFF = DX - DY;
   1226           DAD = ABS_EXPR <DDIFF>;
   1227           DDPROD = (TYPE2) DPROD;
   1228           sum_1 = DAD + sum_0;
   1229      In which
   1230      - DX is at least double the size of X
   1231      - DY is at least double the size of Y
   1232      - DX, DY, DDIFF, DAD all have the same type
   1233      - sum is the same size of DAD or bigger
   1234      - sum has been recognized as a reduction variable.
   1235 
   1236      This is equivalent to:
   1237        DDIFF = X w- Y;          #widen sub
   1238        DAD = ABS_EXPR <DDIFF>;
   1239        sum_1 = DAD w+ sum_0;    #widen summation
   1240      or
   1241        DDIFF = X w- Y;          #widen sub
   1242        DAD = ABS_EXPR <DDIFF>;
   1243        sum_1 = DAD + sum_0;     #summation
   1244    */
   1245 
   1246   /* Starting from LAST_STMT, follow the defs of its uses in search
   1247      of the above pattern.  */
   1248 
   1249   tree plus_oprnd0, plus_oprnd1;
   1250   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1251 				       &plus_oprnd0, &plus_oprnd1))
   1252     return NULL;
   1253 
   1254   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1255 
   1256   /* Any non-truncating sequence of conversions is OK here, since
   1257      with a successful match, the result of the ABS(U) is known to fit
   1258      within the nonnegative range of the result type.  (It cannot be the
   1259      negative of the minimum signed value due to the range of the widening
   1260      MINUS_EXPR.)  */
   1261   vect_unpromoted_value unprom_abs;
   1262   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
   1263 						      &unprom_abs);
   1264 
   1265   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1266      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
   1267      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
   1268      Then check that plus_oprnd0 is defined by an abs_expr.  */
   1269 
   1270   if (!plus_oprnd0)
   1271     return NULL;
   1272 
   1273   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
   1274   if (!abs_stmt_vinfo)
   1275     return NULL;
   1276 
   1277   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1278      inside the loop (in case we are analyzing an outer-loop).  */
   1279   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
   1280   if (!abs_stmt
   1281       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
   1282 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
   1283     return NULL;
   1284 
   1285   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
   1286   tree abs_type = TREE_TYPE (abs_oprnd);
   1287   if (TYPE_UNSIGNED (abs_type))
   1288     return NULL;
   1289 
   1290   /* Peel off conversions from the ABS input.  This can involve sign
   1291      changes (e.g. from an unsigned subtraction to a signed ABS input)
   1292      or signed promotion, but it can't include unsigned promotion.
   1293      (Note that ABS of an unsigned promotion should have been folded
   1294      away before now anyway.)  */
   1295   vect_unpromoted_value unprom_diff;
   1296   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
   1297 						    &unprom_diff);
   1298   if (!abs_oprnd)
   1299     return NULL;
   1300   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
   1301       && TYPE_UNSIGNED (unprom_diff.type))
   1302     return NULL;
   1303 
   1304   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
   1305   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
   1306   if (!diff_stmt_vinfo)
   1307     return NULL;
   1308 
   1309   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1310      inside the loop (in case we are analyzing an outer-loop).  */
   1311   vect_unpromoted_value unprom[2];
   1312   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
   1313 			     false, 2, unprom, &half_type))
   1314     return NULL;
   1315 
   1316   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
   1317 
   1318   tree half_vectype;
   1319   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
   1320 					type_out, &half_vectype))
   1321     return NULL;
   1322 
   1323   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
   1324   tree sad_oprnd[2];
   1325   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
   1326 		       unprom, half_vectype);
   1327 
   1328   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
   1329   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
   1330 					      sad_oprnd[1], plus_oprnd1);
   1331 
   1332   return pattern_stmt;
   1333 }
   1334 
   1335 /* Recognize an operation that performs ORIG_CODE on widened inputs,
   1336    so that it can be treated as though it had the form:
   1337 
   1338       A_TYPE a;
   1339       B_TYPE b;
   1340       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
   1341       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
   1342     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
   1343     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
   1344     | RES_TYPE res = a_extend ORIG_CODE b_extend;
   1345 
   1346    Try to replace the pattern with:
   1347 
   1348       A_TYPE a;
   1349       B_TYPE b;
   1350       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
   1351       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
   1352     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
   1353     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
   1354 
   1355    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
   1356 
   1357    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
   1358    name of the pattern being matched, for dump purposes.  */
   1359 
   1360 static gimple *
   1361 vect_recog_widen_op_pattern (vec_info *vinfo,
   1362 			     stmt_vec_info last_stmt_info, tree *type_out,
   1363 			     tree_code orig_code, tree_code wide_code,
   1364 			     bool shift_p, const char *name)
   1365 {
   1366   gimple *last_stmt = last_stmt_info->stmt;
   1367 
   1368   vect_unpromoted_value unprom[2];
   1369   tree half_type;
   1370   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
   1371 			     shift_p, 2, unprom, &half_type))
   1372     return NULL;
   1373 
   1374   /* Pattern detected.  */
   1375   vect_pattern_detected (name, last_stmt);
   1376 
   1377   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1378   tree itype = type;
   1379   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
   1380       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
   1381     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
   1382 					    TYPE_UNSIGNED (half_type));
   1383 
   1384   /* Check target support  */
   1385   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
   1386   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
   1387   tree ctype = itype;
   1388   tree vecctype = vecitype;
   1389   if (orig_code == MINUS_EXPR
   1390       && TYPE_UNSIGNED (itype)
   1391       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
   1392     {
   1393       /* Subtraction is special, even if half_type is unsigned and no matter
   1394 	 whether type is signed or unsigned, if type is wider than itype,
   1395 	 we need to sign-extend from the widening operation result to the
   1396 	 result type.
   1397 	 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
   1398 	 itype unsigned short and type either int or unsigned int.
   1399 	 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
   1400 	 (unsigned short) 0xffff, but for type int we want the result -1
   1401 	 and for type unsigned int 0xffffffff rather than 0xffff.  */
   1402       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
   1403       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
   1404     }
   1405 
   1406   enum tree_code dummy_code;
   1407   int dummy_int;
   1408   auto_vec<tree> dummy_vec;
   1409   if (!vectype
   1410       || !vecitype
   1411       || !vecctype
   1412       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
   1413 					  vecitype, vectype,
   1414 					  &dummy_code, &dummy_code,
   1415 					  &dummy_int, &dummy_vec))
   1416     return NULL;
   1417 
   1418   *type_out = get_vectype_for_scalar_type (vinfo, type);
   1419   if (!*type_out)
   1420     return NULL;
   1421 
   1422   tree oprnd[2];
   1423   vect_convert_inputs (vinfo, last_stmt_info,
   1424 		       2, oprnd, half_type, unprom, vectype);
   1425 
   1426   tree var = vect_recog_temp_ssa_var (itype, NULL);
   1427   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
   1428 					      oprnd[0], oprnd[1]);
   1429 
   1430   if (vecctype != vecitype)
   1431     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
   1432 					pattern_stmt, vecitype);
   1433 
   1434   return vect_convert_output (vinfo, last_stmt_info,
   1435 			      type, pattern_stmt, vecctype);
   1436 }
   1437 
   1438 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
   1439    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1440 
   1441 static gimple *
   1442 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1443 			       tree *type_out)
   1444 {
   1445   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1446 				      MULT_EXPR, WIDEN_MULT_EXPR, false,
   1447 				      "vect_recog_widen_mult_pattern");
   1448 }
   1449 
   1450 /* Try to detect addition on widened inputs, converting PLUS_EXPR
   1451    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1452 
   1453 static gimple *
   1454 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1455 			       tree *type_out)
   1456 {
   1457   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1458 				      PLUS_EXPR, WIDEN_PLUS_EXPR, false,
   1459 				      "vect_recog_widen_plus_pattern");
   1460 }
   1461 
   1462 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
   1463    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1464 static gimple *
   1465 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1466 			       tree *type_out)
   1467 {
   1468   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1469 				      MINUS_EXPR, WIDEN_MINUS_EXPR, false,
   1470 				      "vect_recog_widen_minus_pattern");
   1471 }
   1472 
   1473 /* Function vect_recog_popcount_pattern
   1474 
   1475    Try to find the following pattern:
   1476 
   1477    UTYPE1 A;
   1478    TYPE1 B;
   1479    UTYPE2 temp_in;
   1480    TYPE3 temp_out;
   1481    temp_in = (UTYPE2)A;
   1482 
   1483    temp_out = __builtin_popcount{,l,ll} (temp_in);
   1484    B = (TYPE1) temp_out;
   1485 
   1486    TYPE2 may or may not be equal to TYPE3.
   1487    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
   1488    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
   1489 
   1490    Input:
   1491 
   1492    * STMT_VINFO: The stmt from which the pattern search begins.
   1493    here it starts with B = (TYPE1) temp_out;
   1494 
   1495    Output:
   1496 
   1497    * TYPE_OUT: The vector type of the output of this pattern.
   1498 
   1499    * Return value: A new stmt that will be used to replace the sequence of
   1500    stmts that constitute the pattern. In this case it will be:
   1501    B = .POPCOUNT (A);
   1502 */
   1503 
   1504 static gimple *
   1505 vect_recog_popcount_pattern (vec_info *vinfo,
   1506 			     stmt_vec_info stmt_vinfo, tree *type_out)
   1507 {
   1508   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
   1509   gimple *popcount_stmt, *pattern_stmt;
   1510   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
   1511   auto_vec<tree> vargs;
   1512 
   1513   /* Find B = (TYPE1) temp_out. */
   1514   if (!last_stmt)
   1515     return NULL;
   1516   tree_code code = gimple_assign_rhs_code (last_stmt);
   1517   if (!CONVERT_EXPR_CODE_P (code))
   1518     return NULL;
   1519 
   1520   lhs_oprnd = gimple_assign_lhs (last_stmt);
   1521   lhs_type = TREE_TYPE (lhs_oprnd);
   1522   if (!INTEGRAL_TYPE_P (lhs_type))
   1523     return NULL;
   1524 
   1525   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
   1526   if (TREE_CODE (rhs_oprnd) != SSA_NAME
   1527       || !has_single_use (rhs_oprnd))
   1528     return NULL;
   1529   popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
   1530 
   1531   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
   1532   if (!is_gimple_call (popcount_stmt))
   1533     return NULL;
   1534   switch (gimple_call_combined_fn (popcount_stmt))
   1535     {
   1536     CASE_CFN_POPCOUNT:
   1537       break;
   1538     default:
   1539       return NULL;
   1540     }
   1541 
   1542   if (gimple_call_num_args (popcount_stmt) != 1)
   1543     return NULL;
   1544 
   1545   rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
   1546   vect_unpromoted_value unprom_diff;
   1547   rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
   1548 						    &unprom_diff);
   1549 
   1550   if (!rhs_origin)
   1551     return NULL;
   1552 
   1553   /* Input and output of .POPCOUNT should be same-precision integer.
   1554      Also A should be unsigned or same precision as temp_in,
   1555      otherwise there would be sign_extend from A to temp_in.  */
   1556   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
   1557       || (!TYPE_UNSIGNED (unprom_diff.type)
   1558 	  && (TYPE_PRECISION (unprom_diff.type)
   1559 	      != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
   1560     return NULL;
   1561   vargs.safe_push (unprom_diff.op);
   1562 
   1563   vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
   1564   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
   1565   /* Do it only if the backend has popcount<vector_mode>2 pattern.  */
   1566   if (!vec_type
   1567       || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
   1568 					  OPTIMIZE_FOR_SPEED))
   1569     return NULL;
   1570 
   1571   /* Create B = .POPCOUNT (A).  */
   1572   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
   1573   pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
   1574   gimple_call_set_lhs (pattern_stmt, new_var);
   1575   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   1576   *type_out = vec_type;
   1577 
   1578   if (dump_enabled_p ())
   1579     dump_printf_loc (MSG_NOTE, vect_location,
   1580 		     "created pattern stmt: %G", pattern_stmt);
   1581   return pattern_stmt;
   1582 }
   1583 
   1584 /* Function vect_recog_pow_pattern
   1585 
   1586    Try to find the following pattern:
   1587 
   1588      x = POW (y, N);
   1589 
   1590    with POW being one of pow, powf, powi, powif and N being
   1591    either 2 or 0.5.
   1592 
   1593    Input:
   1594 
   1595    * STMT_VINFO: The stmt from which the pattern search begins.
   1596 
   1597    Output:
   1598 
   1599    * TYPE_OUT: The type of the output of this pattern.
   1600 
   1601    * Return value: A new stmt that will be used to replace the sequence of
   1602    stmts that constitute the pattern. In this case it will be:
   1603         x = x * x
   1604    or
   1605 	x = sqrt (x)
   1606 */
   1607 
   1608 static gimple *
   1609 vect_recog_pow_pattern (vec_info *vinfo,
   1610 			stmt_vec_info stmt_vinfo, tree *type_out)
   1611 {
   1612   gimple *last_stmt = stmt_vinfo->stmt;
   1613   tree base, exp;
   1614   gimple *stmt;
   1615   tree var;
   1616 
   1617   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
   1618     return NULL;
   1619 
   1620   switch (gimple_call_combined_fn (last_stmt))
   1621     {
   1622     CASE_CFN_POW:
   1623     CASE_CFN_POWI:
   1624       break;
   1625 
   1626     default:
   1627       return NULL;
   1628     }
   1629 
   1630   base = gimple_call_arg (last_stmt, 0);
   1631   exp = gimple_call_arg (last_stmt, 1);
   1632   if (TREE_CODE (exp) != REAL_CST
   1633       && TREE_CODE (exp) != INTEGER_CST)
   1634     {
   1635       if (flag_unsafe_math_optimizations
   1636 	  && TREE_CODE (base) == REAL_CST
   1637 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
   1638 	{
   1639 	  combined_fn log_cfn;
   1640 	  built_in_function exp_bfn;
   1641 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
   1642 	    {
   1643 	    case BUILT_IN_POW:
   1644 	      log_cfn = CFN_BUILT_IN_LOG;
   1645 	      exp_bfn = BUILT_IN_EXP;
   1646 	      break;
   1647 	    case BUILT_IN_POWF:
   1648 	      log_cfn = CFN_BUILT_IN_LOGF;
   1649 	      exp_bfn = BUILT_IN_EXPF;
   1650 	      break;
   1651 	    case BUILT_IN_POWL:
   1652 	      log_cfn = CFN_BUILT_IN_LOGL;
   1653 	      exp_bfn = BUILT_IN_EXPL;
   1654 	      break;
   1655 	    default:
   1656 	      return NULL;
   1657 	    }
   1658 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
   1659 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
   1660 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
   1661 	     does that, but if C is a power of 2, we want to use
   1662 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
   1663 	     vectorization we don't have vectorized exp2.  */
   1664 	  if (logc
   1665 	      && TREE_CODE (logc) == REAL_CST
   1666 	      && exp_decl
   1667 	      && lookup_attribute ("omp declare simd",
   1668 				   DECL_ATTRIBUTES (exp_decl)))
   1669 	    {
   1670 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
   1671 	      if (node->simd_clones == NULL)
   1672 		{
   1673 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
   1674 		      || node->definition)
   1675 		    return NULL;
   1676 		  expand_simd_clones (node);
   1677 		  if (node->simd_clones == NULL)
   1678 		    return NULL;
   1679 		}
   1680 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
   1681 	      if (!*type_out)
   1682 		return NULL;
   1683 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1684 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
   1685 	      append_pattern_def_seq (vinfo, stmt_vinfo, g);
   1686 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1687 	      g = gimple_build_call (exp_decl, 1, def);
   1688 	      gimple_call_set_lhs (g, res);
   1689 	      return g;
   1690 	    }
   1691 	}
   1692 
   1693       return NULL;
   1694     }
   1695 
   1696   /* We now have a pow or powi builtin function call with a constant
   1697      exponent.  */
   1698 
   1699   /* Catch squaring.  */
   1700   if ((tree_fits_shwi_p (exp)
   1701        && tree_to_shwi (exp) == 2)
   1702       || (TREE_CODE (exp) == REAL_CST
   1703           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
   1704     {
   1705       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
   1706 					    TREE_TYPE (base), type_out))
   1707 	return NULL;
   1708 
   1709       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1710       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
   1711       return stmt;
   1712     }
   1713 
   1714   /* Catch square root.  */
   1715   if (TREE_CODE (exp) == REAL_CST
   1716       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
   1717     {
   1718       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
   1719       if (*type_out
   1720 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
   1721 					     OPTIMIZE_FOR_SPEED))
   1722 	{
   1723 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
   1724 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
   1725 	  gimple_call_set_lhs (stmt, var);
   1726 	  gimple_call_set_nothrow (stmt, true);
   1727 	  return stmt;
   1728 	}
   1729     }
   1730 
   1731   return NULL;
   1732 }
   1733 
   1734 
   1735 /* Function vect_recog_widen_sum_pattern
   1736 
   1737    Try to find the following pattern:
   1738 
   1739      type x_t;
   1740      TYPE x_T, sum = init;
   1741    loop:
   1742      sum_0 = phi <init, sum_1>
   1743      S1  x_t = *p;
   1744      S2  x_T = (TYPE) x_t;
   1745      S3  sum_1 = x_T + sum_0;
   1746 
   1747    where type 'TYPE' is at least double the size of type 'type', i.e - we're
   1748    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
   1749    a special case of a reduction computation.
   1750 
   1751    Input:
   1752 
   1753    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
   1754    when this function is called with S3, the pattern {S2,S3} will be detected.
   1755 
   1756    Output:
   1757 
   1758    * TYPE_OUT: The type of the output of this pattern.
   1759 
   1760    * Return value: A new stmt that will be used to replace the sequence of
   1761    stmts that constitute the pattern. In this case it will be:
   1762         WIDEN_SUM <x_t, sum_0>
   1763 
   1764    Note: The widening-sum idiom is a widening reduction pattern that is
   1765 	 vectorized without preserving all the intermediate results. It
   1766          produces only N/2 (widened) results (by summing up pairs of
   1767 	 intermediate results) rather than all N results.  Therefore, we
   1768 	 cannot allow this pattern when we want to get all the results and in
   1769 	 the correct order (as is the case when this computation is in an
   1770 	 inner-loop nested in an outer-loop that us being vectorized).  */
   1771 
   1772 static gimple *
   1773 vect_recog_widen_sum_pattern (vec_info *vinfo,
   1774 			      stmt_vec_info stmt_vinfo, tree *type_out)
   1775 {
   1776   gimple *last_stmt = stmt_vinfo->stmt;
   1777   tree oprnd0, oprnd1;
   1778   tree type;
   1779   gimple *pattern_stmt;
   1780   tree var;
   1781 
   1782   /* Look for the following pattern
   1783           DX = (TYPE) X;
   1784           sum_1 = DX + sum_0;
   1785      In which DX is at least double the size of X, and sum_1 has been
   1786      recognized as a reduction variable.
   1787    */
   1788 
   1789   /* Starting from LAST_STMT, follow the defs of its uses in search
   1790      of the above pattern.  */
   1791 
   1792   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1793 				       &oprnd0, &oprnd1)
   1794       || TREE_CODE (oprnd0) != SSA_NAME
   1795       || !vinfo->lookup_def (oprnd0))
   1796     return NULL;
   1797 
   1798   type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1799 
   1800   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1801      we know that oprnd1 is the reduction variable (defined by a loop-header
   1802      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
   1803      Left to check that oprnd0 is defined by a cast from type 'type' to type
   1804      'TYPE'.  */
   1805 
   1806   vect_unpromoted_value unprom0;
   1807   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
   1808       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
   1809     return NULL;
   1810 
   1811   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
   1812 
   1813   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
   1814 					unprom0.type, type_out))
   1815     return NULL;
   1816 
   1817   var = vect_recog_temp_ssa_var (type, NULL);
   1818   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
   1819 
   1820   return pattern_stmt;
   1821 }
   1822 
   1823 /* Recognize cases in which an operation is performed in one type WTYPE
   1824    but could be done more efficiently in a narrower type NTYPE.  For example,
   1825    if we have:
   1826 
   1827      ATYPE a;  // narrower than NTYPE
   1828      BTYPE b;  // narrower than NTYPE
   1829      WTYPE aw = (WTYPE) a;
   1830      WTYPE bw = (WTYPE) b;
   1831      WTYPE res = aw + bw;  // only uses of aw and bw
   1832 
   1833    then it would be more efficient to do:
   1834 
   1835      NTYPE an = (NTYPE) a;
   1836      NTYPE bn = (NTYPE) b;
   1837      NTYPE resn = an + bn;
   1838      WTYPE res = (WTYPE) resn;
   1839 
   1840    Other situations include things like:
   1841 
   1842      ATYPE a;  // NTYPE or narrower
   1843      WTYPE aw = (WTYPE) a;
   1844      WTYPE res = aw + b;
   1845 
   1846    when only "(NTYPE) res" is significant.  In that case it's more efficient
   1847    to truncate "b" and do the operation on NTYPE instead:
   1848 
   1849      NTYPE an = (NTYPE) a;
   1850      NTYPE bn = (NTYPE) b;  // truncation
   1851      NTYPE resn = an + bn;
   1852      WTYPE res = (WTYPE) resn;
   1853 
   1854    All users of "res" should then use "resn" instead, making the final
   1855    statement dead (not marked as relevant).  The final statement is still
   1856    needed to maintain the type correctness of the IR.
   1857 
   1858    vect_determine_precisions has already determined the minimum
   1859    precison of the operation and the minimum precision required
   1860    by users of the result.  */
   1861 
   1862 static gimple *
   1863 vect_recog_over_widening_pattern (vec_info *vinfo,
   1864 				  stmt_vec_info last_stmt_info, tree *type_out)
   1865 {
   1866   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   1867   if (!last_stmt)
   1868     return NULL;
   1869 
   1870   /* See whether we have found that this operation can be done on a
   1871      narrower type without changing its semantics.  */
   1872   unsigned int new_precision = last_stmt_info->operation_precision;
   1873   if (!new_precision)
   1874     return NULL;
   1875 
   1876   tree lhs = gimple_assign_lhs (last_stmt);
   1877   tree type = TREE_TYPE (lhs);
   1878   tree_code code = gimple_assign_rhs_code (last_stmt);
   1879 
   1880   /* Punt for reductions where we don't handle the type conversions.  */
   1881   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
   1882     return NULL;
   1883 
   1884   /* Keep the first operand of a COND_EXPR as-is: only the other two
   1885      operands are interesting.  */
   1886   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
   1887 
   1888   /* Check the operands.  */
   1889   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
   1890   auto_vec <vect_unpromoted_value, 3> unprom (nops);
   1891   unprom.quick_grow (nops);
   1892   unsigned int min_precision = 0;
   1893   bool single_use_p = false;
   1894   for (unsigned int i = 0; i < nops; ++i)
   1895     {
   1896       tree op = gimple_op (last_stmt, first_op + i);
   1897       if (TREE_CODE (op) == INTEGER_CST)
   1898 	unprom[i].set_op (op, vect_constant_def);
   1899       else if (TREE_CODE (op) == SSA_NAME)
   1900 	{
   1901 	  bool op_single_use_p = true;
   1902 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
   1903 						     &op_single_use_p))
   1904 	    return NULL;
   1905 	  /* If:
   1906 
   1907 	     (1) N bits of the result are needed;
   1908 	     (2) all inputs are widened from M<N bits; and
   1909 	     (3) one operand OP is a single-use SSA name
   1910 
   1911 	     we can shift the M->N widening from OP to the output
   1912 	     without changing the number or type of extensions involved.
   1913 	     This then reduces the number of copies of STMT_INFO.
   1914 
   1915 	     If instead of (3) more than one operand is a single-use SSA name,
   1916 	     shifting the extension to the output is even more of a win.
   1917 
   1918 	     If instead:
   1919 
   1920 	     (1) N bits of the result are needed;
   1921 	     (2) one operand OP2 is widened from M2<N bits;
   1922 	     (3) another operand OP1 is widened from M1<M2 bits; and
   1923 	     (4) both OP1 and OP2 are single-use
   1924 
   1925 	     the choice is between:
   1926 
   1927 	     (a) truncating OP2 to M1, doing the operation on M1,
   1928 		 and then widening the result to N
   1929 
   1930 	     (b) widening OP1 to M2, doing the operation on M2, and then
   1931 		 widening the result to N
   1932 
   1933 	     Both shift the M2->N widening of the inputs to the output.
   1934 	     (a) additionally shifts the M1->M2 widening to the output;
   1935 	     it requires fewer copies of STMT_INFO but requires an extra
   1936 	     M2->M1 truncation.
   1937 
   1938 	     Which is better will depend on the complexity and cost of
   1939 	     STMT_INFO, which is hard to predict at this stage.  However,
   1940 	     a clear tie-breaker in favor of (b) is the fact that the
   1941 	     truncation in (a) increases the length of the operation chain.
   1942 
   1943 	     If instead of (4) only one of OP1 or OP2 is single-use,
   1944 	     (b) is still a win over doing the operation in N bits:
   1945 	     it still shifts the M2->N widening on the single-use operand
   1946 	     to the output and reduces the number of STMT_INFO copies.
   1947 
   1948 	     If neither operand is single-use then operating on fewer than
   1949 	     N bits might lead to more extensions overall.  Whether it does
   1950 	     or not depends on global information about the vectorization
   1951 	     region, and whether that's a good trade-off would again
   1952 	     depend on the complexity and cost of the statements involved,
   1953 	     as well as things like register pressure that are not normally
   1954 	     modelled at this stage.  We therefore ignore these cases
   1955 	     and just optimize the clear single-use wins above.
   1956 
   1957 	     Thus we take the maximum precision of the unpromoted operands
   1958 	     and record whether any operand is single-use.  */
   1959 	  if (unprom[i].dt == vect_internal_def)
   1960 	    {
   1961 	      min_precision = MAX (min_precision,
   1962 				   TYPE_PRECISION (unprom[i].type));
   1963 	      single_use_p |= op_single_use_p;
   1964 	    }
   1965 	}
   1966       else
   1967 	return NULL;
   1968     }
   1969 
   1970   /* Although the operation could be done in operation_precision, we have
   1971      to balance that against introducing extra truncations or extensions.
   1972      Calculate the minimum precision that can be handled efficiently.
   1973 
   1974      The loop above determined that the operation could be handled
   1975      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
   1976      extension from the inputs to the output without introducing more
   1977      instructions, and would reduce the number of instructions required
   1978      for STMT_INFO itself.
   1979 
   1980      vect_determine_precisions has also determined that the result only
   1981      needs min_output_precision bits.  Truncating by a factor of N times
   1982      requires a tree of N - 1 instructions, so if TYPE is N times wider
   1983      than min_output_precision, doing the operation in TYPE and truncating
   1984      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
   1985      In contrast:
   1986 
   1987      - truncating the input to a unary operation and doing the operation
   1988        in the new type requires at most N - 1 + 1 = N instructions per
   1989        output vector
   1990 
   1991      - doing the same for a binary operation requires at most
   1992        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
   1993 
   1994      Both unary and binary operations require fewer instructions than
   1995      this if the operands were extended from a suitable truncated form.
   1996      Thus there is usually nothing to lose by doing operations in
   1997      min_output_precision bits, but there can be something to gain.  */
   1998   if (!single_use_p)
   1999     min_precision = last_stmt_info->min_output_precision;
   2000   else
   2001     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
   2002 
   2003   /* Apply the minimum efficient precision we just calculated.  */
   2004   if (new_precision < min_precision)
   2005     new_precision = min_precision;
   2006   new_precision = vect_element_precision (new_precision);
   2007   if (new_precision >= TYPE_PRECISION (type))
   2008     return NULL;
   2009 
   2010   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
   2011 
   2012   *type_out = get_vectype_for_scalar_type (vinfo, type);
   2013   if (!*type_out)
   2014     return NULL;
   2015 
   2016   /* We've found a viable pattern.  Get the new type of the operation.  */
   2017   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
   2018   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
   2019 
   2020   /* If we're truncating an operation, we need to make sure that we
   2021      don't introduce new undefined overflow.  The codes tested here are
   2022      a subset of those accepted by vect_truncatable_operation_p.  */
   2023   tree op_type = new_type;
   2024   if (TYPE_OVERFLOW_UNDEFINED (new_type)
   2025       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
   2026     op_type = build_nonstandard_integer_type (new_precision, true);
   2027 
   2028   /* We specifically don't check here whether the target supports the
   2029      new operation, since it might be something that a later pattern
   2030      wants to rewrite anyway.  If targets have a minimum element size
   2031      for some optabs, we should pattern-match smaller ops to larger ops
   2032      where beneficial.  */
   2033   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2034   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
   2035   if (!new_vectype || !op_vectype)
   2036     return NULL;
   2037 
   2038   if (dump_enabled_p ())
   2039     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
   2040 		     type, new_type);
   2041 
   2042   /* Calculate the rhs operands for an operation on OP_TYPE.  */
   2043   tree ops[3] = {};
   2044   for (unsigned int i = 1; i < first_op; ++i)
   2045     ops[i - 1] = gimple_op (last_stmt, i);
   2046   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
   2047 		       op_type, &unprom[0], op_vectype);
   2048 
   2049   /* Use the operation to produce a result of type OP_TYPE.  */
   2050   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
   2051   gimple *pattern_stmt = gimple_build_assign (new_var, code,
   2052 					      ops[0], ops[1], ops[2]);
   2053   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   2054 
   2055   if (dump_enabled_p ())
   2056     dump_printf_loc (MSG_NOTE, vect_location,
   2057 		     "created pattern stmt: %G", pattern_stmt);
   2058 
   2059   /* Convert back to the original signedness, if OP_TYPE is different
   2060      from NEW_TYPE.  */
   2061   if (op_type != new_type)
   2062     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
   2063 					pattern_stmt, op_vectype);
   2064 
   2065   /* Promote the result to the original type.  */
   2066   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
   2067 				      pattern_stmt, new_vectype);
   2068 
   2069   return pattern_stmt;
   2070 }
   2071 
   2072 /* Recognize the following patterns:
   2073 
   2074      ATYPE a;  // narrower than TYPE
   2075      BTYPE b;  // narrower than TYPE
   2076 
   2077    1) Multiply high with scaling
   2078      TYPE res = ((TYPE) a * (TYPE) b) >> c;
   2079      Here, c is bitsize (TYPE) / 2 - 1.
   2080 
   2081    2) ... or also with rounding
   2082      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
   2083      Here, d is bitsize (TYPE) / 2 - 2.
   2084 
   2085    3) Normal multiply high
   2086      TYPE res = ((TYPE) a * (TYPE) b) >> e;
   2087      Here, e is bitsize (TYPE) / 2.
   2088 
   2089    where only the bottom half of res is used.  */
   2090 
   2091 static gimple *
   2092 vect_recog_mulhs_pattern (vec_info *vinfo,
   2093 			  stmt_vec_info last_stmt_info, tree *type_out)
   2094 {
   2095   /* Check for a right shift.  */
   2096   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2097   if (!last_stmt
   2098       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
   2099     return NULL;
   2100 
   2101   /* Check that the shift result is wider than the users of the
   2102      result need (i.e. that narrowing would be a natural choice).  */
   2103   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
   2104   unsigned int target_precision
   2105     = vect_element_precision (last_stmt_info->min_output_precision);
   2106   if (!INTEGRAL_TYPE_P (lhs_type)
   2107       || target_precision >= TYPE_PRECISION (lhs_type))
   2108     return NULL;
   2109 
   2110   /* Look through any change in sign on the outer shift input.  */
   2111   vect_unpromoted_value unprom_rshift_input;
   2112   tree rshift_input = vect_look_through_possible_promotion
   2113     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
   2114   if (!rshift_input
   2115       || TYPE_PRECISION (TREE_TYPE (rshift_input))
   2116 	   != TYPE_PRECISION (lhs_type))
   2117     return NULL;
   2118 
   2119   /* Get the definition of the shift input.  */
   2120   stmt_vec_info rshift_input_stmt_info
   2121     = vect_get_internal_def (vinfo, rshift_input);
   2122   if (!rshift_input_stmt_info)
   2123     return NULL;
   2124   gassign *rshift_input_stmt
   2125     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
   2126   if (!rshift_input_stmt)
   2127     return NULL;
   2128 
   2129   stmt_vec_info mulh_stmt_info;
   2130   tree scale_term;
   2131   bool rounding_p = false;
   2132 
   2133   /* Check for the presence of the rounding term.  */
   2134   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
   2135     {
   2136       /* Check that the outer shift was by 1.  */
   2137       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
   2138 	return NULL;
   2139 
   2140       /* Check that the second operand of the PLUS_EXPR is 1.  */
   2141       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
   2142 	return NULL;
   2143 
   2144       /* Look through any change in sign on the addition input.  */
   2145       vect_unpromoted_value unprom_plus_input;
   2146       tree plus_input = vect_look_through_possible_promotion
   2147 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
   2148       if (!plus_input
   2149 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
   2150 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
   2151 	return NULL;
   2152 
   2153       /* Get the definition of the multiply-high-scale part.  */
   2154       stmt_vec_info plus_input_stmt_info
   2155 	= vect_get_internal_def (vinfo, plus_input);
   2156       if (!plus_input_stmt_info)
   2157 	return NULL;
   2158       gassign *plus_input_stmt
   2159 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
   2160       if (!plus_input_stmt
   2161 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
   2162 	return NULL;
   2163 
   2164       /* Look through any change in sign on the scaling input.  */
   2165       vect_unpromoted_value unprom_scale_input;
   2166       tree scale_input = vect_look_through_possible_promotion
   2167 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
   2168       if (!scale_input
   2169 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
   2170 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
   2171 	return NULL;
   2172 
   2173       /* Get the definition of the multiply-high part.  */
   2174       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
   2175       if (!mulh_stmt_info)
   2176 	return NULL;
   2177 
   2178       /* Get the scaling term.  */
   2179       scale_term = gimple_assign_rhs2 (plus_input_stmt);
   2180       rounding_p = true;
   2181     }
   2182   else
   2183     {
   2184       mulh_stmt_info = rshift_input_stmt_info;
   2185       scale_term = gimple_assign_rhs2 (last_stmt);
   2186     }
   2187 
   2188   /* Check that the scaling factor is constant.  */
   2189   if (TREE_CODE (scale_term) != INTEGER_CST)
   2190     return NULL;
   2191 
   2192   /* Check whether the scaling input term can be seen as two widened
   2193      inputs multiplied together.  */
   2194   vect_unpromoted_value unprom_mult[2];
   2195   tree new_type;
   2196   unsigned int nops
   2197     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
   2198 			    false, 2, unprom_mult, &new_type);
   2199   if (nops != 2)
   2200     return NULL;
   2201 
   2202   /* Adjust output precision.  */
   2203   if (TYPE_PRECISION (new_type) < target_precision)
   2204     new_type = build_nonstandard_integer_type
   2205       (target_precision, TYPE_UNSIGNED (new_type));
   2206 
   2207   unsigned mult_precision = TYPE_PRECISION (new_type);
   2208   internal_fn ifn;
   2209   /* Check that the scaling factor is expected.  Instead of
   2210      target_precision, we should use the one that we actually
   2211      use for internal function.  */
   2212   if (rounding_p)
   2213     {
   2214       /* Check pattern 2).  */
   2215       if (wi::to_widest (scale_term) + mult_precision + 2
   2216 	  != TYPE_PRECISION (lhs_type))
   2217 	return NULL;
   2218 
   2219       ifn = IFN_MULHRS;
   2220     }
   2221   else
   2222     {
   2223       /* Check for pattern 1).  */
   2224       if (wi::to_widest (scale_term) + mult_precision + 1
   2225 	  == TYPE_PRECISION (lhs_type))
   2226 	ifn = IFN_MULHS;
   2227       /* Check for pattern 3).  */
   2228       else if (wi::to_widest (scale_term) + mult_precision
   2229 	       == TYPE_PRECISION (lhs_type))
   2230 	ifn = IFN_MULH;
   2231       else
   2232 	return NULL;
   2233     }
   2234 
   2235   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
   2236 
   2237   /* Check for target support.  */
   2238   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2239   if (!new_vectype
   2240       || !direct_internal_fn_supported_p
   2241 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
   2242     return NULL;
   2243 
   2244   /* The IR requires a valid vector type for the cast result, even though
   2245      it's likely to be discarded.  */
   2246   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
   2247   if (!*type_out)
   2248     return NULL;
   2249 
   2250   /* Generate the IFN_MULHRS call.  */
   2251   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   2252   tree new_ops[2];
   2253   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
   2254 		       unprom_mult, new_vectype);
   2255   gcall *mulhrs_stmt
   2256     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
   2257   gimple_call_set_lhs (mulhrs_stmt, new_var);
   2258   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
   2259 
   2260   if (dump_enabled_p ())
   2261     dump_printf_loc (MSG_NOTE, vect_location,
   2262 		     "created pattern stmt: %G", mulhrs_stmt);
   2263 
   2264   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
   2265 			      mulhrs_stmt, new_vectype);
   2266 }
   2267 
   2268 /* Recognize the patterns:
   2269 
   2270 	    ATYPE a;  // narrower than TYPE
   2271 	    BTYPE b;  // narrower than TYPE
   2272 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
   2273      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
   2274 
   2275    where only the bottom half of avg is used.  Try to transform them into:
   2276 
   2277 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
   2278      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
   2279 
   2280   followed by:
   2281 
   2282 	    TYPE avg = (TYPE) avg';
   2283 
   2284   where NTYPE is no wider than half of TYPE.  Since only the bottom half
   2285   of avg is used, all or part of the cast of avg' should become redundant.
   2286 
   2287   If there is no target support available, generate code to distribute rshift
   2288   over plus and add a carry.  */
   2289 
   2290 static gimple *
   2291 vect_recog_average_pattern (vec_info *vinfo,
   2292 			    stmt_vec_info last_stmt_info, tree *type_out)
   2293 {
   2294   /* Check for a shift right by one bit.  */
   2295   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2296   if (!last_stmt
   2297       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
   2298       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
   2299     return NULL;
   2300 
   2301   /* Check that the shift result is wider than the users of the
   2302      result need (i.e. that narrowing would be a natural choice).  */
   2303   tree lhs = gimple_assign_lhs (last_stmt);
   2304   tree type = TREE_TYPE (lhs);
   2305   unsigned int target_precision
   2306     = vect_element_precision (last_stmt_info->min_output_precision);
   2307   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
   2308     return NULL;
   2309 
   2310   /* Look through any change in sign on the shift input.  */
   2311   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
   2312   vect_unpromoted_value unprom_plus;
   2313   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
   2314 						     &unprom_plus);
   2315   if (!rshift_rhs
   2316       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
   2317     return NULL;
   2318 
   2319   /* Get the definition of the shift input.  */
   2320   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
   2321   if (!plus_stmt_info)
   2322     return NULL;
   2323 
   2324   /* Check whether the shift input can be seen as a tree of additions on
   2325      2 or 3 widened inputs.
   2326 
   2327      Note that the pattern should be a win even if the result of one or
   2328      more additions is reused elsewhere: if the pattern matches, we'd be
   2329      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
   2330   internal_fn ifn = IFN_AVG_FLOOR;
   2331   vect_unpromoted_value unprom[3];
   2332   tree new_type;
   2333   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
   2334 					    WIDEN_PLUS_EXPR, false, 3,
   2335 					    unprom, &new_type);
   2336   if (nops == 0)
   2337     return NULL;
   2338   if (nops == 3)
   2339     {
   2340       /* Check that one operand is 1.  */
   2341       unsigned int i;
   2342       for (i = 0; i < 3; ++i)
   2343 	if (integer_onep (unprom[i].op))
   2344 	  break;
   2345       if (i == 3)
   2346 	return NULL;
   2347       /* Throw away the 1 operand and keep the other two.  */
   2348       if (i < 2)
   2349 	unprom[i] = unprom[2];
   2350       ifn = IFN_AVG_CEIL;
   2351     }
   2352 
   2353   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
   2354 
   2355   /* We know that:
   2356 
   2357      (a) the operation can be viewed as:
   2358 
   2359 	   TYPE widened0 = (TYPE) UNPROM[0];
   2360 	   TYPE widened1 = (TYPE) UNPROM[1];
   2361 	   TYPE tmp1 = widened0 + widened1 {+ 1};
   2362 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
   2363 
   2364      (b) the first two statements are equivalent to:
   2365 
   2366 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
   2367 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
   2368 
   2369      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
   2370 	 where sensible;
   2371 
   2372      (d) all the operations can be performed correctly at twice the width of
   2373 	 NEW_TYPE, due to the nature of the average operation; and
   2374 
   2375      (e) users of the result of the right shift need only TARGET_PRECISION
   2376 	 bits, where TARGET_PRECISION is no more than half of TYPE's
   2377 	 precision.
   2378 
   2379      Under these circumstances, the only situation in which NEW_TYPE
   2380      could be narrower than TARGET_PRECISION is if widened0, widened1
   2381      and an addition result are all used more than once.  Thus we can
   2382      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
   2383      as "free", whereas widening the result of the average instruction
   2384      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
   2385      therefore better not to go narrower than TARGET_PRECISION.  */
   2386   if (TYPE_PRECISION (new_type) < target_precision)
   2387     new_type = build_nonstandard_integer_type (target_precision,
   2388 					       TYPE_UNSIGNED (new_type));
   2389 
   2390   /* Check for target support.  */
   2391   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2392   if (!new_vectype)
   2393     return NULL;
   2394 
   2395   bool fallback_p = false;
   2396 
   2397   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
   2398     ;
   2399   else if (TYPE_UNSIGNED (new_type)
   2400 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
   2401 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
   2402 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
   2403 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
   2404     fallback_p = true;
   2405   else
   2406     return NULL;
   2407 
   2408   /* The IR requires a valid vector type for the cast result, even though
   2409      it's likely to be discarded.  */
   2410   *type_out = get_vectype_for_scalar_type (vinfo, type);
   2411   if (!*type_out)
   2412     return NULL;
   2413 
   2414   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   2415   tree new_ops[2];
   2416   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
   2417 		       unprom, new_vectype);
   2418 
   2419   if (fallback_p)
   2420     {
   2421       /* As a fallback, generate code for following sequence:
   2422 
   2423 	 shifted_op0 = new_ops[0] >> 1;
   2424 	 shifted_op1 = new_ops[1] >> 1;
   2425 	 sum_of_shifted = shifted_op0 + shifted_op1;
   2426 	 unmasked_carry = new_ops[0] and/or new_ops[1];
   2427 	 carry = unmasked_carry & 1;
   2428 	 new_var = sum_of_shifted + carry;
   2429       */
   2430 
   2431       tree one_cst = build_one_cst (new_type);
   2432       gassign *g;
   2433 
   2434       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
   2435       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
   2436       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2437 
   2438       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
   2439       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
   2440       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2441 
   2442       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
   2443       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
   2444 			       shifted_op0, shifted_op1);
   2445       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2446 
   2447       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
   2448       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
   2449       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
   2450       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2451 
   2452       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
   2453       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
   2454       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2455 
   2456       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
   2457       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
   2458     }
   2459 
   2460   /* Generate the IFN_AVG* call.  */
   2461   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
   2462 						    new_ops[1]);
   2463   gimple_call_set_lhs (average_stmt, new_var);
   2464   gimple_set_location (average_stmt, gimple_location (last_stmt));
   2465 
   2466   if (dump_enabled_p ())
   2467     dump_printf_loc (MSG_NOTE, vect_location,
   2468 		     "created pattern stmt: %G", average_stmt);
   2469 
   2470   return vect_convert_output (vinfo, last_stmt_info,
   2471 			      type, average_stmt, new_vectype);
   2472 }
   2473 
   2474 /* Recognize cases in which the input to a cast is wider than its
   2475    output, and the input is fed by a widening operation.  Fold this
   2476    by removing the unnecessary intermediate widening.  E.g.:
   2477 
   2478      unsigned char a;
   2479      unsigned int b = (unsigned int) a;
   2480      unsigned short c = (unsigned short) b;
   2481 
   2482    -->
   2483 
   2484      unsigned short c = (unsigned short) a;
   2485 
   2486    Although this is rare in input IR, it is an expected side-effect
   2487    of the over-widening pattern above.
   2488 
   2489    This is beneficial also for integer-to-float conversions, if the
   2490    widened integer has more bits than the float, and if the unwidened
   2491    input doesn't.  */
   2492 
   2493 static gimple *
   2494 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
   2495 				  stmt_vec_info last_stmt_info, tree *type_out)
   2496 {
   2497   /* Check for a cast, including an integer-to-float conversion.  */
   2498   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2499   if (!last_stmt)
   2500     return NULL;
   2501   tree_code code = gimple_assign_rhs_code (last_stmt);
   2502   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
   2503     return NULL;
   2504 
   2505   /* Make sure that the rhs is a scalar with a natural bitsize.  */
   2506   tree lhs = gimple_assign_lhs (last_stmt);
   2507   if (!lhs)
   2508     return NULL;
   2509   tree lhs_type = TREE_TYPE (lhs);
   2510   scalar_mode lhs_mode;
   2511   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
   2512       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
   2513     return NULL;
   2514 
   2515   /* Check for a narrowing operation (from a vector point of view).  */
   2516   tree rhs = gimple_assign_rhs1 (last_stmt);
   2517   tree rhs_type = TREE_TYPE (rhs);
   2518   if (!INTEGRAL_TYPE_P (rhs_type)
   2519       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
   2520       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
   2521     return NULL;
   2522 
   2523   /* Try to find an unpromoted input.  */
   2524   vect_unpromoted_value unprom;
   2525   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
   2526       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
   2527     return NULL;
   2528 
   2529   /* If the bits above RHS_TYPE matter, make sure that they're the
   2530      same when extending from UNPROM as they are when extending from RHS.  */
   2531   if (!INTEGRAL_TYPE_P (lhs_type)
   2532       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
   2533     return NULL;
   2534 
   2535   /* We can get the same result by casting UNPROM directly, to avoid
   2536      the unnecessary widening and narrowing.  */
   2537   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
   2538 
   2539   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
   2540   if (!*type_out)
   2541     return NULL;
   2542 
   2543   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
   2544   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
   2545   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   2546 
   2547   return pattern_stmt;
   2548 }
   2549 
   2550 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
   2551    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
   2552 
   2553 static gimple *
   2554 vect_recog_widen_shift_pattern (vec_info *vinfo,
   2555 				stmt_vec_info last_stmt_info, tree *type_out)
   2556 {
   2557   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   2558 				      LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
   2559 				      "vect_recog_widen_shift_pattern");
   2560 }
   2561 
   2562 /* Detect a rotate pattern wouldn't be otherwise vectorized:
   2563 
   2564    type a_t, b_t, c_t;
   2565 
   2566    S0 a_t = b_t r<< c_t;
   2567 
   2568   Input/Output:
   2569 
   2570   * STMT_VINFO: The stmt from which the pattern search begins,
   2571     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
   2572     with a sequence:
   2573 
   2574    S1 d_t = -c_t;
   2575    S2 e_t = d_t & (B - 1);
   2576    S3 f_t = b_t << c_t;
   2577    S4 g_t = b_t >> e_t;
   2578    S0 a_t = f_t | g_t;
   2579 
   2580     where B is element bitsize of type.
   2581 
   2582   Output:
   2583 
   2584   * TYPE_OUT: The type of the output of this pattern.
   2585 
   2586   * Return value: A new stmt that will be used to replace the rotate
   2587     S0 stmt.  */
   2588 
   2589 static gimple *
   2590 vect_recog_rotate_pattern (vec_info *vinfo,
   2591 			   stmt_vec_info stmt_vinfo, tree *type_out)
   2592 {
   2593   gimple *last_stmt = stmt_vinfo->stmt;
   2594   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
   2595   gimple *pattern_stmt, *def_stmt;
   2596   enum tree_code rhs_code;
   2597   enum vect_def_type dt;
   2598   optab optab1, optab2;
   2599   edge ext_def = NULL;
   2600   bool bswap16_p = false;
   2601 
   2602   if (is_gimple_assign (last_stmt))
   2603     {
   2604       rhs_code = gimple_assign_rhs_code (last_stmt);
   2605       switch (rhs_code)
   2606 	{
   2607 	case LROTATE_EXPR:
   2608 	case RROTATE_EXPR:
   2609 	  break;
   2610 	default:
   2611 	  return NULL;
   2612 	}
   2613 
   2614       lhs = gimple_assign_lhs (last_stmt);
   2615       oprnd0 = gimple_assign_rhs1 (last_stmt);
   2616       type = TREE_TYPE (oprnd0);
   2617       oprnd1 = gimple_assign_rhs2 (last_stmt);
   2618     }
   2619   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
   2620     {
   2621       /* __builtin_bswap16 (x) is another form of x r>> 8.
   2622 	 The vectorizer has bswap support, but only if the argument isn't
   2623 	 promoted.  */
   2624       lhs = gimple_call_lhs (last_stmt);
   2625       oprnd0 = gimple_call_arg (last_stmt, 0);
   2626       type = TREE_TYPE (oprnd0);
   2627       if (!lhs
   2628 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
   2629 	  || TYPE_PRECISION (type) <= 16
   2630 	  || TREE_CODE (oprnd0) != SSA_NAME
   2631 	  || BITS_PER_UNIT != 8
   2632 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
   2633 	return NULL;
   2634 
   2635       stmt_vec_info def_stmt_info;
   2636       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
   2637 	return NULL;
   2638 
   2639       if (dt != vect_internal_def)
   2640 	return NULL;
   2641 
   2642       if (gimple_assign_cast_p (def_stmt))
   2643 	{
   2644 	  def = gimple_assign_rhs1 (def_stmt);
   2645 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
   2646 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
   2647 	    oprnd0 = def;
   2648 	}
   2649 
   2650       type = TREE_TYPE (lhs);
   2651       vectype = get_vectype_for_scalar_type (vinfo, type);
   2652       if (vectype == NULL_TREE)
   2653 	return NULL;
   2654 
   2655       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
   2656 	{
   2657 	  /* The encoding uses one stepped pattern for each byte in the
   2658 	     16-bit word.  */
   2659 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
   2660 	  for (unsigned i = 0; i < 3; ++i)
   2661 	    for (unsigned j = 0; j < 2; ++j)
   2662 	      elts.quick_push ((i + 1) * 2 - j - 1);
   2663 
   2664 	  vec_perm_indices indices (elts, 1,
   2665 				    TYPE_VECTOR_SUBPARTS (char_vectype));
   2666 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
   2667 	    {
   2668 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
   2669 		 undo the argument promotion.  */
   2670 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2671 		{
   2672 		  def = vect_recog_temp_ssa_var (type, NULL);
   2673 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2674 		  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2675 		  oprnd0 = def;
   2676 		}
   2677 
   2678 	      /* Pattern detected.  */
   2679 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2680 
   2681 	      *type_out = vectype;
   2682 
   2683 	      /* Pattern supported.  Create a stmt to be used to replace the
   2684 		 pattern, with the unpromoted argument.  */
   2685 	      var = vect_recog_temp_ssa_var (type, NULL);
   2686 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
   2687 						1, oprnd0);
   2688 	      gimple_call_set_lhs (pattern_stmt, var);
   2689 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
   2690 				      gimple_call_fntype (last_stmt));
   2691 	      return pattern_stmt;
   2692 	    }
   2693 	}
   2694 
   2695       oprnd1 = build_int_cst (integer_type_node, 8);
   2696       rhs_code = LROTATE_EXPR;
   2697       bswap16_p = true;
   2698     }
   2699   else
   2700     return NULL;
   2701 
   2702   if (TREE_CODE (oprnd0) != SSA_NAME
   2703       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
   2704       || !INTEGRAL_TYPE_P (type)
   2705       || !TYPE_UNSIGNED (type))
   2706     return NULL;
   2707 
   2708   stmt_vec_info def_stmt_info;
   2709   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
   2710     return NULL;
   2711 
   2712   if (dt != vect_internal_def
   2713       && dt != vect_constant_def
   2714       && dt != vect_external_def)
   2715     return NULL;
   2716 
   2717   vectype = get_vectype_for_scalar_type (vinfo, type);
   2718   if (vectype == NULL_TREE)
   2719     return NULL;
   2720 
   2721   /* If vector/vector or vector/scalar rotate is supported by the target,
   2722      don't do anything here.  */
   2723   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
   2724   if (optab1
   2725       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
   2726     {
   2727      use_rotate:
   2728       if (bswap16_p)
   2729 	{
   2730 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2731 	    {
   2732 	      def = vect_recog_temp_ssa_var (type, NULL);
   2733 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2734 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2735 	      oprnd0 = def;
   2736 	    }
   2737 
   2738 	  /* Pattern detected.  */
   2739 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2740 
   2741 	  *type_out = vectype;
   2742 
   2743 	  /* Pattern supported.  Create a stmt to be used to replace the
   2744 	     pattern.  */
   2745 	  var = vect_recog_temp_ssa_var (type, NULL);
   2746 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
   2747 					      oprnd1);
   2748 	  return pattern_stmt;
   2749 	}
   2750       return NULL;
   2751     }
   2752 
   2753   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
   2754     {
   2755       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
   2756       if (optab2
   2757 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
   2758 	goto use_rotate;
   2759     }
   2760 
   2761   /* If vector/vector or vector/scalar shifts aren't supported by the target,
   2762      don't do anything here either.  */
   2763   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
   2764   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
   2765   if (!optab1
   2766       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
   2767       || !optab2
   2768       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
   2769     {
   2770       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
   2771 	return NULL;
   2772       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
   2773       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
   2774       if (!optab1
   2775 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
   2776 	  || !optab2
   2777 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
   2778 	return NULL;
   2779     }
   2780 
   2781   *type_out = vectype;
   2782 
   2783   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2784     {
   2785       def = vect_recog_temp_ssa_var (type, NULL);
   2786       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2787       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2788       oprnd0 = def;
   2789     }
   2790 
   2791   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
   2792     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
   2793 
   2794   def = NULL_TREE;
   2795   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
   2796   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
   2797     def = oprnd1;
   2798   else if (def_stmt && gimple_assign_cast_p (def_stmt))
   2799     {
   2800       tree rhs1 = gimple_assign_rhs1 (def_stmt);
   2801       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
   2802 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
   2803 	     == TYPE_PRECISION (type))
   2804 	def = rhs1;
   2805     }
   2806 
   2807   if (def == NULL_TREE)
   2808     {
   2809       def = vect_recog_temp_ssa_var (type, NULL);
   2810       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
   2811       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2812     }
   2813   stype = TREE_TYPE (def);
   2814 
   2815   if (TREE_CODE (def) == INTEGER_CST)
   2816     {
   2817       if (!tree_fits_uhwi_p (def)
   2818 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
   2819 	  || integer_zerop (def))
   2820 	return NULL;
   2821       def2 = build_int_cst (stype,
   2822 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
   2823     }
   2824   else
   2825     {
   2826       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
   2827 
   2828       if (vecstype == NULL_TREE)
   2829 	return NULL;
   2830       def2 = vect_recog_temp_ssa_var (stype, NULL);
   2831       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
   2832       if (ext_def)
   2833 	{
   2834 	  basic_block new_bb
   2835 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
   2836 	  gcc_assert (!new_bb);
   2837 	}
   2838       else
   2839 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2840 
   2841       def2 = vect_recog_temp_ssa_var (stype, NULL);
   2842       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
   2843       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
   2844 				      gimple_assign_lhs (def_stmt), mask);
   2845       if (ext_def)
   2846 	{
   2847 	  basic_block new_bb
   2848 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
   2849 	  gcc_assert (!new_bb);
   2850 	}
   2851       else
   2852 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2853     }
   2854 
   2855   var1 = vect_recog_temp_ssa_var (type, NULL);
   2856   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
   2857 					? LSHIFT_EXPR : RSHIFT_EXPR,
   2858 				  oprnd0, def);
   2859   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2860 
   2861   var2 = vect_recog_temp_ssa_var (type, NULL);
   2862   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
   2863 					? RSHIFT_EXPR : LSHIFT_EXPR,
   2864 				  oprnd0, def2);
   2865   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2866 
   2867   /* Pattern detected.  */
   2868   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2869 
   2870   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
   2871   var = vect_recog_temp_ssa_var (type, NULL);
   2872   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
   2873 
   2874   return pattern_stmt;
   2875 }
   2876 
   2877 /* Detect a vector by vector shift pattern that wouldn't be otherwise
   2878    vectorized:
   2879 
   2880    type a_t;
   2881    TYPE b_T, res_T;
   2882 
   2883    S1 a_t = ;
   2884    S2 b_T = ;
   2885    S3 res_T = b_T op a_t;
   2886 
   2887   where type 'TYPE' is a type with different size than 'type',
   2888   and op is <<, >> or rotate.
   2889 
   2890   Also detect cases:
   2891 
   2892    type a_t;
   2893    TYPE b_T, c_T, res_T;
   2894 
   2895    S0 c_T = ;
   2896    S1 a_t = (type) c_T;
   2897    S2 b_T = ;
   2898    S3 res_T = b_T op a_t;
   2899 
   2900   Input/Output:
   2901 
   2902   * STMT_VINFO: The stmt from which the pattern search begins,
   2903     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
   2904     with a shift/rotate which has same type on both operands, in the
   2905     second case just b_T op c_T, in the first case with added cast
   2906     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
   2907 
   2908   Output:
   2909 
   2910   * TYPE_OUT: The type of the output of this pattern.
   2911 
   2912   * Return value: A new stmt that will be used to replace the shift/rotate
   2913     S3 stmt.  */
   2914 
   2915 static gimple *
   2916 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
   2917 					stmt_vec_info stmt_vinfo,
   2918 					tree *type_out)
   2919 {
   2920   gimple *last_stmt = stmt_vinfo->stmt;
   2921   tree oprnd0, oprnd1, lhs, var;
   2922   gimple *pattern_stmt;
   2923   enum tree_code rhs_code;
   2924 
   2925   if (!is_gimple_assign (last_stmt))
   2926     return NULL;
   2927 
   2928   rhs_code = gimple_assign_rhs_code (last_stmt);
   2929   switch (rhs_code)
   2930     {
   2931     case LSHIFT_EXPR:
   2932     case RSHIFT_EXPR:
   2933     case LROTATE_EXPR:
   2934     case RROTATE_EXPR:
   2935       break;
   2936     default:
   2937       return NULL;
   2938     }
   2939 
   2940   lhs = gimple_assign_lhs (last_stmt);
   2941   oprnd0 = gimple_assign_rhs1 (last_stmt);
   2942   oprnd1 = gimple_assign_rhs2 (last_stmt);
   2943   if (TREE_CODE (oprnd0) != SSA_NAME
   2944       || TREE_CODE (oprnd1) != SSA_NAME
   2945       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
   2946       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
   2947       || TYPE_PRECISION (TREE_TYPE (lhs))
   2948 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
   2949     return NULL;
   2950 
   2951   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
   2952   if (!def_vinfo)
   2953     return NULL;
   2954 
   2955   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
   2956   if (*type_out == NULL_TREE)
   2957     return NULL;
   2958 
   2959   tree def = NULL_TREE;
   2960   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
   2961   if (def_stmt && gimple_assign_cast_p (def_stmt))
   2962     {
   2963       tree rhs1 = gimple_assign_rhs1 (def_stmt);
   2964       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
   2965 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
   2966 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
   2967 	{
   2968 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
   2969 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
   2970 	    def = rhs1;
   2971 	  else
   2972 	    {
   2973 	      tree mask
   2974 		= build_low_bits_mask (TREE_TYPE (rhs1),
   2975 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
   2976 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
   2977 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
   2978 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
   2979 							   TREE_TYPE (rhs1));
   2980 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2981 	    }
   2982 	}
   2983     }
   2984 
   2985   if (def == NULL_TREE)
   2986     {
   2987       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
   2988       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
   2989       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2990     }
   2991 
   2992   /* Pattern detected.  */
   2993   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
   2994 
   2995   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
   2996   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
   2997   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
   2998 
   2999   return pattern_stmt;
   3000 }
   3001 
   3002 /* Return true iff the target has a vector optab implementing the operation
   3003    CODE on type VECTYPE.  */
   3004 
   3005 static bool
   3006 target_has_vecop_for_code (tree_code code, tree vectype)
   3007 {
   3008   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
   3009   return voptab
   3010 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
   3011 }
   3012 
   3013 /* Verify that the target has optabs of VECTYPE to perform all the steps
   3014    needed by the multiplication-by-immediate synthesis algorithm described by
   3015    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
   3016    present.  Return true iff the target supports all the steps.  */
   3017 
   3018 static bool
   3019 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
   3020 				 tree vectype, bool synth_shift_p)
   3021 {
   3022   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
   3023     return false;
   3024 
   3025   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
   3026   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
   3027 
   3028   if (var == negate_variant
   3029       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
   3030     return false;
   3031 
   3032   /* If we must synthesize shifts with additions make sure that vector
   3033      addition is available.  */
   3034   if ((var == add_variant || synth_shift_p) && !supports_vplus)
   3035     return false;
   3036 
   3037   for (int i = 1; i < alg->ops; i++)
   3038     {
   3039       switch (alg->op[i])
   3040 	{
   3041 	case alg_shift:
   3042 	  break;
   3043 	case alg_add_t_m2:
   3044 	case alg_add_t2_m:
   3045 	case alg_add_factor:
   3046 	  if (!supports_vplus)
   3047 	    return false;
   3048 	  break;
   3049 	case alg_sub_t_m2:
   3050 	case alg_sub_t2_m:
   3051 	case alg_sub_factor:
   3052 	  if (!supports_vminus)
   3053 	    return false;
   3054 	  break;
   3055 	case alg_unknown:
   3056 	case alg_m:
   3057 	case alg_zero:
   3058 	case alg_impossible:
   3059 	  return false;
   3060 	default:
   3061 	  gcc_unreachable ();
   3062 	}
   3063     }
   3064 
   3065   return true;
   3066 }
   3067 
   3068 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
   3069    putting the final result in DEST.  Append all statements but the last into
   3070    VINFO.  Return the last statement.  */
   3071 
   3072 static gimple *
   3073 synth_lshift_by_additions (vec_info *vinfo,
   3074 			   tree dest, tree op, HOST_WIDE_INT amnt,
   3075 			   stmt_vec_info stmt_info)
   3076 {
   3077   HOST_WIDE_INT i;
   3078   tree itype = TREE_TYPE (op);
   3079   tree prev_res = op;
   3080   gcc_assert (amnt >= 0);
   3081   for (i = 0; i < amnt; i++)
   3082     {
   3083       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
   3084 		      : dest;
   3085       gimple *stmt
   3086         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
   3087       prev_res = tmp_var;
   3088       if (i < amnt - 1)
   3089 	append_pattern_def_seq (vinfo, stmt_info, stmt);
   3090       else
   3091 	return stmt;
   3092     }
   3093   gcc_unreachable ();
   3094   return NULL;
   3095 }
   3096 
   3097 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
   3098    CODE to operands OP1 and OP2, creating a new temporary SSA var in
   3099    the process if necessary.  Append the resulting assignment statements
   3100    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
   3101    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
   3102    left shifts using additions.  */
   3103 
   3104 static tree
   3105 apply_binop_and_append_stmt (vec_info *vinfo,
   3106 			     tree_code code, tree op1, tree op2,
   3107 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
   3108 {
   3109   if (integer_zerop (op2)
   3110       && (code == LSHIFT_EXPR
   3111 	  || code == PLUS_EXPR))
   3112     {
   3113       gcc_assert (TREE_CODE (op1) == SSA_NAME);
   3114       return op1;
   3115     }
   3116 
   3117   gimple *stmt;
   3118   tree itype = TREE_TYPE (op1);
   3119   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
   3120 
   3121   if (code == LSHIFT_EXPR
   3122       && synth_shift_p)
   3123     {
   3124       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
   3125 					TREE_INT_CST_LOW (op2), stmt_vinfo);
   3126       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3127       return tmp_var;
   3128     }
   3129 
   3130   stmt = gimple_build_assign (tmp_var, code, op1, op2);
   3131   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3132   return tmp_var;
   3133 }
   3134 
   3135 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
   3136    and simple arithmetic operations to be vectorized.  Record the statements
   3137    produced in STMT_VINFO and return the last statement in the sequence or
   3138    NULL if it's not possible to synthesize such a multiplication.
   3139    This function mirrors the behavior of expand_mult_const in expmed.cc but
   3140    works on tree-ssa form.  */
   3141 
   3142 static gimple *
   3143 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
   3144 			     stmt_vec_info stmt_vinfo)
   3145 {
   3146   tree itype = TREE_TYPE (op);
   3147   machine_mode mode = TYPE_MODE (itype);
   3148   struct algorithm alg;
   3149   mult_variant variant;
   3150   if (!tree_fits_shwi_p (val))
   3151     return NULL;
   3152 
   3153   /* Multiplication synthesis by shifts, adds and subs can introduce
   3154      signed overflow where the original operation didn't.  Perform the
   3155      operations on an unsigned type and cast back to avoid this.
   3156      In the future we may want to relax this for synthesis algorithms
   3157      that we can prove do not cause unexpected overflow.  */
   3158   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
   3159 
   3160   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
   3161   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
   3162   if (!vectype)
   3163     return NULL;
   3164 
   3165   /* Targets that don't support vector shifts but support vector additions
   3166      can synthesize shifts that way.  */
   3167   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
   3168 
   3169   HOST_WIDE_INT hwval = tree_to_shwi (val);
   3170   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
   3171      The vectorizer's benefit analysis will decide whether it's beneficial
   3172      to do this.  */
   3173   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
   3174 				       ? TYPE_MODE (vectype) : mode,
   3175 				       hwval, &alg, &variant, MAX_COST);
   3176   if (!possible)
   3177     return NULL;
   3178 
   3179   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
   3180     return NULL;
   3181 
   3182   tree accumulator;
   3183 
   3184   /* Clear out the sequence of statements so we can populate it below.  */
   3185   gimple *stmt = NULL;
   3186 
   3187   if (cast_to_unsigned_p)
   3188     {
   3189       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
   3190       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
   3191       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3192       op = tmp_op;
   3193     }
   3194 
   3195   if (alg.op[0] == alg_zero)
   3196     accumulator = build_int_cst (multtype, 0);
   3197   else
   3198     accumulator = op;
   3199 
   3200   bool needs_fixup = (variant == negate_variant)
   3201 		      || (variant == add_variant);
   3202 
   3203   for (int i = 1; i < alg.ops; i++)
   3204     {
   3205       tree shft_log = build_int_cst (multtype, alg.log[i]);
   3206       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3207       tree tmp_var = NULL_TREE;
   3208 
   3209       switch (alg.op[i])
   3210 	{
   3211 	case alg_shift:
   3212 	  if (synth_shift_p)
   3213 	    stmt
   3214 	      = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
   3215 					   alg.log[i], stmt_vinfo);
   3216 	  else
   3217 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
   3218 					 shft_log);
   3219 	  break;
   3220 	case alg_add_t_m2:
   3221 	  tmp_var
   3222 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
   3223 					   stmt_vinfo, synth_shift_p);
   3224 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
   3225 				       tmp_var);
   3226 	  break;
   3227 	case alg_sub_t_m2:
   3228 	  tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
   3229 						 shft_log, stmt_vinfo,
   3230 						 synth_shift_p);
   3231 	  /* In some algorithms the first step involves zeroing the
   3232 	     accumulator.  If subtracting from such an accumulator
   3233 	     just emit the negation directly.  */
   3234 	  if (integer_zerop (accumulator))
   3235 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
   3236 	  else
   3237 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
   3238 					tmp_var);
   3239 	  break;
   3240 	case alg_add_t2_m:
   3241 	  tmp_var
   3242 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3243 					   shft_log, stmt_vinfo, synth_shift_p);
   3244 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
   3245 	  break;
   3246 	case alg_sub_t2_m:
   3247 	  tmp_var
   3248 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3249 					   shft_log, stmt_vinfo, synth_shift_p);
   3250 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
   3251 	  break;
   3252 	case alg_add_factor:
   3253 	  tmp_var
   3254 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3255 					   shft_log, stmt_vinfo, synth_shift_p);
   3256 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
   3257 				       tmp_var);
   3258 	  break;
   3259 	case alg_sub_factor:
   3260 	  tmp_var
   3261 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3262 					   shft_log, stmt_vinfo, synth_shift_p);
   3263 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
   3264 				      accumulator);
   3265 	  break;
   3266 	default:
   3267 	  gcc_unreachable ();
   3268 	}
   3269       /* We don't want to append the last stmt in the sequence to stmt_vinfo
   3270 	 but rather return it directly.  */
   3271 
   3272       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
   3273 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3274       accumulator = accum_tmp;
   3275     }
   3276   if (variant == negate_variant)
   3277     {
   3278       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3279       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
   3280       accumulator = accum_tmp;
   3281       if (cast_to_unsigned_p)
   3282 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3283     }
   3284   else if (variant == add_variant)
   3285     {
   3286       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3287       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
   3288       accumulator = accum_tmp;
   3289       if (cast_to_unsigned_p)
   3290 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3291     }
   3292   /* Move back to a signed if needed.  */
   3293   if (cast_to_unsigned_p)
   3294     {
   3295       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
   3296       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
   3297     }
   3298 
   3299   return stmt;
   3300 }
   3301 
   3302 /* Detect multiplication by constant and convert it into a sequence of
   3303    shifts and additions, subtractions, negations.  We reuse the
   3304    choose_mult_variant algorithms from expmed.cc
   3305 
   3306    Input/Output:
   3307 
   3308    STMT_VINFO: The stmt from which the pattern search begins,
   3309    i.e. the mult stmt.
   3310 
   3311  Output:
   3312 
   3313   * TYPE_OUT: The type of the output of this pattern.
   3314 
   3315   * Return value: A new stmt that will be used to replace
   3316     the multiplication.  */
   3317 
   3318 static gimple *
   3319 vect_recog_mult_pattern (vec_info *vinfo,
   3320 			 stmt_vec_info stmt_vinfo, tree *type_out)
   3321 {
   3322   gimple *last_stmt = stmt_vinfo->stmt;
   3323   tree oprnd0, oprnd1, vectype, itype;
   3324   gimple *pattern_stmt;
   3325 
   3326   if (!is_gimple_assign (last_stmt))
   3327     return NULL;
   3328 
   3329   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
   3330     return NULL;
   3331 
   3332   oprnd0 = gimple_assign_rhs1 (last_stmt);
   3333   oprnd1 = gimple_assign_rhs2 (last_stmt);
   3334   itype = TREE_TYPE (oprnd0);
   3335 
   3336   if (TREE_CODE (oprnd0) != SSA_NAME
   3337       || TREE_CODE (oprnd1) != INTEGER_CST
   3338       || !INTEGRAL_TYPE_P (itype)
   3339       || !type_has_mode_precision_p (itype))
   3340     return NULL;
   3341 
   3342   vectype = get_vectype_for_scalar_type (vinfo, itype);
   3343   if (vectype == NULL_TREE)
   3344     return NULL;
   3345 
   3346   /* If the target can handle vectorized multiplication natively,
   3347      don't attempt to optimize this.  */
   3348   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
   3349   if (mul_optab != unknown_optab)
   3350     {
   3351       machine_mode vec_mode = TYPE_MODE (vectype);
   3352       int icode = (int) optab_handler (mul_optab, vec_mode);
   3353       if (icode != CODE_FOR_nothing)
   3354        return NULL;
   3355     }
   3356 
   3357   pattern_stmt = vect_synth_mult_by_constant (vinfo,
   3358 					      oprnd0, oprnd1, stmt_vinfo);
   3359   if (!pattern_stmt)
   3360     return NULL;
   3361 
   3362   /* Pattern detected.  */
   3363   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
   3364 
   3365   *type_out = vectype;
   3366 
   3367   return pattern_stmt;
   3368 }
   3369 
   3370 /* Detect a signed division by a constant that wouldn't be
   3371    otherwise vectorized:
   3372 
   3373    type a_t, b_t;
   3374 
   3375    S1 a_t = b_t / N;
   3376 
   3377   where type 'type' is an integral type and N is a constant.
   3378 
   3379   Similarly handle modulo by a constant:
   3380 
   3381    S4 a_t = b_t % N;
   3382 
   3383   Input/Output:
   3384 
   3385   * STMT_VINFO: The stmt from which the pattern search begins,
   3386     i.e. the division stmt.  S1 is replaced by if N is a power
   3387     of two constant and type is signed:
   3388   S3  y_t = b_t < 0 ? N - 1 : 0;
   3389   S2  x_t = b_t + y_t;
   3390   S1' a_t = x_t >> log2 (N);
   3391 
   3392     S4 is replaced if N is a power of two constant and
   3393     type is signed by (where *_T temporaries have unsigned type):
   3394   S9  y_T = b_t < 0 ? -1U : 0U;
   3395   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
   3396   S7  z_t = (type) z_T;
   3397   S6  w_t = b_t + z_t;
   3398   S5  x_t = w_t & (N - 1);
   3399   S4' a_t = x_t - z_t;
   3400 
   3401   Output:
   3402 
   3403   * TYPE_OUT: The type of the output of this pattern.
   3404 
   3405   * Return value: A new stmt that will be used to replace the division
   3406     S1 or modulo S4 stmt.  */
   3407 
   3408 static gimple *
   3409 vect_recog_divmod_pattern (vec_info *vinfo,
   3410 			   stmt_vec_info stmt_vinfo, tree *type_out)
   3411 {
   3412   gimple *last_stmt = stmt_vinfo->stmt;
   3413   tree oprnd0, oprnd1, vectype, itype, cond;
   3414   gimple *pattern_stmt, *def_stmt;
   3415   enum tree_code rhs_code;
   3416   optab optab;
   3417   tree q;
   3418   int dummy_int, prec;
   3419 
   3420   if (!is_gimple_assign (last_stmt))
   3421     return NULL;
   3422 
   3423   rhs_code = gimple_assign_rhs_code (last_stmt);
   3424   switch (rhs_code)
   3425     {
   3426     case TRUNC_DIV_EXPR:
   3427     case EXACT_DIV_EXPR:
   3428     case TRUNC_MOD_EXPR:
   3429       break;
   3430     default:
   3431       return NULL;
   3432     }
   3433 
   3434   oprnd0 = gimple_assign_rhs1 (last_stmt);
   3435   oprnd1 = gimple_assign_rhs2 (last_stmt);
   3436   itype = TREE_TYPE (oprnd0);
   3437   if (TREE_CODE (oprnd0) != SSA_NAME
   3438       || TREE_CODE (oprnd1) != INTEGER_CST
   3439       || TREE_CODE (itype) != INTEGER_TYPE
   3440       || !type_has_mode_precision_p (itype))
   3441     return NULL;
   3442 
   3443   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
   3444   vectype = get_vectype_for_scalar_type (vinfo, itype);
   3445   if (vectype == NULL_TREE)
   3446     return NULL;
   3447 
   3448   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
   3449     {
   3450       /* If the target can handle vectorized division or modulo natively,
   3451 	 don't attempt to optimize this, since native division is likely
   3452 	 to give smaller code.  */
   3453       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
   3454       if (optab != unknown_optab)
   3455 	{
   3456 	  machine_mode vec_mode = TYPE_MODE (vectype);
   3457 	  int icode = (int) optab_handler (optab, vec_mode);
   3458 	  if (icode != CODE_FOR_nothing)
   3459 	    return NULL;
   3460 	}
   3461     }
   3462 
   3463   prec = TYPE_PRECISION (itype);
   3464   if (integer_pow2p (oprnd1))
   3465     {
   3466       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
   3467 	return NULL;
   3468 
   3469       /* Pattern detected.  */
   3470       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
   3471 
   3472       *type_out = vectype;
   3473 
   3474       /* Check if the target supports this internal function.  */
   3475       internal_fn ifn = IFN_DIV_POW2;
   3476       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
   3477 	{
   3478 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
   3479 
   3480 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
   3481 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
   3482 	  gimple_call_set_lhs (div_stmt, var_div);
   3483 
   3484 	  if (rhs_code == TRUNC_MOD_EXPR)
   3485 	    {
   3486 	      append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
   3487 	      def_stmt
   3488 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3489 				       LSHIFT_EXPR, var_div, shift);
   3490 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3491 	      pattern_stmt
   3492 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3493 				       MINUS_EXPR, oprnd0,
   3494 				       gimple_assign_lhs (def_stmt));
   3495 	    }
   3496 	  else
   3497 	    pattern_stmt = div_stmt;
   3498 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   3499 
   3500 	  return pattern_stmt;
   3501 	}
   3502 
   3503       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
   3504 		     build_int_cst (itype, 0));
   3505       if (rhs_code == TRUNC_DIV_EXPR
   3506 	  || rhs_code == EXACT_DIV_EXPR)
   3507 	{
   3508 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
   3509 	  tree shift;
   3510 	  def_stmt
   3511 	    = gimple_build_assign (var, COND_EXPR, cond,
   3512 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
   3513 						build_int_cst (itype, 1)),
   3514 				   build_int_cst (itype, 0));
   3515 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3516 	  var = vect_recog_temp_ssa_var (itype, NULL);
   3517 	  def_stmt
   3518 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
   3519 				   gimple_assign_lhs (def_stmt));
   3520 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3521 
   3522 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
   3523 	  pattern_stmt
   3524 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3525 				   RSHIFT_EXPR, var, shift);
   3526 	}
   3527       else
   3528 	{
   3529 	  tree signmask;
   3530 	  if (compare_tree_int (oprnd1, 2) == 0)
   3531 	    {
   3532 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
   3533 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
   3534 					      build_int_cst (itype, 1),
   3535 					      build_int_cst (itype, 0));
   3536 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3537 	    }
   3538 	  else
   3539 	    {
   3540 	      tree utype
   3541 		= build_nonstandard_integer_type (prec, 1);
   3542 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
   3543 	      tree shift
   3544 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
   3545 					- tree_log2 (oprnd1));
   3546 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
   3547 
   3548 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
   3549 					      build_int_cst (utype, -1),
   3550 					      build_int_cst (utype, 0));
   3551 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
   3552 	      var = vect_recog_temp_ssa_var (utype, NULL);
   3553 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
   3554 					      gimple_assign_lhs (def_stmt),
   3555 					      shift);
   3556 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
   3557 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
   3558 	      def_stmt
   3559 		= gimple_build_assign (signmask, NOP_EXPR, var);
   3560 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3561 	    }
   3562 	  def_stmt
   3563 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3564 				   PLUS_EXPR, oprnd0, signmask);
   3565 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3566 	  def_stmt
   3567 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3568 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
   3569 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
   3570 						build_int_cst (itype, 1)));
   3571 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3572 
   3573 	  pattern_stmt
   3574 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3575 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
   3576 				   signmask);
   3577 	}
   3578 
   3579       return pattern_stmt;
   3580     }
   3581 
   3582   if (prec > HOST_BITS_PER_WIDE_INT
   3583       || integer_zerop (oprnd1))
   3584     return NULL;
   3585 
   3586   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
   3587     return NULL;
   3588 
   3589   if (TYPE_UNSIGNED (itype))
   3590     {
   3591       unsigned HOST_WIDE_INT mh, ml;
   3592       int pre_shift, post_shift;
   3593       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
   3594 				  & GET_MODE_MASK (itype_mode));
   3595       tree t1, t2, t3, t4;
   3596 
   3597       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
   3598 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
   3599 	return NULL;
   3600 
   3601       /* Find a suitable multiplier and right shift count
   3602 	 instead of multiplying with D.  */
   3603       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
   3604 
   3605       /* If the suggested multiplier is more than SIZE bits, we can do better
   3606 	 for even divisors, using an initial right shift.  */
   3607       if (mh != 0 && (d & 1) == 0)
   3608 	{
   3609 	  pre_shift = ctz_or_zero (d);
   3610 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
   3611 				  &ml, &post_shift, &dummy_int);
   3612 	  gcc_assert (!mh);
   3613 	}
   3614       else
   3615 	pre_shift = 0;
   3616 
   3617       if (mh != 0)
   3618 	{
   3619 	  if (post_shift - 1 >= prec)
   3620 	    return NULL;
   3621 
   3622 	  /* t1 = oprnd0 h* ml;
   3623 	     t2 = oprnd0 - t1;
   3624 	     t3 = t2 >> 1;
   3625 	     t4 = t1 + t3;
   3626 	     q = t4 >> (post_shift - 1);  */
   3627 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
   3628 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
   3629 					  build_int_cst (itype, ml));
   3630 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3631 
   3632 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3633 	  def_stmt
   3634 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
   3635 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3636 
   3637 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
   3638 	  def_stmt
   3639 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
   3640 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3641 
   3642 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
   3643 	  def_stmt
   3644 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
   3645 
   3646 	  if (post_shift != 1)
   3647 	    {
   3648 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3649 
   3650 	      q = vect_recog_temp_ssa_var (itype, NULL);
   3651 	      pattern_stmt
   3652 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
   3653 				       build_int_cst (itype, post_shift - 1));
   3654 	    }
   3655 	  else
   3656 	    {
   3657 	      q = t4;
   3658 	      pattern_stmt = def_stmt;
   3659 	    }
   3660 	}
   3661       else
   3662 	{
   3663 	  if (pre_shift >= prec || post_shift >= prec)
   3664 	    return NULL;
   3665 
   3666 	  /* t1 = oprnd0 >> pre_shift;
   3667 	     t2 = t1 h* ml;
   3668 	     q = t2 >> post_shift;  */
   3669 	  if (pre_shift)
   3670 	    {
   3671 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
   3672 	      def_stmt
   3673 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
   3674 				       build_int_cst (NULL, pre_shift));
   3675 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3676 	    }
   3677 	  else
   3678 	    t1 = oprnd0;
   3679 
   3680 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3681 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
   3682 					  build_int_cst (itype, ml));
   3683 
   3684 	  if (post_shift)
   3685 	    {
   3686 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3687 
   3688 	      q = vect_recog_temp_ssa_var (itype, NULL);
   3689 	      def_stmt
   3690 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
   3691 				       build_int_cst (itype, post_shift));
   3692 	    }
   3693 	  else
   3694 	    q = t2;
   3695 
   3696 	  pattern_stmt = def_stmt;
   3697 	}
   3698     }
   3699   else
   3700     {
   3701       unsigned HOST_WIDE_INT ml;
   3702       int post_shift;
   3703       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
   3704       unsigned HOST_WIDE_INT abs_d;
   3705       bool add = false;
   3706       tree t1, t2, t3, t4;
   3707 
   3708       /* Give up for -1.  */
   3709       if (d == -1)
   3710 	return NULL;
   3711 
   3712       /* Since d might be INT_MIN, we have to cast to
   3713 	 unsigned HOST_WIDE_INT before negating to avoid
   3714 	 undefined signed overflow.  */
   3715       abs_d = (d >= 0
   3716 	       ? (unsigned HOST_WIDE_INT) d
   3717 	       : - (unsigned HOST_WIDE_INT) d);
   3718 
   3719       /* n rem d = n rem -d */
   3720       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
   3721 	{
   3722 	  d = abs_d;
   3723 	  oprnd1 = build_int_cst (itype, abs_d);
   3724 	}
   3725       if (HOST_BITS_PER_WIDE_INT >= prec
   3726 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
   3727 	/* This case is not handled correctly below.  */
   3728 	return NULL;
   3729 
   3730       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
   3731       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
   3732 	{
   3733 	  add = true;
   3734 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
   3735 	}
   3736       if (post_shift >= prec)
   3737 	return NULL;
   3738 
   3739       /* t1 = oprnd0 h* ml;  */
   3740       t1 = vect_recog_temp_ssa_var (itype, NULL);
   3741       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
   3742 				      build_int_cst (itype, ml));
   3743 
   3744       if (add)
   3745 	{
   3746 	  /* t2 = t1 + oprnd0;  */
   3747 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3748 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3749 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
   3750 	}
   3751       else
   3752 	t2 = t1;
   3753 
   3754       if (post_shift)
   3755 	{
   3756 	  /* t3 = t2 >> post_shift;  */
   3757 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3758 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
   3759 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
   3760 					  build_int_cst (itype, post_shift));
   3761 	}
   3762       else
   3763 	t3 = t2;
   3764 
   3765       int msb = 1;
   3766       value_range r;
   3767       get_range_query (cfun)->range_of_expr (r, oprnd0);
   3768       if (r.kind () == VR_RANGE)
   3769 	{
   3770 	  if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
   3771 	    msb = 0;
   3772 	  else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
   3773 	    msb = -1;
   3774 	}
   3775 
   3776       if (msb == 0 && d >= 0)
   3777 	{
   3778 	  /* q = t3;  */
   3779 	  q = t3;
   3780 	  pattern_stmt = def_stmt;
   3781 	}
   3782       else
   3783 	{
   3784 	  /* t4 = oprnd0 >> (prec - 1);
   3785 	     or if we know from VRP that oprnd0 >= 0
   3786 	     t4 = 0;
   3787 	     or if we know from VRP that oprnd0 < 0
   3788 	     t4 = -1;  */
   3789 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3790 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
   3791 	  if (msb != 1)
   3792 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
   3793 					    build_int_cst (itype, msb));
   3794 	  else
   3795 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
   3796 					    build_int_cst (itype, prec - 1));
   3797 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3798 
   3799 	  /* q = t3 - t4;  or q = t4 - t3;  */
   3800 	  q = vect_recog_temp_ssa_var (itype, NULL);
   3801 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
   3802 					      d < 0 ? t3 : t4);
   3803 	}
   3804     }
   3805 
   3806   if (rhs_code == TRUNC_MOD_EXPR)
   3807     {
   3808       tree r, t1;
   3809 
   3810       /* We divided.  Now finish by:
   3811 	 t1 = q * oprnd1;
   3812 	 r = oprnd0 - t1;  */
   3813       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
   3814 
   3815       t1 = vect_recog_temp_ssa_var (itype, NULL);
   3816       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
   3817       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3818 
   3819       r = vect_recog_temp_ssa_var (itype, NULL);
   3820       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
   3821     }
   3822 
   3823   /* Pattern detected.  */
   3824   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
   3825 
   3826   *type_out = vectype;
   3827   return pattern_stmt;
   3828 }
   3829 
   3830 /* Function vect_recog_mixed_size_cond_pattern
   3831 
   3832    Try to find the following pattern:
   3833 
   3834      type x_t, y_t;
   3835      TYPE a_T, b_T, c_T;
   3836    loop:
   3837      S1  a_T = x_t CMP y_t ? b_T : c_T;
   3838 
   3839    where type 'TYPE' is an integral type which has different size
   3840    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
   3841    than 'type', the constants need to fit into an integer type
   3842    with the same width as 'type') or results of conversion from 'type'.
   3843 
   3844    Input:
   3845 
   3846    * STMT_VINFO: The stmt from which the pattern search begins.
   3847 
   3848    Output:
   3849 
   3850    * TYPE_OUT: The type of the output of this pattern.
   3851 
   3852    * Return value: A new stmt that will be used to replace the pattern.
   3853 	Additionally a def_stmt is added.
   3854 
   3855 	a_it = x_t CMP y_t ? b_it : c_it;
   3856 	a_T = (TYPE) a_it;  */
   3857 
   3858 static gimple *
   3859 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
   3860 				    stmt_vec_info stmt_vinfo, tree *type_out)
   3861 {
   3862   gimple *last_stmt = stmt_vinfo->stmt;
   3863   tree cond_expr, then_clause, else_clause;
   3864   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
   3865   gimple *pattern_stmt, *def_stmt;
   3866   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
   3867   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
   3868   bool promotion;
   3869   tree comp_scalar_type;
   3870 
   3871   if (!is_gimple_assign (last_stmt)
   3872       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
   3873       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
   3874     return NULL;
   3875 
   3876   cond_expr = gimple_assign_rhs1 (last_stmt);
   3877   then_clause = gimple_assign_rhs2 (last_stmt);
   3878   else_clause = gimple_assign_rhs3 (last_stmt);
   3879 
   3880   if (!COMPARISON_CLASS_P (cond_expr))
   3881     return NULL;
   3882 
   3883   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
   3884   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
   3885   if (comp_vectype == NULL_TREE)
   3886     return NULL;
   3887 
   3888   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
   3889   if (types_compatible_p (type, comp_scalar_type)
   3890       || ((TREE_CODE (then_clause) != INTEGER_CST
   3891 	   || TREE_CODE (else_clause) != INTEGER_CST)
   3892 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
   3893       || !INTEGRAL_TYPE_P (type))
   3894     return NULL;
   3895 
   3896   if ((TREE_CODE (then_clause) != INTEGER_CST
   3897        && !type_conversion_p (vinfo, then_clause, false,
   3898 			      &orig_type0, &def_stmt0, &promotion))
   3899       || (TREE_CODE (else_clause) != INTEGER_CST
   3900 	  && !type_conversion_p (vinfo, else_clause, false,
   3901 				 &orig_type1, &def_stmt1, &promotion)))
   3902     return NULL;
   3903 
   3904   if (orig_type0 && orig_type1
   3905       && !types_compatible_p (orig_type0, orig_type1))
   3906     return NULL;
   3907 
   3908   if (orig_type0)
   3909     {
   3910       if (!types_compatible_p (orig_type0, comp_scalar_type))
   3911 	return NULL;
   3912       then_clause = gimple_assign_rhs1 (def_stmt0);
   3913       itype = orig_type0;
   3914     }
   3915 
   3916   if (orig_type1)
   3917     {
   3918       if (!types_compatible_p (orig_type1, comp_scalar_type))
   3919 	return NULL;
   3920       else_clause = gimple_assign_rhs1 (def_stmt1);
   3921       itype = orig_type1;
   3922     }
   3923 
   3924 
   3925   HOST_WIDE_INT cmp_mode_size
   3926     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
   3927 
   3928   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
   3929   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
   3930     return NULL;
   3931 
   3932   vectype = get_vectype_for_scalar_type (vinfo, type);
   3933   if (vectype == NULL_TREE)
   3934     return NULL;
   3935 
   3936   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
   3937     return NULL;
   3938 
   3939   if (itype == NULL_TREE)
   3940     itype = build_nonstandard_integer_type (cmp_mode_size,
   3941   					    TYPE_UNSIGNED (type));
   3942 
   3943   if (itype == NULL_TREE
   3944       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
   3945     return NULL;
   3946 
   3947   vecitype = get_vectype_for_scalar_type (vinfo, itype);
   3948   if (vecitype == NULL_TREE)
   3949     return NULL;
   3950 
   3951   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
   3952     return NULL;
   3953 
   3954   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
   3955     {
   3956       if ((TREE_CODE (then_clause) == INTEGER_CST
   3957 	   && !int_fits_type_p (then_clause, itype))
   3958 	  || (TREE_CODE (else_clause) == INTEGER_CST
   3959 	      && !int_fits_type_p (else_clause, itype)))
   3960 	return NULL;
   3961     }
   3962 
   3963   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3964 				  COND_EXPR, unshare_expr (cond_expr),
   3965 				  fold_convert (itype, then_clause),
   3966 				  fold_convert (itype, else_clause));
   3967   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
   3968 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
   3969 
   3970   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
   3971   *type_out = vectype;
   3972 
   3973   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
   3974 
   3975   return pattern_stmt;
   3976 }
   3977 
   3978 
   3979 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
   3980    true if bool VAR can and should be optimized that way.  Assume it shouldn't
   3981    in case it's a result of a comparison which can be directly vectorized into
   3982    a vector comparison.  Fills in STMTS with all stmts visited during the
   3983    walk.  */
   3984 
   3985 static bool
   3986 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
   3987 {
   3988   tree rhs1;
   3989   enum tree_code rhs_code;
   3990 
   3991   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
   3992   if (!def_stmt_info)
   3993     return false;
   3994 
   3995   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
   3996   if (!def_stmt)
   3997     return false;
   3998 
   3999   if (stmts.contains (def_stmt))
   4000     return true;
   4001 
   4002   rhs1 = gimple_assign_rhs1 (def_stmt);
   4003   rhs_code = gimple_assign_rhs_code (def_stmt);
   4004   switch (rhs_code)
   4005     {
   4006     case SSA_NAME:
   4007       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4008 	return false;
   4009       break;
   4010 
   4011     CASE_CONVERT:
   4012       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
   4013 	return false;
   4014       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4015 	return false;
   4016       break;
   4017 
   4018     case BIT_NOT_EXPR:
   4019       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4020 	return false;
   4021       break;
   4022 
   4023     case BIT_AND_EXPR:
   4024     case BIT_IOR_EXPR:
   4025     case BIT_XOR_EXPR:
   4026       if (! check_bool_pattern (rhs1, vinfo, stmts)
   4027 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
   4028 	return false;
   4029       break;
   4030 
   4031     default:
   4032       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
   4033 	{
   4034 	  tree vecitype, comp_vectype;
   4035 
   4036 	  /* If the comparison can throw, then is_gimple_condexpr will be
   4037 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
   4038 	  if (stmt_could_throw_p (cfun, def_stmt))
   4039 	    return false;
   4040 
   4041 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
   4042 	  if (comp_vectype == NULL_TREE)
   4043 	    return false;
   4044 
   4045 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
   4046 							  TREE_TYPE (rhs1));
   4047 	  if (mask_type
   4048 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
   4049 	    return false;
   4050 
   4051 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
   4052 	    {
   4053 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
   4054 	      tree itype
   4055 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
   4056 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
   4057 	      if (vecitype == NULL_TREE)
   4058 		return false;
   4059 	    }
   4060 	  else
   4061 	    vecitype = comp_vectype;
   4062 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
   4063 	    return false;
   4064 	}
   4065       else
   4066 	return false;
   4067       break;
   4068     }
   4069 
   4070   bool res = stmts.add (def_stmt);
   4071   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
   4072   gcc_assert (!res);
   4073 
   4074   return true;
   4075 }
   4076 
   4077 
   4078 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
   4079    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
   4080    pattern sequence.  */
   4081 
   4082 static tree
   4083 adjust_bool_pattern_cast (vec_info *vinfo,
   4084 			  tree type, tree var, stmt_vec_info stmt_info)
   4085 {
   4086   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
   4087 					   NOP_EXPR, var);
   4088   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
   4089 			  get_vectype_for_scalar_type (vinfo, type));
   4090   return gimple_assign_lhs (cast_stmt);
   4091 }
   4092 
   4093 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
   4094    VAR is an SSA_NAME that should be transformed from bool to a wider integer
   4095    type, OUT_TYPE is the desired final integer type of the whole pattern.
   4096    STMT_INFO is the info of the pattern root and is where pattern stmts should
   4097    be associated with.  DEFS is a map of pattern defs.  */
   4098 
   4099 static void
   4100 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
   4101 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
   4102 {
   4103   gimple *stmt = SSA_NAME_DEF_STMT (var);
   4104   enum tree_code rhs_code, def_rhs_code;
   4105   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
   4106   location_t loc;
   4107   gimple *pattern_stmt, *def_stmt;
   4108   tree trueval = NULL_TREE;
   4109 
   4110   rhs1 = gimple_assign_rhs1 (stmt);
   4111   rhs2 = gimple_assign_rhs2 (stmt);
   4112   rhs_code = gimple_assign_rhs_code (stmt);
   4113   loc = gimple_location (stmt);
   4114   switch (rhs_code)
   4115     {
   4116     case SSA_NAME:
   4117     CASE_CONVERT:
   4118       irhs1 = *defs.get (rhs1);
   4119       itype = TREE_TYPE (irhs1);
   4120       pattern_stmt
   4121 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4122 			       SSA_NAME, irhs1);
   4123       break;
   4124 
   4125     case BIT_NOT_EXPR:
   4126       irhs1 = *defs.get (rhs1);
   4127       itype = TREE_TYPE (irhs1);
   4128       pattern_stmt
   4129 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4130 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
   4131       break;
   4132 
   4133     case BIT_AND_EXPR:
   4134       /* Try to optimize x = y & (a < b ? 1 : 0); into
   4135 	 x = (a < b ? y : 0);
   4136 
   4137 	 E.g. for:
   4138 	   bool a_b, b_b, c_b;
   4139 	   TYPE d_T;
   4140 
   4141 	   S1  a_b = x1 CMP1 y1;
   4142 	   S2  b_b = x2 CMP2 y2;
   4143 	   S3  c_b = a_b & b_b;
   4144 	   S4  d_T = (TYPE) c_b;
   4145 
   4146 	 we would normally emit:
   4147 
   4148 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4149 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
   4150 	   S3'  c_T = a_T & b_T;
   4151 	   S4'  d_T = c_T;
   4152 
   4153 	 but we can save one stmt by using the
   4154 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
   4155 	 BIT_AND_EXPR stmt out:
   4156 
   4157 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4158 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
   4159 	   S4'  f_T = c_T;
   4160 
   4161 	 At least when VEC_COND_EXPR is implemented using masks
   4162 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
   4163 	 computes the comparison masks and ands it, in one case with
   4164 	 all ones vector, in the other case with a vector register.
   4165 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
   4166 	 often more expensive.  */
   4167       def_stmt = SSA_NAME_DEF_STMT (rhs2);
   4168       def_rhs_code = gimple_assign_rhs_code (def_stmt);
   4169       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
   4170 	{
   4171 	  irhs1 = *defs.get (rhs1);
   4172 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
   4173 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
   4174 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
   4175 	    {
   4176 	      rhs_code = def_rhs_code;
   4177 	      rhs1 = def_rhs1;
   4178 	      rhs2 = gimple_assign_rhs2 (def_stmt);
   4179 	      trueval = irhs1;
   4180 	      goto do_compare;
   4181 	    }
   4182 	  else
   4183 	    irhs2 = *defs.get (rhs2);
   4184 	  goto and_ior_xor;
   4185 	}
   4186       def_stmt = SSA_NAME_DEF_STMT (rhs1);
   4187       def_rhs_code = gimple_assign_rhs_code (def_stmt);
   4188       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
   4189 	{
   4190 	  irhs2 = *defs.get (rhs2);
   4191 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
   4192 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
   4193 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
   4194 	    {
   4195 	      rhs_code = def_rhs_code;
   4196 	      rhs1 = def_rhs1;
   4197 	      rhs2 = gimple_assign_rhs2 (def_stmt);
   4198 	      trueval = irhs2;
   4199 	      goto do_compare;
   4200 	    }
   4201 	  else
   4202 	    irhs1 = *defs.get (rhs1);
   4203 	  goto and_ior_xor;
   4204 	}
   4205       /* FALLTHRU */
   4206     case BIT_IOR_EXPR:
   4207     case BIT_XOR_EXPR:
   4208       irhs1 = *defs.get (rhs1);
   4209       irhs2 = *defs.get (rhs2);
   4210     and_ior_xor:
   4211       if (TYPE_PRECISION (TREE_TYPE (irhs1))
   4212 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
   4213 	{
   4214 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
   4215 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
   4216 	  int out_prec = TYPE_PRECISION (out_type);
   4217 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
   4218 	    irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
   4219 					      stmt_info);
   4220 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
   4221 	    irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
   4222 					      stmt_info);
   4223 	  else
   4224 	    {
   4225 	      irhs1 = adjust_bool_pattern_cast (vinfo,
   4226 						out_type, irhs1, stmt_info);
   4227 	      irhs2 = adjust_bool_pattern_cast (vinfo,
   4228 						out_type, irhs2, stmt_info);
   4229 	    }
   4230 	}
   4231       itype = TREE_TYPE (irhs1);
   4232       pattern_stmt
   4233 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4234 			       rhs_code, irhs1, irhs2);
   4235       break;
   4236 
   4237     default:
   4238     do_compare:
   4239       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
   4240       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
   4241 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
   4242 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
   4243 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
   4244 	{
   4245 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
   4246 	  itype
   4247 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
   4248 	}
   4249       else
   4250 	itype = TREE_TYPE (rhs1);
   4251       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
   4252       if (trueval == NULL_TREE)
   4253 	trueval = build_int_cst (itype, 1);
   4254       else
   4255 	gcc_checking_assert (useless_type_conversion_p (itype,
   4256 							TREE_TYPE (trueval)));
   4257       pattern_stmt
   4258 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4259 			       COND_EXPR, cond_expr, trueval,
   4260 			       build_int_cst (itype, 0));
   4261       break;
   4262     }
   4263 
   4264   gimple_set_location (pattern_stmt, loc);
   4265   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
   4266 			  get_vectype_for_scalar_type (vinfo, itype));
   4267   defs.put (var, gimple_assign_lhs (pattern_stmt));
   4268 }
   4269 
   4270 /* Comparison function to qsort a vector of gimple stmts after UID.  */
   4271 
   4272 static int
   4273 sort_after_uid (const void *p1, const void *p2)
   4274 {
   4275   const gimple *stmt1 = *(const gimple * const *)p1;
   4276   const gimple *stmt2 = *(const gimple * const *)p2;
   4277   return gimple_uid (stmt1) - gimple_uid (stmt2);
   4278 }
   4279 
   4280 /* Create pattern stmts for all stmts participating in the bool pattern
   4281    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
   4282    OUT_TYPE.  Return the def of the pattern root.  */
   4283 
   4284 static tree
   4285 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
   4286 		   tree out_type, stmt_vec_info stmt_info)
   4287 {
   4288   /* Gather original stmts in the bool pattern in their order of appearance
   4289      in the IL.  */
   4290   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
   4291   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
   4292        i != bool_stmt_set.end (); ++i)
   4293     bool_stmts.quick_push (*i);
   4294   bool_stmts.qsort (sort_after_uid);
   4295 
   4296   /* Now process them in that order, producing pattern stmts.  */
   4297   hash_map <tree, tree> defs;
   4298   for (unsigned i = 0; i < bool_stmts.length (); ++i)
   4299     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
   4300 			 out_type, stmt_info, defs);
   4301 
   4302   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
   4303   gimple *pattern_stmt
   4304     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   4305   return gimple_assign_lhs (pattern_stmt);
   4306 }
   4307 
   4308 /* Return the proper type for converting bool VAR into
   4309    an integer value or NULL_TREE if no such type exists.
   4310    The type is chosen so that the converted value has the
   4311    same number of elements as VAR's vector type.  */
   4312 
   4313 static tree
   4314 integer_type_for_mask (tree var, vec_info *vinfo)
   4315 {
   4316   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
   4317     return NULL_TREE;
   4318 
   4319   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
   4320   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
   4321     return NULL_TREE;
   4322 
   4323   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
   4324 }
   4325 
   4326 /* Function vect_recog_bool_pattern
   4327 
   4328    Try to find pattern like following:
   4329 
   4330      bool a_b, b_b, c_b, d_b, e_b;
   4331      TYPE f_T;
   4332    loop:
   4333      S1  a_b = x1 CMP1 y1;
   4334      S2  b_b = x2 CMP2 y2;
   4335      S3  c_b = a_b & b_b;
   4336      S4  d_b = x3 CMP3 y3;
   4337      S5  e_b = c_b | d_b;
   4338      S6  f_T = (TYPE) e_b;
   4339 
   4340    where type 'TYPE' is an integral type.  Or a similar pattern
   4341    ending in
   4342 
   4343      S6  f_Y = e_b ? r_Y : s_Y;
   4344 
   4345    as results from if-conversion of a complex condition.
   4346 
   4347    Input:
   4348 
   4349    * STMT_VINFO: The stmt at the end from which the pattern
   4350 		 search begins, i.e. cast of a bool to
   4351 		 an integer type.
   4352 
   4353    Output:
   4354 
   4355    * TYPE_OUT: The type of the output of this pattern.
   4356 
   4357    * Return value: A new stmt that will be used to replace the pattern.
   4358 
   4359 	Assuming size of TYPE is the same as size of all comparisons
   4360 	(otherwise some casts would be added where needed), the above
   4361 	sequence we create related pattern stmts:
   4362 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4363 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
   4364 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
   4365 	S5'  e_T = c_T | d_T;
   4366 	S6'  f_T = e_T;
   4367 
   4368 	Instead of the above S3' we could emit:
   4369 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
   4370 	S3'  c_T = a_T | b_T;
   4371 	but the above is more efficient.  */
   4372 
   4373 static gimple *
   4374 vect_recog_bool_pattern (vec_info *vinfo,
   4375 			 stmt_vec_info stmt_vinfo, tree *type_out)
   4376 {
   4377   gimple *last_stmt = stmt_vinfo->stmt;
   4378   enum tree_code rhs_code;
   4379   tree var, lhs, rhs, vectype;
   4380   gimple *pattern_stmt;
   4381 
   4382   if (!is_gimple_assign (last_stmt))
   4383     return NULL;
   4384 
   4385   var = gimple_assign_rhs1 (last_stmt);
   4386   lhs = gimple_assign_lhs (last_stmt);
   4387   rhs_code = gimple_assign_rhs_code (last_stmt);
   4388 
   4389   if (rhs_code == VIEW_CONVERT_EXPR)
   4390     var = TREE_OPERAND (var, 0);
   4391 
   4392   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
   4393     return NULL;
   4394 
   4395   hash_set<gimple *> bool_stmts;
   4396 
   4397   if (CONVERT_EXPR_CODE_P (rhs_code)
   4398       || rhs_code == VIEW_CONVERT_EXPR)
   4399     {
   4400       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
   4401 	  || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   4402 	return NULL;
   4403       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4404 
   4405       if (check_bool_pattern (var, vinfo, bool_stmts))
   4406 	{
   4407 	  rhs = adjust_bool_stmts (vinfo, bool_stmts,
   4408 				   TREE_TYPE (lhs), stmt_vinfo);
   4409 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4410 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
   4411 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
   4412 	  else
   4413 	    pattern_stmt
   4414 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
   4415 	}
   4416       else
   4417 	{
   4418 	  tree type = integer_type_for_mask (var, vinfo);
   4419 	  tree cst0, cst1, tmp;
   4420 
   4421 	  if (!type)
   4422 	    return NULL;
   4423 
   4424 	  /* We may directly use cond with narrowed type to avoid
   4425 	     multiple cond exprs with following result packing and
   4426 	     perform single cond with packed mask instead.  In case
   4427 	     of widening we better make cond first and then extract
   4428 	     results.  */
   4429 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
   4430 	    type = TREE_TYPE (lhs);
   4431 
   4432 	  cst0 = build_int_cst (type, 0);
   4433 	  cst1 = build_int_cst (type, 1);
   4434 	  tmp = vect_recog_temp_ssa_var (type, NULL);
   4435 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
   4436 
   4437 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
   4438 	    {
   4439 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
   4440 	      append_pattern_def_seq (vinfo, stmt_vinfo,
   4441 				      pattern_stmt, new_vectype);
   4442 
   4443 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4444 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
   4445 	    }
   4446 	}
   4447 
   4448       *type_out = vectype;
   4449       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4450 
   4451       return pattern_stmt;
   4452     }
   4453   else if (rhs_code == COND_EXPR
   4454 	   && TREE_CODE (var) == SSA_NAME)
   4455     {
   4456       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4457       if (vectype == NULL_TREE)
   4458 	return NULL;
   4459 
   4460       /* Build a scalar type for the boolean result that when
   4461          vectorized matches the vector type of the result in
   4462 	 size and number of elements.  */
   4463       unsigned prec
   4464 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
   4465 			       TYPE_VECTOR_SUBPARTS (vectype));
   4466 
   4467       tree type
   4468 	= build_nonstandard_integer_type (prec,
   4469 					  TYPE_UNSIGNED (TREE_TYPE (var)));
   4470       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
   4471 	return NULL;
   4472 
   4473       if (!check_bool_pattern (var, vinfo, bool_stmts))
   4474 	return NULL;
   4475 
   4476       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
   4477 
   4478       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4479       pattern_stmt
   4480 	  = gimple_build_assign (lhs, COND_EXPR,
   4481 				 build2 (NE_EXPR, boolean_type_node,
   4482 					 rhs, build_int_cst (type, 0)),
   4483 				 gimple_assign_rhs2 (last_stmt),
   4484 				 gimple_assign_rhs3 (last_stmt));
   4485       *type_out = vectype;
   4486       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4487 
   4488       return pattern_stmt;
   4489     }
   4490   else if (rhs_code == SSA_NAME
   4491 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
   4492     {
   4493       stmt_vec_info pattern_stmt_info;
   4494       tree nunits_vectype;
   4495       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
   4496 					   &nunits_vectype)
   4497 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
   4498 	return NULL;
   4499 
   4500       if (check_bool_pattern (var, vinfo, bool_stmts))
   4501 	rhs = adjust_bool_stmts (vinfo, bool_stmts,
   4502 				 TREE_TYPE (vectype), stmt_vinfo);
   4503       else
   4504 	{
   4505 	  tree type = integer_type_for_mask (var, vinfo);
   4506 	  tree cst0, cst1, new_vectype;
   4507 
   4508 	  if (!type)
   4509 	    return NULL;
   4510 
   4511 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
   4512 	    type = TREE_TYPE (vectype);
   4513 
   4514 	  cst0 = build_int_cst (type, 0);
   4515 	  cst1 = build_int_cst (type, 1);
   4516 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
   4517 
   4518 	  rhs = vect_recog_temp_ssa_var (type, NULL);
   4519 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
   4520 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
   4521 	}
   4522 
   4523       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
   4524       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
   4525 	{
   4526 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4527 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
   4528 	  append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
   4529 	  rhs = rhs2;
   4530 	}
   4531       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
   4532       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
   4533       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
   4534       *type_out = vectype;
   4535       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4536 
   4537       return pattern_stmt;
   4538     }
   4539   else
   4540     return NULL;
   4541 }
   4542 
   4543 
   4544 /* A helper for vect_recog_mask_conversion_pattern.  Build
   4545    conversion of MASK to a type suitable for masking VECTYPE.
   4546    Built statement gets required vectype and is appended to
   4547    a pattern sequence of STMT_VINFO.
   4548 
   4549    Return converted mask.  */
   4550 
   4551 static tree
   4552 build_mask_conversion (vec_info *vinfo,
   4553 		       tree mask, tree vectype, stmt_vec_info stmt_vinfo)
   4554 {
   4555   gimple *stmt;
   4556   tree masktype, tmp;
   4557 
   4558   masktype = truth_type_for (vectype);
   4559   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
   4560   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
   4561   append_pattern_def_seq (vinfo, stmt_vinfo,
   4562 			  stmt, masktype, TREE_TYPE (vectype));
   4563 
   4564   return tmp;
   4565 }
   4566 
   4567 
   4568 /* Function vect_recog_mask_conversion_pattern
   4569 
   4570    Try to find statements which require boolean type
   4571    converison.  Additional conversion statements are
   4572    added to handle such cases.  For example:
   4573 
   4574    bool m_1, m_2, m_3;
   4575    int i_4, i_5;
   4576    double d_6, d_7;
   4577    char c_1, c_2, c_3;
   4578 
   4579    S1   m_1 = i_4 > i_5;
   4580    S2   m_2 = d_6 < d_7;
   4581    S3   m_3 = m_1 & m_2;
   4582    S4   c_1 = m_3 ? c_2 : c_3;
   4583 
   4584    Will be transformed into:
   4585 
   4586    S1   m_1 = i_4 > i_5;
   4587    S2   m_2 = d_6 < d_7;
   4588    S3'' m_2' = (_Bool[bitsize=32])m_2
   4589    S3'  m_3' = m_1 & m_2';
   4590    S4'' m_3'' = (_Bool[bitsize=8])m_3'
   4591    S4'  c_1' = m_3'' ? c_2 : c_3;  */
   4592 
   4593 static gimple *
   4594 vect_recog_mask_conversion_pattern (vec_info *vinfo,
   4595 				    stmt_vec_info stmt_vinfo, tree *type_out)
   4596 {
   4597   gimple *last_stmt = stmt_vinfo->stmt;
   4598   enum tree_code rhs_code;
   4599   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
   4600   tree vectype1, vectype2;
   4601   stmt_vec_info pattern_stmt_info;
   4602   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
   4603   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
   4604 
   4605   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
   4606   if (is_gimple_call (last_stmt)
   4607       && gimple_call_internal_p (last_stmt))
   4608     {
   4609       gcall *pattern_stmt;
   4610 
   4611       internal_fn ifn = gimple_call_internal_fn (last_stmt);
   4612       int mask_argno = internal_fn_mask_index (ifn);
   4613       if (mask_argno < 0)
   4614 	return NULL;
   4615 
   4616       bool store_p = internal_store_fn_p (ifn);
   4617       if (store_p)
   4618 	{
   4619 	  int rhs_index = internal_fn_stored_value_index (ifn);
   4620 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
   4621 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
   4622 	}
   4623       else
   4624 	{
   4625 	  lhs = gimple_call_lhs (last_stmt);
   4626 	  if (!lhs)
   4627 	    return NULL;
   4628 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4629 	}
   4630 
   4631       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
   4632       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
   4633       if (!mask_arg_type)
   4634 	return NULL;
   4635       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
   4636 
   4637       if (!vectype1 || !vectype2
   4638 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
   4639 		       TYPE_VECTOR_SUBPARTS (vectype2)))
   4640 	return NULL;
   4641 
   4642       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
   4643 
   4644       auto_vec<tree, 8> args;
   4645       unsigned int nargs = gimple_call_num_args (last_stmt);
   4646       args.safe_grow (nargs, true);
   4647       for (unsigned int i = 0; i < nargs; ++i)
   4648 	args[i] = ((int) i == mask_argno
   4649 		   ? tmp
   4650 		   : gimple_call_arg (last_stmt, i));
   4651       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
   4652 
   4653       if (!store_p)
   4654 	{
   4655 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4656 	  gimple_call_set_lhs (pattern_stmt, lhs);
   4657 	}
   4658       gimple_call_set_nothrow (pattern_stmt, true);
   4659 
   4660       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
   4661       if (STMT_VINFO_DATA_REF (stmt_vinfo))
   4662 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
   4663 
   4664       *type_out = vectype1;
   4665       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4666 
   4667       return pattern_stmt;
   4668     }
   4669 
   4670   if (!is_gimple_assign (last_stmt))
   4671     return NULL;
   4672 
   4673   gimple *pattern_stmt;
   4674   lhs = gimple_assign_lhs (last_stmt);
   4675   rhs1 = gimple_assign_rhs1 (last_stmt);
   4676   rhs_code = gimple_assign_rhs_code (last_stmt);
   4677 
   4678   /* Check for cond expression requiring mask conversion.  */
   4679   if (rhs_code == COND_EXPR)
   4680     {
   4681       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4682 
   4683       if (TREE_CODE (rhs1) == SSA_NAME)
   4684 	{
   4685 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
   4686 	  if (!rhs1_type)
   4687 	    return NULL;
   4688 	}
   4689       else if (COMPARISON_CLASS_P (rhs1))
   4690 	{
   4691 	  /* Check whether we're comparing scalar booleans and (if so)
   4692 	     whether a better mask type exists than the mask associated
   4693 	     with boolean-sized elements.  This avoids unnecessary packs
   4694 	     and unpacks if the booleans are set from comparisons of
   4695 	     wider types.  E.g. in:
   4696 
   4697 	       int x1, x2, x3, x4, y1, y1;
   4698 	       ...
   4699 	       bool b1 = (x1 == x2);
   4700 	       bool b2 = (x3 == x4);
   4701 	       ... = b1 == b2 ? y1 : y2;
   4702 
   4703 	     it is better for b1 and b2 to use the mask type associated
   4704 	     with int elements rather bool (byte) elements.  */
   4705 	  rhs1_op0 = TREE_OPERAND (rhs1, 0);
   4706 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
   4707 	  if (!rhs1_op0 || !rhs1_op1)
   4708 	    return NULL;
   4709 	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
   4710 	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
   4711 
   4712 	  if (!rhs1_op0_type)
   4713 	    rhs1_type = TREE_TYPE (rhs1_op0);
   4714 	  else if (!rhs1_op1_type)
   4715 	    rhs1_type = TREE_TYPE (rhs1_op1);
   4716 	  else if (TYPE_PRECISION (rhs1_op0_type)
   4717 		   != TYPE_PRECISION (rhs1_op1_type))
   4718 	    {
   4719 	      int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
   4720 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
   4721 	      int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
   4722 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
   4723 	      if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
   4724 		{
   4725 		  if (abs (tmp0) > abs (tmp1))
   4726 		    rhs1_type = rhs1_op1_type;
   4727 		  else
   4728 		    rhs1_type = rhs1_op0_type;
   4729 		}
   4730 	      else
   4731 		rhs1_type = build_nonstandard_integer_type
   4732 		  (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
   4733 	    }
   4734 	  else
   4735 	    rhs1_type = rhs1_op0_type;
   4736 	}
   4737       else
   4738 	return NULL;
   4739 
   4740       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
   4741 
   4742       if (!vectype1 || !vectype2)
   4743 	return NULL;
   4744 
   4745       /* Continue if a conversion is needed.  Also continue if we have
   4746 	 a comparison whose vector type would normally be different from
   4747 	 VECTYPE2 when considered in isolation.  In that case we'll
   4748 	 replace the comparison with an SSA name (so that we can record
   4749 	 its vector type) and behave as though the comparison was an SSA
   4750 	 name from the outset.  */
   4751       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
   4752 		    TYPE_VECTOR_SUBPARTS (vectype2))
   4753 	  && !rhs1_op0_type
   4754 	  && !rhs1_op1_type)
   4755 	return NULL;
   4756 
   4757       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
   4758          in place, we can handle it in vectorizable_condition.  This avoids
   4759 	 unnecessary promotion stmts and increased vectorization factor.  */
   4760       if (COMPARISON_CLASS_P (rhs1)
   4761 	  && INTEGRAL_TYPE_P (rhs1_type)
   4762 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
   4763 		       TYPE_VECTOR_SUBPARTS (vectype2)))
   4764 	{
   4765 	  enum vect_def_type dt;
   4766 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
   4767 	      && dt == vect_external_def
   4768 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
   4769 	      && (dt == vect_external_def
   4770 		  || dt == vect_constant_def))
   4771 	    {
   4772 	      tree wide_scalar_type = build_nonstandard_integer_type
   4773 		(vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
   4774 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
   4775 							   wide_scalar_type);
   4776 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
   4777 		return NULL;
   4778 	    }
   4779 	}
   4780 
   4781       /* If rhs1 is a comparison we need to move it into a
   4782 	 separate statement.  */
   4783       if (TREE_CODE (rhs1) != SSA_NAME)
   4784 	{
   4785 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
   4786 	  if (rhs1_op0_type
   4787 	      && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
   4788 	    rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
   4789 					      vectype2, stmt_vinfo);
   4790 	  if (rhs1_op1_type
   4791 	      && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
   4792 	    rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
   4793 				      vectype2, stmt_vinfo);
   4794 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
   4795 					      rhs1_op0, rhs1_op1);
   4796 	  rhs1 = tmp;
   4797 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
   4798 				  rhs1_type);
   4799 	}
   4800 
   4801       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   4802 		    TYPE_VECTOR_SUBPARTS (vectype2)))
   4803 	tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
   4804       else
   4805 	tmp = rhs1;
   4806 
   4807       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4808       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
   4809 					  gimple_assign_rhs2 (last_stmt),
   4810 					  gimple_assign_rhs3 (last_stmt));
   4811 
   4812       *type_out = vectype1;
   4813       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4814 
   4815       return pattern_stmt;
   4816     }
   4817 
   4818   /* Now check for binary boolean operations requiring conversion for
   4819      one of operands.  */
   4820   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   4821     return NULL;
   4822 
   4823   if (rhs_code != BIT_IOR_EXPR
   4824       && rhs_code != BIT_XOR_EXPR
   4825       && rhs_code != BIT_AND_EXPR
   4826       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
   4827     return NULL;
   4828 
   4829   rhs2 = gimple_assign_rhs2 (last_stmt);
   4830 
   4831   rhs1_type = integer_type_for_mask (rhs1, vinfo);
   4832   rhs2_type = integer_type_for_mask (rhs2, vinfo);
   4833 
   4834   if (!rhs1_type || !rhs2_type
   4835       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
   4836     return NULL;
   4837 
   4838   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
   4839     {
   4840       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
   4841       if (!vectype1)
   4842 	return NULL;
   4843       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
   4844     }
   4845   else
   4846     {
   4847       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
   4848       if (!vectype1)
   4849 	return NULL;
   4850       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
   4851     }
   4852 
   4853   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4854   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
   4855 
   4856   *type_out = vectype1;
   4857   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4858 
   4859   return pattern_stmt;
   4860 }
   4861 
   4862 /* STMT_INFO is a load or store.  If the load or store is conditional, return
   4863    the boolean condition under which it occurs, otherwise return null.  */
   4864 
   4865 static tree
   4866 vect_get_load_store_mask (stmt_vec_info stmt_info)
   4867 {
   4868   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
   4869     {
   4870       gcc_assert (gimple_assign_single_p (def_assign));
   4871       return NULL_TREE;
   4872     }
   4873 
   4874   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
   4875     {
   4876       internal_fn ifn = gimple_call_internal_fn (def_call);
   4877       int mask_index = internal_fn_mask_index (ifn);
   4878       return gimple_call_arg (def_call, mask_index);
   4879     }
   4880 
   4881   gcc_unreachable ();
   4882 }
   4883 
   4884 /* Return MASK if MASK is suitable for masking an operation on vectors
   4885    of type VECTYPE, otherwise convert it into such a form and return
   4886    the result.  Associate any conversion statements with STMT_INFO's
   4887    pattern.  */
   4888 
   4889 static tree
   4890 vect_convert_mask_for_vectype (tree mask, tree vectype,
   4891 			       stmt_vec_info stmt_info, vec_info *vinfo)
   4892 {
   4893   tree mask_type = integer_type_for_mask (mask, vinfo);
   4894   if (mask_type)
   4895     {
   4896       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
   4897       if (mask_vectype
   4898 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
   4899 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
   4900 	mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
   4901     }
   4902   return mask;
   4903 }
   4904 
   4905 /* Return the equivalent of:
   4906 
   4907      fold_convert (TYPE, VALUE)
   4908 
   4909    with the expectation that the operation will be vectorized.
   4910    If new statements are needed, add them as pattern statements
   4911    to STMT_INFO.  */
   4912 
   4913 static tree
   4914 vect_add_conversion_to_pattern (vec_info *vinfo,
   4915 				tree type, tree value, stmt_vec_info stmt_info)
   4916 {
   4917   if (useless_type_conversion_p (type, TREE_TYPE (value)))
   4918     return value;
   4919 
   4920   tree new_value = vect_recog_temp_ssa_var (type, NULL);
   4921   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
   4922   append_pattern_def_seq (vinfo, stmt_info, conversion,
   4923 			  get_vectype_for_scalar_type (vinfo, type));
   4924   return new_value;
   4925 }
   4926 
   4927 /* Try to convert STMT_INFO into a call to a gather load or scatter store
   4928    internal function.  Return the final statement on success and set
   4929    *TYPE_OUT to the vector type being loaded or stored.
   4930 
   4931    This function only handles gathers and scatters that were recognized
   4932    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
   4933 
   4934 static gimple *
   4935 vect_recog_gather_scatter_pattern (vec_info *vinfo,
   4936 				   stmt_vec_info stmt_info, tree *type_out)
   4937 {
   4938   /* Currently we only support this for loop vectorization.  */
   4939   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4940   if (!loop_vinfo)
   4941     return NULL;
   4942 
   4943   /* Make sure that we're looking at a gather load or scatter store.  */
   4944   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   4945   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   4946     return NULL;
   4947 
   4948   /* Get the boolean that controls whether the load or store happens.
   4949      This is null if the operation is unconditional.  */
   4950   tree mask = vect_get_load_store_mask (stmt_info);
   4951 
   4952   /* Make sure that the target supports an appropriate internal
   4953      function for the gather/scatter operation.  */
   4954   gather_scatter_info gs_info;
   4955   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
   4956       || gs_info.ifn == IFN_LAST)
   4957     return NULL;
   4958 
   4959   /* Convert the mask to the right form.  */
   4960   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
   4961 						 gs_info.element_type);
   4962   if (mask)
   4963     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
   4964 					  loop_vinfo);
   4965   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
   4966 	   || gs_info.ifn == IFN_MASK_GATHER_LOAD)
   4967     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
   4968 
   4969   /* Get the invariant base and non-invariant offset, converting the
   4970      latter to the same width as the vector elements.  */
   4971   tree base = gs_info.base;
   4972   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
   4973   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
   4974 						gs_info.offset, stmt_info);
   4975 
   4976   /* Build the new pattern statement.  */
   4977   tree scale = size_int (gs_info.scale);
   4978   gcall *pattern_stmt;
   4979   if (DR_IS_READ (dr))
   4980     {
   4981       tree zero = build_zero_cst (gs_info.element_type);
   4982       if (mask != NULL)
   4983 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
   4984 						   offset, scale, zero, mask);
   4985       else
   4986 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
   4987 						   offset, scale, zero);
   4988       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
   4989       gimple_call_set_lhs (pattern_stmt, load_lhs);
   4990     }
   4991   else
   4992     {
   4993       tree rhs = vect_get_store_rhs (stmt_info);
   4994       if (mask != NULL)
   4995 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
   4996 						   base, offset, scale, rhs,
   4997 						   mask);
   4998       else
   4999 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
   5000 						   base, offset, scale, rhs);
   5001     }
   5002   gimple_call_set_nothrow (pattern_stmt, true);
   5003 
   5004   /* Copy across relevant vectorization info and associate DR with the
   5005      new pattern statement instead of the original statement.  */
   5006   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
   5007   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
   5008 
   5009   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5010   *type_out = vectype;
   5011   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
   5012 
   5013   return pattern_stmt;
   5014 }
   5015 
   5016 /* Return true if TYPE is a non-boolean integer type.  These are the types
   5017    that we want to consider for narrowing.  */
   5018 
   5019 static bool
   5020 vect_narrowable_type_p (tree type)
   5021 {
   5022   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
   5023 }
   5024 
   5025 /* Return true if the operation given by CODE can be truncated to N bits
   5026    when only N bits of the output are needed.  This is only true if bit N+1
   5027    of the inputs has no effect on the low N bits of the result.  */
   5028 
   5029 static bool
   5030 vect_truncatable_operation_p (tree_code code)
   5031 {
   5032   switch (code)
   5033     {
   5034     case NEGATE_EXPR:
   5035     case PLUS_EXPR:
   5036     case MINUS_EXPR:
   5037     case MULT_EXPR:
   5038     case BIT_NOT_EXPR:
   5039     case BIT_AND_EXPR:
   5040     case BIT_IOR_EXPR:
   5041     case BIT_XOR_EXPR:
   5042     case COND_EXPR:
   5043       return true;
   5044 
   5045     default:
   5046       return false;
   5047     }
   5048 }
   5049 
   5050 /* Record that STMT_INFO could be changed from operating on TYPE to
   5051    operating on a type with the precision and sign given by PRECISION
   5052    and SIGN respectively.  PRECISION is an arbitrary bit precision;
   5053    it might not be a whole number of bytes.  */
   5054 
   5055 static void
   5056 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
   5057 			 unsigned int precision, signop sign)
   5058 {
   5059   /* Round the precision up to a whole number of bytes.  */
   5060   precision = vect_element_precision (precision);
   5061   if (precision < TYPE_PRECISION (type)
   5062       && (!stmt_info->operation_precision
   5063 	  || stmt_info->operation_precision > precision))
   5064     {
   5065       stmt_info->operation_precision = precision;
   5066       stmt_info->operation_sign = sign;
   5067     }
   5068 }
   5069 
   5070 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
   5071    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
   5072    is an arbitrary bit precision; it might not be a whole number of bytes.  */
   5073 
   5074 static void
   5075 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
   5076 			      unsigned int min_input_precision)
   5077 {
   5078   /* This operation in isolation only requires the inputs to have
   5079      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
   5080      that MIN_INPUT_PRECISION is a natural precision for the chain
   5081      as a whole.  E.g. consider something like:
   5082 
   5083 	 unsigned short *x, *y;
   5084 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
   5085 
   5086      The right shift can be done on unsigned chars, and only requires the
   5087      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
   5088      approach would mean turning a natural chain of single-vector unsigned
   5089      short operations into one that truncates "*x" and then extends
   5090      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
   5091      operation and one vector for each unsigned char operation.
   5092      This would be a significant pessimization.
   5093 
   5094      Instead only propagate the maximum of this precision and the precision
   5095      required by the users of the result.  This means that we don't pessimize
   5096      the case above but continue to optimize things like:
   5097 
   5098 	 unsigned char *y;
   5099 	 unsigned short *x;
   5100 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
   5101 
   5102      Here we would truncate two vectors of *x to a single vector of
   5103      unsigned chars and use single-vector unsigned char operations for
   5104      everything else, rather than doing two unsigned short copies of
   5105      "(*x & 0xf0) >> 4" and then truncating the result.  */
   5106   min_input_precision = MAX (min_input_precision,
   5107 			     stmt_info->min_output_precision);
   5108 
   5109   if (min_input_precision < TYPE_PRECISION (type)
   5110       && (!stmt_info->min_input_precision
   5111 	  || stmt_info->min_input_precision > min_input_precision))
   5112     stmt_info->min_input_precision = min_input_precision;
   5113 }
   5114 
   5115 /* Subroutine of vect_determine_min_output_precision.  Return true if
   5116    we can calculate a reduced number of output bits for STMT_INFO,
   5117    whose result is LHS.  */
   5118 
   5119 static bool
   5120 vect_determine_min_output_precision_1 (vec_info *vinfo,
   5121 				       stmt_vec_info stmt_info, tree lhs)
   5122 {
   5123   /* Take the maximum precision required by users of the result.  */
   5124   unsigned int precision = 0;
   5125   imm_use_iterator iter;
   5126   use_operand_p use;
   5127   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
   5128     {
   5129       gimple *use_stmt = USE_STMT (use);
   5130       if (is_gimple_debug (use_stmt))
   5131 	continue;
   5132       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
   5133       if (!use_stmt_info || !use_stmt_info->min_input_precision)
   5134 	return false;
   5135       /* The input precision recorded for COND_EXPRs applies only to the
   5136 	 "then" and "else" values.  */
   5137       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
   5138       if (assign
   5139 	  && gimple_assign_rhs_code (assign) == COND_EXPR
   5140 	  && use->use != gimple_assign_rhs2_ptr (assign)
   5141 	  && use->use != gimple_assign_rhs3_ptr (assign))
   5142 	return false;
   5143       precision = MAX (precision, use_stmt_info->min_input_precision);
   5144     }
   5145 
   5146   if (dump_enabled_p ())
   5147     dump_printf_loc (MSG_NOTE, vect_location,
   5148 		     "only the low %d bits of %T are significant\n",
   5149 		     precision, lhs);
   5150   stmt_info->min_output_precision = precision;
   5151   return true;
   5152 }
   5153 
   5154 /* Calculate min_output_precision for STMT_INFO.  */
   5155 
   5156 static void
   5157 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
   5158 {
   5159   /* We're only interested in statements with a narrowable result.  */
   5160   tree lhs = gimple_get_lhs (stmt_info->stmt);
   5161   if (!lhs
   5162       || TREE_CODE (lhs) != SSA_NAME
   5163       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
   5164     return;
   5165 
   5166   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
   5167     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
   5168 }
   5169 
   5170 /* Use range information to decide whether STMT (described by STMT_INFO)
   5171    could be done in a narrower type.  This is effectively a forward
   5172    propagation, since it uses context-independent information that applies
   5173    to all users of an SSA name.  */
   5174 
   5175 static void
   5176 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
   5177 {
   5178   tree lhs = gimple_assign_lhs (stmt);
   5179   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
   5180     return;
   5181 
   5182   tree type = TREE_TYPE (lhs);
   5183   if (!vect_narrowable_type_p (type))
   5184     return;
   5185 
   5186   /* First see whether we have any useful range information for the result.  */
   5187   unsigned int precision = TYPE_PRECISION (type);
   5188   signop sign = TYPE_SIGN (type);
   5189   wide_int min_value, max_value;
   5190   if (!vect_get_range_info (lhs, &min_value, &max_value))
   5191     return;
   5192 
   5193   tree_code code = gimple_assign_rhs_code (stmt);
   5194   unsigned int nops = gimple_num_ops (stmt);
   5195 
   5196   if (!vect_truncatable_operation_p (code))
   5197     {
   5198       /* Handle operations that can be computed in type T if all inputs
   5199 	 and outputs can be represented in type T.  Also handle left and
   5200 	 right shifts, where (in addition) the maximum shift amount must
   5201 	 be less than the number of bits in T.  */
   5202       bool is_shift;
   5203       switch (code)
   5204 	{
   5205 	case LSHIFT_EXPR:
   5206 	case RSHIFT_EXPR:
   5207 	  is_shift = true;
   5208 	  break;
   5209 
   5210 	case ABS_EXPR:
   5211 	case MIN_EXPR:
   5212 	case MAX_EXPR:
   5213 	case TRUNC_DIV_EXPR:
   5214 	case CEIL_DIV_EXPR:
   5215 	case FLOOR_DIV_EXPR:
   5216 	case ROUND_DIV_EXPR:
   5217 	case EXACT_DIV_EXPR:
   5218 	  /* Modulus is excluded because it is typically calculated by doing
   5219 	     a division, for which minimum signed / -1 isn't representable in
   5220 	     the original signed type.  We could take the division range into
   5221 	     account instead, if handling modulus ever becomes important.  */
   5222 	  is_shift = false;
   5223 	  break;
   5224 
   5225 	default:
   5226 	  return;
   5227 	}
   5228       for (unsigned int i = 1; i < nops; ++i)
   5229 	{
   5230 	  tree op = gimple_op (stmt, i);
   5231 	  wide_int op_min_value, op_max_value;
   5232 	  if (TREE_CODE (op) == INTEGER_CST)
   5233 	    {
   5234 	      unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
   5235 	      op_min_value = op_max_value = wi::to_wide (op, op_precision);
   5236 	    }
   5237 	  else if (TREE_CODE (op) == SSA_NAME)
   5238 	    {
   5239 	      if (!vect_get_range_info (op, &op_min_value, &op_max_value))
   5240 		return;
   5241 	    }
   5242 	  else
   5243 	    return;
   5244 
   5245 	  if (is_shift && i == 2)
   5246 	    {
   5247 	      /* There needs to be one more bit than the maximum shift amount.
   5248 
   5249 		 If the maximum shift amount is already 1 less than PRECISION
   5250 		 then we can't narrow the shift further.  Dealing with that
   5251 		 case first ensures that we can safely use an unsigned range
   5252 		 below.
   5253 
   5254 		 op_min_value isn't relevant, since shifts by negative amounts
   5255 		 are UB.  */
   5256 	      if (wi::geu_p (op_max_value, precision - 1))
   5257 		return;
   5258 	      unsigned int min_bits = op_max_value.to_uhwi () + 1;
   5259 
   5260 	      /* As explained below, we can convert a signed shift into an
   5261 		 unsigned shift if the sign bit is always clear.  At this
   5262 		 point we've already processed the ranges of the output and
   5263 		 the first input.  */
   5264 	      auto op_sign = sign;
   5265 	      if (sign == SIGNED && !wi::neg_p (min_value))
   5266 		op_sign = UNSIGNED;
   5267 	      op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
   5268 					     precision, op_sign);
   5269 	      op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
   5270 					     precision, op_sign);
   5271 	    }
   5272 	  min_value = wi::min (min_value, op_min_value, sign);
   5273 	  max_value = wi::max (max_value, op_max_value, sign);
   5274 	}
   5275     }
   5276 
   5277   /* Try to switch signed types for unsigned types if we can.
   5278      This is better for two reasons.  First, unsigned ops tend
   5279      to be cheaper than signed ops.  Second, it means that we can
   5280      handle things like:
   5281 
   5282 	signed char c;
   5283 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
   5284 
   5285      as:
   5286 
   5287 	signed char c;
   5288 	unsigned short res_1 = (unsigned short) c & 0xff00;
   5289 	int res = (int) res_1;
   5290 
   5291      where the intermediate result res_1 has unsigned rather than
   5292      signed type.  */
   5293   if (sign == SIGNED && !wi::neg_p (min_value))
   5294     sign = UNSIGNED;
   5295 
   5296   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
   5297   unsigned int precision1 = wi::min_precision (min_value, sign);
   5298   unsigned int precision2 = wi::min_precision (max_value, sign);
   5299   unsigned int value_precision = MAX (precision1, precision2);
   5300   if (value_precision >= precision)
   5301     return;
   5302 
   5303   if (dump_enabled_p ())
   5304     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
   5305 		     " without loss of precision: %G",
   5306 		     sign == SIGNED ? "signed" : "unsigned",
   5307 		     value_precision, stmt);
   5308 
   5309   vect_set_operation_type (stmt_info, type, value_precision, sign);
   5310   vect_set_min_input_precision (stmt_info, type, value_precision);
   5311 }
   5312 
   5313 /* Use information about the users of STMT's result to decide whether
   5314    STMT (described by STMT_INFO) could be done in a narrower type.
   5315    This is effectively a backward propagation.  */
   5316 
   5317 static void
   5318 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
   5319 {
   5320   tree_code code = gimple_assign_rhs_code (stmt);
   5321   unsigned int opno = (code == COND_EXPR ? 2 : 1);
   5322   tree type = TREE_TYPE (gimple_op (stmt, opno));
   5323   if (!vect_narrowable_type_p (type))
   5324     return;
   5325 
   5326   unsigned int precision = TYPE_PRECISION (type);
   5327   unsigned int operation_precision, min_input_precision;
   5328   switch (code)
   5329     {
   5330     CASE_CONVERT:
   5331       /* Only the bits that contribute to the output matter.  Don't change
   5332 	 the precision of the operation itself.  */
   5333       operation_precision = precision;
   5334       min_input_precision = stmt_info->min_output_precision;
   5335       break;
   5336 
   5337     case LSHIFT_EXPR:
   5338     case RSHIFT_EXPR:
   5339       {
   5340 	tree shift = gimple_assign_rhs2 (stmt);
   5341 	if (TREE_CODE (shift) != INTEGER_CST
   5342 	    || !wi::ltu_p (wi::to_widest (shift), precision))
   5343 	  return;
   5344 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
   5345 	if (code == LSHIFT_EXPR)
   5346 	  {
   5347 	    /* Avoid creating an undefined shift.
   5348 
   5349 	       ??? We could instead use min_output_precision as-is and
   5350 	       optimize out-of-range shifts to zero.  However, only
   5351 	       degenerate testcases shift away all their useful input data,
   5352 	       and it isn't natural to drop input operations in the middle
   5353 	       of vectorization.  This sort of thing should really be
   5354 	       handled before vectorization.  */
   5355 	    operation_precision = MAX (stmt_info->min_output_precision,
   5356 				       const_shift + 1);
   5357 	    /* We need CONST_SHIFT fewer bits of the input.  */
   5358 	    min_input_precision = (MAX (operation_precision, const_shift)
   5359 				   - const_shift);
   5360 	  }
   5361 	else
   5362 	  {
   5363 	    /* We need CONST_SHIFT extra bits to do the operation.  */
   5364 	    operation_precision = (stmt_info->min_output_precision
   5365 				   + const_shift);
   5366 	    min_input_precision = operation_precision;
   5367 	  }
   5368 	break;
   5369       }
   5370 
   5371     default:
   5372       if (vect_truncatable_operation_p (code))
   5373 	{
   5374 	  /* Input bit N has no effect on output bits N-1 and lower.  */
   5375 	  operation_precision = stmt_info->min_output_precision;
   5376 	  min_input_precision = operation_precision;
   5377 	  break;
   5378 	}
   5379       return;
   5380     }
   5381 
   5382   if (operation_precision < precision)
   5383     {
   5384       if (dump_enabled_p ())
   5385 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
   5386 			 " without affecting users: %G",
   5387 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
   5388 			 operation_precision, stmt);
   5389       vect_set_operation_type (stmt_info, type, operation_precision,
   5390 			       TYPE_SIGN (type));
   5391     }
   5392   vect_set_min_input_precision (stmt_info, type, min_input_precision);
   5393 }
   5394 
   5395 /* Return true if the statement described by STMT_INFO sets a boolean
   5396    SSA_NAME and if we know how to vectorize this kind of statement using
   5397    vector mask types.  */
   5398 
   5399 static bool
   5400 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
   5401 {
   5402   tree lhs = gimple_get_lhs (stmt_info->stmt);
   5403   if (!lhs
   5404       || TREE_CODE (lhs) != SSA_NAME
   5405       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   5406     return false;
   5407 
   5408   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   5409     {
   5410       tree_code rhs_code = gimple_assign_rhs_code (assign);
   5411       switch (rhs_code)
   5412 	{
   5413 	CASE_CONVERT:
   5414 	case SSA_NAME:
   5415 	case BIT_NOT_EXPR:
   5416 	case BIT_IOR_EXPR:
   5417 	case BIT_XOR_EXPR:
   5418 	case BIT_AND_EXPR:
   5419 	  return true;
   5420 
   5421 	default:
   5422 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
   5423 	}
   5424     }
   5425   else if (is_a <gphi *> (stmt_info->stmt))
   5426     return true;
   5427   return false;
   5428 }
   5429 
   5430 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
   5431    a vector mask type instead of a normal vector type.  Record the
   5432    result in STMT_INFO->mask_precision.  */
   5433 
   5434 static void
   5435 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
   5436 {
   5437   if (!possible_vector_mask_operation_p (stmt_info))
   5438     return;
   5439 
   5440   /* If at least one boolean input uses a vector mask type,
   5441      pick the mask type with the narrowest elements.
   5442 
   5443      ??? This is the traditional behavior.  It should always produce
   5444      the smallest number of operations, but isn't necessarily the
   5445      optimal choice.  For example, if we have:
   5446 
   5447        a = b & c
   5448 
   5449      where:
   5450 
   5451        - the user of a wants it to have a mask type for 16-bit elements (M16)
   5452        - b also uses M16
   5453        - c uses a mask type for 8-bit elements (M8)
   5454 
   5455      then picking M8 gives:
   5456 
   5457        - 1 M16->M8 pack for b
   5458        - 1 M8 AND for a
   5459        - 2 M8->M16 unpacks for the user of a
   5460 
   5461      whereas picking M16 would have given:
   5462 
   5463        - 2 M8->M16 unpacks for c
   5464        - 2 M16 ANDs for a
   5465 
   5466      The number of operations are equal, but M16 would have given
   5467      a shorter dependency chain and allowed more ILP.  */
   5468   unsigned int precision = ~0U;
   5469   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   5470     {
   5471       unsigned int nops = gimple_num_ops (assign);
   5472       for (unsigned int i = 1; i < nops; ++i)
   5473 	{
   5474 	  tree rhs = gimple_op (assign, i);
   5475 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
   5476 	    continue;
   5477 
   5478 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
   5479 	  if (!def_stmt_info)
   5480 	    /* Don't let external or constant operands influence the choice.
   5481 	       We can convert them to whichever vector type we pick.  */
   5482 	    continue;
   5483 
   5484 	  if (def_stmt_info->mask_precision)
   5485 	    {
   5486 	      if (precision > def_stmt_info->mask_precision)
   5487 		precision = def_stmt_info->mask_precision;
   5488 	    }
   5489 	}
   5490 
   5491       /* If the statement compares two values that shouldn't use vector masks,
   5492 	 try comparing the values as normal scalars instead.  */
   5493       tree_code rhs_code = gimple_assign_rhs_code (assign);
   5494       if (precision == ~0U
   5495 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
   5496 	{
   5497 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
   5498 	  scalar_mode mode;
   5499 	  tree vectype, mask_type;
   5500 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
   5501 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
   5502 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
   5503 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
   5504 	    precision = GET_MODE_BITSIZE (mode);
   5505 	}
   5506     }
   5507   else
   5508     {
   5509       gphi *phi = as_a <gphi *> (stmt_info->stmt);
   5510       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
   5511 	{
   5512 	  tree rhs = gimple_phi_arg_def (phi, i);
   5513 
   5514 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
   5515 	  if (!def_stmt_info)
   5516 	    /* Don't let external or constant operands influence the choice.
   5517 	       We can convert them to whichever vector type we pick.  */
   5518 	    continue;
   5519 
   5520 	  if (def_stmt_info->mask_precision)
   5521 	    {
   5522 	      if (precision > def_stmt_info->mask_precision)
   5523 		precision = def_stmt_info->mask_precision;
   5524 	    }
   5525 	}
   5526     }
   5527 
   5528   if (dump_enabled_p ())
   5529     {
   5530       if (precision == ~0U)
   5531 	dump_printf_loc (MSG_NOTE, vect_location,
   5532 			 "using normal nonmask vectors for %G",
   5533 			 stmt_info->stmt);
   5534       else
   5535 	dump_printf_loc (MSG_NOTE, vect_location,
   5536 			 "using boolean precision %d for %G",
   5537 			 precision, stmt_info->stmt);
   5538     }
   5539 
   5540   stmt_info->mask_precision = precision;
   5541 }
   5542 
   5543 /* Handle vect_determine_precisions for STMT_INFO, given that we
   5544    have already done so for the users of its result.  */
   5545 
   5546 void
   5547 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
   5548 {
   5549   vect_determine_min_output_precision (vinfo, stmt_info);
   5550   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
   5551     {
   5552       vect_determine_precisions_from_range (stmt_info, stmt);
   5553       vect_determine_precisions_from_users (stmt_info, stmt);
   5554     }
   5555 }
   5556 
   5557 /* Walk backwards through the vectorizable region to determine the
   5558    values of these fields:
   5559 
   5560    - min_output_precision
   5561    - min_input_precision
   5562    - operation_precision
   5563    - operation_sign.  */
   5564 
   5565 void
   5566 vect_determine_precisions (vec_info *vinfo)
   5567 {
   5568   DUMP_VECT_SCOPE ("vect_determine_precisions");
   5569 
   5570   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   5571     {
   5572       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   5573       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   5574       unsigned int nbbs = loop->num_nodes;
   5575 
   5576       for (unsigned int i = 0; i < nbbs; i++)
   5577 	{
   5578 	  basic_block bb = bbs[i];
   5579 	  for (auto gsi = gsi_start_phis (bb);
   5580 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5581 	    {
   5582 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5583 	      if (stmt_info)
   5584 		vect_determine_mask_precision (vinfo, stmt_info);
   5585 	    }
   5586 	  for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
   5587 	    if (!is_gimple_debug (gsi_stmt (si)))
   5588 	      vect_determine_mask_precision
   5589 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
   5590 	}
   5591       for (unsigned int i = 0; i < nbbs; i++)
   5592 	{
   5593 	  basic_block bb = bbs[nbbs - i - 1];
   5594 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
   5595 	       !gsi_end_p (si); gsi_prev (&si))
   5596 	    if (!is_gimple_debug (gsi_stmt (si)))
   5597 	      vect_determine_stmt_precisions
   5598 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
   5599 	  for (auto gsi = gsi_start_phis (bb);
   5600 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5601 	    {
   5602 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5603 	      if (stmt_info)
   5604 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5605 	    }
   5606 	}
   5607     }
   5608   else
   5609     {
   5610       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
   5611       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
   5612 	{
   5613 	  basic_block bb = bb_vinfo->bbs[i];
   5614 	  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
   5615 	    {
   5616 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5617 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5618 		vect_determine_mask_precision (vinfo, stmt_info);
   5619 	    }
   5620 	  for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
   5621 	    {
   5622 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
   5623 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5624 		vect_determine_mask_precision (vinfo, stmt_info);
   5625 	    }
   5626 	}
   5627       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
   5628 	{
   5629 	  for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
   5630 	       !gsi_end_p (gsi); gsi_prev (&gsi))
   5631 	    {
   5632 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
   5633 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5634 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5635 	    }
   5636 	  for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
   5637 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5638 	    {
   5639 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5640 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5641 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5642 	    }
   5643 	}
   5644     }
   5645 }
   5646 
   5647 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
   5648 
   5649 struct vect_recog_func
   5650 {
   5651   vect_recog_func_ptr fn;
   5652   const char *name;
   5653 };
   5654 
   5655 /* Note that ordering matters - the first pattern matching on a stmt is
   5656    taken which means usually the more complex one needs to preceed the
   5657    less comples onex (widen_sum only after dot_prod or sad for example).  */
   5658 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   5659   { vect_recog_over_widening_pattern, "over_widening" },
   5660   /* Must come after over_widening, which narrows the shift as much as
   5661      possible beforehand.  */
   5662   { vect_recog_average_pattern, "average" },
   5663   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
   5664   { vect_recog_mulhs_pattern, "mult_high" },
   5665   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
   5666   { vect_recog_widen_mult_pattern, "widen_mult" },
   5667   { vect_recog_dot_prod_pattern, "dot_prod" },
   5668   { vect_recog_sad_pattern, "sad" },
   5669   { vect_recog_widen_sum_pattern, "widen_sum" },
   5670   { vect_recog_pow_pattern, "pow" },
   5671   { vect_recog_popcount_pattern, "popcount" },
   5672   { vect_recog_widen_shift_pattern, "widen_shift" },
   5673   { vect_recog_rotate_pattern, "rotate" },
   5674   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   5675   { vect_recog_divmod_pattern, "divmod" },
   5676   { vect_recog_mult_pattern, "mult" },
   5677   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   5678   { vect_recog_bool_pattern, "bool" },
   5679   /* This must come before mask conversion, and includes the parts
   5680      of mask conversion that are needed for gather and scatter
   5681      internal functions.  */
   5682   { vect_recog_gather_scatter_pattern, "gather_scatter" },
   5683   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   5684   { vect_recog_widen_plus_pattern, "widen_plus" },
   5685   { vect_recog_widen_minus_pattern, "widen_minus" },
   5686 };
   5687 
   5688 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
   5689 
   5690 /* Mark statements that are involved in a pattern.  */
   5691 
   5692 void
   5693 vect_mark_pattern_stmts (vec_info *vinfo,
   5694 			 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
   5695                          tree pattern_vectype)
   5696 {
   5697   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
   5698   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
   5699 
   5700   gimple *orig_pattern_stmt = NULL;
   5701   if (is_pattern_stmt_p (orig_stmt_info))
   5702     {
   5703       /* We're replacing a statement in an existing pattern definition
   5704 	 sequence.  */
   5705       orig_pattern_stmt = orig_stmt_info->stmt;
   5706       if (dump_enabled_p ())
   5707 	dump_printf_loc (MSG_NOTE, vect_location,
   5708 			 "replacing earlier pattern %G", orig_pattern_stmt);
   5709 
   5710       /* To keep the book-keeping simple, just swap the lhs of the
   5711 	 old and new statements, so that the old one has a valid but
   5712 	 unused lhs.  */
   5713       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
   5714       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
   5715       gimple_set_lhs (pattern_stmt, old_lhs);
   5716 
   5717       if (dump_enabled_p ())
   5718 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
   5719 
   5720       /* Switch to the statement that ORIG replaces.  */
   5721       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
   5722 
   5723       /* We shouldn't be replacing the main pattern statement.  */
   5724       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
   5725 		  != orig_pattern_stmt);
   5726     }
   5727 
   5728   if (def_seq)
   5729     for (gimple_stmt_iterator si = gsi_start (def_seq);
   5730 	 !gsi_end_p (si); gsi_next (&si))
   5731       {
   5732 	if (dump_enabled_p ())
   5733 	  dump_printf_loc (MSG_NOTE, vect_location,
   5734 			   "extra pattern stmt: %G", gsi_stmt (si));
   5735 	stmt_vec_info pattern_stmt_info
   5736 	  = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
   5737 				    orig_stmt_info, pattern_vectype);
   5738 	/* Stmts in the def sequence are not vectorizable cycle or
   5739 	   induction defs, instead they should all be vect_internal_def
   5740 	   feeding the main pattern stmt which retains this def type.  */
   5741 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
   5742       }
   5743 
   5744   if (orig_pattern_stmt)
   5745     {
   5746       vect_init_pattern_stmt (vinfo, pattern_stmt,
   5747 			      orig_stmt_info, pattern_vectype);
   5748 
   5749       /* Insert all the new pattern statements before the original one.  */
   5750       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
   5751       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
   5752 					       orig_def_seq);
   5753       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
   5754       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
   5755 
   5756       /* Remove the pattern statement that this new pattern replaces.  */
   5757       gsi_remove (&gsi, false);
   5758     }
   5759   else
   5760     vect_set_pattern_stmt (vinfo,
   5761 			   pattern_stmt, orig_stmt_info, pattern_vectype);
   5762 
   5763   /* Transfer reduction path info to the pattern.  */
   5764   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
   5765     {
   5766       gimple_match_op op;
   5767       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
   5768 	gcc_unreachable ();
   5769       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
   5770       /* Search the pattern def sequence and the main pattern stmt.  Note
   5771          we may have inserted all into a containing pattern def sequence
   5772 	 so the following is a bit awkward.  */
   5773       gimple_stmt_iterator si;
   5774       gimple *s;
   5775       if (def_seq)
   5776 	{
   5777 	  si = gsi_start (def_seq);
   5778 	  s = gsi_stmt (si);
   5779 	  gsi_next (&si);
   5780 	}
   5781       else
   5782 	{
   5783 	  si = gsi_none ();
   5784 	  s = pattern_stmt;
   5785 	}
   5786       do
   5787 	{
   5788 	  bool found = false;
   5789 	  if (gimple_extract_op (s, &op))
   5790 	    for (unsigned i = 0; i < op.num_ops; ++i)
   5791 	      if (op.ops[i] == lookfor)
   5792 		{
   5793 		  STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
   5794 		  lookfor = gimple_get_lhs (s);
   5795 		  found = true;
   5796 		  break;
   5797 		}
   5798 	  if (s == pattern_stmt)
   5799 	    {
   5800 	      if (!found && dump_enabled_p ())
   5801 		dump_printf_loc (MSG_NOTE, vect_location,
   5802 				 "failed to update reduction index.\n");
   5803 	      break;
   5804 	    }
   5805 	  if (gsi_end_p (si))
   5806 	    s = pattern_stmt;
   5807 	  else
   5808 	    {
   5809 	      s = gsi_stmt (si);
   5810 	      if (s == pattern_stmt)
   5811 		/* Found the end inside a bigger pattern def seq.  */
   5812 		si = gsi_none ();
   5813 	      else
   5814 		gsi_next (&si);
   5815 	    }
   5816 	} while (1);
   5817     }
   5818 }
   5819 
   5820 /* Function vect_pattern_recog_1
   5821 
   5822    Input:
   5823    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
   5824         computation pattern.
   5825    STMT_INFO: A stmt from which the pattern search should start.
   5826 
   5827    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
   5828    a sequence of statements that has the same functionality and can be
   5829    used to replace STMT_INFO.  It returns the last statement in the sequence
   5830    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
   5831    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
   5832    statement, having first checked that the target supports the new operation
   5833    in that type.
   5834 
   5835    This function also does some bookkeeping, as explained in the documentation
   5836    for vect_recog_pattern.  */
   5837 
   5838 static void
   5839 vect_pattern_recog_1 (vec_info *vinfo,
   5840 		      vect_recog_func *recog_func, stmt_vec_info stmt_info)
   5841 {
   5842   gimple *pattern_stmt;
   5843   loop_vec_info loop_vinfo;
   5844   tree pattern_vectype;
   5845 
   5846   /* If this statement has already been replaced with pattern statements,
   5847      leave the original statement alone, since the first match wins.
   5848      Instead try to match against the definition statements that feed
   5849      the main pattern statement.  */
   5850   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
   5851     {
   5852       gimple_stmt_iterator gsi;
   5853       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   5854 	   !gsi_end_p (gsi); gsi_next (&gsi))
   5855 	vect_pattern_recog_1 (vinfo, recog_func,
   5856 			      vinfo->lookup_stmt (gsi_stmt (gsi)));
   5857       return;
   5858     }
   5859 
   5860   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   5861   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
   5862   if (!pattern_stmt)
   5863     {
   5864       /* Clear any half-formed pattern definition sequence.  */
   5865       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
   5866       return;
   5867     }
   5868 
   5869   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5870 
   5871   /* Found a vectorizable pattern.  */
   5872   if (dump_enabled_p ())
   5873     dump_printf_loc (MSG_NOTE, vect_location,
   5874 		     "%s pattern recognized: %G",
   5875 		     recog_func->name, pattern_stmt);
   5876 
   5877   /* Mark the stmts that are involved in the pattern. */
   5878   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
   5879 
   5880   /* Patterns cannot be vectorized using SLP, because they change the order of
   5881      computation.  */
   5882   if (loop_vinfo)
   5883     {
   5884       unsigned ix, ix2;
   5885       stmt_vec_info *elem_ptr;
   5886       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
   5887 			     elem_ptr, *elem_ptr == stmt_info);
   5888     }
   5889 }
   5890 
   5891 
   5892 /* Function vect_pattern_recog
   5893 
   5894    Input:
   5895    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
   5896         computation idioms.
   5897 
   5898    Output - for each computation idiom that is detected we create a new stmt
   5899         that provides the same functionality and that can be vectorized.  We
   5900         also record some information in the struct_stmt_info of the relevant
   5901         stmts, as explained below:
   5902 
   5903    At the entry to this function we have the following stmts, with the
   5904    following initial value in the STMT_VINFO fields:
   5905 
   5906          stmt                     in_pattern_p  related_stmt    vec_stmt
   5907          S1: a_i = ....                 -       -               -
   5908          S2: a_2 = ..use(a_i)..         -       -               -
   5909          S3: a_1 = ..use(a_2)..         -       -               -
   5910          S4: a_0 = ..use(a_1)..         -       -               -
   5911          S5: ... = ..use(a_0)..         -       -               -
   5912 
   5913    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
   5914    represented by a single stmt.  We then:
   5915    - create a new stmt S6 equivalent to the pattern (the stmt is not
   5916      inserted into the code)
   5917    - fill in the STMT_VINFO fields as follows:
   5918 
   5919                                   in_pattern_p  related_stmt    vec_stmt
   5920          S1: a_i = ....                 -       -               -
   5921          S2: a_2 = ..use(a_i)..         -       -               -
   5922          S3: a_1 = ..use(a_2)..         -       -               -
   5923          S4: a_0 = ..use(a_1)..         true    S6              -
   5924           '---> S6: a_new = ....        -       S4              -
   5925          S5: ... = ..use(a_0)..         -       -               -
   5926 
   5927    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
   5928    to each other through the RELATED_STMT field).
   5929 
   5930    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
   5931    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
   5932    remain irrelevant unless used by stmts other than S4.
   5933 
   5934    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
   5935    (because they are marked as irrelevant).  It will vectorize S6, and record
   5936    a pointer to the new vector stmt VS6 from S6 (as usual).
   5937    S4 will be skipped, and S5 will be vectorized as usual:
   5938 
   5939                                   in_pattern_p  related_stmt    vec_stmt
   5940          S1: a_i = ....                 -       -               -
   5941          S2: a_2 = ..use(a_i)..         -       -               -
   5942          S3: a_1 = ..use(a_2)..         -       -               -
   5943        > VS6: va_new = ....             -       -               -
   5944          S4: a_0 = ..use(a_1)..         true    S6              VS6
   5945           '---> S6: a_new = ....        -       S4              VS6
   5946        > VS5: ... = ..vuse(va_new)..    -       -               -
   5947          S5: ... = ..use(a_0)..         -       -               -
   5948 
   5949    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
   5950    elsewhere), and we'll end up with:
   5951 
   5952         VS6: va_new = ....
   5953         VS5: ... = ..vuse(va_new)..
   5954 
   5955    In case of more than one pattern statements, e.g., widen-mult with
   5956    intermediate type:
   5957 
   5958      S1  a_t = ;
   5959      S2  a_T = (TYPE) a_t;
   5960            '--> S3: a_it = (interm_type) a_t;
   5961      S4  prod_T = a_T * CONST;
   5962            '--> S5: prod_T' = a_it w* CONST;
   5963 
   5964    there may be other users of a_T outside the pattern.  In that case S2 will
   5965    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
   5966    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
   5967    be recorded in S3.  */
   5968 
   5969 void
   5970 vect_pattern_recog (vec_info *vinfo)
   5971 {
   5972   class loop *loop;
   5973   basic_block *bbs;
   5974   unsigned int nbbs;
   5975   gimple_stmt_iterator si;
   5976   unsigned int i, j;
   5977 
   5978   vect_determine_precisions (vinfo);
   5979 
   5980   DUMP_VECT_SCOPE ("vect_pattern_recog");
   5981 
   5982   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   5983     {
   5984       loop = LOOP_VINFO_LOOP (loop_vinfo);
   5985       bbs = LOOP_VINFO_BBS (loop_vinfo);
   5986       nbbs = loop->num_nodes;
   5987 
   5988       /* Scan through the loop stmts, applying the pattern recognition
   5989 	 functions starting at each stmt visited:  */
   5990       for (i = 0; i < nbbs; i++)
   5991 	{
   5992 	  basic_block bb = bbs[i];
   5993 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
   5994 	    {
   5995 	      if (is_gimple_debug (gsi_stmt (si)))
   5996 		continue;
   5997 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
   5998 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
   5999 	      for (j = 0; j < NUM_PATTERNS; j++)
   6000 		vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
   6001 				      stmt_info);
   6002 	    }
   6003 	}
   6004     }
   6005   else
   6006     {
   6007       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
   6008       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
   6009 	for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
   6010 	     !gsi_end_p (gsi); gsi_next (&gsi))
   6011 	  {
   6012 	    stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
   6013 	    if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
   6014 	      continue;
   6015 
   6016 	    /* Scan over all generic vect_recog_xxx_pattern functions.  */
   6017 	    for (j = 0; j < NUM_PATTERNS; j++)
   6018 	      vect_pattern_recog_1 (vinfo,
   6019 				    &vect_vect_recog_func_ptrs[j], stmt_info);
   6020 	  }
   6021     }
   6022 
   6023   /* After this no more add_stmt calls are allowed.  */
   6024   vinfo->stmt_vec_info_ro = true;
   6025 }
   6026