Home | History | Annotate | Line # | Download | only in gcc
tree-vect-patterns.cc revision 1.1
      1  1.1  mrg /* Analysis Utilities for Loop Vectorization.
      2  1.1  mrg    Copyright (C) 2006-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Dorit Nuzman <dorit (at) il.ibm.com>
      4  1.1  mrg 
      5  1.1  mrg This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      8  1.1  mrg the terms of the GNU General Public License as published by the Free
      9  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     10  1.1  mrg version.
     11  1.1  mrg 
     12  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  1.1  mrg for more details.
     16  1.1  mrg 
     17  1.1  mrg You should have received a copy of the GNU General Public License
     18  1.1  mrg along with GCC; see the file COPYING3.  If not see
     19  1.1  mrg <http://www.gnu.org/licenses/>.  */
     20  1.1  mrg 
     21  1.1  mrg #include "config.h"
     22  1.1  mrg #include "system.h"
     23  1.1  mrg #include "coretypes.h"
     24  1.1  mrg #include "backend.h"
     25  1.1  mrg #include "rtl.h"
     26  1.1  mrg #include "tree.h"
     27  1.1  mrg #include "gimple.h"
     28  1.1  mrg #include "ssa.h"
     29  1.1  mrg #include "expmed.h"
     30  1.1  mrg #include "optabs-tree.h"
     31  1.1  mrg #include "insn-config.h"
     32  1.1  mrg #include "recog.h"		/* FIXME: for insn_data */
     33  1.1  mrg #include "fold-const.h"
     34  1.1  mrg #include "stor-layout.h"
     35  1.1  mrg #include "tree-eh.h"
     36  1.1  mrg #include "gimplify.h"
     37  1.1  mrg #include "gimple-iterator.h"
     38  1.1  mrg #include "cfgloop.h"
     39  1.1  mrg #include "tree-vectorizer.h"
     40  1.1  mrg #include "dumpfile.h"
     41  1.1  mrg #include "builtins.h"
     42  1.1  mrg #include "internal-fn.h"
     43  1.1  mrg #include "case-cfn-macros.h"
     44  1.1  mrg #include "fold-const-call.h"
     45  1.1  mrg #include "attribs.h"
     46  1.1  mrg #include "cgraph.h"
     47  1.1  mrg #include "omp-simd-clone.h"
     48  1.1  mrg #include "predict.h"
     49  1.1  mrg #include "tree-vector-builder.h"
     50  1.1  mrg #include "vec-perm-indices.h"
     51  1.1  mrg #include "gimple-range.h"
     52  1.1  mrg 
     53  1.1  mrg /* Return true if we have a useful VR_RANGE range for VAR, storing it
     54  1.1  mrg    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
     55  1.1  mrg 
     56  1.1  mrg static bool
     57  1.1  mrg vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
     58  1.1  mrg {
     59  1.1  mrg   value_range vr;
     60  1.1  mrg   get_range_query (cfun)->range_of_expr (vr, var);
     61  1.1  mrg   if (vr.undefined_p ())
     62  1.1  mrg     vr.set_varying (TREE_TYPE (var));
     63  1.1  mrg   *min_value = wi::to_wide (vr.min ());
     64  1.1  mrg   *max_value = wi::to_wide (vr.max ());
     65  1.1  mrg   value_range_kind vr_type = vr.kind ();
     66  1.1  mrg   wide_int nonzero = get_nonzero_bits (var);
     67  1.1  mrg   signop sgn = TYPE_SIGN (TREE_TYPE (var));
     68  1.1  mrg   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
     69  1.1  mrg 					 nonzero, sgn) == VR_RANGE)
     70  1.1  mrg     {
     71  1.1  mrg       if (dump_enabled_p ())
     72  1.1  mrg 	{
     73  1.1  mrg 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     74  1.1  mrg 	  dump_printf (MSG_NOTE, " has range [");
     75  1.1  mrg 	  dump_hex (MSG_NOTE, *min_value);
     76  1.1  mrg 	  dump_printf (MSG_NOTE, ", ");
     77  1.1  mrg 	  dump_hex (MSG_NOTE, *max_value);
     78  1.1  mrg 	  dump_printf (MSG_NOTE, "]\n");
     79  1.1  mrg 	}
     80  1.1  mrg       return true;
     81  1.1  mrg     }
     82  1.1  mrg   else
     83  1.1  mrg     {
     84  1.1  mrg       if (dump_enabled_p ())
     85  1.1  mrg 	{
     86  1.1  mrg 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     87  1.1  mrg 	  dump_printf (MSG_NOTE, " has no range info\n");
     88  1.1  mrg 	}
     89  1.1  mrg       return false;
     90  1.1  mrg     }
     91  1.1  mrg }
     92  1.1  mrg 
     93  1.1  mrg /* Report that we've found an instance of pattern PATTERN in
     94  1.1  mrg    statement STMT.  */
     95  1.1  mrg 
     96  1.1  mrg static void
     97  1.1  mrg vect_pattern_detected (const char *name, gimple *stmt)
     98  1.1  mrg {
     99  1.1  mrg   if (dump_enabled_p ())
    100  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
    101  1.1  mrg }
    102  1.1  mrg 
    103  1.1  mrg /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
    104  1.1  mrg    return the pattern statement's stmt_vec_info.  Set its vector type to
    105  1.1  mrg    VECTYPE if it doesn't have one already.  */
    106  1.1  mrg 
    107  1.1  mrg static stmt_vec_info
    108  1.1  mrg vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
    109  1.1  mrg 			stmt_vec_info orig_stmt_info, tree vectype)
    110  1.1  mrg {
    111  1.1  mrg   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
    112  1.1  mrg   if (pattern_stmt_info == NULL)
    113  1.1  mrg     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    114  1.1  mrg   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
    115  1.1  mrg 
    116  1.1  mrg   pattern_stmt_info->pattern_stmt_p = true;
    117  1.1  mrg   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
    118  1.1  mrg   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
    119  1.1  mrg     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
    120  1.1  mrg   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
    121  1.1  mrg     {
    122  1.1  mrg       gcc_assert (!vectype
    123  1.1  mrg 		  || (VECTOR_BOOLEAN_TYPE_P (vectype)
    124  1.1  mrg 		      == vect_use_mask_type_p (orig_stmt_info)));
    125  1.1  mrg       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
    126  1.1  mrg       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
    127  1.1  mrg     }
    128  1.1  mrg   return pattern_stmt_info;
    129  1.1  mrg }
    130  1.1  mrg 
    131  1.1  mrg /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
    132  1.1  mrg    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
    133  1.1  mrg    have one already.  */
    134  1.1  mrg 
    135  1.1  mrg static void
    136  1.1  mrg vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
    137  1.1  mrg 		       stmt_vec_info orig_stmt_info, tree vectype)
    138  1.1  mrg {
    139  1.1  mrg   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
    140  1.1  mrg   STMT_VINFO_RELATED_STMT (orig_stmt_info)
    141  1.1  mrg     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
    142  1.1  mrg }
    143  1.1  mrg 
    144  1.1  mrg /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
    145  1.1  mrg    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
    146  1.1  mrg    be different from the vector type of the final pattern statement.
    147  1.1  mrg    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
    148  1.1  mrg    from which it was derived.  */
    149  1.1  mrg 
    150  1.1  mrg static inline void
    151  1.1  mrg append_pattern_def_seq (vec_info *vinfo,
    152  1.1  mrg 			stmt_vec_info stmt_info, gimple *new_stmt,
    153  1.1  mrg 			tree vectype = NULL_TREE,
    154  1.1  mrg 			tree scalar_type_for_mask = NULL_TREE)
    155  1.1  mrg {
    156  1.1  mrg   gcc_assert (!scalar_type_for_mask
    157  1.1  mrg 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
    158  1.1  mrg   if (vectype)
    159  1.1  mrg     {
    160  1.1  mrg       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
    161  1.1  mrg       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
    162  1.1  mrg       if (scalar_type_for_mask)
    163  1.1  mrg 	new_stmt_info->mask_precision
    164  1.1  mrg 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
    165  1.1  mrg     }
    166  1.1  mrg   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
    167  1.1  mrg 				      new_stmt);
    168  1.1  mrg }
    169  1.1  mrg 
    170  1.1  mrg /* The caller wants to perform new operations on vect_external variable
    171  1.1  mrg    VAR, so that the result of the operations would also be vect_external.
    172  1.1  mrg    Return the edge on which the operations can be performed, if one exists.
    173  1.1  mrg    Return null if the operations should instead be treated as part of
    174  1.1  mrg    the pattern that needs them.  */
    175  1.1  mrg 
    176  1.1  mrg static edge
    177  1.1  mrg vect_get_external_def_edge (vec_info *vinfo, tree var)
    178  1.1  mrg {
    179  1.1  mrg   edge e = NULL;
    180  1.1  mrg   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    181  1.1  mrg     {
    182  1.1  mrg       e = loop_preheader_edge (loop_vinfo->loop);
    183  1.1  mrg       if (!SSA_NAME_IS_DEFAULT_DEF (var))
    184  1.1  mrg 	{
    185  1.1  mrg 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
    186  1.1  mrg 	  if (bb == NULL
    187  1.1  mrg 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
    188  1.1  mrg 	    e = NULL;
    189  1.1  mrg 	}
    190  1.1  mrg     }
    191  1.1  mrg   return e;
    192  1.1  mrg }
    193  1.1  mrg 
    194  1.1  mrg /* Return true if the target supports a vector version of CODE,
    195  1.1  mrg    where CODE is known to map to a direct optab with the given SUBTYPE.
    196  1.1  mrg    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
    197  1.1  mrg    specifies the type of the scalar result.
    198  1.1  mrg 
    199  1.1  mrg    If CODE allows the inputs and outputs to have different type
    200  1.1  mrg    (such as for WIDEN_SUM_EXPR), it is the input mode rather
    201  1.1  mrg    than the output mode that determines the appropriate target pattern.
    202  1.1  mrg    Operand 0 of the target pattern then specifies the mode that the output
    203  1.1  mrg    must have.
    204  1.1  mrg 
    205  1.1  mrg    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
    206  1.1  mrg    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
    207  1.1  mrg    is nonnull.  */
    208  1.1  mrg 
    209  1.1  mrg static bool
    210  1.1  mrg vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
    211  1.1  mrg 				 tree itype, tree *vecotype_out,
    212  1.1  mrg 				 tree *vecitype_out = NULL,
    213  1.1  mrg 				 enum optab_subtype subtype = optab_default)
    214  1.1  mrg {
    215  1.1  mrg   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    216  1.1  mrg   if (!vecitype)
    217  1.1  mrg     return false;
    218  1.1  mrg 
    219  1.1  mrg   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
    220  1.1  mrg   if (!vecotype)
    221  1.1  mrg     return false;
    222  1.1  mrg 
    223  1.1  mrg   optab optab = optab_for_tree_code (code, vecitype, subtype);
    224  1.1  mrg   if (!optab)
    225  1.1  mrg     return false;
    226  1.1  mrg 
    227  1.1  mrg   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
    228  1.1  mrg   if (icode == CODE_FOR_nothing
    229  1.1  mrg       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
    230  1.1  mrg     return false;
    231  1.1  mrg 
    232  1.1  mrg   *vecotype_out = vecotype;
    233  1.1  mrg   if (vecitype_out)
    234  1.1  mrg     *vecitype_out = vecitype;
    235  1.1  mrg   return true;
    236  1.1  mrg }
    237  1.1  mrg 
    238  1.1  mrg /* Round bit precision PRECISION up to a full element.  */
    239  1.1  mrg 
    240  1.1  mrg static unsigned int
    241  1.1  mrg vect_element_precision (unsigned int precision)
    242  1.1  mrg {
    243  1.1  mrg   precision = 1 << ceil_log2 (precision);
    244  1.1  mrg   return MAX (precision, BITS_PER_UNIT);
    245  1.1  mrg }
    246  1.1  mrg 
    247  1.1  mrg /* If OP is defined by a statement that's being considered for vectorization,
    248  1.1  mrg    return information about that statement, otherwise return NULL.  */
    249  1.1  mrg 
    250  1.1  mrg static stmt_vec_info
    251  1.1  mrg vect_get_internal_def (vec_info *vinfo, tree op)
    252  1.1  mrg {
    253  1.1  mrg   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
    254  1.1  mrg   if (def_stmt_info
    255  1.1  mrg       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
    256  1.1  mrg     return def_stmt_info;
    257  1.1  mrg   return NULL;
    258  1.1  mrg }
    259  1.1  mrg 
    260  1.1  mrg /* Check whether NAME, an ssa-name used in STMT_VINFO,
    261  1.1  mrg    is a result of a type promotion, such that:
    262  1.1  mrg      DEF_STMT: NAME = NOP (name0)
    263  1.1  mrg    If CHECK_SIGN is TRUE, check that either both types are signed or both are
    264  1.1  mrg    unsigned.  */
    265  1.1  mrg 
    266  1.1  mrg static bool
    267  1.1  mrg type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
    268  1.1  mrg 		   tree *orig_type, gimple **def_stmt, bool *promotion)
    269  1.1  mrg {
    270  1.1  mrg   tree type = TREE_TYPE (name);
    271  1.1  mrg   tree oprnd0;
    272  1.1  mrg   enum vect_def_type dt;
    273  1.1  mrg 
    274  1.1  mrg   stmt_vec_info def_stmt_info;
    275  1.1  mrg   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
    276  1.1  mrg     return false;
    277  1.1  mrg 
    278  1.1  mrg   if (dt != vect_internal_def
    279  1.1  mrg       && dt != vect_external_def && dt != vect_constant_def)
    280  1.1  mrg     return false;
    281  1.1  mrg 
    282  1.1  mrg   if (!*def_stmt)
    283  1.1  mrg     return false;
    284  1.1  mrg 
    285  1.1  mrg   if (!is_gimple_assign (*def_stmt))
    286  1.1  mrg     return false;
    287  1.1  mrg 
    288  1.1  mrg   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
    289  1.1  mrg     return false;
    290  1.1  mrg 
    291  1.1  mrg   oprnd0 = gimple_assign_rhs1 (*def_stmt);
    292  1.1  mrg 
    293  1.1  mrg   *orig_type = TREE_TYPE (oprnd0);
    294  1.1  mrg   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
    295  1.1  mrg       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
    296  1.1  mrg     return false;
    297  1.1  mrg 
    298  1.1  mrg   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
    299  1.1  mrg     *promotion = true;
    300  1.1  mrg   else
    301  1.1  mrg     *promotion = false;
    302  1.1  mrg 
    303  1.1  mrg   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
    304  1.1  mrg     return false;
    305  1.1  mrg 
    306  1.1  mrg   return true;
    307  1.1  mrg }
    308  1.1  mrg 
    309  1.1  mrg /* Holds information about an input operand after some sign changes
    310  1.1  mrg    and type promotions have been peeled away.  */
    311  1.1  mrg class vect_unpromoted_value {
    312  1.1  mrg public:
    313  1.1  mrg   vect_unpromoted_value ();
    314  1.1  mrg 
    315  1.1  mrg   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
    316  1.1  mrg 
    317  1.1  mrg   /* The value obtained after peeling away zero or more casts.  */
    318  1.1  mrg   tree op;
    319  1.1  mrg 
    320  1.1  mrg   /* The type of OP.  */
    321  1.1  mrg   tree type;
    322  1.1  mrg 
    323  1.1  mrg   /* The definition type of OP.  */
    324  1.1  mrg   vect_def_type dt;
    325  1.1  mrg 
    326  1.1  mrg   /* If OP is the result of peeling at least one cast, and if the cast
    327  1.1  mrg      of OP itself is a vectorizable statement, CASTER identifies that
    328  1.1  mrg      statement, otherwise it is null.  */
    329  1.1  mrg   stmt_vec_info caster;
    330  1.1  mrg };
    331  1.1  mrg 
    332  1.1  mrg inline vect_unpromoted_value::vect_unpromoted_value ()
    333  1.1  mrg   : op (NULL_TREE),
    334  1.1  mrg     type (NULL_TREE),
    335  1.1  mrg     dt (vect_uninitialized_def),
    336  1.1  mrg     caster (NULL)
    337  1.1  mrg {
    338  1.1  mrg }
    339  1.1  mrg 
    340  1.1  mrg /* Set the operand to OP_IN, its definition type to DT_IN, and the
    341  1.1  mrg    statement that casts it to CASTER_IN.  */
    342  1.1  mrg 
    343  1.1  mrg inline void
    344  1.1  mrg vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
    345  1.1  mrg 			       stmt_vec_info caster_in)
    346  1.1  mrg {
    347  1.1  mrg   op = op_in;
    348  1.1  mrg   type = TREE_TYPE (op);
    349  1.1  mrg   dt = dt_in;
    350  1.1  mrg   caster = caster_in;
    351  1.1  mrg }
    352  1.1  mrg 
    353  1.1  mrg /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
    354  1.1  mrg    to reach some vectorizable inner operand OP', continuing as long as it
    355  1.1  mrg    is possible to convert OP' back to OP using a possible sign change
    356  1.1  mrg    followed by a possible promotion P.  Return this OP', or null if OP is
    357  1.1  mrg    not a vectorizable SSA name.  If there is a promotion P, describe its
    358  1.1  mrg    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
    359  1.1  mrg    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
    360  1.1  mrg    have more than one user.
    361  1.1  mrg 
    362  1.1  mrg    A successful return means that it is possible to go from OP' to OP
    363  1.1  mrg    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
    364  1.1  mrg    whereas the cast from UNPROM to OP might be a promotion, a sign
    365  1.1  mrg    change, or a nop.
    366  1.1  mrg 
    367  1.1  mrg    E.g. say we have:
    368  1.1  mrg 
    369  1.1  mrg        signed short *ptr = ...;
    370  1.1  mrg        signed short C = *ptr;
    371  1.1  mrg        unsigned short B = (unsigned short) C;    // sign change
    372  1.1  mrg        signed int A = (signed int) B;            // unsigned promotion
    373  1.1  mrg        ...possible other uses of A...
    374  1.1  mrg        unsigned int OP = (unsigned int) A;       // sign change
    375  1.1  mrg 
    376  1.1  mrg    In this case it's possible to go directly from C to OP using:
    377  1.1  mrg 
    378  1.1  mrg        OP = (unsigned int) (unsigned short) C;
    379  1.1  mrg 	    +------------+ +--------------+
    380  1.1  mrg 	       promotion      sign change
    381  1.1  mrg 
    382  1.1  mrg    so OP' would be C.  The input to the promotion is B, so UNPROM
    383  1.1  mrg    would describe B.  */
    384  1.1  mrg 
    385  1.1  mrg static tree
    386  1.1  mrg vect_look_through_possible_promotion (vec_info *vinfo, tree op,
    387  1.1  mrg 				      vect_unpromoted_value *unprom,
    388  1.1  mrg 				      bool *single_use_p = NULL)
    389  1.1  mrg {
    390  1.1  mrg   tree res = NULL_TREE;
    391  1.1  mrg   tree op_type = TREE_TYPE (op);
    392  1.1  mrg   unsigned int orig_precision = TYPE_PRECISION (op_type);
    393  1.1  mrg   unsigned int min_precision = orig_precision;
    394  1.1  mrg   stmt_vec_info caster = NULL;
    395  1.1  mrg   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
    396  1.1  mrg     {
    397  1.1  mrg       /* See whether OP is simple enough to vectorize.  */
    398  1.1  mrg       stmt_vec_info def_stmt_info;
    399  1.1  mrg       gimple *def_stmt;
    400  1.1  mrg       vect_def_type dt;
    401  1.1  mrg       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
    402  1.1  mrg 	break;
    403  1.1  mrg 
    404  1.1  mrg       /* If OP is the input of a demotion, skip over it to see whether
    405  1.1  mrg 	 OP is itself the result of a promotion.  If so, the combined
    406  1.1  mrg 	 effect of the promotion and the demotion might fit the required
    407  1.1  mrg 	 pattern, otherwise neither operation fits.
    408  1.1  mrg 
    409  1.1  mrg 	 This copes with cases such as the result of an arithmetic
    410  1.1  mrg 	 operation being truncated before being stored, and where that
    411  1.1  mrg 	 arithmetic operation has been recognized as an over-widened one.  */
    412  1.1  mrg       if (TYPE_PRECISION (op_type) <= min_precision)
    413  1.1  mrg 	{
    414  1.1  mrg 	  /* Use OP as the UNPROM described above if we haven't yet
    415  1.1  mrg 	     found a promotion, or if using the new input preserves the
    416  1.1  mrg 	     sign of the previous promotion.  */
    417  1.1  mrg 	  if (!res
    418  1.1  mrg 	      || TYPE_PRECISION (unprom->type) == orig_precision
    419  1.1  mrg 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
    420  1.1  mrg 	    {
    421  1.1  mrg 	      unprom->set_op (op, dt, caster);
    422  1.1  mrg 	      min_precision = TYPE_PRECISION (op_type);
    423  1.1  mrg 	    }
    424  1.1  mrg 	  /* Stop if we've already seen a promotion and if this
    425  1.1  mrg 	     conversion does more than change the sign.  */
    426  1.1  mrg 	  else if (TYPE_PRECISION (op_type)
    427  1.1  mrg 		   != TYPE_PRECISION (unprom->type))
    428  1.1  mrg 	    break;
    429  1.1  mrg 
    430  1.1  mrg 	  /* The sequence now extends to OP.  */
    431  1.1  mrg 	  res = op;
    432  1.1  mrg 	}
    433  1.1  mrg 
    434  1.1  mrg       /* See whether OP is defined by a cast.  Record it as CASTER if
    435  1.1  mrg 	 the cast is potentially vectorizable.  */
    436  1.1  mrg       if (!def_stmt)
    437  1.1  mrg 	break;
    438  1.1  mrg       caster = def_stmt_info;
    439  1.1  mrg 
    440  1.1  mrg       /* Ignore pattern statements, since we don't link uses for them.  */
    441  1.1  mrg       if (caster
    442  1.1  mrg 	  && single_use_p
    443  1.1  mrg 	  && !STMT_VINFO_RELATED_STMT (caster)
    444  1.1  mrg 	  && !has_single_use (res))
    445  1.1  mrg 	*single_use_p = false;
    446  1.1  mrg 
    447  1.1  mrg       gassign *assign = dyn_cast <gassign *> (def_stmt);
    448  1.1  mrg       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
    449  1.1  mrg 	break;
    450  1.1  mrg 
    451  1.1  mrg       /* Continue with the input to the cast.  */
    452  1.1  mrg       op = gimple_assign_rhs1 (def_stmt);
    453  1.1  mrg       op_type = TREE_TYPE (op);
    454  1.1  mrg     }
    455  1.1  mrg   return res;
    456  1.1  mrg }
    457  1.1  mrg 
    458  1.1  mrg /* OP is an integer operand to an operation that returns TYPE, and we
    459  1.1  mrg    want to treat the operation as a widening one.  So far we can treat
    460  1.1  mrg    it as widening from *COMMON_TYPE.
    461  1.1  mrg 
    462  1.1  mrg    Return true if OP is suitable for such a widening operation,
    463  1.1  mrg    either widening from *COMMON_TYPE or from some supertype of it.
    464  1.1  mrg    Update *COMMON_TYPE to the supertype in the latter case.
    465  1.1  mrg 
    466  1.1  mrg    SHIFT_P is true if OP is a shift amount.  */
    467  1.1  mrg 
    468  1.1  mrg static bool
    469  1.1  mrg vect_joust_widened_integer (tree type, bool shift_p, tree op,
    470  1.1  mrg 			    tree *common_type)
    471  1.1  mrg {
    472  1.1  mrg   /* Calculate the minimum precision required by OP, without changing
    473  1.1  mrg      the sign of either operand.  */
    474  1.1  mrg   unsigned int precision;
    475  1.1  mrg   if (shift_p)
    476  1.1  mrg     {
    477  1.1  mrg       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
    478  1.1  mrg 	return false;
    479  1.1  mrg       precision = TREE_INT_CST_LOW (op);
    480  1.1  mrg     }
    481  1.1  mrg   else
    482  1.1  mrg     {
    483  1.1  mrg       precision = wi::min_precision (wi::to_widest (op),
    484  1.1  mrg 				     TYPE_SIGN (*common_type));
    485  1.1  mrg       if (precision * 2 > TYPE_PRECISION (type))
    486  1.1  mrg 	return false;
    487  1.1  mrg     }
    488  1.1  mrg 
    489  1.1  mrg   /* If OP requires a wider type, switch to that type.  The checks
    490  1.1  mrg      above ensure that this is still narrower than the result.  */
    491  1.1  mrg   precision = vect_element_precision (precision);
    492  1.1  mrg   if (TYPE_PRECISION (*common_type) < precision)
    493  1.1  mrg     *common_type = build_nonstandard_integer_type
    494  1.1  mrg       (precision, TYPE_UNSIGNED (*common_type));
    495  1.1  mrg   return true;
    496  1.1  mrg }
    497  1.1  mrg 
    498  1.1  mrg /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
    499  1.1  mrg    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
    500  1.1  mrg 
    501  1.1  mrg static bool
    502  1.1  mrg vect_joust_widened_type (tree type, tree new_type, tree *common_type)
    503  1.1  mrg {
    504  1.1  mrg   if (types_compatible_p (*common_type, new_type))
    505  1.1  mrg     return true;
    506  1.1  mrg 
    507  1.1  mrg   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
    508  1.1  mrg   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
    509  1.1  mrg       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
    510  1.1  mrg     return true;
    511  1.1  mrg 
    512  1.1  mrg   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
    513  1.1  mrg   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
    514  1.1  mrg       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
    515  1.1  mrg     {
    516  1.1  mrg       *common_type = new_type;
    517  1.1  mrg       return true;
    518  1.1  mrg     }
    519  1.1  mrg 
    520  1.1  mrg   /* We have mismatched signs, with the signed type being
    521  1.1  mrg      no wider than the unsigned type.  In this case we need
    522  1.1  mrg      a wider signed type.  */
    523  1.1  mrg   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
    524  1.1  mrg 				TYPE_PRECISION (new_type));
    525  1.1  mrg   precision *= 2;
    526  1.1  mrg 
    527  1.1  mrg   if (precision * 2 > TYPE_PRECISION (type))
    528  1.1  mrg     return false;
    529  1.1  mrg 
    530  1.1  mrg   *common_type = build_nonstandard_integer_type (precision, false);
    531  1.1  mrg   return true;
    532  1.1  mrg }
    533  1.1  mrg 
    534  1.1  mrg /* Check whether STMT_INFO can be viewed as a tree of integer operations
    535  1.1  mrg    in which each node either performs CODE or WIDENED_CODE, and where
    536  1.1  mrg    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
    537  1.1  mrg    specifies the maximum number of leaf operands.  SHIFT_P says whether
    538  1.1  mrg    CODE and WIDENED_CODE are some sort of shift.
    539  1.1  mrg 
    540  1.1  mrg    If STMT_INFO is such a tree, return the number of leaf operands
    541  1.1  mrg    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
    542  1.1  mrg    to a type that (a) is narrower than the result of STMT_INFO and
    543  1.1  mrg    (b) can hold all leaf operand values.
    544  1.1  mrg 
    545  1.1  mrg    If SUBTYPE then allow that the signs of the operands
    546  1.1  mrg    may differ in signs but not in precision.  SUBTYPE is updated to reflect
    547  1.1  mrg    this.
    548  1.1  mrg 
    549  1.1  mrg    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
    550  1.1  mrg    exists.  */
    551  1.1  mrg 
    552  1.1  mrg static unsigned int
    553  1.1  mrg vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
    554  1.1  mrg 		      tree_code widened_code, bool shift_p,
    555  1.1  mrg 		      unsigned int max_nops,
    556  1.1  mrg 		      vect_unpromoted_value *unprom, tree *common_type,
    557  1.1  mrg 		      enum optab_subtype *subtype = NULL)
    558  1.1  mrg {
    559  1.1  mrg   /* Check for an integer operation with the right code.  */
    560  1.1  mrg   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    561  1.1  mrg   if (!assign)
    562  1.1  mrg     return 0;
    563  1.1  mrg 
    564  1.1  mrg   tree_code rhs_code = gimple_assign_rhs_code (assign);
    565  1.1  mrg   if (rhs_code != code && rhs_code != widened_code)
    566  1.1  mrg     return 0;
    567  1.1  mrg 
    568  1.1  mrg   tree type = TREE_TYPE (gimple_assign_lhs (assign));
    569  1.1  mrg   if (!INTEGRAL_TYPE_P (type))
    570  1.1  mrg     return 0;
    571  1.1  mrg 
    572  1.1  mrg   /* Assume that both operands will be leaf operands.  */
    573  1.1  mrg   max_nops -= 2;
    574  1.1  mrg 
    575  1.1  mrg   /* Check the operands.  */
    576  1.1  mrg   unsigned int next_op = 0;
    577  1.1  mrg   for (unsigned int i = 0; i < 2; ++i)
    578  1.1  mrg     {
    579  1.1  mrg       vect_unpromoted_value *this_unprom = &unprom[next_op];
    580  1.1  mrg       unsigned int nops = 1;
    581  1.1  mrg       tree op = gimple_op (assign, i + 1);
    582  1.1  mrg       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
    583  1.1  mrg 	{
    584  1.1  mrg 	  /* We already have a common type from earlier operands.
    585  1.1  mrg 	     Update it to account for OP.  */
    586  1.1  mrg 	  this_unprom->set_op (op, vect_constant_def);
    587  1.1  mrg 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
    588  1.1  mrg 	    return 0;
    589  1.1  mrg 	}
    590  1.1  mrg       else
    591  1.1  mrg 	{
    592  1.1  mrg 	  /* Only allow shifts by constants.  */
    593  1.1  mrg 	  if (shift_p && i == 1)
    594  1.1  mrg 	    return 0;
    595  1.1  mrg 
    596  1.1  mrg 	  if (rhs_code != code)
    597  1.1  mrg 	    {
    598  1.1  mrg 	      /* If rhs_code is widened_code, don't look through further
    599  1.1  mrg 		 possible promotions, there is a promotion already embedded
    600  1.1  mrg 		 in the WIDEN_*_EXPR.  */
    601  1.1  mrg 	      if (TREE_CODE (op) != SSA_NAME
    602  1.1  mrg 		  || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
    603  1.1  mrg 		return 0;
    604  1.1  mrg 
    605  1.1  mrg 	      stmt_vec_info def_stmt_info;
    606  1.1  mrg 	      gimple *def_stmt;
    607  1.1  mrg 	      vect_def_type dt;
    608  1.1  mrg 	      if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
    609  1.1  mrg 				       &def_stmt))
    610  1.1  mrg 		return 0;
    611  1.1  mrg 	      this_unprom->set_op (op, dt, NULL);
    612  1.1  mrg 	    }
    613  1.1  mrg 	  else if (!vect_look_through_possible_promotion (vinfo, op,
    614  1.1  mrg 							  this_unprom))
    615  1.1  mrg 	    return 0;
    616  1.1  mrg 
    617  1.1  mrg 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
    618  1.1  mrg 	    {
    619  1.1  mrg 	      /* The operand isn't widened.  If STMT_INFO has the code
    620  1.1  mrg 		 for an unwidened operation, recursively check whether
    621  1.1  mrg 		 this operand is a node of the tree.  */
    622  1.1  mrg 	      if (rhs_code != code
    623  1.1  mrg 		  || max_nops == 0
    624  1.1  mrg 		  || this_unprom->dt != vect_internal_def)
    625  1.1  mrg 		return 0;
    626  1.1  mrg 
    627  1.1  mrg 	      /* Give back the leaf slot allocated above now that we're
    628  1.1  mrg 		 not treating this as a leaf operand.  */
    629  1.1  mrg 	      max_nops += 1;
    630  1.1  mrg 
    631  1.1  mrg 	      /* Recursively process the definition of the operand.  */
    632  1.1  mrg 	      stmt_vec_info def_stmt_info
    633  1.1  mrg 		= vinfo->lookup_def (this_unprom->op);
    634  1.1  mrg 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
    635  1.1  mrg 					   widened_code, shift_p, max_nops,
    636  1.1  mrg 					   this_unprom, common_type,
    637  1.1  mrg 					   subtype);
    638  1.1  mrg 	      if (nops == 0)
    639  1.1  mrg 		return 0;
    640  1.1  mrg 
    641  1.1  mrg 	      max_nops -= nops;
    642  1.1  mrg 	    }
    643  1.1  mrg 	  else
    644  1.1  mrg 	    {
    645  1.1  mrg 	      /* Make sure that the operand is narrower than the result.  */
    646  1.1  mrg 	      if (TYPE_PRECISION (this_unprom->type) * 2
    647  1.1  mrg 		  > TYPE_PRECISION (type))
    648  1.1  mrg 		return 0;
    649  1.1  mrg 
    650  1.1  mrg 	      /* Update COMMON_TYPE for the new operand.  */
    651  1.1  mrg 	      if (i == 0)
    652  1.1  mrg 		*common_type = this_unprom->type;
    653  1.1  mrg 	      else if (!vect_joust_widened_type (type, this_unprom->type,
    654  1.1  mrg 						 common_type))
    655  1.1  mrg 		{
    656  1.1  mrg 		  if (subtype)
    657  1.1  mrg 		    {
    658  1.1  mrg 		      /* See if we can sign extend the smaller type.  */
    659  1.1  mrg 		      if (TYPE_PRECISION (this_unprom->type)
    660  1.1  mrg 			  > TYPE_PRECISION (*common_type))
    661  1.1  mrg 			*common_type = this_unprom->type;
    662  1.1  mrg 		      *subtype = optab_vector_mixed_sign;
    663  1.1  mrg 		    }
    664  1.1  mrg 		  else
    665  1.1  mrg 		    return 0;
    666  1.1  mrg 		}
    667  1.1  mrg 	    }
    668  1.1  mrg 	}
    669  1.1  mrg       next_op += nops;
    670  1.1  mrg     }
    671  1.1  mrg   return next_op;
    672  1.1  mrg }
    673  1.1  mrg 
    674  1.1  mrg /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
    675  1.1  mrg    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
    676  1.1  mrg 
    677  1.1  mrg static tree
    678  1.1  mrg vect_recog_temp_ssa_var (tree type, gimple *stmt)
    679  1.1  mrg {
    680  1.1  mrg   return make_temp_ssa_name (type, stmt, "patt");
    681  1.1  mrg }
    682  1.1  mrg 
    683  1.1  mrg /* STMT2_INFO describes a type conversion that could be split into STMT1
    684  1.1  mrg    followed by a version of STMT2_INFO that takes NEW_RHS as its first
    685  1.1  mrg    input.  Try to do this using pattern statements, returning true on
    686  1.1  mrg    success.  */
    687  1.1  mrg 
    688  1.1  mrg static bool
    689  1.1  mrg vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
    690  1.1  mrg 		      gimple *stmt1, tree vectype)
    691  1.1  mrg {
    692  1.1  mrg   if (is_pattern_stmt_p (stmt2_info))
    693  1.1  mrg     {
    694  1.1  mrg       /* STMT2_INFO is part of a pattern.  Get the statement to which
    695  1.1  mrg 	 the pattern is attached.  */
    696  1.1  mrg       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
    697  1.1  mrg       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
    698  1.1  mrg 
    699  1.1  mrg       if (dump_enabled_p ())
    700  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    701  1.1  mrg 			 "Splitting pattern statement: %G", stmt2_info->stmt);
    702  1.1  mrg 
    703  1.1  mrg       /* Since STMT2_INFO is a pattern statement, we can change it
    704  1.1  mrg 	 in-situ without worrying about changing the code for the
    705  1.1  mrg 	 containing block.  */
    706  1.1  mrg       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
    707  1.1  mrg 
    708  1.1  mrg       if (dump_enabled_p ())
    709  1.1  mrg 	{
    710  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
    711  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
    712  1.1  mrg 			   stmt2_info->stmt);
    713  1.1  mrg 	}
    714  1.1  mrg 
    715  1.1  mrg       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
    716  1.1  mrg       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
    717  1.1  mrg 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
    718  1.1  mrg 	   to the end of the definition sequence.  */
    719  1.1  mrg 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
    720  1.1  mrg       else
    721  1.1  mrg 	{
    722  1.1  mrg 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
    723  1.1  mrg 	     before it.  */
    724  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
    725  1.1  mrg 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
    726  1.1  mrg 	}
    727  1.1  mrg       return true;
    728  1.1  mrg     }
    729  1.1  mrg   else
    730  1.1  mrg     {
    731  1.1  mrg       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
    732  1.1  mrg 	 two-statement pattern now.  */
    733  1.1  mrg       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
    734  1.1  mrg       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
    735  1.1  mrg       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
    736  1.1  mrg       if (!lhs_vectype)
    737  1.1  mrg 	return false;
    738  1.1  mrg 
    739  1.1  mrg       if (dump_enabled_p ())
    740  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    741  1.1  mrg 			 "Splitting statement: %G", stmt2_info->stmt);
    742  1.1  mrg 
    743  1.1  mrg       /* Add STMT1 as a singleton pattern definition sequence.  */
    744  1.1  mrg       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
    745  1.1  mrg       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
    746  1.1  mrg       gimple_seq_add_stmt_without_update (def_seq, stmt1);
    747  1.1  mrg 
    748  1.1  mrg       /* Build the second of the two pattern statements.  */
    749  1.1  mrg       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
    750  1.1  mrg       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
    751  1.1  mrg       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
    752  1.1  mrg 
    753  1.1  mrg       if (dump_enabled_p ())
    754  1.1  mrg 	{
    755  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location,
    756  1.1  mrg 			   "into pattern statements: %G", stmt1);
    757  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
    758  1.1  mrg 	}
    759  1.1  mrg 
    760  1.1  mrg       return true;
    761  1.1  mrg     }
    762  1.1  mrg }
    763  1.1  mrg 
    764  1.1  mrg /* Convert UNPROM to TYPE and return the result, adding new statements
    765  1.1  mrg    to STMT_INFO's pattern definition statements if no better way is
    766  1.1  mrg    available.  VECTYPE is the vector form of TYPE.
    767  1.1  mrg 
    768  1.1  mrg    If SUBTYPE then convert the type based on the subtype.  */
    769  1.1  mrg 
    770  1.1  mrg static tree
    771  1.1  mrg vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
    772  1.1  mrg 		    vect_unpromoted_value *unprom, tree vectype,
    773  1.1  mrg 		    enum optab_subtype subtype = optab_default)
    774  1.1  mrg {
    775  1.1  mrg 
    776  1.1  mrg   /* Update the type if the signs differ.  */
    777  1.1  mrg   if (subtype == optab_vector_mixed_sign
    778  1.1  mrg       && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
    779  1.1  mrg     type = build_nonstandard_integer_type (TYPE_PRECISION (type),
    780  1.1  mrg 					   TYPE_SIGN (unprom->type));
    781  1.1  mrg 
    782  1.1  mrg   /* Check for a no-op conversion.  */
    783  1.1  mrg   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
    784  1.1  mrg     return unprom->op;
    785  1.1  mrg 
    786  1.1  mrg   /* Allow the caller to create constant vect_unpromoted_values.  */
    787  1.1  mrg   if (TREE_CODE (unprom->op) == INTEGER_CST)
    788  1.1  mrg     return wide_int_to_tree (type, wi::to_widest (unprom->op));
    789  1.1  mrg 
    790  1.1  mrg   tree input = unprom->op;
    791  1.1  mrg   if (unprom->caster)
    792  1.1  mrg     {
    793  1.1  mrg       tree lhs = gimple_get_lhs (unprom->caster->stmt);
    794  1.1  mrg       tree lhs_type = TREE_TYPE (lhs);
    795  1.1  mrg 
    796  1.1  mrg       /* If the result of the existing cast is the right width, use it
    797  1.1  mrg 	 instead of the source of the cast.  */
    798  1.1  mrg       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
    799  1.1  mrg 	input = lhs;
    800  1.1  mrg       /* If the precision we want is between the source and result
    801  1.1  mrg 	 precisions of the existing cast, try splitting the cast into
    802  1.1  mrg 	 two and tapping into a mid-way point.  */
    803  1.1  mrg       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
    804  1.1  mrg 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
    805  1.1  mrg 	{
    806  1.1  mrg 	  /* In order to preserve the semantics of the original cast,
    807  1.1  mrg 	     give the mid-way point the same signedness as the input value.
    808  1.1  mrg 
    809  1.1  mrg 	     It would be possible to use a signed type here instead if
    810  1.1  mrg 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
    811  1.1  mrg 	     make the sign of the midtype sensitive to the order in
    812  1.1  mrg 	     which we process the statements, since the signedness of
    813  1.1  mrg 	     TYPE is the signedness required by just one of possibly
    814  1.1  mrg 	     many users.  Also, unsigned promotions are usually as cheap
    815  1.1  mrg 	     as or cheaper than signed ones, so it's better to keep an
    816  1.1  mrg 	     unsigned promotion.  */
    817  1.1  mrg 	  tree midtype = build_nonstandard_integer_type
    818  1.1  mrg 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
    819  1.1  mrg 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
    820  1.1  mrg 	  if (vec_midtype)
    821  1.1  mrg 	    {
    822  1.1  mrg 	      input = vect_recog_temp_ssa_var (midtype, NULL);
    823  1.1  mrg 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
    824  1.1  mrg 						       unprom->op);
    825  1.1  mrg 	      if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
    826  1.1  mrg 					 vec_midtype))
    827  1.1  mrg 		append_pattern_def_seq (vinfo, stmt_info,
    828  1.1  mrg 					new_stmt, vec_midtype);
    829  1.1  mrg 	    }
    830  1.1  mrg 	}
    831  1.1  mrg 
    832  1.1  mrg       /* See if we can reuse an existing result.  */
    833  1.1  mrg       if (types_compatible_p (type, TREE_TYPE (input)))
    834  1.1  mrg 	return input;
    835  1.1  mrg     }
    836  1.1  mrg 
    837  1.1  mrg   /* We need a new conversion statement.  */
    838  1.1  mrg   tree new_op = vect_recog_temp_ssa_var (type, NULL);
    839  1.1  mrg   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
    840  1.1  mrg 
    841  1.1  mrg   /* If OP is an external value, see if we can insert the new statement
    842  1.1  mrg      on an incoming edge.  */
    843  1.1  mrg   if (input == unprom->op && unprom->dt == vect_external_def)
    844  1.1  mrg     if (edge e = vect_get_external_def_edge (vinfo, input))
    845  1.1  mrg       {
    846  1.1  mrg 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
    847  1.1  mrg 	gcc_assert (!new_bb);
    848  1.1  mrg 	return new_op;
    849  1.1  mrg       }
    850  1.1  mrg 
    851  1.1  mrg   /* As a (common) last resort, add the statement to the pattern itself.  */
    852  1.1  mrg   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
    853  1.1  mrg   return new_op;
    854  1.1  mrg }
    855  1.1  mrg 
    856  1.1  mrg /* Invoke vect_convert_input for N elements of UNPROM and store the
    857  1.1  mrg    result in the corresponding elements of RESULT.
    858  1.1  mrg 
    859  1.1  mrg    If SUBTYPE then convert the type based on the subtype.  */
    860  1.1  mrg 
    861  1.1  mrg static void
    862  1.1  mrg vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
    863  1.1  mrg 		     tree *result, tree type, vect_unpromoted_value *unprom,
    864  1.1  mrg 		     tree vectype, enum optab_subtype subtype = optab_default)
    865  1.1  mrg {
    866  1.1  mrg   for (unsigned int i = 0; i < n; ++i)
    867  1.1  mrg     {
    868  1.1  mrg       unsigned int j;
    869  1.1  mrg       for (j = 0; j < i; ++j)
    870  1.1  mrg 	if (unprom[j].op == unprom[i].op)
    871  1.1  mrg 	  break;
    872  1.1  mrg 
    873  1.1  mrg       if (j < i)
    874  1.1  mrg 	result[i] = result[j];
    875  1.1  mrg       else
    876  1.1  mrg 	result[i] = vect_convert_input (vinfo, stmt_info,
    877  1.1  mrg 					type, &unprom[i], vectype, subtype);
    878  1.1  mrg     }
    879  1.1  mrg }
    880  1.1  mrg 
    881  1.1  mrg /* The caller has created a (possibly empty) sequence of pattern definition
    882  1.1  mrg    statements followed by a single statement PATTERN_STMT.  Cast the result
    883  1.1  mrg    of this final statement to TYPE.  If a new statement is needed, add
    884  1.1  mrg    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
    885  1.1  mrg    and return the new statement, otherwise return PATTERN_STMT as-is.
    886  1.1  mrg    VECITYPE is the vector form of PATTERN_STMT's result type.  */
    887  1.1  mrg 
    888  1.1  mrg static gimple *
    889  1.1  mrg vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
    890  1.1  mrg 		     gimple *pattern_stmt, tree vecitype)
    891  1.1  mrg {
    892  1.1  mrg   tree lhs = gimple_get_lhs (pattern_stmt);
    893  1.1  mrg   if (!types_compatible_p (type, TREE_TYPE (lhs)))
    894  1.1  mrg     {
    895  1.1  mrg       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
    896  1.1  mrg       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
    897  1.1  mrg       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
    898  1.1  mrg     }
    899  1.1  mrg   return pattern_stmt;
    900  1.1  mrg }
    901  1.1  mrg 
    902  1.1  mrg /* Return true if STMT_VINFO describes a reduction for which reassociation
    903  1.1  mrg    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    904  1.1  mrg    a reduction chain and optimistically assume that all statements
    905  1.1  mrg    except the last allow reassociation.
    906  1.1  mrg    Also require it to have code CODE and to be a reduction
    907  1.1  mrg    in the outermost loop.  When returning true, store the operands in
    908  1.1  mrg    *OP0_OUT and *OP1_OUT.  */
    909  1.1  mrg 
    910  1.1  mrg static bool
    911  1.1  mrg vect_reassociating_reduction_p (vec_info *vinfo,
    912  1.1  mrg 				stmt_vec_info stmt_info, tree_code code,
    913  1.1  mrg 				tree *op0_out, tree *op1_out)
    914  1.1  mrg {
    915  1.1  mrg   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    916  1.1  mrg   if (!loop_info)
    917  1.1  mrg     return false;
    918  1.1  mrg 
    919  1.1  mrg   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    920  1.1  mrg   if (!assign || gimple_assign_rhs_code (assign) != code)
    921  1.1  mrg     return false;
    922  1.1  mrg 
    923  1.1  mrg   /* We don't allow changing the order of the computation in the inner-loop
    924  1.1  mrg      when doing outer-loop vectorization.  */
    925  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    926  1.1  mrg   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    927  1.1  mrg     return false;
    928  1.1  mrg 
    929  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
    930  1.1  mrg     {
    931  1.1  mrg       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    932  1.1  mrg 				       code))
    933  1.1  mrg 	return false;
    934  1.1  mrg     }
    935  1.1  mrg   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
    936  1.1  mrg     return false;
    937  1.1  mrg 
    938  1.1  mrg   *op0_out = gimple_assign_rhs1 (assign);
    939  1.1  mrg   *op1_out = gimple_assign_rhs2 (assign);
    940  1.1  mrg   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    941  1.1  mrg     std::swap (*op0_out, *op1_out);
    942  1.1  mrg   return true;
    943  1.1  mrg }
    944  1.1  mrg 
    945  1.1  mrg /* match.pd function to match
    946  1.1  mrg    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    947  1.1  mrg    with conditions:
    948  1.1  mrg    1) @1, @2, c, d, a, b are all integral type.
    949  1.1  mrg    2) There's single_use for both @1 and @2.
    950  1.1  mrg    3) a, c have same precision.
    951  1.1  mrg    4) c and @1 have different precision.
    952  1.1  mrg    5) c, d are the same type or they can differ in sign when convert is
    953  1.1  mrg    truncation.
    954  1.1  mrg 
    955  1.1  mrg    record a and c and d and @3.  */
    956  1.1  mrg 
    957  1.1  mrg extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    958  1.1  mrg 
    959  1.1  mrg /* Function vect_recog_cond_expr_convert
    960  1.1  mrg 
    961  1.1  mrg    Try to find the following pattern:
    962  1.1  mrg 
    963  1.1  mrg    TYPE_AB A,B;
    964  1.1  mrg    TYPE_CD C,D;
    965  1.1  mrg    TYPE_E E;
    966  1.1  mrg    TYPE_E op_true = (TYPE_E) A;
    967  1.1  mrg    TYPE_E op_false = (TYPE_E) B;
    968  1.1  mrg 
    969  1.1  mrg    E = C cmp D ? op_true : op_false;
    970  1.1  mrg 
    971  1.1  mrg    where
    972  1.1  mrg    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    973  1.1  mrg    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    974  1.1  mrg    single_use of op_true and op_false.
    975  1.1  mrg    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    976  1.1  mrg 
    977  1.1  mrg    Input:
    978  1.1  mrg 
    979  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.
    980  1.1  mrg    here it starts with E = c cmp D ? op_true : op_false;
    981  1.1  mrg 
    982  1.1  mrg    Output:
    983  1.1  mrg 
    984  1.1  mrg    TYPE1 E' = C cmp D ? A : B;
    985  1.1  mrg    TYPE3 E = (TYPE3) E';
    986  1.1  mrg 
    987  1.1  mrg    There may extra nop_convert for A or B to handle different signness.
    988  1.1  mrg 
    989  1.1  mrg    * TYPE_OUT: The vector type of the output of this pattern.
    990  1.1  mrg 
    991  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
    992  1.1  mrg    stmts that constitute the pattern. In this case it will be:
    993  1.1  mrg    E = (TYPE3)E';
    994  1.1  mrg    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    995  1.1  mrg 
    996  1.1  mrg static gimple *
    997  1.1  mrg vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    998  1.1  mrg 				      stmt_vec_info stmt_vinfo, tree *type_out)
    999  1.1  mrg {
   1000  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
   1001  1.1  mrg   tree lhs, match[4], temp, type, new_lhs, op2;
   1002  1.1  mrg   gimple *cond_stmt;
   1003  1.1  mrg   gimple *pattern_stmt;
   1004  1.1  mrg 
   1005  1.1  mrg   if (!last_stmt)
   1006  1.1  mrg     return NULL;
   1007  1.1  mrg 
   1008  1.1  mrg   lhs = gimple_assign_lhs (last_stmt);
   1009  1.1  mrg 
   1010  1.1  mrg   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
   1011  1.1  mrg      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
   1012  1.1  mrg   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
   1013  1.1  mrg     return NULL;
   1014  1.1  mrg 
   1015  1.1  mrg   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
   1016  1.1  mrg 
   1017  1.1  mrg   op2 = match[2];
   1018  1.1  mrg   type = TREE_TYPE (match[1]);
   1019  1.1  mrg   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
   1020  1.1  mrg     {
   1021  1.1  mrg       op2 = vect_recog_temp_ssa_var (type, NULL);
   1022  1.1  mrg       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
   1023  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
   1024  1.1  mrg 			      get_vectype_for_scalar_type (vinfo, type));
   1025  1.1  mrg     }
   1026  1.1  mrg 
   1027  1.1  mrg   temp = vect_recog_temp_ssa_var (type, NULL);
   1028  1.1  mrg   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
   1029  1.1  mrg 						 match[1], op2));
   1030  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
   1031  1.1  mrg 			  get_vectype_for_scalar_type (vinfo, type));
   1032  1.1  mrg   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   1033  1.1  mrg   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
   1034  1.1  mrg   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
   1035  1.1  mrg 
   1036  1.1  mrg   if (dump_enabled_p ())
   1037  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1038  1.1  mrg 		     "created pattern stmt: %G", pattern_stmt);
   1039  1.1  mrg   return pattern_stmt;
   1040  1.1  mrg }
   1041  1.1  mrg 
   1042  1.1  mrg /* Function vect_recog_dot_prod_pattern
   1043  1.1  mrg 
   1044  1.1  mrg    Try to find the following pattern:
   1045  1.1  mrg 
   1046  1.1  mrg      type1a x_t
   1047  1.1  mrg      type1b y_t;
   1048  1.1  mrg      TYPE1 prod;
   1049  1.1  mrg      TYPE2 sum = init;
   1050  1.1  mrg    loop:
   1051  1.1  mrg      sum_0 = phi <init, sum_1>
   1052  1.1  mrg      S1  x_t = ...
   1053  1.1  mrg      S2  y_t = ...
   1054  1.1  mrg      S3  x_T = (TYPE1) x_t;
   1055  1.1  mrg      S4  y_T = (TYPE1) y_t;
   1056  1.1  mrg      S5  prod = x_T * y_T;
   1057  1.1  mrg      [S6  prod = (TYPE2) prod;  #optional]
   1058  1.1  mrg      S7  sum_1 = prod + sum_0;
   1059  1.1  mrg 
   1060  1.1  mrg    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
   1061  1.1  mrg    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
   1062  1.1  mrg    'type1a' and 'type1b' can differ.
   1063  1.1  mrg 
   1064  1.1  mrg    Input:
   1065  1.1  mrg 
   1066  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.  In the
   1067  1.1  mrg    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
   1068  1.1  mrg    will be detected.
   1069  1.1  mrg 
   1070  1.1  mrg    Output:
   1071  1.1  mrg 
   1072  1.1  mrg    * TYPE_OUT: The type of the output  of this pattern.
   1073  1.1  mrg 
   1074  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
   1075  1.1  mrg    stmts that constitute the pattern. In this case it will be:
   1076  1.1  mrg         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
   1077  1.1  mrg 
   1078  1.1  mrg    Note: The dot-prod idiom is a widening reduction pattern that is
   1079  1.1  mrg          vectorized without preserving all the intermediate results. It
   1080  1.1  mrg          produces only N/2 (widened) results (by summing up pairs of
   1081  1.1  mrg          intermediate results) rather than all N results.  Therefore, we
   1082  1.1  mrg          cannot allow this pattern when we want to get all the results and in
   1083  1.1  mrg          the correct order (as is the case when this computation is in an
   1084  1.1  mrg          inner-loop nested in an outer-loop that us being vectorized).  */
   1085  1.1  mrg 
   1086  1.1  mrg static gimple *
   1087  1.1  mrg vect_recog_dot_prod_pattern (vec_info *vinfo,
   1088  1.1  mrg 			     stmt_vec_info stmt_vinfo, tree *type_out)
   1089  1.1  mrg {
   1090  1.1  mrg   tree oprnd0, oprnd1;
   1091  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   1092  1.1  mrg   tree type, half_type;
   1093  1.1  mrg   gimple *pattern_stmt;
   1094  1.1  mrg   tree var;
   1095  1.1  mrg 
   1096  1.1  mrg   /* Look for the following pattern
   1097  1.1  mrg           DX = (TYPE1) X;
   1098  1.1  mrg           DY = (TYPE1) Y;
   1099  1.1  mrg           DPROD = DX * DY;
   1100  1.1  mrg           DDPROD = (TYPE2) DPROD;
   1101  1.1  mrg           sum_1 = DDPROD + sum_0;
   1102  1.1  mrg      In which
   1103  1.1  mrg      - DX is double the size of X
   1104  1.1  mrg      - DY is double the size of Y
   1105  1.1  mrg      - DX, DY, DPROD all have the same type but the sign
   1106  1.1  mrg        between X, Y and DPROD can differ.
   1107  1.1  mrg      - sum is the same size of DPROD or bigger
   1108  1.1  mrg      - sum has been recognized as a reduction variable.
   1109  1.1  mrg 
   1110  1.1  mrg      This is equivalent to:
   1111  1.1  mrg        DPROD = X w* Y;          #widen mult
   1112  1.1  mrg        sum_1 = DPROD w+ sum_0;  #widen summation
   1113  1.1  mrg      or
   1114  1.1  mrg        DPROD = X w* Y;          #widen mult
   1115  1.1  mrg        sum_1 = DPROD + sum_0;   #summation
   1116  1.1  mrg    */
   1117  1.1  mrg 
   1118  1.1  mrg   /* Starting from LAST_STMT, follow the defs of its uses in search
   1119  1.1  mrg      of the above pattern.  */
   1120  1.1  mrg 
   1121  1.1  mrg   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1122  1.1  mrg 				       &oprnd0, &oprnd1))
   1123  1.1  mrg     return NULL;
   1124  1.1  mrg 
   1125  1.1  mrg   type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1126  1.1  mrg 
   1127  1.1  mrg   vect_unpromoted_value unprom_mult;
   1128  1.1  mrg   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
   1129  1.1  mrg 
   1130  1.1  mrg   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1131  1.1  mrg      we know that oprnd1 is the reduction variable (defined by a loop-header
   1132  1.1  mrg      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
   1133  1.1  mrg      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
   1134  1.1  mrg   if (!oprnd0)
   1135  1.1  mrg     return NULL;
   1136  1.1  mrg 
   1137  1.1  mrg   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
   1138  1.1  mrg   if (!mult_vinfo)
   1139  1.1  mrg     return NULL;
   1140  1.1  mrg 
   1141  1.1  mrg   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1142  1.1  mrg      inside the loop (in case we are analyzing an outer-loop).  */
   1143  1.1  mrg   vect_unpromoted_value unprom0[2];
   1144  1.1  mrg   enum optab_subtype subtype = optab_vector;
   1145  1.1  mrg   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
   1146  1.1  mrg 			     false, 2, unprom0, &half_type, &subtype))
   1147  1.1  mrg     return NULL;
   1148  1.1  mrg 
   1149  1.1  mrg   /* If there are two widening operations, make sure they agree on the sign
   1150  1.1  mrg      of the extension.  The result of an optab_vector_mixed_sign operation
   1151  1.1  mrg      is signed; otherwise, the result has the same sign as the operands.  */
   1152  1.1  mrg   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
   1153  1.1  mrg       && (subtype == optab_vector_mixed_sign
   1154  1.1  mrg 	? TYPE_UNSIGNED (unprom_mult.type)
   1155  1.1  mrg 	: TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
   1156  1.1  mrg     return NULL;
   1157  1.1  mrg 
   1158  1.1  mrg   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
   1159  1.1  mrg 
   1160  1.1  mrg   tree half_vectype;
   1161  1.1  mrg   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
   1162  1.1  mrg 					type_out, &half_vectype, subtype))
   1163  1.1  mrg     return NULL;
   1164  1.1  mrg 
   1165  1.1  mrg   /* Get the inputs in the appropriate types.  */
   1166  1.1  mrg   tree mult_oprnd[2];
   1167  1.1  mrg   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
   1168  1.1  mrg 		       unprom0, half_vectype, subtype);
   1169  1.1  mrg 
   1170  1.1  mrg   var = vect_recog_temp_ssa_var (type, NULL);
   1171  1.1  mrg   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
   1172  1.1  mrg 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
   1173  1.1  mrg 
   1174  1.1  mrg   return pattern_stmt;
   1175  1.1  mrg }
   1176  1.1  mrg 
   1177  1.1  mrg 
   1178  1.1  mrg /* Function vect_recog_sad_pattern
   1179  1.1  mrg 
   1180  1.1  mrg    Try to find the following Sum of Absolute Difference (SAD) pattern:
   1181  1.1  mrg 
   1182  1.1  mrg      type x_t, y_t;
   1183  1.1  mrg      signed TYPE1 diff, abs_diff;
   1184  1.1  mrg      TYPE2 sum = init;
   1185  1.1  mrg    loop:
   1186  1.1  mrg      sum_0 = phi <init, sum_1>
   1187  1.1  mrg      S1  x_t = ...
   1188  1.1  mrg      S2  y_t = ...
   1189  1.1  mrg      S3  x_T = (TYPE1) x_t;
   1190  1.1  mrg      S4  y_T = (TYPE1) y_t;
   1191  1.1  mrg      S5  diff = x_T - y_T;
   1192  1.1  mrg      S6  abs_diff = ABS_EXPR <diff>;
   1193  1.1  mrg      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
   1194  1.1  mrg      S8  sum_1 = abs_diff + sum_0;
   1195  1.1  mrg 
   1196  1.1  mrg    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
   1197  1.1  mrg    same size of 'TYPE1' or bigger. This is a special case of a reduction
   1198  1.1  mrg    computation.
   1199  1.1  mrg 
   1200  1.1  mrg    Input:
   1201  1.1  mrg 
   1202  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.  In the
   1203  1.1  mrg    example, when this function is called with S8, the pattern
   1204  1.1  mrg    {S3,S4,S5,S6,S7,S8} will be detected.
   1205  1.1  mrg 
   1206  1.1  mrg    Output:
   1207  1.1  mrg 
   1208  1.1  mrg    * TYPE_OUT: The type of the output of this pattern.
   1209  1.1  mrg 
   1210  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
   1211  1.1  mrg    stmts that constitute the pattern. In this case it will be:
   1212  1.1  mrg         SAD_EXPR <x_t, y_t, sum_0>
   1213  1.1  mrg   */
   1214  1.1  mrg 
   1215  1.1  mrg static gimple *
   1216  1.1  mrg vect_recog_sad_pattern (vec_info *vinfo,
   1217  1.1  mrg 			stmt_vec_info stmt_vinfo, tree *type_out)
   1218  1.1  mrg {
   1219  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   1220  1.1  mrg   tree half_type;
   1221  1.1  mrg 
   1222  1.1  mrg   /* Look for the following pattern
   1223  1.1  mrg           DX = (TYPE1) X;
   1224  1.1  mrg           DY = (TYPE1) Y;
   1225  1.1  mrg           DDIFF = DX - DY;
   1226  1.1  mrg           DAD = ABS_EXPR <DDIFF>;
   1227  1.1  mrg           DDPROD = (TYPE2) DPROD;
   1228  1.1  mrg           sum_1 = DAD + sum_0;
   1229  1.1  mrg      In which
   1230  1.1  mrg      - DX is at least double the size of X
   1231  1.1  mrg      - DY is at least double the size of Y
   1232  1.1  mrg      - DX, DY, DDIFF, DAD all have the same type
   1233  1.1  mrg      - sum is the same size of DAD or bigger
   1234  1.1  mrg      - sum has been recognized as a reduction variable.
   1235  1.1  mrg 
   1236  1.1  mrg      This is equivalent to:
   1237  1.1  mrg        DDIFF = X w- Y;          #widen sub
   1238  1.1  mrg        DAD = ABS_EXPR <DDIFF>;
   1239  1.1  mrg        sum_1 = DAD w+ sum_0;    #widen summation
   1240  1.1  mrg      or
   1241  1.1  mrg        DDIFF = X w- Y;          #widen sub
   1242  1.1  mrg        DAD = ABS_EXPR <DDIFF>;
   1243  1.1  mrg        sum_1 = DAD + sum_0;     #summation
   1244  1.1  mrg    */
   1245  1.1  mrg 
   1246  1.1  mrg   /* Starting from LAST_STMT, follow the defs of its uses in search
   1247  1.1  mrg      of the above pattern.  */
   1248  1.1  mrg 
   1249  1.1  mrg   tree plus_oprnd0, plus_oprnd1;
   1250  1.1  mrg   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1251  1.1  mrg 				       &plus_oprnd0, &plus_oprnd1))
   1252  1.1  mrg     return NULL;
   1253  1.1  mrg 
   1254  1.1  mrg   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1255  1.1  mrg 
   1256  1.1  mrg   /* Any non-truncating sequence of conversions is OK here, since
   1257  1.1  mrg      with a successful match, the result of the ABS(U) is known to fit
   1258  1.1  mrg      within the nonnegative range of the result type.  (It cannot be the
   1259  1.1  mrg      negative of the minimum signed value due to the range of the widening
   1260  1.1  mrg      MINUS_EXPR.)  */
   1261  1.1  mrg   vect_unpromoted_value unprom_abs;
   1262  1.1  mrg   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
   1263  1.1  mrg 						      &unprom_abs);
   1264  1.1  mrg 
   1265  1.1  mrg   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1266  1.1  mrg      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
   1267  1.1  mrg      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
   1268  1.1  mrg      Then check that plus_oprnd0 is defined by an abs_expr.  */
   1269  1.1  mrg 
   1270  1.1  mrg   if (!plus_oprnd0)
   1271  1.1  mrg     return NULL;
   1272  1.1  mrg 
   1273  1.1  mrg   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
   1274  1.1  mrg   if (!abs_stmt_vinfo)
   1275  1.1  mrg     return NULL;
   1276  1.1  mrg 
   1277  1.1  mrg   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1278  1.1  mrg      inside the loop (in case we are analyzing an outer-loop).  */
   1279  1.1  mrg   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
   1280  1.1  mrg   if (!abs_stmt
   1281  1.1  mrg       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
   1282  1.1  mrg 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
   1283  1.1  mrg     return NULL;
   1284  1.1  mrg 
   1285  1.1  mrg   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
   1286  1.1  mrg   tree abs_type = TREE_TYPE (abs_oprnd);
   1287  1.1  mrg   if (TYPE_UNSIGNED (abs_type))
   1288  1.1  mrg     return NULL;
   1289  1.1  mrg 
   1290  1.1  mrg   /* Peel off conversions from the ABS input.  This can involve sign
   1291  1.1  mrg      changes (e.g. from an unsigned subtraction to a signed ABS input)
   1292  1.1  mrg      or signed promotion, but it can't include unsigned promotion.
   1293  1.1  mrg      (Note that ABS of an unsigned promotion should have been folded
   1294  1.1  mrg      away before now anyway.)  */
   1295  1.1  mrg   vect_unpromoted_value unprom_diff;
   1296  1.1  mrg   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
   1297  1.1  mrg 						    &unprom_diff);
   1298  1.1  mrg   if (!abs_oprnd)
   1299  1.1  mrg     return NULL;
   1300  1.1  mrg   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
   1301  1.1  mrg       && TYPE_UNSIGNED (unprom_diff.type))
   1302  1.1  mrg     return NULL;
   1303  1.1  mrg 
   1304  1.1  mrg   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
   1305  1.1  mrg   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
   1306  1.1  mrg   if (!diff_stmt_vinfo)
   1307  1.1  mrg     return NULL;
   1308  1.1  mrg 
   1309  1.1  mrg   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
   1310  1.1  mrg      inside the loop (in case we are analyzing an outer-loop).  */
   1311  1.1  mrg   vect_unpromoted_value unprom[2];
   1312  1.1  mrg   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
   1313  1.1  mrg 			     false, 2, unprom, &half_type))
   1314  1.1  mrg     return NULL;
   1315  1.1  mrg 
   1316  1.1  mrg   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
   1317  1.1  mrg 
   1318  1.1  mrg   tree half_vectype;
   1319  1.1  mrg   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
   1320  1.1  mrg 					type_out, &half_vectype))
   1321  1.1  mrg     return NULL;
   1322  1.1  mrg 
   1323  1.1  mrg   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
   1324  1.1  mrg   tree sad_oprnd[2];
   1325  1.1  mrg   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
   1326  1.1  mrg 		       unprom, half_vectype);
   1327  1.1  mrg 
   1328  1.1  mrg   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
   1329  1.1  mrg   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
   1330  1.1  mrg 					      sad_oprnd[1], plus_oprnd1);
   1331  1.1  mrg 
   1332  1.1  mrg   return pattern_stmt;
   1333  1.1  mrg }
   1334  1.1  mrg 
   1335  1.1  mrg /* Recognize an operation that performs ORIG_CODE on widened inputs,
   1336  1.1  mrg    so that it can be treated as though it had the form:
   1337  1.1  mrg 
   1338  1.1  mrg       A_TYPE a;
   1339  1.1  mrg       B_TYPE b;
   1340  1.1  mrg       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
   1341  1.1  mrg       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
   1342  1.1  mrg     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
   1343  1.1  mrg     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
   1344  1.1  mrg     | RES_TYPE res = a_extend ORIG_CODE b_extend;
   1345  1.1  mrg 
   1346  1.1  mrg    Try to replace the pattern with:
   1347  1.1  mrg 
   1348  1.1  mrg       A_TYPE a;
   1349  1.1  mrg       B_TYPE b;
   1350  1.1  mrg       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
   1351  1.1  mrg       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
   1352  1.1  mrg     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
   1353  1.1  mrg     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
   1354  1.1  mrg 
   1355  1.1  mrg    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
   1356  1.1  mrg 
   1357  1.1  mrg    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
   1358  1.1  mrg    name of the pattern being matched, for dump purposes.  */
   1359  1.1  mrg 
   1360  1.1  mrg static gimple *
   1361  1.1  mrg vect_recog_widen_op_pattern (vec_info *vinfo,
   1362  1.1  mrg 			     stmt_vec_info last_stmt_info, tree *type_out,
   1363  1.1  mrg 			     tree_code orig_code, tree_code wide_code,
   1364  1.1  mrg 			     bool shift_p, const char *name)
   1365  1.1  mrg {
   1366  1.1  mrg   gimple *last_stmt = last_stmt_info->stmt;
   1367  1.1  mrg 
   1368  1.1  mrg   vect_unpromoted_value unprom[2];
   1369  1.1  mrg   tree half_type;
   1370  1.1  mrg   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
   1371  1.1  mrg 			     shift_p, 2, unprom, &half_type))
   1372  1.1  mrg     return NULL;
   1373  1.1  mrg 
   1374  1.1  mrg   /* Pattern detected.  */
   1375  1.1  mrg   vect_pattern_detected (name, last_stmt);
   1376  1.1  mrg 
   1377  1.1  mrg   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1378  1.1  mrg   tree itype = type;
   1379  1.1  mrg   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
   1380  1.1  mrg       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
   1381  1.1  mrg     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
   1382  1.1  mrg 					    TYPE_UNSIGNED (half_type));
   1383  1.1  mrg 
   1384  1.1  mrg   /* Check target support  */
   1385  1.1  mrg   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
   1386  1.1  mrg   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
   1387  1.1  mrg   tree ctype = itype;
   1388  1.1  mrg   tree vecctype = vecitype;
   1389  1.1  mrg   if (orig_code == MINUS_EXPR
   1390  1.1  mrg       && TYPE_UNSIGNED (itype)
   1391  1.1  mrg       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
   1392  1.1  mrg     {
   1393  1.1  mrg       /* Subtraction is special, even if half_type is unsigned and no matter
   1394  1.1  mrg 	 whether type is signed or unsigned, if type is wider than itype,
   1395  1.1  mrg 	 we need to sign-extend from the widening operation result to the
   1396  1.1  mrg 	 result type.
   1397  1.1  mrg 	 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
   1398  1.1  mrg 	 itype unsigned short and type either int or unsigned int.
   1399  1.1  mrg 	 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
   1400  1.1  mrg 	 (unsigned short) 0xffff, but for type int we want the result -1
   1401  1.1  mrg 	 and for type unsigned int 0xffffffff rather than 0xffff.  */
   1402  1.1  mrg       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
   1403  1.1  mrg       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
   1404  1.1  mrg     }
   1405  1.1  mrg 
   1406  1.1  mrg   enum tree_code dummy_code;
   1407  1.1  mrg   int dummy_int;
   1408  1.1  mrg   auto_vec<tree> dummy_vec;
   1409  1.1  mrg   if (!vectype
   1410  1.1  mrg       || !vecitype
   1411  1.1  mrg       || !vecctype
   1412  1.1  mrg       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
   1413  1.1  mrg 					  vecitype, vectype,
   1414  1.1  mrg 					  &dummy_code, &dummy_code,
   1415  1.1  mrg 					  &dummy_int, &dummy_vec))
   1416  1.1  mrg     return NULL;
   1417  1.1  mrg 
   1418  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, type);
   1419  1.1  mrg   if (!*type_out)
   1420  1.1  mrg     return NULL;
   1421  1.1  mrg 
   1422  1.1  mrg   tree oprnd[2];
   1423  1.1  mrg   vect_convert_inputs (vinfo, last_stmt_info,
   1424  1.1  mrg 		       2, oprnd, half_type, unprom, vectype);
   1425  1.1  mrg 
   1426  1.1  mrg   tree var = vect_recog_temp_ssa_var (itype, NULL);
   1427  1.1  mrg   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
   1428  1.1  mrg 					      oprnd[0], oprnd[1]);
   1429  1.1  mrg 
   1430  1.1  mrg   if (vecctype != vecitype)
   1431  1.1  mrg     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
   1432  1.1  mrg 					pattern_stmt, vecitype);
   1433  1.1  mrg 
   1434  1.1  mrg   return vect_convert_output (vinfo, last_stmt_info,
   1435  1.1  mrg 			      type, pattern_stmt, vecctype);
   1436  1.1  mrg }
   1437  1.1  mrg 
   1438  1.1  mrg /* Try to detect multiplication on widened inputs, converting MULT_EXPR
   1439  1.1  mrg    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1440  1.1  mrg 
   1441  1.1  mrg static gimple *
   1442  1.1  mrg vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1443  1.1  mrg 			       tree *type_out)
   1444  1.1  mrg {
   1445  1.1  mrg   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1446  1.1  mrg 				      MULT_EXPR, WIDEN_MULT_EXPR, false,
   1447  1.1  mrg 				      "vect_recog_widen_mult_pattern");
   1448  1.1  mrg }
   1449  1.1  mrg 
   1450  1.1  mrg /* Try to detect addition on widened inputs, converting PLUS_EXPR
   1451  1.1  mrg    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1452  1.1  mrg 
   1453  1.1  mrg static gimple *
   1454  1.1  mrg vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1455  1.1  mrg 			       tree *type_out)
   1456  1.1  mrg {
   1457  1.1  mrg   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1458  1.1  mrg 				      PLUS_EXPR, WIDEN_PLUS_EXPR, false,
   1459  1.1  mrg 				      "vect_recog_widen_plus_pattern");
   1460  1.1  mrg }
   1461  1.1  mrg 
   1462  1.1  mrg /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
   1463  1.1  mrg    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
   1464  1.1  mrg static gimple *
   1465  1.1  mrg vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
   1466  1.1  mrg 			       tree *type_out)
   1467  1.1  mrg {
   1468  1.1  mrg   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   1469  1.1  mrg 				      MINUS_EXPR, WIDEN_MINUS_EXPR, false,
   1470  1.1  mrg 				      "vect_recog_widen_minus_pattern");
   1471  1.1  mrg }
   1472  1.1  mrg 
   1473  1.1  mrg /* Function vect_recog_popcount_pattern
   1474  1.1  mrg 
   1475  1.1  mrg    Try to find the following pattern:
   1476  1.1  mrg 
   1477  1.1  mrg    UTYPE1 A;
   1478  1.1  mrg    TYPE1 B;
   1479  1.1  mrg    UTYPE2 temp_in;
   1480  1.1  mrg    TYPE3 temp_out;
   1481  1.1  mrg    temp_in = (UTYPE2)A;
   1482  1.1  mrg 
   1483  1.1  mrg    temp_out = __builtin_popcount{,l,ll} (temp_in);
   1484  1.1  mrg    B = (TYPE1) temp_out;
   1485  1.1  mrg 
   1486  1.1  mrg    TYPE2 may or may not be equal to TYPE3.
   1487  1.1  mrg    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
   1488  1.1  mrg    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
   1489  1.1  mrg 
   1490  1.1  mrg    Input:
   1491  1.1  mrg 
   1492  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.
   1493  1.1  mrg    here it starts with B = (TYPE1) temp_out;
   1494  1.1  mrg 
   1495  1.1  mrg    Output:
   1496  1.1  mrg 
   1497  1.1  mrg    * TYPE_OUT: The vector type of the output of this pattern.
   1498  1.1  mrg 
   1499  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
   1500  1.1  mrg    stmts that constitute the pattern. In this case it will be:
   1501  1.1  mrg    B = .POPCOUNT (A);
   1502  1.1  mrg */
   1503  1.1  mrg 
   1504  1.1  mrg static gimple *
   1505  1.1  mrg vect_recog_popcount_pattern (vec_info *vinfo,
   1506  1.1  mrg 			     stmt_vec_info stmt_vinfo, tree *type_out)
   1507  1.1  mrg {
   1508  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
   1509  1.1  mrg   gimple *popcount_stmt, *pattern_stmt;
   1510  1.1  mrg   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
   1511  1.1  mrg   auto_vec<tree> vargs;
   1512  1.1  mrg 
   1513  1.1  mrg   /* Find B = (TYPE1) temp_out. */
   1514  1.1  mrg   if (!last_stmt)
   1515  1.1  mrg     return NULL;
   1516  1.1  mrg   tree_code code = gimple_assign_rhs_code (last_stmt);
   1517  1.1  mrg   if (!CONVERT_EXPR_CODE_P (code))
   1518  1.1  mrg     return NULL;
   1519  1.1  mrg 
   1520  1.1  mrg   lhs_oprnd = gimple_assign_lhs (last_stmt);
   1521  1.1  mrg   lhs_type = TREE_TYPE (lhs_oprnd);
   1522  1.1  mrg   if (!INTEGRAL_TYPE_P (lhs_type))
   1523  1.1  mrg     return NULL;
   1524  1.1  mrg 
   1525  1.1  mrg   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
   1526  1.1  mrg   if (TREE_CODE (rhs_oprnd) != SSA_NAME
   1527  1.1  mrg       || !has_single_use (rhs_oprnd))
   1528  1.1  mrg     return NULL;
   1529  1.1  mrg   popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
   1530  1.1  mrg 
   1531  1.1  mrg   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
   1532  1.1  mrg   if (!is_gimple_call (popcount_stmt))
   1533  1.1  mrg     return NULL;
   1534  1.1  mrg   switch (gimple_call_combined_fn (popcount_stmt))
   1535  1.1  mrg     {
   1536  1.1  mrg     CASE_CFN_POPCOUNT:
   1537  1.1  mrg       break;
   1538  1.1  mrg     default:
   1539  1.1  mrg       return NULL;
   1540  1.1  mrg     }
   1541  1.1  mrg 
   1542  1.1  mrg   if (gimple_call_num_args (popcount_stmt) != 1)
   1543  1.1  mrg     return NULL;
   1544  1.1  mrg 
   1545  1.1  mrg   rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
   1546  1.1  mrg   vect_unpromoted_value unprom_diff;
   1547  1.1  mrg   rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
   1548  1.1  mrg 						    &unprom_diff);
   1549  1.1  mrg 
   1550  1.1  mrg   if (!rhs_origin)
   1551  1.1  mrg     return NULL;
   1552  1.1  mrg 
   1553  1.1  mrg   /* Input and output of .POPCOUNT should be same-precision integer.
   1554  1.1  mrg      Also A should be unsigned or same precision as temp_in,
   1555  1.1  mrg      otherwise there would be sign_extend from A to temp_in.  */
   1556  1.1  mrg   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
   1557  1.1  mrg       || (!TYPE_UNSIGNED (unprom_diff.type)
   1558  1.1  mrg 	  && (TYPE_PRECISION (unprom_diff.type)
   1559  1.1  mrg 	      != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
   1560  1.1  mrg     return NULL;
   1561  1.1  mrg   vargs.safe_push (unprom_diff.op);
   1562  1.1  mrg 
   1563  1.1  mrg   vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
   1564  1.1  mrg   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
   1565  1.1  mrg   /* Do it only if the backend has popcount<vector_mode>2 pattern.  */
   1566  1.1  mrg   if (!vec_type
   1567  1.1  mrg       || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
   1568  1.1  mrg 					  OPTIMIZE_FOR_SPEED))
   1569  1.1  mrg     return NULL;
   1570  1.1  mrg 
   1571  1.1  mrg   /* Create B = .POPCOUNT (A).  */
   1572  1.1  mrg   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
   1573  1.1  mrg   pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
   1574  1.1  mrg   gimple_call_set_lhs (pattern_stmt, new_var);
   1575  1.1  mrg   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   1576  1.1  mrg   *type_out = vec_type;
   1577  1.1  mrg 
   1578  1.1  mrg   if (dump_enabled_p ())
   1579  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1580  1.1  mrg 		     "created pattern stmt: %G", pattern_stmt);
   1581  1.1  mrg   return pattern_stmt;
   1582  1.1  mrg }
   1583  1.1  mrg 
   1584  1.1  mrg /* Function vect_recog_pow_pattern
   1585  1.1  mrg 
   1586  1.1  mrg    Try to find the following pattern:
   1587  1.1  mrg 
   1588  1.1  mrg      x = POW (y, N);
   1589  1.1  mrg 
   1590  1.1  mrg    with POW being one of pow, powf, powi, powif and N being
   1591  1.1  mrg    either 2 or 0.5.
   1592  1.1  mrg 
   1593  1.1  mrg    Input:
   1594  1.1  mrg 
   1595  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.
   1596  1.1  mrg 
   1597  1.1  mrg    Output:
   1598  1.1  mrg 
   1599  1.1  mrg    * TYPE_OUT: The type of the output of this pattern.
   1600  1.1  mrg 
   1601  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
   1602  1.1  mrg    stmts that constitute the pattern. In this case it will be:
   1603  1.1  mrg         x = x * x
   1604  1.1  mrg    or
   1605  1.1  mrg 	x = sqrt (x)
   1606  1.1  mrg */
   1607  1.1  mrg 
   1608  1.1  mrg static gimple *
   1609  1.1  mrg vect_recog_pow_pattern (vec_info *vinfo,
   1610  1.1  mrg 			stmt_vec_info stmt_vinfo, tree *type_out)
   1611  1.1  mrg {
   1612  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   1613  1.1  mrg   tree base, exp;
   1614  1.1  mrg   gimple *stmt;
   1615  1.1  mrg   tree var;
   1616  1.1  mrg 
   1617  1.1  mrg   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
   1618  1.1  mrg     return NULL;
   1619  1.1  mrg 
   1620  1.1  mrg   switch (gimple_call_combined_fn (last_stmt))
   1621  1.1  mrg     {
   1622  1.1  mrg     CASE_CFN_POW:
   1623  1.1  mrg     CASE_CFN_POWI:
   1624  1.1  mrg       break;
   1625  1.1  mrg 
   1626  1.1  mrg     default:
   1627  1.1  mrg       return NULL;
   1628  1.1  mrg     }
   1629  1.1  mrg 
   1630  1.1  mrg   base = gimple_call_arg (last_stmt, 0);
   1631  1.1  mrg   exp = gimple_call_arg (last_stmt, 1);
   1632  1.1  mrg   if (TREE_CODE (exp) != REAL_CST
   1633  1.1  mrg       && TREE_CODE (exp) != INTEGER_CST)
   1634  1.1  mrg     {
   1635  1.1  mrg       if (flag_unsafe_math_optimizations
   1636  1.1  mrg 	  && TREE_CODE (base) == REAL_CST
   1637  1.1  mrg 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
   1638  1.1  mrg 	{
   1639  1.1  mrg 	  combined_fn log_cfn;
   1640  1.1  mrg 	  built_in_function exp_bfn;
   1641  1.1  mrg 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
   1642  1.1  mrg 	    {
   1643  1.1  mrg 	    case BUILT_IN_POW:
   1644  1.1  mrg 	      log_cfn = CFN_BUILT_IN_LOG;
   1645  1.1  mrg 	      exp_bfn = BUILT_IN_EXP;
   1646  1.1  mrg 	      break;
   1647  1.1  mrg 	    case BUILT_IN_POWF:
   1648  1.1  mrg 	      log_cfn = CFN_BUILT_IN_LOGF;
   1649  1.1  mrg 	      exp_bfn = BUILT_IN_EXPF;
   1650  1.1  mrg 	      break;
   1651  1.1  mrg 	    case BUILT_IN_POWL:
   1652  1.1  mrg 	      log_cfn = CFN_BUILT_IN_LOGL;
   1653  1.1  mrg 	      exp_bfn = BUILT_IN_EXPL;
   1654  1.1  mrg 	      break;
   1655  1.1  mrg 	    default:
   1656  1.1  mrg 	      return NULL;
   1657  1.1  mrg 	    }
   1658  1.1  mrg 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
   1659  1.1  mrg 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
   1660  1.1  mrg 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
   1661  1.1  mrg 	     does that, but if C is a power of 2, we want to use
   1662  1.1  mrg 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
   1663  1.1  mrg 	     vectorization we don't have vectorized exp2.  */
   1664  1.1  mrg 	  if (logc
   1665  1.1  mrg 	      && TREE_CODE (logc) == REAL_CST
   1666  1.1  mrg 	      && exp_decl
   1667  1.1  mrg 	      && lookup_attribute ("omp declare simd",
   1668  1.1  mrg 				   DECL_ATTRIBUTES (exp_decl)))
   1669  1.1  mrg 	    {
   1670  1.1  mrg 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
   1671  1.1  mrg 	      if (node->simd_clones == NULL)
   1672  1.1  mrg 		{
   1673  1.1  mrg 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
   1674  1.1  mrg 		      || node->definition)
   1675  1.1  mrg 		    return NULL;
   1676  1.1  mrg 		  expand_simd_clones (node);
   1677  1.1  mrg 		  if (node->simd_clones == NULL)
   1678  1.1  mrg 		    return NULL;
   1679  1.1  mrg 		}
   1680  1.1  mrg 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
   1681  1.1  mrg 	      if (!*type_out)
   1682  1.1  mrg 		return NULL;
   1683  1.1  mrg 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1684  1.1  mrg 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
   1685  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, g);
   1686  1.1  mrg 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1687  1.1  mrg 	      g = gimple_build_call (exp_decl, 1, def);
   1688  1.1  mrg 	      gimple_call_set_lhs (g, res);
   1689  1.1  mrg 	      return g;
   1690  1.1  mrg 	    }
   1691  1.1  mrg 	}
   1692  1.1  mrg 
   1693  1.1  mrg       return NULL;
   1694  1.1  mrg     }
   1695  1.1  mrg 
   1696  1.1  mrg   /* We now have a pow or powi builtin function call with a constant
   1697  1.1  mrg      exponent.  */
   1698  1.1  mrg 
   1699  1.1  mrg   /* Catch squaring.  */
   1700  1.1  mrg   if ((tree_fits_shwi_p (exp)
   1701  1.1  mrg        && tree_to_shwi (exp) == 2)
   1702  1.1  mrg       || (TREE_CODE (exp) == REAL_CST
   1703  1.1  mrg           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
   1704  1.1  mrg     {
   1705  1.1  mrg       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
   1706  1.1  mrg 					    TREE_TYPE (base), type_out))
   1707  1.1  mrg 	return NULL;
   1708  1.1  mrg 
   1709  1.1  mrg       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
   1710  1.1  mrg       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
   1711  1.1  mrg       return stmt;
   1712  1.1  mrg     }
   1713  1.1  mrg 
   1714  1.1  mrg   /* Catch square root.  */
   1715  1.1  mrg   if (TREE_CODE (exp) == REAL_CST
   1716  1.1  mrg       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
   1717  1.1  mrg     {
   1718  1.1  mrg       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
   1719  1.1  mrg       if (*type_out
   1720  1.1  mrg 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
   1721  1.1  mrg 					     OPTIMIZE_FOR_SPEED))
   1722  1.1  mrg 	{
   1723  1.1  mrg 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
   1724  1.1  mrg 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
   1725  1.1  mrg 	  gimple_call_set_lhs (stmt, var);
   1726  1.1  mrg 	  gimple_call_set_nothrow (stmt, true);
   1727  1.1  mrg 	  return stmt;
   1728  1.1  mrg 	}
   1729  1.1  mrg     }
   1730  1.1  mrg 
   1731  1.1  mrg   return NULL;
   1732  1.1  mrg }
   1733  1.1  mrg 
   1734  1.1  mrg 
   1735  1.1  mrg /* Function vect_recog_widen_sum_pattern
   1736  1.1  mrg 
   1737  1.1  mrg    Try to find the following pattern:
   1738  1.1  mrg 
   1739  1.1  mrg      type x_t;
   1740  1.1  mrg      TYPE x_T, sum = init;
   1741  1.1  mrg    loop:
   1742  1.1  mrg      sum_0 = phi <init, sum_1>
   1743  1.1  mrg      S1  x_t = *p;
   1744  1.1  mrg      S2  x_T = (TYPE) x_t;
   1745  1.1  mrg      S3  sum_1 = x_T + sum_0;
   1746  1.1  mrg 
   1747  1.1  mrg    where type 'TYPE' is at least double the size of type 'type', i.e - we're
   1748  1.1  mrg    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
   1749  1.1  mrg    a special case of a reduction computation.
   1750  1.1  mrg 
   1751  1.1  mrg    Input:
   1752  1.1  mrg 
   1753  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
   1754  1.1  mrg    when this function is called with S3, the pattern {S2,S3} will be detected.
   1755  1.1  mrg 
   1756  1.1  mrg    Output:
   1757  1.1  mrg 
   1758  1.1  mrg    * TYPE_OUT: The type of the output of this pattern.
   1759  1.1  mrg 
   1760  1.1  mrg    * Return value: A new stmt that will be used to replace the sequence of
   1761  1.1  mrg    stmts that constitute the pattern. In this case it will be:
   1762  1.1  mrg         WIDEN_SUM <x_t, sum_0>
   1763  1.1  mrg 
   1764  1.1  mrg    Note: The widening-sum idiom is a widening reduction pattern that is
   1765  1.1  mrg 	 vectorized without preserving all the intermediate results. It
   1766  1.1  mrg          produces only N/2 (widened) results (by summing up pairs of
   1767  1.1  mrg 	 intermediate results) rather than all N results.  Therefore, we
   1768  1.1  mrg 	 cannot allow this pattern when we want to get all the results and in
   1769  1.1  mrg 	 the correct order (as is the case when this computation is in an
   1770  1.1  mrg 	 inner-loop nested in an outer-loop that us being vectorized).  */
   1771  1.1  mrg 
   1772  1.1  mrg static gimple *
   1773  1.1  mrg vect_recog_widen_sum_pattern (vec_info *vinfo,
   1774  1.1  mrg 			      stmt_vec_info stmt_vinfo, tree *type_out)
   1775  1.1  mrg {
   1776  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   1777  1.1  mrg   tree oprnd0, oprnd1;
   1778  1.1  mrg   tree type;
   1779  1.1  mrg   gimple *pattern_stmt;
   1780  1.1  mrg   tree var;
   1781  1.1  mrg 
   1782  1.1  mrg   /* Look for the following pattern
   1783  1.1  mrg           DX = (TYPE) X;
   1784  1.1  mrg           sum_1 = DX + sum_0;
   1785  1.1  mrg      In which DX is at least double the size of X, and sum_1 has been
   1786  1.1  mrg      recognized as a reduction variable.
   1787  1.1  mrg    */
   1788  1.1  mrg 
   1789  1.1  mrg   /* Starting from LAST_STMT, follow the defs of its uses in search
   1790  1.1  mrg      of the above pattern.  */
   1791  1.1  mrg 
   1792  1.1  mrg   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
   1793  1.1  mrg 				       &oprnd0, &oprnd1)
   1794  1.1  mrg       || TREE_CODE (oprnd0) != SSA_NAME
   1795  1.1  mrg       || !vinfo->lookup_def (oprnd0))
   1796  1.1  mrg     return NULL;
   1797  1.1  mrg 
   1798  1.1  mrg   type = TREE_TYPE (gimple_get_lhs (last_stmt));
   1799  1.1  mrg 
   1800  1.1  mrg   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
   1801  1.1  mrg      we know that oprnd1 is the reduction variable (defined by a loop-header
   1802  1.1  mrg      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
   1803  1.1  mrg      Left to check that oprnd0 is defined by a cast from type 'type' to type
   1804  1.1  mrg      'TYPE'.  */
   1805  1.1  mrg 
   1806  1.1  mrg   vect_unpromoted_value unprom0;
   1807  1.1  mrg   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
   1808  1.1  mrg       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
   1809  1.1  mrg     return NULL;
   1810  1.1  mrg 
   1811  1.1  mrg   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
   1812  1.1  mrg 
   1813  1.1  mrg   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
   1814  1.1  mrg 					unprom0.type, type_out))
   1815  1.1  mrg     return NULL;
   1816  1.1  mrg 
   1817  1.1  mrg   var = vect_recog_temp_ssa_var (type, NULL);
   1818  1.1  mrg   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
   1819  1.1  mrg 
   1820  1.1  mrg   return pattern_stmt;
   1821  1.1  mrg }
   1822  1.1  mrg 
   1823  1.1  mrg /* Recognize cases in which an operation is performed in one type WTYPE
   1824  1.1  mrg    but could be done more efficiently in a narrower type NTYPE.  For example,
   1825  1.1  mrg    if we have:
   1826  1.1  mrg 
   1827  1.1  mrg      ATYPE a;  // narrower than NTYPE
   1828  1.1  mrg      BTYPE b;  // narrower than NTYPE
   1829  1.1  mrg      WTYPE aw = (WTYPE) a;
   1830  1.1  mrg      WTYPE bw = (WTYPE) b;
   1831  1.1  mrg      WTYPE res = aw + bw;  // only uses of aw and bw
   1832  1.1  mrg 
   1833  1.1  mrg    then it would be more efficient to do:
   1834  1.1  mrg 
   1835  1.1  mrg      NTYPE an = (NTYPE) a;
   1836  1.1  mrg      NTYPE bn = (NTYPE) b;
   1837  1.1  mrg      NTYPE resn = an + bn;
   1838  1.1  mrg      WTYPE res = (WTYPE) resn;
   1839  1.1  mrg 
   1840  1.1  mrg    Other situations include things like:
   1841  1.1  mrg 
   1842  1.1  mrg      ATYPE a;  // NTYPE or narrower
   1843  1.1  mrg      WTYPE aw = (WTYPE) a;
   1844  1.1  mrg      WTYPE res = aw + b;
   1845  1.1  mrg 
   1846  1.1  mrg    when only "(NTYPE) res" is significant.  In that case it's more efficient
   1847  1.1  mrg    to truncate "b" and do the operation on NTYPE instead:
   1848  1.1  mrg 
   1849  1.1  mrg      NTYPE an = (NTYPE) a;
   1850  1.1  mrg      NTYPE bn = (NTYPE) b;  // truncation
   1851  1.1  mrg      NTYPE resn = an + bn;
   1852  1.1  mrg      WTYPE res = (WTYPE) resn;
   1853  1.1  mrg 
   1854  1.1  mrg    All users of "res" should then use "resn" instead, making the final
   1855  1.1  mrg    statement dead (not marked as relevant).  The final statement is still
   1856  1.1  mrg    needed to maintain the type correctness of the IR.
   1857  1.1  mrg 
   1858  1.1  mrg    vect_determine_precisions has already determined the minimum
   1859  1.1  mrg    precison of the operation and the minimum precision required
   1860  1.1  mrg    by users of the result.  */
   1861  1.1  mrg 
   1862  1.1  mrg static gimple *
   1863  1.1  mrg vect_recog_over_widening_pattern (vec_info *vinfo,
   1864  1.1  mrg 				  stmt_vec_info last_stmt_info, tree *type_out)
   1865  1.1  mrg {
   1866  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   1867  1.1  mrg   if (!last_stmt)
   1868  1.1  mrg     return NULL;
   1869  1.1  mrg 
   1870  1.1  mrg   /* See whether we have found that this operation can be done on a
   1871  1.1  mrg      narrower type without changing its semantics.  */
   1872  1.1  mrg   unsigned int new_precision = last_stmt_info->operation_precision;
   1873  1.1  mrg   if (!new_precision)
   1874  1.1  mrg     return NULL;
   1875  1.1  mrg 
   1876  1.1  mrg   tree lhs = gimple_assign_lhs (last_stmt);
   1877  1.1  mrg   tree type = TREE_TYPE (lhs);
   1878  1.1  mrg   tree_code code = gimple_assign_rhs_code (last_stmt);
   1879  1.1  mrg 
   1880  1.1  mrg   /* Punt for reductions where we don't handle the type conversions.  */
   1881  1.1  mrg   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
   1882  1.1  mrg     return NULL;
   1883  1.1  mrg 
   1884  1.1  mrg   /* Keep the first operand of a COND_EXPR as-is: only the other two
   1885  1.1  mrg      operands are interesting.  */
   1886  1.1  mrg   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
   1887  1.1  mrg 
   1888  1.1  mrg   /* Check the operands.  */
   1889  1.1  mrg   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
   1890  1.1  mrg   auto_vec <vect_unpromoted_value, 3> unprom (nops);
   1891  1.1  mrg   unprom.quick_grow (nops);
   1892  1.1  mrg   unsigned int min_precision = 0;
   1893  1.1  mrg   bool single_use_p = false;
   1894  1.1  mrg   for (unsigned int i = 0; i < nops; ++i)
   1895  1.1  mrg     {
   1896  1.1  mrg       tree op = gimple_op (last_stmt, first_op + i);
   1897  1.1  mrg       if (TREE_CODE (op) == INTEGER_CST)
   1898  1.1  mrg 	unprom[i].set_op (op, vect_constant_def);
   1899  1.1  mrg       else if (TREE_CODE (op) == SSA_NAME)
   1900  1.1  mrg 	{
   1901  1.1  mrg 	  bool op_single_use_p = true;
   1902  1.1  mrg 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
   1903  1.1  mrg 						     &op_single_use_p))
   1904  1.1  mrg 	    return NULL;
   1905  1.1  mrg 	  /* If:
   1906  1.1  mrg 
   1907  1.1  mrg 	     (1) N bits of the result are needed;
   1908  1.1  mrg 	     (2) all inputs are widened from M<N bits; and
   1909  1.1  mrg 	     (3) one operand OP is a single-use SSA name
   1910  1.1  mrg 
   1911  1.1  mrg 	     we can shift the M->N widening from OP to the output
   1912  1.1  mrg 	     without changing the number or type of extensions involved.
   1913  1.1  mrg 	     This then reduces the number of copies of STMT_INFO.
   1914  1.1  mrg 
   1915  1.1  mrg 	     If instead of (3) more than one operand is a single-use SSA name,
   1916  1.1  mrg 	     shifting the extension to the output is even more of a win.
   1917  1.1  mrg 
   1918  1.1  mrg 	     If instead:
   1919  1.1  mrg 
   1920  1.1  mrg 	     (1) N bits of the result are needed;
   1921  1.1  mrg 	     (2) one operand OP2 is widened from M2<N bits;
   1922  1.1  mrg 	     (3) another operand OP1 is widened from M1<M2 bits; and
   1923  1.1  mrg 	     (4) both OP1 and OP2 are single-use
   1924  1.1  mrg 
   1925  1.1  mrg 	     the choice is between:
   1926  1.1  mrg 
   1927  1.1  mrg 	     (a) truncating OP2 to M1, doing the operation on M1,
   1928  1.1  mrg 		 and then widening the result to N
   1929  1.1  mrg 
   1930  1.1  mrg 	     (b) widening OP1 to M2, doing the operation on M2, and then
   1931  1.1  mrg 		 widening the result to N
   1932  1.1  mrg 
   1933  1.1  mrg 	     Both shift the M2->N widening of the inputs to the output.
   1934  1.1  mrg 	     (a) additionally shifts the M1->M2 widening to the output;
   1935  1.1  mrg 	     it requires fewer copies of STMT_INFO but requires an extra
   1936  1.1  mrg 	     M2->M1 truncation.
   1937  1.1  mrg 
   1938  1.1  mrg 	     Which is better will depend on the complexity and cost of
   1939  1.1  mrg 	     STMT_INFO, which is hard to predict at this stage.  However,
   1940  1.1  mrg 	     a clear tie-breaker in favor of (b) is the fact that the
   1941  1.1  mrg 	     truncation in (a) increases the length of the operation chain.
   1942  1.1  mrg 
   1943  1.1  mrg 	     If instead of (4) only one of OP1 or OP2 is single-use,
   1944  1.1  mrg 	     (b) is still a win over doing the operation in N bits:
   1945  1.1  mrg 	     it still shifts the M2->N widening on the single-use operand
   1946  1.1  mrg 	     to the output and reduces the number of STMT_INFO copies.
   1947  1.1  mrg 
   1948  1.1  mrg 	     If neither operand is single-use then operating on fewer than
   1949  1.1  mrg 	     N bits might lead to more extensions overall.  Whether it does
   1950  1.1  mrg 	     or not depends on global information about the vectorization
   1951  1.1  mrg 	     region, and whether that's a good trade-off would again
   1952  1.1  mrg 	     depend on the complexity and cost of the statements involved,
   1953  1.1  mrg 	     as well as things like register pressure that are not normally
   1954  1.1  mrg 	     modelled at this stage.  We therefore ignore these cases
   1955  1.1  mrg 	     and just optimize the clear single-use wins above.
   1956  1.1  mrg 
   1957  1.1  mrg 	     Thus we take the maximum precision of the unpromoted operands
   1958  1.1  mrg 	     and record whether any operand is single-use.  */
   1959  1.1  mrg 	  if (unprom[i].dt == vect_internal_def)
   1960  1.1  mrg 	    {
   1961  1.1  mrg 	      min_precision = MAX (min_precision,
   1962  1.1  mrg 				   TYPE_PRECISION (unprom[i].type));
   1963  1.1  mrg 	      single_use_p |= op_single_use_p;
   1964  1.1  mrg 	    }
   1965  1.1  mrg 	}
   1966  1.1  mrg       else
   1967  1.1  mrg 	return NULL;
   1968  1.1  mrg     }
   1969  1.1  mrg 
   1970  1.1  mrg   /* Although the operation could be done in operation_precision, we have
   1971  1.1  mrg      to balance that against introducing extra truncations or extensions.
   1972  1.1  mrg      Calculate the minimum precision that can be handled efficiently.
   1973  1.1  mrg 
   1974  1.1  mrg      The loop above determined that the operation could be handled
   1975  1.1  mrg      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
   1976  1.1  mrg      extension from the inputs to the output without introducing more
   1977  1.1  mrg      instructions, and would reduce the number of instructions required
   1978  1.1  mrg      for STMT_INFO itself.
   1979  1.1  mrg 
   1980  1.1  mrg      vect_determine_precisions has also determined that the result only
   1981  1.1  mrg      needs min_output_precision bits.  Truncating by a factor of N times
   1982  1.1  mrg      requires a tree of N - 1 instructions, so if TYPE is N times wider
   1983  1.1  mrg      than min_output_precision, doing the operation in TYPE and truncating
   1984  1.1  mrg      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
   1985  1.1  mrg      In contrast:
   1986  1.1  mrg 
   1987  1.1  mrg      - truncating the input to a unary operation and doing the operation
   1988  1.1  mrg        in the new type requires at most N - 1 + 1 = N instructions per
   1989  1.1  mrg        output vector
   1990  1.1  mrg 
   1991  1.1  mrg      - doing the same for a binary operation requires at most
   1992  1.1  mrg        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
   1993  1.1  mrg 
   1994  1.1  mrg      Both unary and binary operations require fewer instructions than
   1995  1.1  mrg      this if the operands were extended from a suitable truncated form.
   1996  1.1  mrg      Thus there is usually nothing to lose by doing operations in
   1997  1.1  mrg      min_output_precision bits, but there can be something to gain.  */
   1998  1.1  mrg   if (!single_use_p)
   1999  1.1  mrg     min_precision = last_stmt_info->min_output_precision;
   2000  1.1  mrg   else
   2001  1.1  mrg     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
   2002  1.1  mrg 
   2003  1.1  mrg   /* Apply the minimum efficient precision we just calculated.  */
   2004  1.1  mrg   if (new_precision < min_precision)
   2005  1.1  mrg     new_precision = min_precision;
   2006  1.1  mrg   new_precision = vect_element_precision (new_precision);
   2007  1.1  mrg   if (new_precision >= TYPE_PRECISION (type))
   2008  1.1  mrg     return NULL;
   2009  1.1  mrg 
   2010  1.1  mrg   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
   2011  1.1  mrg 
   2012  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, type);
   2013  1.1  mrg   if (!*type_out)
   2014  1.1  mrg     return NULL;
   2015  1.1  mrg 
   2016  1.1  mrg   /* We've found a viable pattern.  Get the new type of the operation.  */
   2017  1.1  mrg   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
   2018  1.1  mrg   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
   2019  1.1  mrg 
   2020  1.1  mrg   /* If we're truncating an operation, we need to make sure that we
   2021  1.1  mrg      don't introduce new undefined overflow.  The codes tested here are
   2022  1.1  mrg      a subset of those accepted by vect_truncatable_operation_p.  */
   2023  1.1  mrg   tree op_type = new_type;
   2024  1.1  mrg   if (TYPE_OVERFLOW_UNDEFINED (new_type)
   2025  1.1  mrg       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
   2026  1.1  mrg     op_type = build_nonstandard_integer_type (new_precision, true);
   2027  1.1  mrg 
   2028  1.1  mrg   /* We specifically don't check here whether the target supports the
   2029  1.1  mrg      new operation, since it might be something that a later pattern
   2030  1.1  mrg      wants to rewrite anyway.  If targets have a minimum element size
   2031  1.1  mrg      for some optabs, we should pattern-match smaller ops to larger ops
   2032  1.1  mrg      where beneficial.  */
   2033  1.1  mrg   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2034  1.1  mrg   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
   2035  1.1  mrg   if (!new_vectype || !op_vectype)
   2036  1.1  mrg     return NULL;
   2037  1.1  mrg 
   2038  1.1  mrg   if (dump_enabled_p ())
   2039  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
   2040  1.1  mrg 		     type, new_type);
   2041  1.1  mrg 
   2042  1.1  mrg   /* Calculate the rhs operands for an operation on OP_TYPE.  */
   2043  1.1  mrg   tree ops[3] = {};
   2044  1.1  mrg   for (unsigned int i = 1; i < first_op; ++i)
   2045  1.1  mrg     ops[i - 1] = gimple_op (last_stmt, i);
   2046  1.1  mrg   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
   2047  1.1  mrg 		       op_type, &unprom[0], op_vectype);
   2048  1.1  mrg 
   2049  1.1  mrg   /* Use the operation to produce a result of type OP_TYPE.  */
   2050  1.1  mrg   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
   2051  1.1  mrg   gimple *pattern_stmt = gimple_build_assign (new_var, code,
   2052  1.1  mrg 					      ops[0], ops[1], ops[2]);
   2053  1.1  mrg   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   2054  1.1  mrg 
   2055  1.1  mrg   if (dump_enabled_p ())
   2056  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   2057  1.1  mrg 		     "created pattern stmt: %G", pattern_stmt);
   2058  1.1  mrg 
   2059  1.1  mrg   /* Convert back to the original signedness, if OP_TYPE is different
   2060  1.1  mrg      from NEW_TYPE.  */
   2061  1.1  mrg   if (op_type != new_type)
   2062  1.1  mrg     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
   2063  1.1  mrg 					pattern_stmt, op_vectype);
   2064  1.1  mrg 
   2065  1.1  mrg   /* Promote the result to the original type.  */
   2066  1.1  mrg   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
   2067  1.1  mrg 				      pattern_stmt, new_vectype);
   2068  1.1  mrg 
   2069  1.1  mrg   return pattern_stmt;
   2070  1.1  mrg }
   2071  1.1  mrg 
   2072  1.1  mrg /* Recognize the following patterns:
   2073  1.1  mrg 
   2074  1.1  mrg      ATYPE a;  // narrower than TYPE
   2075  1.1  mrg      BTYPE b;  // narrower than TYPE
   2076  1.1  mrg 
   2077  1.1  mrg    1) Multiply high with scaling
   2078  1.1  mrg      TYPE res = ((TYPE) a * (TYPE) b) >> c;
   2079  1.1  mrg      Here, c is bitsize (TYPE) / 2 - 1.
   2080  1.1  mrg 
   2081  1.1  mrg    2) ... or also with rounding
   2082  1.1  mrg      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
   2083  1.1  mrg      Here, d is bitsize (TYPE) / 2 - 2.
   2084  1.1  mrg 
   2085  1.1  mrg    3) Normal multiply high
   2086  1.1  mrg      TYPE res = ((TYPE) a * (TYPE) b) >> e;
   2087  1.1  mrg      Here, e is bitsize (TYPE) / 2.
   2088  1.1  mrg 
   2089  1.1  mrg    where only the bottom half of res is used.  */
   2090  1.1  mrg 
   2091  1.1  mrg static gimple *
   2092  1.1  mrg vect_recog_mulhs_pattern (vec_info *vinfo,
   2093  1.1  mrg 			  stmt_vec_info last_stmt_info, tree *type_out)
   2094  1.1  mrg {
   2095  1.1  mrg   /* Check for a right shift.  */
   2096  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2097  1.1  mrg   if (!last_stmt
   2098  1.1  mrg       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
   2099  1.1  mrg     return NULL;
   2100  1.1  mrg 
   2101  1.1  mrg   /* Check that the shift result is wider than the users of the
   2102  1.1  mrg      result need (i.e. that narrowing would be a natural choice).  */
   2103  1.1  mrg   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
   2104  1.1  mrg   unsigned int target_precision
   2105  1.1  mrg     = vect_element_precision (last_stmt_info->min_output_precision);
   2106  1.1  mrg   if (!INTEGRAL_TYPE_P (lhs_type)
   2107  1.1  mrg       || target_precision >= TYPE_PRECISION (lhs_type))
   2108  1.1  mrg     return NULL;
   2109  1.1  mrg 
   2110  1.1  mrg   /* Look through any change in sign on the outer shift input.  */
   2111  1.1  mrg   vect_unpromoted_value unprom_rshift_input;
   2112  1.1  mrg   tree rshift_input = vect_look_through_possible_promotion
   2113  1.1  mrg     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
   2114  1.1  mrg   if (!rshift_input
   2115  1.1  mrg       || TYPE_PRECISION (TREE_TYPE (rshift_input))
   2116  1.1  mrg 	   != TYPE_PRECISION (lhs_type))
   2117  1.1  mrg     return NULL;
   2118  1.1  mrg 
   2119  1.1  mrg   /* Get the definition of the shift input.  */
   2120  1.1  mrg   stmt_vec_info rshift_input_stmt_info
   2121  1.1  mrg     = vect_get_internal_def (vinfo, rshift_input);
   2122  1.1  mrg   if (!rshift_input_stmt_info)
   2123  1.1  mrg     return NULL;
   2124  1.1  mrg   gassign *rshift_input_stmt
   2125  1.1  mrg     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
   2126  1.1  mrg   if (!rshift_input_stmt)
   2127  1.1  mrg     return NULL;
   2128  1.1  mrg 
   2129  1.1  mrg   stmt_vec_info mulh_stmt_info;
   2130  1.1  mrg   tree scale_term;
   2131  1.1  mrg   bool rounding_p = false;
   2132  1.1  mrg 
   2133  1.1  mrg   /* Check for the presence of the rounding term.  */
   2134  1.1  mrg   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
   2135  1.1  mrg     {
   2136  1.1  mrg       /* Check that the outer shift was by 1.  */
   2137  1.1  mrg       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
   2138  1.1  mrg 	return NULL;
   2139  1.1  mrg 
   2140  1.1  mrg       /* Check that the second operand of the PLUS_EXPR is 1.  */
   2141  1.1  mrg       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
   2142  1.1  mrg 	return NULL;
   2143  1.1  mrg 
   2144  1.1  mrg       /* Look through any change in sign on the addition input.  */
   2145  1.1  mrg       vect_unpromoted_value unprom_plus_input;
   2146  1.1  mrg       tree plus_input = vect_look_through_possible_promotion
   2147  1.1  mrg 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
   2148  1.1  mrg       if (!plus_input
   2149  1.1  mrg 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
   2150  1.1  mrg 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
   2151  1.1  mrg 	return NULL;
   2152  1.1  mrg 
   2153  1.1  mrg       /* Get the definition of the multiply-high-scale part.  */
   2154  1.1  mrg       stmt_vec_info plus_input_stmt_info
   2155  1.1  mrg 	= vect_get_internal_def (vinfo, plus_input);
   2156  1.1  mrg       if (!plus_input_stmt_info)
   2157  1.1  mrg 	return NULL;
   2158  1.1  mrg       gassign *plus_input_stmt
   2159  1.1  mrg 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
   2160  1.1  mrg       if (!plus_input_stmt
   2161  1.1  mrg 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
   2162  1.1  mrg 	return NULL;
   2163  1.1  mrg 
   2164  1.1  mrg       /* Look through any change in sign on the scaling input.  */
   2165  1.1  mrg       vect_unpromoted_value unprom_scale_input;
   2166  1.1  mrg       tree scale_input = vect_look_through_possible_promotion
   2167  1.1  mrg 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
   2168  1.1  mrg       if (!scale_input
   2169  1.1  mrg 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
   2170  1.1  mrg 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
   2171  1.1  mrg 	return NULL;
   2172  1.1  mrg 
   2173  1.1  mrg       /* Get the definition of the multiply-high part.  */
   2174  1.1  mrg       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
   2175  1.1  mrg       if (!mulh_stmt_info)
   2176  1.1  mrg 	return NULL;
   2177  1.1  mrg 
   2178  1.1  mrg       /* Get the scaling term.  */
   2179  1.1  mrg       scale_term = gimple_assign_rhs2 (plus_input_stmt);
   2180  1.1  mrg       rounding_p = true;
   2181  1.1  mrg     }
   2182  1.1  mrg   else
   2183  1.1  mrg     {
   2184  1.1  mrg       mulh_stmt_info = rshift_input_stmt_info;
   2185  1.1  mrg       scale_term = gimple_assign_rhs2 (last_stmt);
   2186  1.1  mrg     }
   2187  1.1  mrg 
   2188  1.1  mrg   /* Check that the scaling factor is constant.  */
   2189  1.1  mrg   if (TREE_CODE (scale_term) != INTEGER_CST)
   2190  1.1  mrg     return NULL;
   2191  1.1  mrg 
   2192  1.1  mrg   /* Check whether the scaling input term can be seen as two widened
   2193  1.1  mrg      inputs multiplied together.  */
   2194  1.1  mrg   vect_unpromoted_value unprom_mult[2];
   2195  1.1  mrg   tree new_type;
   2196  1.1  mrg   unsigned int nops
   2197  1.1  mrg     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
   2198  1.1  mrg 			    false, 2, unprom_mult, &new_type);
   2199  1.1  mrg   if (nops != 2)
   2200  1.1  mrg     return NULL;
   2201  1.1  mrg 
   2202  1.1  mrg   /* Adjust output precision.  */
   2203  1.1  mrg   if (TYPE_PRECISION (new_type) < target_precision)
   2204  1.1  mrg     new_type = build_nonstandard_integer_type
   2205  1.1  mrg       (target_precision, TYPE_UNSIGNED (new_type));
   2206  1.1  mrg 
   2207  1.1  mrg   unsigned mult_precision = TYPE_PRECISION (new_type);
   2208  1.1  mrg   internal_fn ifn;
   2209  1.1  mrg   /* Check that the scaling factor is expected.  Instead of
   2210  1.1  mrg      target_precision, we should use the one that we actually
   2211  1.1  mrg      use for internal function.  */
   2212  1.1  mrg   if (rounding_p)
   2213  1.1  mrg     {
   2214  1.1  mrg       /* Check pattern 2).  */
   2215  1.1  mrg       if (wi::to_widest (scale_term) + mult_precision + 2
   2216  1.1  mrg 	  != TYPE_PRECISION (lhs_type))
   2217  1.1  mrg 	return NULL;
   2218  1.1  mrg 
   2219  1.1  mrg       ifn = IFN_MULHRS;
   2220  1.1  mrg     }
   2221  1.1  mrg   else
   2222  1.1  mrg     {
   2223  1.1  mrg       /* Check for pattern 1).  */
   2224  1.1  mrg       if (wi::to_widest (scale_term) + mult_precision + 1
   2225  1.1  mrg 	  == TYPE_PRECISION (lhs_type))
   2226  1.1  mrg 	ifn = IFN_MULHS;
   2227  1.1  mrg       /* Check for pattern 3).  */
   2228  1.1  mrg       else if (wi::to_widest (scale_term) + mult_precision
   2229  1.1  mrg 	       == TYPE_PRECISION (lhs_type))
   2230  1.1  mrg 	ifn = IFN_MULH;
   2231  1.1  mrg       else
   2232  1.1  mrg 	return NULL;
   2233  1.1  mrg     }
   2234  1.1  mrg 
   2235  1.1  mrg   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
   2236  1.1  mrg 
   2237  1.1  mrg   /* Check for target support.  */
   2238  1.1  mrg   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2239  1.1  mrg   if (!new_vectype
   2240  1.1  mrg       || !direct_internal_fn_supported_p
   2241  1.1  mrg 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
   2242  1.1  mrg     return NULL;
   2243  1.1  mrg 
   2244  1.1  mrg   /* The IR requires a valid vector type for the cast result, even though
   2245  1.1  mrg      it's likely to be discarded.  */
   2246  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
   2247  1.1  mrg   if (!*type_out)
   2248  1.1  mrg     return NULL;
   2249  1.1  mrg 
   2250  1.1  mrg   /* Generate the IFN_MULHRS call.  */
   2251  1.1  mrg   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   2252  1.1  mrg   tree new_ops[2];
   2253  1.1  mrg   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
   2254  1.1  mrg 		       unprom_mult, new_vectype);
   2255  1.1  mrg   gcall *mulhrs_stmt
   2256  1.1  mrg     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
   2257  1.1  mrg   gimple_call_set_lhs (mulhrs_stmt, new_var);
   2258  1.1  mrg   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
   2259  1.1  mrg 
   2260  1.1  mrg   if (dump_enabled_p ())
   2261  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   2262  1.1  mrg 		     "created pattern stmt: %G", mulhrs_stmt);
   2263  1.1  mrg 
   2264  1.1  mrg   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
   2265  1.1  mrg 			      mulhrs_stmt, new_vectype);
   2266  1.1  mrg }
   2267  1.1  mrg 
   2268  1.1  mrg /* Recognize the patterns:
   2269  1.1  mrg 
   2270  1.1  mrg 	    ATYPE a;  // narrower than TYPE
   2271  1.1  mrg 	    BTYPE b;  // narrower than TYPE
   2272  1.1  mrg 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
   2273  1.1  mrg      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
   2274  1.1  mrg 
   2275  1.1  mrg    where only the bottom half of avg is used.  Try to transform them into:
   2276  1.1  mrg 
   2277  1.1  mrg 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
   2278  1.1  mrg      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
   2279  1.1  mrg 
   2280  1.1  mrg   followed by:
   2281  1.1  mrg 
   2282  1.1  mrg 	    TYPE avg = (TYPE) avg';
   2283  1.1  mrg 
   2284  1.1  mrg   where NTYPE is no wider than half of TYPE.  Since only the bottom half
   2285  1.1  mrg   of avg is used, all or part of the cast of avg' should become redundant.
   2286  1.1  mrg 
   2287  1.1  mrg   If there is no target support available, generate code to distribute rshift
   2288  1.1  mrg   over plus and add a carry.  */
   2289  1.1  mrg 
   2290  1.1  mrg static gimple *
   2291  1.1  mrg vect_recog_average_pattern (vec_info *vinfo,
   2292  1.1  mrg 			    stmt_vec_info last_stmt_info, tree *type_out)
   2293  1.1  mrg {
   2294  1.1  mrg   /* Check for a shift right by one bit.  */
   2295  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2296  1.1  mrg   if (!last_stmt
   2297  1.1  mrg       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
   2298  1.1  mrg       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
   2299  1.1  mrg     return NULL;
   2300  1.1  mrg 
   2301  1.1  mrg   /* Check that the shift result is wider than the users of the
   2302  1.1  mrg      result need (i.e. that narrowing would be a natural choice).  */
   2303  1.1  mrg   tree lhs = gimple_assign_lhs (last_stmt);
   2304  1.1  mrg   tree type = TREE_TYPE (lhs);
   2305  1.1  mrg   unsigned int target_precision
   2306  1.1  mrg     = vect_element_precision (last_stmt_info->min_output_precision);
   2307  1.1  mrg   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
   2308  1.1  mrg     return NULL;
   2309  1.1  mrg 
   2310  1.1  mrg   /* Look through any change in sign on the shift input.  */
   2311  1.1  mrg   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
   2312  1.1  mrg   vect_unpromoted_value unprom_plus;
   2313  1.1  mrg   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
   2314  1.1  mrg 						     &unprom_plus);
   2315  1.1  mrg   if (!rshift_rhs
   2316  1.1  mrg       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
   2317  1.1  mrg     return NULL;
   2318  1.1  mrg 
   2319  1.1  mrg   /* Get the definition of the shift input.  */
   2320  1.1  mrg   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
   2321  1.1  mrg   if (!plus_stmt_info)
   2322  1.1  mrg     return NULL;
   2323  1.1  mrg 
   2324  1.1  mrg   /* Check whether the shift input can be seen as a tree of additions on
   2325  1.1  mrg      2 or 3 widened inputs.
   2326  1.1  mrg 
   2327  1.1  mrg      Note that the pattern should be a win even if the result of one or
   2328  1.1  mrg      more additions is reused elsewhere: if the pattern matches, we'd be
   2329  1.1  mrg      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
   2330  1.1  mrg   internal_fn ifn = IFN_AVG_FLOOR;
   2331  1.1  mrg   vect_unpromoted_value unprom[3];
   2332  1.1  mrg   tree new_type;
   2333  1.1  mrg   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
   2334  1.1  mrg 					    WIDEN_PLUS_EXPR, false, 3,
   2335  1.1  mrg 					    unprom, &new_type);
   2336  1.1  mrg   if (nops == 0)
   2337  1.1  mrg     return NULL;
   2338  1.1  mrg   if (nops == 3)
   2339  1.1  mrg     {
   2340  1.1  mrg       /* Check that one operand is 1.  */
   2341  1.1  mrg       unsigned int i;
   2342  1.1  mrg       for (i = 0; i < 3; ++i)
   2343  1.1  mrg 	if (integer_onep (unprom[i].op))
   2344  1.1  mrg 	  break;
   2345  1.1  mrg       if (i == 3)
   2346  1.1  mrg 	return NULL;
   2347  1.1  mrg       /* Throw away the 1 operand and keep the other two.  */
   2348  1.1  mrg       if (i < 2)
   2349  1.1  mrg 	unprom[i] = unprom[2];
   2350  1.1  mrg       ifn = IFN_AVG_CEIL;
   2351  1.1  mrg     }
   2352  1.1  mrg 
   2353  1.1  mrg   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
   2354  1.1  mrg 
   2355  1.1  mrg   /* We know that:
   2356  1.1  mrg 
   2357  1.1  mrg      (a) the operation can be viewed as:
   2358  1.1  mrg 
   2359  1.1  mrg 	   TYPE widened0 = (TYPE) UNPROM[0];
   2360  1.1  mrg 	   TYPE widened1 = (TYPE) UNPROM[1];
   2361  1.1  mrg 	   TYPE tmp1 = widened0 + widened1 {+ 1};
   2362  1.1  mrg 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
   2363  1.1  mrg 
   2364  1.1  mrg      (b) the first two statements are equivalent to:
   2365  1.1  mrg 
   2366  1.1  mrg 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
   2367  1.1  mrg 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
   2368  1.1  mrg 
   2369  1.1  mrg      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
   2370  1.1  mrg 	 where sensible;
   2371  1.1  mrg 
   2372  1.1  mrg      (d) all the operations can be performed correctly at twice the width of
   2373  1.1  mrg 	 NEW_TYPE, due to the nature of the average operation; and
   2374  1.1  mrg 
   2375  1.1  mrg      (e) users of the result of the right shift need only TARGET_PRECISION
   2376  1.1  mrg 	 bits, where TARGET_PRECISION is no more than half of TYPE's
   2377  1.1  mrg 	 precision.
   2378  1.1  mrg 
   2379  1.1  mrg      Under these circumstances, the only situation in which NEW_TYPE
   2380  1.1  mrg      could be narrower than TARGET_PRECISION is if widened0, widened1
   2381  1.1  mrg      and an addition result are all used more than once.  Thus we can
   2382  1.1  mrg      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
   2383  1.1  mrg      as "free", whereas widening the result of the average instruction
   2384  1.1  mrg      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
   2385  1.1  mrg      therefore better not to go narrower than TARGET_PRECISION.  */
   2386  1.1  mrg   if (TYPE_PRECISION (new_type) < target_precision)
   2387  1.1  mrg     new_type = build_nonstandard_integer_type (target_precision,
   2388  1.1  mrg 					       TYPE_UNSIGNED (new_type));
   2389  1.1  mrg 
   2390  1.1  mrg   /* Check for target support.  */
   2391  1.1  mrg   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
   2392  1.1  mrg   if (!new_vectype)
   2393  1.1  mrg     return NULL;
   2394  1.1  mrg 
   2395  1.1  mrg   bool fallback_p = false;
   2396  1.1  mrg 
   2397  1.1  mrg   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
   2398  1.1  mrg     ;
   2399  1.1  mrg   else if (TYPE_UNSIGNED (new_type)
   2400  1.1  mrg 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
   2401  1.1  mrg 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
   2402  1.1  mrg 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
   2403  1.1  mrg 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
   2404  1.1  mrg     fallback_p = true;
   2405  1.1  mrg   else
   2406  1.1  mrg     return NULL;
   2407  1.1  mrg 
   2408  1.1  mrg   /* The IR requires a valid vector type for the cast result, even though
   2409  1.1  mrg      it's likely to be discarded.  */
   2410  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, type);
   2411  1.1  mrg   if (!*type_out)
   2412  1.1  mrg     return NULL;
   2413  1.1  mrg 
   2414  1.1  mrg   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   2415  1.1  mrg   tree new_ops[2];
   2416  1.1  mrg   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
   2417  1.1  mrg 		       unprom, new_vectype);
   2418  1.1  mrg 
   2419  1.1  mrg   if (fallback_p)
   2420  1.1  mrg     {
   2421  1.1  mrg       /* As a fallback, generate code for following sequence:
   2422  1.1  mrg 
   2423  1.1  mrg 	 shifted_op0 = new_ops[0] >> 1;
   2424  1.1  mrg 	 shifted_op1 = new_ops[1] >> 1;
   2425  1.1  mrg 	 sum_of_shifted = shifted_op0 + shifted_op1;
   2426  1.1  mrg 	 unmasked_carry = new_ops[0] and/or new_ops[1];
   2427  1.1  mrg 	 carry = unmasked_carry & 1;
   2428  1.1  mrg 	 new_var = sum_of_shifted + carry;
   2429  1.1  mrg       */
   2430  1.1  mrg 
   2431  1.1  mrg       tree one_cst = build_one_cst (new_type);
   2432  1.1  mrg       gassign *g;
   2433  1.1  mrg 
   2434  1.1  mrg       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
   2435  1.1  mrg       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
   2436  1.1  mrg       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2437  1.1  mrg 
   2438  1.1  mrg       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
   2439  1.1  mrg       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
   2440  1.1  mrg       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2441  1.1  mrg 
   2442  1.1  mrg       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
   2443  1.1  mrg       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
   2444  1.1  mrg 			       shifted_op0, shifted_op1);
   2445  1.1  mrg       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2446  1.1  mrg 
   2447  1.1  mrg       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
   2448  1.1  mrg       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
   2449  1.1  mrg       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
   2450  1.1  mrg       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2451  1.1  mrg 
   2452  1.1  mrg       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
   2453  1.1  mrg       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
   2454  1.1  mrg       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
   2455  1.1  mrg 
   2456  1.1  mrg       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
   2457  1.1  mrg       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
   2458  1.1  mrg     }
   2459  1.1  mrg 
   2460  1.1  mrg   /* Generate the IFN_AVG* call.  */
   2461  1.1  mrg   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
   2462  1.1  mrg 						    new_ops[1]);
   2463  1.1  mrg   gimple_call_set_lhs (average_stmt, new_var);
   2464  1.1  mrg   gimple_set_location (average_stmt, gimple_location (last_stmt));
   2465  1.1  mrg 
   2466  1.1  mrg   if (dump_enabled_p ())
   2467  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   2468  1.1  mrg 		     "created pattern stmt: %G", average_stmt);
   2469  1.1  mrg 
   2470  1.1  mrg   return vect_convert_output (vinfo, last_stmt_info,
   2471  1.1  mrg 			      type, average_stmt, new_vectype);
   2472  1.1  mrg }
   2473  1.1  mrg 
   2474  1.1  mrg /* Recognize cases in which the input to a cast is wider than its
   2475  1.1  mrg    output, and the input is fed by a widening operation.  Fold this
   2476  1.1  mrg    by removing the unnecessary intermediate widening.  E.g.:
   2477  1.1  mrg 
   2478  1.1  mrg      unsigned char a;
   2479  1.1  mrg      unsigned int b = (unsigned int) a;
   2480  1.1  mrg      unsigned short c = (unsigned short) b;
   2481  1.1  mrg 
   2482  1.1  mrg    -->
   2483  1.1  mrg 
   2484  1.1  mrg      unsigned short c = (unsigned short) a;
   2485  1.1  mrg 
   2486  1.1  mrg    Although this is rare in input IR, it is an expected side-effect
   2487  1.1  mrg    of the over-widening pattern above.
   2488  1.1  mrg 
   2489  1.1  mrg    This is beneficial also for integer-to-float conversions, if the
   2490  1.1  mrg    widened integer has more bits than the float, and if the unwidened
   2491  1.1  mrg    input doesn't.  */
   2492  1.1  mrg 
   2493  1.1  mrg static gimple *
   2494  1.1  mrg vect_recog_cast_forwprop_pattern (vec_info *vinfo,
   2495  1.1  mrg 				  stmt_vec_info last_stmt_info, tree *type_out)
   2496  1.1  mrg {
   2497  1.1  mrg   /* Check for a cast, including an integer-to-float conversion.  */
   2498  1.1  mrg   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
   2499  1.1  mrg   if (!last_stmt)
   2500  1.1  mrg     return NULL;
   2501  1.1  mrg   tree_code code = gimple_assign_rhs_code (last_stmt);
   2502  1.1  mrg   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
   2503  1.1  mrg     return NULL;
   2504  1.1  mrg 
   2505  1.1  mrg   /* Make sure that the rhs is a scalar with a natural bitsize.  */
   2506  1.1  mrg   tree lhs = gimple_assign_lhs (last_stmt);
   2507  1.1  mrg   if (!lhs)
   2508  1.1  mrg     return NULL;
   2509  1.1  mrg   tree lhs_type = TREE_TYPE (lhs);
   2510  1.1  mrg   scalar_mode lhs_mode;
   2511  1.1  mrg   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
   2512  1.1  mrg       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
   2513  1.1  mrg     return NULL;
   2514  1.1  mrg 
   2515  1.1  mrg   /* Check for a narrowing operation (from a vector point of view).  */
   2516  1.1  mrg   tree rhs = gimple_assign_rhs1 (last_stmt);
   2517  1.1  mrg   tree rhs_type = TREE_TYPE (rhs);
   2518  1.1  mrg   if (!INTEGRAL_TYPE_P (rhs_type)
   2519  1.1  mrg       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
   2520  1.1  mrg       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
   2521  1.1  mrg     return NULL;
   2522  1.1  mrg 
   2523  1.1  mrg   /* Try to find an unpromoted input.  */
   2524  1.1  mrg   vect_unpromoted_value unprom;
   2525  1.1  mrg   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
   2526  1.1  mrg       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
   2527  1.1  mrg     return NULL;
   2528  1.1  mrg 
   2529  1.1  mrg   /* If the bits above RHS_TYPE matter, make sure that they're the
   2530  1.1  mrg      same when extending from UNPROM as they are when extending from RHS.  */
   2531  1.1  mrg   if (!INTEGRAL_TYPE_P (lhs_type)
   2532  1.1  mrg       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
   2533  1.1  mrg     return NULL;
   2534  1.1  mrg 
   2535  1.1  mrg   /* We can get the same result by casting UNPROM directly, to avoid
   2536  1.1  mrg      the unnecessary widening and narrowing.  */
   2537  1.1  mrg   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
   2538  1.1  mrg 
   2539  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
   2540  1.1  mrg   if (!*type_out)
   2541  1.1  mrg     return NULL;
   2542  1.1  mrg 
   2543  1.1  mrg   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
   2544  1.1  mrg   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
   2545  1.1  mrg   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   2546  1.1  mrg 
   2547  1.1  mrg   return pattern_stmt;
   2548  1.1  mrg }
   2549  1.1  mrg 
   2550  1.1  mrg /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
   2551  1.1  mrg    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
   2552  1.1  mrg 
   2553  1.1  mrg static gimple *
   2554  1.1  mrg vect_recog_widen_shift_pattern (vec_info *vinfo,
   2555  1.1  mrg 				stmt_vec_info last_stmt_info, tree *type_out)
   2556  1.1  mrg {
   2557  1.1  mrg   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
   2558  1.1  mrg 				      LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
   2559  1.1  mrg 				      "vect_recog_widen_shift_pattern");
   2560  1.1  mrg }
   2561  1.1  mrg 
   2562  1.1  mrg /* Detect a rotate pattern wouldn't be otherwise vectorized:
   2563  1.1  mrg 
   2564  1.1  mrg    type a_t, b_t, c_t;
   2565  1.1  mrg 
   2566  1.1  mrg    S0 a_t = b_t r<< c_t;
   2567  1.1  mrg 
   2568  1.1  mrg   Input/Output:
   2569  1.1  mrg 
   2570  1.1  mrg   * STMT_VINFO: The stmt from which the pattern search begins,
   2571  1.1  mrg     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
   2572  1.1  mrg     with a sequence:
   2573  1.1  mrg 
   2574  1.1  mrg    S1 d_t = -c_t;
   2575  1.1  mrg    S2 e_t = d_t & (B - 1);
   2576  1.1  mrg    S3 f_t = b_t << c_t;
   2577  1.1  mrg    S4 g_t = b_t >> e_t;
   2578  1.1  mrg    S0 a_t = f_t | g_t;
   2579  1.1  mrg 
   2580  1.1  mrg     where B is element bitsize of type.
   2581  1.1  mrg 
   2582  1.1  mrg   Output:
   2583  1.1  mrg 
   2584  1.1  mrg   * TYPE_OUT: The type of the output of this pattern.
   2585  1.1  mrg 
   2586  1.1  mrg   * Return value: A new stmt that will be used to replace the rotate
   2587  1.1  mrg     S0 stmt.  */
   2588  1.1  mrg 
   2589  1.1  mrg static gimple *
   2590  1.1  mrg vect_recog_rotate_pattern (vec_info *vinfo,
   2591  1.1  mrg 			   stmt_vec_info stmt_vinfo, tree *type_out)
   2592  1.1  mrg {
   2593  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   2594  1.1  mrg   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
   2595  1.1  mrg   gimple *pattern_stmt, *def_stmt;
   2596  1.1  mrg   enum tree_code rhs_code;
   2597  1.1  mrg   enum vect_def_type dt;
   2598  1.1  mrg   optab optab1, optab2;
   2599  1.1  mrg   edge ext_def = NULL;
   2600  1.1  mrg   bool bswap16_p = false;
   2601  1.1  mrg 
   2602  1.1  mrg   if (is_gimple_assign (last_stmt))
   2603  1.1  mrg     {
   2604  1.1  mrg       rhs_code = gimple_assign_rhs_code (last_stmt);
   2605  1.1  mrg       switch (rhs_code)
   2606  1.1  mrg 	{
   2607  1.1  mrg 	case LROTATE_EXPR:
   2608  1.1  mrg 	case RROTATE_EXPR:
   2609  1.1  mrg 	  break;
   2610  1.1  mrg 	default:
   2611  1.1  mrg 	  return NULL;
   2612  1.1  mrg 	}
   2613  1.1  mrg 
   2614  1.1  mrg       lhs = gimple_assign_lhs (last_stmt);
   2615  1.1  mrg       oprnd0 = gimple_assign_rhs1 (last_stmt);
   2616  1.1  mrg       type = TREE_TYPE (oprnd0);
   2617  1.1  mrg       oprnd1 = gimple_assign_rhs2 (last_stmt);
   2618  1.1  mrg     }
   2619  1.1  mrg   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
   2620  1.1  mrg     {
   2621  1.1  mrg       /* __builtin_bswap16 (x) is another form of x r>> 8.
   2622  1.1  mrg 	 The vectorizer has bswap support, but only if the argument isn't
   2623  1.1  mrg 	 promoted.  */
   2624  1.1  mrg       lhs = gimple_call_lhs (last_stmt);
   2625  1.1  mrg       oprnd0 = gimple_call_arg (last_stmt, 0);
   2626  1.1  mrg       type = TREE_TYPE (oprnd0);
   2627  1.1  mrg       if (!lhs
   2628  1.1  mrg 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
   2629  1.1  mrg 	  || TYPE_PRECISION (type) <= 16
   2630  1.1  mrg 	  || TREE_CODE (oprnd0) != SSA_NAME
   2631  1.1  mrg 	  || BITS_PER_UNIT != 8
   2632  1.1  mrg 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
   2633  1.1  mrg 	return NULL;
   2634  1.1  mrg 
   2635  1.1  mrg       stmt_vec_info def_stmt_info;
   2636  1.1  mrg       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
   2637  1.1  mrg 	return NULL;
   2638  1.1  mrg 
   2639  1.1  mrg       if (dt != vect_internal_def)
   2640  1.1  mrg 	return NULL;
   2641  1.1  mrg 
   2642  1.1  mrg       if (gimple_assign_cast_p (def_stmt))
   2643  1.1  mrg 	{
   2644  1.1  mrg 	  def = gimple_assign_rhs1 (def_stmt);
   2645  1.1  mrg 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
   2646  1.1  mrg 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
   2647  1.1  mrg 	    oprnd0 = def;
   2648  1.1  mrg 	}
   2649  1.1  mrg 
   2650  1.1  mrg       type = TREE_TYPE (lhs);
   2651  1.1  mrg       vectype = get_vectype_for_scalar_type (vinfo, type);
   2652  1.1  mrg       if (vectype == NULL_TREE)
   2653  1.1  mrg 	return NULL;
   2654  1.1  mrg 
   2655  1.1  mrg       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
   2656  1.1  mrg 	{
   2657  1.1  mrg 	  /* The encoding uses one stepped pattern for each byte in the
   2658  1.1  mrg 	     16-bit word.  */
   2659  1.1  mrg 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
   2660  1.1  mrg 	  for (unsigned i = 0; i < 3; ++i)
   2661  1.1  mrg 	    for (unsigned j = 0; j < 2; ++j)
   2662  1.1  mrg 	      elts.quick_push ((i + 1) * 2 - j - 1);
   2663  1.1  mrg 
   2664  1.1  mrg 	  vec_perm_indices indices (elts, 1,
   2665  1.1  mrg 				    TYPE_VECTOR_SUBPARTS (char_vectype));
   2666  1.1  mrg 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
   2667  1.1  mrg 	    {
   2668  1.1  mrg 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
   2669  1.1  mrg 		 undo the argument promotion.  */
   2670  1.1  mrg 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2671  1.1  mrg 		{
   2672  1.1  mrg 		  def = vect_recog_temp_ssa_var (type, NULL);
   2673  1.1  mrg 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2674  1.1  mrg 		  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2675  1.1  mrg 		  oprnd0 = def;
   2676  1.1  mrg 		}
   2677  1.1  mrg 
   2678  1.1  mrg 	      /* Pattern detected.  */
   2679  1.1  mrg 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2680  1.1  mrg 
   2681  1.1  mrg 	      *type_out = vectype;
   2682  1.1  mrg 
   2683  1.1  mrg 	      /* Pattern supported.  Create a stmt to be used to replace the
   2684  1.1  mrg 		 pattern, with the unpromoted argument.  */
   2685  1.1  mrg 	      var = vect_recog_temp_ssa_var (type, NULL);
   2686  1.1  mrg 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
   2687  1.1  mrg 						1, oprnd0);
   2688  1.1  mrg 	      gimple_call_set_lhs (pattern_stmt, var);
   2689  1.1  mrg 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
   2690  1.1  mrg 				      gimple_call_fntype (last_stmt));
   2691  1.1  mrg 	      return pattern_stmt;
   2692  1.1  mrg 	    }
   2693  1.1  mrg 	}
   2694  1.1  mrg 
   2695  1.1  mrg       oprnd1 = build_int_cst (integer_type_node, 8);
   2696  1.1  mrg       rhs_code = LROTATE_EXPR;
   2697  1.1  mrg       bswap16_p = true;
   2698  1.1  mrg     }
   2699  1.1  mrg   else
   2700  1.1  mrg     return NULL;
   2701  1.1  mrg 
   2702  1.1  mrg   if (TREE_CODE (oprnd0) != SSA_NAME
   2703  1.1  mrg       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
   2704  1.1  mrg       || !INTEGRAL_TYPE_P (type)
   2705  1.1  mrg       || !TYPE_UNSIGNED (type))
   2706  1.1  mrg     return NULL;
   2707  1.1  mrg 
   2708  1.1  mrg   stmt_vec_info def_stmt_info;
   2709  1.1  mrg   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
   2710  1.1  mrg     return NULL;
   2711  1.1  mrg 
   2712  1.1  mrg   if (dt != vect_internal_def
   2713  1.1  mrg       && dt != vect_constant_def
   2714  1.1  mrg       && dt != vect_external_def)
   2715  1.1  mrg     return NULL;
   2716  1.1  mrg 
   2717  1.1  mrg   vectype = get_vectype_for_scalar_type (vinfo, type);
   2718  1.1  mrg   if (vectype == NULL_TREE)
   2719  1.1  mrg     return NULL;
   2720  1.1  mrg 
   2721  1.1  mrg   /* If vector/vector or vector/scalar rotate is supported by the target,
   2722  1.1  mrg      don't do anything here.  */
   2723  1.1  mrg   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
   2724  1.1  mrg   if (optab1
   2725  1.1  mrg       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
   2726  1.1  mrg     {
   2727  1.1  mrg      use_rotate:
   2728  1.1  mrg       if (bswap16_p)
   2729  1.1  mrg 	{
   2730  1.1  mrg 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2731  1.1  mrg 	    {
   2732  1.1  mrg 	      def = vect_recog_temp_ssa_var (type, NULL);
   2733  1.1  mrg 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2734  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2735  1.1  mrg 	      oprnd0 = def;
   2736  1.1  mrg 	    }
   2737  1.1  mrg 
   2738  1.1  mrg 	  /* Pattern detected.  */
   2739  1.1  mrg 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2740  1.1  mrg 
   2741  1.1  mrg 	  *type_out = vectype;
   2742  1.1  mrg 
   2743  1.1  mrg 	  /* Pattern supported.  Create a stmt to be used to replace the
   2744  1.1  mrg 	     pattern.  */
   2745  1.1  mrg 	  var = vect_recog_temp_ssa_var (type, NULL);
   2746  1.1  mrg 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
   2747  1.1  mrg 					      oprnd1);
   2748  1.1  mrg 	  return pattern_stmt;
   2749  1.1  mrg 	}
   2750  1.1  mrg       return NULL;
   2751  1.1  mrg     }
   2752  1.1  mrg 
   2753  1.1  mrg   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
   2754  1.1  mrg     {
   2755  1.1  mrg       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
   2756  1.1  mrg       if (optab2
   2757  1.1  mrg 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
   2758  1.1  mrg 	goto use_rotate;
   2759  1.1  mrg     }
   2760  1.1  mrg 
   2761  1.1  mrg   /* If vector/vector or vector/scalar shifts aren't supported by the target,
   2762  1.1  mrg      don't do anything here either.  */
   2763  1.1  mrg   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
   2764  1.1  mrg   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
   2765  1.1  mrg   if (!optab1
   2766  1.1  mrg       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
   2767  1.1  mrg       || !optab2
   2768  1.1  mrg       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
   2769  1.1  mrg     {
   2770  1.1  mrg       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
   2771  1.1  mrg 	return NULL;
   2772  1.1  mrg       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
   2773  1.1  mrg       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
   2774  1.1  mrg       if (!optab1
   2775  1.1  mrg 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
   2776  1.1  mrg 	  || !optab2
   2777  1.1  mrg 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
   2778  1.1  mrg 	return NULL;
   2779  1.1  mrg     }
   2780  1.1  mrg 
   2781  1.1  mrg   *type_out = vectype;
   2782  1.1  mrg 
   2783  1.1  mrg   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
   2784  1.1  mrg     {
   2785  1.1  mrg       def = vect_recog_temp_ssa_var (type, NULL);
   2786  1.1  mrg       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
   2787  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2788  1.1  mrg       oprnd0 = def;
   2789  1.1  mrg     }
   2790  1.1  mrg 
   2791  1.1  mrg   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
   2792  1.1  mrg     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
   2793  1.1  mrg 
   2794  1.1  mrg   def = NULL_TREE;
   2795  1.1  mrg   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
   2796  1.1  mrg   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
   2797  1.1  mrg     def = oprnd1;
   2798  1.1  mrg   else if (def_stmt && gimple_assign_cast_p (def_stmt))
   2799  1.1  mrg     {
   2800  1.1  mrg       tree rhs1 = gimple_assign_rhs1 (def_stmt);
   2801  1.1  mrg       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
   2802  1.1  mrg 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
   2803  1.1  mrg 	     == TYPE_PRECISION (type))
   2804  1.1  mrg 	def = rhs1;
   2805  1.1  mrg     }
   2806  1.1  mrg 
   2807  1.1  mrg   if (def == NULL_TREE)
   2808  1.1  mrg     {
   2809  1.1  mrg       def = vect_recog_temp_ssa_var (type, NULL);
   2810  1.1  mrg       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
   2811  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2812  1.1  mrg     }
   2813  1.1  mrg   stype = TREE_TYPE (def);
   2814  1.1  mrg 
   2815  1.1  mrg   if (TREE_CODE (def) == INTEGER_CST)
   2816  1.1  mrg     {
   2817  1.1  mrg       if (!tree_fits_uhwi_p (def)
   2818  1.1  mrg 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
   2819  1.1  mrg 	  || integer_zerop (def))
   2820  1.1  mrg 	return NULL;
   2821  1.1  mrg       def2 = build_int_cst (stype,
   2822  1.1  mrg 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
   2823  1.1  mrg     }
   2824  1.1  mrg   else
   2825  1.1  mrg     {
   2826  1.1  mrg       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
   2827  1.1  mrg 
   2828  1.1  mrg       if (vecstype == NULL_TREE)
   2829  1.1  mrg 	return NULL;
   2830  1.1  mrg       def2 = vect_recog_temp_ssa_var (stype, NULL);
   2831  1.1  mrg       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
   2832  1.1  mrg       if (ext_def)
   2833  1.1  mrg 	{
   2834  1.1  mrg 	  basic_block new_bb
   2835  1.1  mrg 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
   2836  1.1  mrg 	  gcc_assert (!new_bb);
   2837  1.1  mrg 	}
   2838  1.1  mrg       else
   2839  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2840  1.1  mrg 
   2841  1.1  mrg       def2 = vect_recog_temp_ssa_var (stype, NULL);
   2842  1.1  mrg       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
   2843  1.1  mrg       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
   2844  1.1  mrg 				      gimple_assign_lhs (def_stmt), mask);
   2845  1.1  mrg       if (ext_def)
   2846  1.1  mrg 	{
   2847  1.1  mrg 	  basic_block new_bb
   2848  1.1  mrg 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
   2849  1.1  mrg 	  gcc_assert (!new_bb);
   2850  1.1  mrg 	}
   2851  1.1  mrg       else
   2852  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2853  1.1  mrg     }
   2854  1.1  mrg 
   2855  1.1  mrg   var1 = vect_recog_temp_ssa_var (type, NULL);
   2856  1.1  mrg   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
   2857  1.1  mrg 					? LSHIFT_EXPR : RSHIFT_EXPR,
   2858  1.1  mrg 				  oprnd0, def);
   2859  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2860  1.1  mrg 
   2861  1.1  mrg   var2 = vect_recog_temp_ssa_var (type, NULL);
   2862  1.1  mrg   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
   2863  1.1  mrg 					? RSHIFT_EXPR : LSHIFT_EXPR,
   2864  1.1  mrg 				  oprnd0, def2);
   2865  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2866  1.1  mrg 
   2867  1.1  mrg   /* Pattern detected.  */
   2868  1.1  mrg   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
   2869  1.1  mrg 
   2870  1.1  mrg   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
   2871  1.1  mrg   var = vect_recog_temp_ssa_var (type, NULL);
   2872  1.1  mrg   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
   2873  1.1  mrg 
   2874  1.1  mrg   return pattern_stmt;
   2875  1.1  mrg }
   2876  1.1  mrg 
   2877  1.1  mrg /* Detect a vector by vector shift pattern that wouldn't be otherwise
   2878  1.1  mrg    vectorized:
   2879  1.1  mrg 
   2880  1.1  mrg    type a_t;
   2881  1.1  mrg    TYPE b_T, res_T;
   2882  1.1  mrg 
   2883  1.1  mrg    S1 a_t = ;
   2884  1.1  mrg    S2 b_T = ;
   2885  1.1  mrg    S3 res_T = b_T op a_t;
   2886  1.1  mrg 
   2887  1.1  mrg   where type 'TYPE' is a type with different size than 'type',
   2888  1.1  mrg   and op is <<, >> or rotate.
   2889  1.1  mrg 
   2890  1.1  mrg   Also detect cases:
   2891  1.1  mrg 
   2892  1.1  mrg    type a_t;
   2893  1.1  mrg    TYPE b_T, c_T, res_T;
   2894  1.1  mrg 
   2895  1.1  mrg    S0 c_T = ;
   2896  1.1  mrg    S1 a_t = (type) c_T;
   2897  1.1  mrg    S2 b_T = ;
   2898  1.1  mrg    S3 res_T = b_T op a_t;
   2899  1.1  mrg 
   2900  1.1  mrg   Input/Output:
   2901  1.1  mrg 
   2902  1.1  mrg   * STMT_VINFO: The stmt from which the pattern search begins,
   2903  1.1  mrg     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
   2904  1.1  mrg     with a shift/rotate which has same type on both operands, in the
   2905  1.1  mrg     second case just b_T op c_T, in the first case with added cast
   2906  1.1  mrg     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
   2907  1.1  mrg 
   2908  1.1  mrg   Output:
   2909  1.1  mrg 
   2910  1.1  mrg   * TYPE_OUT: The type of the output of this pattern.
   2911  1.1  mrg 
   2912  1.1  mrg   * Return value: A new stmt that will be used to replace the shift/rotate
   2913  1.1  mrg     S3 stmt.  */
   2914  1.1  mrg 
   2915  1.1  mrg static gimple *
   2916  1.1  mrg vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
   2917  1.1  mrg 					stmt_vec_info stmt_vinfo,
   2918  1.1  mrg 					tree *type_out)
   2919  1.1  mrg {
   2920  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   2921  1.1  mrg   tree oprnd0, oprnd1, lhs, var;
   2922  1.1  mrg   gimple *pattern_stmt;
   2923  1.1  mrg   enum tree_code rhs_code;
   2924  1.1  mrg 
   2925  1.1  mrg   if (!is_gimple_assign (last_stmt))
   2926  1.1  mrg     return NULL;
   2927  1.1  mrg 
   2928  1.1  mrg   rhs_code = gimple_assign_rhs_code (last_stmt);
   2929  1.1  mrg   switch (rhs_code)
   2930  1.1  mrg     {
   2931  1.1  mrg     case LSHIFT_EXPR:
   2932  1.1  mrg     case RSHIFT_EXPR:
   2933  1.1  mrg     case LROTATE_EXPR:
   2934  1.1  mrg     case RROTATE_EXPR:
   2935  1.1  mrg       break;
   2936  1.1  mrg     default:
   2937  1.1  mrg       return NULL;
   2938  1.1  mrg     }
   2939  1.1  mrg 
   2940  1.1  mrg   lhs = gimple_assign_lhs (last_stmt);
   2941  1.1  mrg   oprnd0 = gimple_assign_rhs1 (last_stmt);
   2942  1.1  mrg   oprnd1 = gimple_assign_rhs2 (last_stmt);
   2943  1.1  mrg   if (TREE_CODE (oprnd0) != SSA_NAME
   2944  1.1  mrg       || TREE_CODE (oprnd1) != SSA_NAME
   2945  1.1  mrg       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
   2946  1.1  mrg       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
   2947  1.1  mrg       || TYPE_PRECISION (TREE_TYPE (lhs))
   2948  1.1  mrg 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
   2949  1.1  mrg     return NULL;
   2950  1.1  mrg 
   2951  1.1  mrg   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
   2952  1.1  mrg   if (!def_vinfo)
   2953  1.1  mrg     return NULL;
   2954  1.1  mrg 
   2955  1.1  mrg   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
   2956  1.1  mrg   if (*type_out == NULL_TREE)
   2957  1.1  mrg     return NULL;
   2958  1.1  mrg 
   2959  1.1  mrg   tree def = NULL_TREE;
   2960  1.1  mrg   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
   2961  1.1  mrg   if (def_stmt && gimple_assign_cast_p (def_stmt))
   2962  1.1  mrg     {
   2963  1.1  mrg       tree rhs1 = gimple_assign_rhs1 (def_stmt);
   2964  1.1  mrg       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
   2965  1.1  mrg 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
   2966  1.1  mrg 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
   2967  1.1  mrg 	{
   2968  1.1  mrg 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
   2969  1.1  mrg 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
   2970  1.1  mrg 	    def = rhs1;
   2971  1.1  mrg 	  else
   2972  1.1  mrg 	    {
   2973  1.1  mrg 	      tree mask
   2974  1.1  mrg 		= build_low_bits_mask (TREE_TYPE (rhs1),
   2975  1.1  mrg 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
   2976  1.1  mrg 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
   2977  1.1  mrg 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
   2978  1.1  mrg 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
   2979  1.1  mrg 							   TREE_TYPE (rhs1));
   2980  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
   2981  1.1  mrg 	    }
   2982  1.1  mrg 	}
   2983  1.1  mrg     }
   2984  1.1  mrg 
   2985  1.1  mrg   if (def == NULL_TREE)
   2986  1.1  mrg     {
   2987  1.1  mrg       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
   2988  1.1  mrg       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
   2989  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   2990  1.1  mrg     }
   2991  1.1  mrg 
   2992  1.1  mrg   /* Pattern detected.  */
   2993  1.1  mrg   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
   2994  1.1  mrg 
   2995  1.1  mrg   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
   2996  1.1  mrg   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
   2997  1.1  mrg   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
   2998  1.1  mrg 
   2999  1.1  mrg   return pattern_stmt;
   3000  1.1  mrg }
   3001  1.1  mrg 
   3002  1.1  mrg /* Return true iff the target has a vector optab implementing the operation
   3003  1.1  mrg    CODE on type VECTYPE.  */
   3004  1.1  mrg 
   3005  1.1  mrg static bool
   3006  1.1  mrg target_has_vecop_for_code (tree_code code, tree vectype)
   3007  1.1  mrg {
   3008  1.1  mrg   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
   3009  1.1  mrg   return voptab
   3010  1.1  mrg 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
   3011  1.1  mrg }
   3012  1.1  mrg 
   3013  1.1  mrg /* Verify that the target has optabs of VECTYPE to perform all the steps
   3014  1.1  mrg    needed by the multiplication-by-immediate synthesis algorithm described by
   3015  1.1  mrg    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
   3016  1.1  mrg    present.  Return true iff the target supports all the steps.  */
   3017  1.1  mrg 
   3018  1.1  mrg static bool
   3019  1.1  mrg target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
   3020  1.1  mrg 				 tree vectype, bool synth_shift_p)
   3021  1.1  mrg {
   3022  1.1  mrg   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
   3023  1.1  mrg     return false;
   3024  1.1  mrg 
   3025  1.1  mrg   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
   3026  1.1  mrg   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
   3027  1.1  mrg 
   3028  1.1  mrg   if (var == negate_variant
   3029  1.1  mrg       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
   3030  1.1  mrg     return false;
   3031  1.1  mrg 
   3032  1.1  mrg   /* If we must synthesize shifts with additions make sure that vector
   3033  1.1  mrg      addition is available.  */
   3034  1.1  mrg   if ((var == add_variant || synth_shift_p) && !supports_vplus)
   3035  1.1  mrg     return false;
   3036  1.1  mrg 
   3037  1.1  mrg   for (int i = 1; i < alg->ops; i++)
   3038  1.1  mrg     {
   3039  1.1  mrg       switch (alg->op[i])
   3040  1.1  mrg 	{
   3041  1.1  mrg 	case alg_shift:
   3042  1.1  mrg 	  break;
   3043  1.1  mrg 	case alg_add_t_m2:
   3044  1.1  mrg 	case alg_add_t2_m:
   3045  1.1  mrg 	case alg_add_factor:
   3046  1.1  mrg 	  if (!supports_vplus)
   3047  1.1  mrg 	    return false;
   3048  1.1  mrg 	  break;
   3049  1.1  mrg 	case alg_sub_t_m2:
   3050  1.1  mrg 	case alg_sub_t2_m:
   3051  1.1  mrg 	case alg_sub_factor:
   3052  1.1  mrg 	  if (!supports_vminus)
   3053  1.1  mrg 	    return false;
   3054  1.1  mrg 	  break;
   3055  1.1  mrg 	case alg_unknown:
   3056  1.1  mrg 	case alg_m:
   3057  1.1  mrg 	case alg_zero:
   3058  1.1  mrg 	case alg_impossible:
   3059  1.1  mrg 	  return false;
   3060  1.1  mrg 	default:
   3061  1.1  mrg 	  gcc_unreachable ();
   3062  1.1  mrg 	}
   3063  1.1  mrg     }
   3064  1.1  mrg 
   3065  1.1  mrg   return true;
   3066  1.1  mrg }
   3067  1.1  mrg 
   3068  1.1  mrg /* Synthesize a left shift of OP by AMNT bits using a series of additions and
   3069  1.1  mrg    putting the final result in DEST.  Append all statements but the last into
   3070  1.1  mrg    VINFO.  Return the last statement.  */
   3071  1.1  mrg 
   3072  1.1  mrg static gimple *
   3073  1.1  mrg synth_lshift_by_additions (vec_info *vinfo,
   3074  1.1  mrg 			   tree dest, tree op, HOST_WIDE_INT amnt,
   3075  1.1  mrg 			   stmt_vec_info stmt_info)
   3076  1.1  mrg {
   3077  1.1  mrg   HOST_WIDE_INT i;
   3078  1.1  mrg   tree itype = TREE_TYPE (op);
   3079  1.1  mrg   tree prev_res = op;
   3080  1.1  mrg   gcc_assert (amnt >= 0);
   3081  1.1  mrg   for (i = 0; i < amnt; i++)
   3082  1.1  mrg     {
   3083  1.1  mrg       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
   3084  1.1  mrg 		      : dest;
   3085  1.1  mrg       gimple *stmt
   3086  1.1  mrg         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
   3087  1.1  mrg       prev_res = tmp_var;
   3088  1.1  mrg       if (i < amnt - 1)
   3089  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_info, stmt);
   3090  1.1  mrg       else
   3091  1.1  mrg 	return stmt;
   3092  1.1  mrg     }
   3093  1.1  mrg   gcc_unreachable ();
   3094  1.1  mrg   return NULL;
   3095  1.1  mrg }
   3096  1.1  mrg 
   3097  1.1  mrg /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
   3098  1.1  mrg    CODE to operands OP1 and OP2, creating a new temporary SSA var in
   3099  1.1  mrg    the process if necessary.  Append the resulting assignment statements
   3100  1.1  mrg    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
   3101  1.1  mrg    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
   3102  1.1  mrg    left shifts using additions.  */
   3103  1.1  mrg 
   3104  1.1  mrg static tree
   3105  1.1  mrg apply_binop_and_append_stmt (vec_info *vinfo,
   3106  1.1  mrg 			     tree_code code, tree op1, tree op2,
   3107  1.1  mrg 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
   3108  1.1  mrg {
   3109  1.1  mrg   if (integer_zerop (op2)
   3110  1.1  mrg       && (code == LSHIFT_EXPR
   3111  1.1  mrg 	  || code == PLUS_EXPR))
   3112  1.1  mrg     {
   3113  1.1  mrg       gcc_assert (TREE_CODE (op1) == SSA_NAME);
   3114  1.1  mrg       return op1;
   3115  1.1  mrg     }
   3116  1.1  mrg 
   3117  1.1  mrg   gimple *stmt;
   3118  1.1  mrg   tree itype = TREE_TYPE (op1);
   3119  1.1  mrg   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
   3120  1.1  mrg 
   3121  1.1  mrg   if (code == LSHIFT_EXPR
   3122  1.1  mrg       && synth_shift_p)
   3123  1.1  mrg     {
   3124  1.1  mrg       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
   3125  1.1  mrg 					TREE_INT_CST_LOW (op2), stmt_vinfo);
   3126  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3127  1.1  mrg       return tmp_var;
   3128  1.1  mrg     }
   3129  1.1  mrg 
   3130  1.1  mrg   stmt = gimple_build_assign (tmp_var, code, op1, op2);
   3131  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3132  1.1  mrg   return tmp_var;
   3133  1.1  mrg }
   3134  1.1  mrg 
   3135  1.1  mrg /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
   3136  1.1  mrg    and simple arithmetic operations to be vectorized.  Record the statements
   3137  1.1  mrg    produced in STMT_VINFO and return the last statement in the sequence or
   3138  1.1  mrg    NULL if it's not possible to synthesize such a multiplication.
   3139  1.1  mrg    This function mirrors the behavior of expand_mult_const in expmed.cc but
   3140  1.1  mrg    works on tree-ssa form.  */
   3141  1.1  mrg 
   3142  1.1  mrg static gimple *
   3143  1.1  mrg vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
   3144  1.1  mrg 			     stmt_vec_info stmt_vinfo)
   3145  1.1  mrg {
   3146  1.1  mrg   tree itype = TREE_TYPE (op);
   3147  1.1  mrg   machine_mode mode = TYPE_MODE (itype);
   3148  1.1  mrg   struct algorithm alg;
   3149  1.1  mrg   mult_variant variant;
   3150  1.1  mrg   if (!tree_fits_shwi_p (val))
   3151  1.1  mrg     return NULL;
   3152  1.1  mrg 
   3153  1.1  mrg   /* Multiplication synthesis by shifts, adds and subs can introduce
   3154  1.1  mrg      signed overflow where the original operation didn't.  Perform the
   3155  1.1  mrg      operations on an unsigned type and cast back to avoid this.
   3156  1.1  mrg      In the future we may want to relax this for synthesis algorithms
   3157  1.1  mrg      that we can prove do not cause unexpected overflow.  */
   3158  1.1  mrg   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
   3159  1.1  mrg 
   3160  1.1  mrg   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
   3161  1.1  mrg   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
   3162  1.1  mrg   if (!vectype)
   3163  1.1  mrg     return NULL;
   3164  1.1  mrg 
   3165  1.1  mrg   /* Targets that don't support vector shifts but support vector additions
   3166  1.1  mrg      can synthesize shifts that way.  */
   3167  1.1  mrg   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
   3168  1.1  mrg 
   3169  1.1  mrg   HOST_WIDE_INT hwval = tree_to_shwi (val);
   3170  1.1  mrg   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
   3171  1.1  mrg      The vectorizer's benefit analysis will decide whether it's beneficial
   3172  1.1  mrg      to do this.  */
   3173  1.1  mrg   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
   3174  1.1  mrg 				       ? TYPE_MODE (vectype) : mode,
   3175  1.1  mrg 				       hwval, &alg, &variant, MAX_COST);
   3176  1.1  mrg   if (!possible)
   3177  1.1  mrg     return NULL;
   3178  1.1  mrg 
   3179  1.1  mrg   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
   3180  1.1  mrg     return NULL;
   3181  1.1  mrg 
   3182  1.1  mrg   tree accumulator;
   3183  1.1  mrg 
   3184  1.1  mrg   /* Clear out the sequence of statements so we can populate it below.  */
   3185  1.1  mrg   gimple *stmt = NULL;
   3186  1.1  mrg 
   3187  1.1  mrg   if (cast_to_unsigned_p)
   3188  1.1  mrg     {
   3189  1.1  mrg       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
   3190  1.1  mrg       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
   3191  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3192  1.1  mrg       op = tmp_op;
   3193  1.1  mrg     }
   3194  1.1  mrg 
   3195  1.1  mrg   if (alg.op[0] == alg_zero)
   3196  1.1  mrg     accumulator = build_int_cst (multtype, 0);
   3197  1.1  mrg   else
   3198  1.1  mrg     accumulator = op;
   3199  1.1  mrg 
   3200  1.1  mrg   bool needs_fixup = (variant == negate_variant)
   3201  1.1  mrg 		      || (variant == add_variant);
   3202  1.1  mrg 
   3203  1.1  mrg   for (int i = 1; i < alg.ops; i++)
   3204  1.1  mrg     {
   3205  1.1  mrg       tree shft_log = build_int_cst (multtype, alg.log[i]);
   3206  1.1  mrg       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3207  1.1  mrg       tree tmp_var = NULL_TREE;
   3208  1.1  mrg 
   3209  1.1  mrg       switch (alg.op[i])
   3210  1.1  mrg 	{
   3211  1.1  mrg 	case alg_shift:
   3212  1.1  mrg 	  if (synth_shift_p)
   3213  1.1  mrg 	    stmt
   3214  1.1  mrg 	      = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
   3215  1.1  mrg 					   alg.log[i], stmt_vinfo);
   3216  1.1  mrg 	  else
   3217  1.1  mrg 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
   3218  1.1  mrg 					 shft_log);
   3219  1.1  mrg 	  break;
   3220  1.1  mrg 	case alg_add_t_m2:
   3221  1.1  mrg 	  tmp_var
   3222  1.1  mrg 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
   3223  1.1  mrg 					   stmt_vinfo, synth_shift_p);
   3224  1.1  mrg 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
   3225  1.1  mrg 				       tmp_var);
   3226  1.1  mrg 	  break;
   3227  1.1  mrg 	case alg_sub_t_m2:
   3228  1.1  mrg 	  tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
   3229  1.1  mrg 						 shft_log, stmt_vinfo,
   3230  1.1  mrg 						 synth_shift_p);
   3231  1.1  mrg 	  /* In some algorithms the first step involves zeroing the
   3232  1.1  mrg 	     accumulator.  If subtracting from such an accumulator
   3233  1.1  mrg 	     just emit the negation directly.  */
   3234  1.1  mrg 	  if (integer_zerop (accumulator))
   3235  1.1  mrg 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
   3236  1.1  mrg 	  else
   3237  1.1  mrg 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
   3238  1.1  mrg 					tmp_var);
   3239  1.1  mrg 	  break;
   3240  1.1  mrg 	case alg_add_t2_m:
   3241  1.1  mrg 	  tmp_var
   3242  1.1  mrg 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3243  1.1  mrg 					   shft_log, stmt_vinfo, synth_shift_p);
   3244  1.1  mrg 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
   3245  1.1  mrg 	  break;
   3246  1.1  mrg 	case alg_sub_t2_m:
   3247  1.1  mrg 	  tmp_var
   3248  1.1  mrg 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3249  1.1  mrg 					   shft_log, stmt_vinfo, synth_shift_p);
   3250  1.1  mrg 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
   3251  1.1  mrg 	  break;
   3252  1.1  mrg 	case alg_add_factor:
   3253  1.1  mrg 	  tmp_var
   3254  1.1  mrg 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3255  1.1  mrg 					   shft_log, stmt_vinfo, synth_shift_p);
   3256  1.1  mrg 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
   3257  1.1  mrg 				       tmp_var);
   3258  1.1  mrg 	  break;
   3259  1.1  mrg 	case alg_sub_factor:
   3260  1.1  mrg 	  tmp_var
   3261  1.1  mrg 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
   3262  1.1  mrg 					   shft_log, stmt_vinfo, synth_shift_p);
   3263  1.1  mrg 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
   3264  1.1  mrg 				      accumulator);
   3265  1.1  mrg 	  break;
   3266  1.1  mrg 	default:
   3267  1.1  mrg 	  gcc_unreachable ();
   3268  1.1  mrg 	}
   3269  1.1  mrg       /* We don't want to append the last stmt in the sequence to stmt_vinfo
   3270  1.1  mrg 	 but rather return it directly.  */
   3271  1.1  mrg 
   3272  1.1  mrg       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
   3273  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3274  1.1  mrg       accumulator = accum_tmp;
   3275  1.1  mrg     }
   3276  1.1  mrg   if (variant == negate_variant)
   3277  1.1  mrg     {
   3278  1.1  mrg       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3279  1.1  mrg       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
   3280  1.1  mrg       accumulator = accum_tmp;
   3281  1.1  mrg       if (cast_to_unsigned_p)
   3282  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3283  1.1  mrg     }
   3284  1.1  mrg   else if (variant == add_variant)
   3285  1.1  mrg     {
   3286  1.1  mrg       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
   3287  1.1  mrg       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
   3288  1.1  mrg       accumulator = accum_tmp;
   3289  1.1  mrg       if (cast_to_unsigned_p)
   3290  1.1  mrg 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
   3291  1.1  mrg     }
   3292  1.1  mrg   /* Move back to a signed if needed.  */
   3293  1.1  mrg   if (cast_to_unsigned_p)
   3294  1.1  mrg     {
   3295  1.1  mrg       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
   3296  1.1  mrg       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
   3297  1.1  mrg     }
   3298  1.1  mrg 
   3299  1.1  mrg   return stmt;
   3300  1.1  mrg }
   3301  1.1  mrg 
   3302  1.1  mrg /* Detect multiplication by constant and convert it into a sequence of
   3303  1.1  mrg    shifts and additions, subtractions, negations.  We reuse the
   3304  1.1  mrg    choose_mult_variant algorithms from expmed.cc
   3305  1.1  mrg 
   3306  1.1  mrg    Input/Output:
   3307  1.1  mrg 
   3308  1.1  mrg    STMT_VINFO: The stmt from which the pattern search begins,
   3309  1.1  mrg    i.e. the mult stmt.
   3310  1.1  mrg 
   3311  1.1  mrg  Output:
   3312  1.1  mrg 
   3313  1.1  mrg   * TYPE_OUT: The type of the output of this pattern.
   3314  1.1  mrg 
   3315  1.1  mrg   * Return value: A new stmt that will be used to replace
   3316  1.1  mrg     the multiplication.  */
   3317  1.1  mrg 
   3318  1.1  mrg static gimple *
   3319  1.1  mrg vect_recog_mult_pattern (vec_info *vinfo,
   3320  1.1  mrg 			 stmt_vec_info stmt_vinfo, tree *type_out)
   3321  1.1  mrg {
   3322  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   3323  1.1  mrg   tree oprnd0, oprnd1, vectype, itype;
   3324  1.1  mrg   gimple *pattern_stmt;
   3325  1.1  mrg 
   3326  1.1  mrg   if (!is_gimple_assign (last_stmt))
   3327  1.1  mrg     return NULL;
   3328  1.1  mrg 
   3329  1.1  mrg   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
   3330  1.1  mrg     return NULL;
   3331  1.1  mrg 
   3332  1.1  mrg   oprnd0 = gimple_assign_rhs1 (last_stmt);
   3333  1.1  mrg   oprnd1 = gimple_assign_rhs2 (last_stmt);
   3334  1.1  mrg   itype = TREE_TYPE (oprnd0);
   3335  1.1  mrg 
   3336  1.1  mrg   if (TREE_CODE (oprnd0) != SSA_NAME
   3337  1.1  mrg       || TREE_CODE (oprnd1) != INTEGER_CST
   3338  1.1  mrg       || !INTEGRAL_TYPE_P (itype)
   3339  1.1  mrg       || !type_has_mode_precision_p (itype))
   3340  1.1  mrg     return NULL;
   3341  1.1  mrg 
   3342  1.1  mrg   vectype = get_vectype_for_scalar_type (vinfo, itype);
   3343  1.1  mrg   if (vectype == NULL_TREE)
   3344  1.1  mrg     return NULL;
   3345  1.1  mrg 
   3346  1.1  mrg   /* If the target can handle vectorized multiplication natively,
   3347  1.1  mrg      don't attempt to optimize this.  */
   3348  1.1  mrg   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
   3349  1.1  mrg   if (mul_optab != unknown_optab)
   3350  1.1  mrg     {
   3351  1.1  mrg       machine_mode vec_mode = TYPE_MODE (vectype);
   3352  1.1  mrg       int icode = (int) optab_handler (mul_optab, vec_mode);
   3353  1.1  mrg       if (icode != CODE_FOR_nothing)
   3354  1.1  mrg        return NULL;
   3355  1.1  mrg     }
   3356  1.1  mrg 
   3357  1.1  mrg   pattern_stmt = vect_synth_mult_by_constant (vinfo,
   3358  1.1  mrg 					      oprnd0, oprnd1, stmt_vinfo);
   3359  1.1  mrg   if (!pattern_stmt)
   3360  1.1  mrg     return NULL;
   3361  1.1  mrg 
   3362  1.1  mrg   /* Pattern detected.  */
   3363  1.1  mrg   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
   3364  1.1  mrg 
   3365  1.1  mrg   *type_out = vectype;
   3366  1.1  mrg 
   3367  1.1  mrg   return pattern_stmt;
   3368  1.1  mrg }
   3369  1.1  mrg 
   3370  1.1  mrg /* Detect a signed division by a constant that wouldn't be
   3371  1.1  mrg    otherwise vectorized:
   3372  1.1  mrg 
   3373  1.1  mrg    type a_t, b_t;
   3374  1.1  mrg 
   3375  1.1  mrg    S1 a_t = b_t / N;
   3376  1.1  mrg 
   3377  1.1  mrg   where type 'type' is an integral type and N is a constant.
   3378  1.1  mrg 
   3379  1.1  mrg   Similarly handle modulo by a constant:
   3380  1.1  mrg 
   3381  1.1  mrg    S4 a_t = b_t % N;
   3382  1.1  mrg 
   3383  1.1  mrg   Input/Output:
   3384  1.1  mrg 
   3385  1.1  mrg   * STMT_VINFO: The stmt from which the pattern search begins,
   3386  1.1  mrg     i.e. the division stmt.  S1 is replaced by if N is a power
   3387  1.1  mrg     of two constant and type is signed:
   3388  1.1  mrg   S3  y_t = b_t < 0 ? N - 1 : 0;
   3389  1.1  mrg   S2  x_t = b_t + y_t;
   3390  1.1  mrg   S1' a_t = x_t >> log2 (N);
   3391  1.1  mrg 
   3392  1.1  mrg     S4 is replaced if N is a power of two constant and
   3393  1.1  mrg     type is signed by (where *_T temporaries have unsigned type):
   3394  1.1  mrg   S9  y_T = b_t < 0 ? -1U : 0U;
   3395  1.1  mrg   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
   3396  1.1  mrg   S7  z_t = (type) z_T;
   3397  1.1  mrg   S6  w_t = b_t + z_t;
   3398  1.1  mrg   S5  x_t = w_t & (N - 1);
   3399  1.1  mrg   S4' a_t = x_t - z_t;
   3400  1.1  mrg 
   3401  1.1  mrg   Output:
   3402  1.1  mrg 
   3403  1.1  mrg   * TYPE_OUT: The type of the output of this pattern.
   3404  1.1  mrg 
   3405  1.1  mrg   * Return value: A new stmt that will be used to replace the division
   3406  1.1  mrg     S1 or modulo S4 stmt.  */
   3407  1.1  mrg 
   3408  1.1  mrg static gimple *
   3409  1.1  mrg vect_recog_divmod_pattern (vec_info *vinfo,
   3410  1.1  mrg 			   stmt_vec_info stmt_vinfo, tree *type_out)
   3411  1.1  mrg {
   3412  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   3413  1.1  mrg   tree oprnd0, oprnd1, vectype, itype, cond;
   3414  1.1  mrg   gimple *pattern_stmt, *def_stmt;
   3415  1.1  mrg   enum tree_code rhs_code;
   3416  1.1  mrg   optab optab;
   3417  1.1  mrg   tree q;
   3418  1.1  mrg   int dummy_int, prec;
   3419  1.1  mrg 
   3420  1.1  mrg   if (!is_gimple_assign (last_stmt))
   3421  1.1  mrg     return NULL;
   3422  1.1  mrg 
   3423  1.1  mrg   rhs_code = gimple_assign_rhs_code (last_stmt);
   3424  1.1  mrg   switch (rhs_code)
   3425  1.1  mrg     {
   3426  1.1  mrg     case TRUNC_DIV_EXPR:
   3427  1.1  mrg     case EXACT_DIV_EXPR:
   3428  1.1  mrg     case TRUNC_MOD_EXPR:
   3429  1.1  mrg       break;
   3430  1.1  mrg     default:
   3431  1.1  mrg       return NULL;
   3432  1.1  mrg     }
   3433  1.1  mrg 
   3434  1.1  mrg   oprnd0 = gimple_assign_rhs1 (last_stmt);
   3435  1.1  mrg   oprnd1 = gimple_assign_rhs2 (last_stmt);
   3436  1.1  mrg   itype = TREE_TYPE (oprnd0);
   3437  1.1  mrg   if (TREE_CODE (oprnd0) != SSA_NAME
   3438  1.1  mrg       || TREE_CODE (oprnd1) != INTEGER_CST
   3439  1.1  mrg       || TREE_CODE (itype) != INTEGER_TYPE
   3440  1.1  mrg       || !type_has_mode_precision_p (itype))
   3441  1.1  mrg     return NULL;
   3442  1.1  mrg 
   3443  1.1  mrg   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
   3444  1.1  mrg   vectype = get_vectype_for_scalar_type (vinfo, itype);
   3445  1.1  mrg   if (vectype == NULL_TREE)
   3446  1.1  mrg     return NULL;
   3447  1.1  mrg 
   3448  1.1  mrg   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
   3449  1.1  mrg     {
   3450  1.1  mrg       /* If the target can handle vectorized division or modulo natively,
   3451  1.1  mrg 	 don't attempt to optimize this, since native division is likely
   3452  1.1  mrg 	 to give smaller code.  */
   3453  1.1  mrg       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
   3454  1.1  mrg       if (optab != unknown_optab)
   3455  1.1  mrg 	{
   3456  1.1  mrg 	  machine_mode vec_mode = TYPE_MODE (vectype);
   3457  1.1  mrg 	  int icode = (int) optab_handler (optab, vec_mode);
   3458  1.1  mrg 	  if (icode != CODE_FOR_nothing)
   3459  1.1  mrg 	    return NULL;
   3460  1.1  mrg 	}
   3461  1.1  mrg     }
   3462  1.1  mrg 
   3463  1.1  mrg   prec = TYPE_PRECISION (itype);
   3464  1.1  mrg   if (integer_pow2p (oprnd1))
   3465  1.1  mrg     {
   3466  1.1  mrg       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
   3467  1.1  mrg 	return NULL;
   3468  1.1  mrg 
   3469  1.1  mrg       /* Pattern detected.  */
   3470  1.1  mrg       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
   3471  1.1  mrg 
   3472  1.1  mrg       *type_out = vectype;
   3473  1.1  mrg 
   3474  1.1  mrg       /* Check if the target supports this internal function.  */
   3475  1.1  mrg       internal_fn ifn = IFN_DIV_POW2;
   3476  1.1  mrg       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
   3477  1.1  mrg 	{
   3478  1.1  mrg 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
   3479  1.1  mrg 
   3480  1.1  mrg 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
   3481  1.1  mrg 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
   3482  1.1  mrg 	  gimple_call_set_lhs (div_stmt, var_div);
   3483  1.1  mrg 
   3484  1.1  mrg 	  if (rhs_code == TRUNC_MOD_EXPR)
   3485  1.1  mrg 	    {
   3486  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
   3487  1.1  mrg 	      def_stmt
   3488  1.1  mrg 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3489  1.1  mrg 				       LSHIFT_EXPR, var_div, shift);
   3490  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3491  1.1  mrg 	      pattern_stmt
   3492  1.1  mrg 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3493  1.1  mrg 				       MINUS_EXPR, oprnd0,
   3494  1.1  mrg 				       gimple_assign_lhs (def_stmt));
   3495  1.1  mrg 	    }
   3496  1.1  mrg 	  else
   3497  1.1  mrg 	    pattern_stmt = div_stmt;
   3498  1.1  mrg 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
   3499  1.1  mrg 
   3500  1.1  mrg 	  return pattern_stmt;
   3501  1.1  mrg 	}
   3502  1.1  mrg 
   3503  1.1  mrg       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
   3504  1.1  mrg 		     build_int_cst (itype, 0));
   3505  1.1  mrg       if (rhs_code == TRUNC_DIV_EXPR
   3506  1.1  mrg 	  || rhs_code == EXACT_DIV_EXPR)
   3507  1.1  mrg 	{
   3508  1.1  mrg 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
   3509  1.1  mrg 	  tree shift;
   3510  1.1  mrg 	  def_stmt
   3511  1.1  mrg 	    = gimple_build_assign (var, COND_EXPR, cond,
   3512  1.1  mrg 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
   3513  1.1  mrg 						build_int_cst (itype, 1)),
   3514  1.1  mrg 				   build_int_cst (itype, 0));
   3515  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3516  1.1  mrg 	  var = vect_recog_temp_ssa_var (itype, NULL);
   3517  1.1  mrg 	  def_stmt
   3518  1.1  mrg 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
   3519  1.1  mrg 				   gimple_assign_lhs (def_stmt));
   3520  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3521  1.1  mrg 
   3522  1.1  mrg 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
   3523  1.1  mrg 	  pattern_stmt
   3524  1.1  mrg 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3525  1.1  mrg 				   RSHIFT_EXPR, var, shift);
   3526  1.1  mrg 	}
   3527  1.1  mrg       else
   3528  1.1  mrg 	{
   3529  1.1  mrg 	  tree signmask;
   3530  1.1  mrg 	  if (compare_tree_int (oprnd1, 2) == 0)
   3531  1.1  mrg 	    {
   3532  1.1  mrg 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
   3533  1.1  mrg 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
   3534  1.1  mrg 					      build_int_cst (itype, 1),
   3535  1.1  mrg 					      build_int_cst (itype, 0));
   3536  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3537  1.1  mrg 	    }
   3538  1.1  mrg 	  else
   3539  1.1  mrg 	    {
   3540  1.1  mrg 	      tree utype
   3541  1.1  mrg 		= build_nonstandard_integer_type (prec, 1);
   3542  1.1  mrg 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
   3543  1.1  mrg 	      tree shift
   3544  1.1  mrg 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
   3545  1.1  mrg 					- tree_log2 (oprnd1));
   3546  1.1  mrg 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
   3547  1.1  mrg 
   3548  1.1  mrg 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
   3549  1.1  mrg 					      build_int_cst (utype, -1),
   3550  1.1  mrg 					      build_int_cst (utype, 0));
   3551  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
   3552  1.1  mrg 	      var = vect_recog_temp_ssa_var (utype, NULL);
   3553  1.1  mrg 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
   3554  1.1  mrg 					      gimple_assign_lhs (def_stmt),
   3555  1.1  mrg 					      shift);
   3556  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
   3557  1.1  mrg 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
   3558  1.1  mrg 	      def_stmt
   3559  1.1  mrg 		= gimple_build_assign (signmask, NOP_EXPR, var);
   3560  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3561  1.1  mrg 	    }
   3562  1.1  mrg 	  def_stmt
   3563  1.1  mrg 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3564  1.1  mrg 				   PLUS_EXPR, oprnd0, signmask);
   3565  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3566  1.1  mrg 	  def_stmt
   3567  1.1  mrg 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3568  1.1  mrg 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
   3569  1.1  mrg 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
   3570  1.1  mrg 						build_int_cst (itype, 1)));
   3571  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3572  1.1  mrg 
   3573  1.1  mrg 	  pattern_stmt
   3574  1.1  mrg 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3575  1.1  mrg 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
   3576  1.1  mrg 				   signmask);
   3577  1.1  mrg 	}
   3578  1.1  mrg 
   3579  1.1  mrg       return pattern_stmt;
   3580  1.1  mrg     }
   3581  1.1  mrg 
   3582  1.1  mrg   if (prec > HOST_BITS_PER_WIDE_INT
   3583  1.1  mrg       || integer_zerop (oprnd1))
   3584  1.1  mrg     return NULL;
   3585  1.1  mrg 
   3586  1.1  mrg   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
   3587  1.1  mrg     return NULL;
   3588  1.1  mrg 
   3589  1.1  mrg   if (TYPE_UNSIGNED (itype))
   3590  1.1  mrg     {
   3591  1.1  mrg       unsigned HOST_WIDE_INT mh, ml;
   3592  1.1  mrg       int pre_shift, post_shift;
   3593  1.1  mrg       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
   3594  1.1  mrg 				  & GET_MODE_MASK (itype_mode));
   3595  1.1  mrg       tree t1, t2, t3, t4;
   3596  1.1  mrg 
   3597  1.1  mrg       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
   3598  1.1  mrg 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
   3599  1.1  mrg 	return NULL;
   3600  1.1  mrg 
   3601  1.1  mrg       /* Find a suitable multiplier and right shift count
   3602  1.1  mrg 	 instead of multiplying with D.  */
   3603  1.1  mrg       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
   3604  1.1  mrg 
   3605  1.1  mrg       /* If the suggested multiplier is more than SIZE bits, we can do better
   3606  1.1  mrg 	 for even divisors, using an initial right shift.  */
   3607  1.1  mrg       if (mh != 0 && (d & 1) == 0)
   3608  1.1  mrg 	{
   3609  1.1  mrg 	  pre_shift = ctz_or_zero (d);
   3610  1.1  mrg 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
   3611  1.1  mrg 				  &ml, &post_shift, &dummy_int);
   3612  1.1  mrg 	  gcc_assert (!mh);
   3613  1.1  mrg 	}
   3614  1.1  mrg       else
   3615  1.1  mrg 	pre_shift = 0;
   3616  1.1  mrg 
   3617  1.1  mrg       if (mh != 0)
   3618  1.1  mrg 	{
   3619  1.1  mrg 	  if (post_shift - 1 >= prec)
   3620  1.1  mrg 	    return NULL;
   3621  1.1  mrg 
   3622  1.1  mrg 	  /* t1 = oprnd0 h* ml;
   3623  1.1  mrg 	     t2 = oprnd0 - t1;
   3624  1.1  mrg 	     t3 = t2 >> 1;
   3625  1.1  mrg 	     t4 = t1 + t3;
   3626  1.1  mrg 	     q = t4 >> (post_shift - 1);  */
   3627  1.1  mrg 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
   3628  1.1  mrg 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
   3629  1.1  mrg 					  build_int_cst (itype, ml));
   3630  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3631  1.1  mrg 
   3632  1.1  mrg 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3633  1.1  mrg 	  def_stmt
   3634  1.1  mrg 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
   3635  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3636  1.1  mrg 
   3637  1.1  mrg 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
   3638  1.1  mrg 	  def_stmt
   3639  1.1  mrg 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
   3640  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3641  1.1  mrg 
   3642  1.1  mrg 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
   3643  1.1  mrg 	  def_stmt
   3644  1.1  mrg 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
   3645  1.1  mrg 
   3646  1.1  mrg 	  if (post_shift != 1)
   3647  1.1  mrg 	    {
   3648  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3649  1.1  mrg 
   3650  1.1  mrg 	      q = vect_recog_temp_ssa_var (itype, NULL);
   3651  1.1  mrg 	      pattern_stmt
   3652  1.1  mrg 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
   3653  1.1  mrg 				       build_int_cst (itype, post_shift - 1));
   3654  1.1  mrg 	    }
   3655  1.1  mrg 	  else
   3656  1.1  mrg 	    {
   3657  1.1  mrg 	      q = t4;
   3658  1.1  mrg 	      pattern_stmt = def_stmt;
   3659  1.1  mrg 	    }
   3660  1.1  mrg 	}
   3661  1.1  mrg       else
   3662  1.1  mrg 	{
   3663  1.1  mrg 	  if (pre_shift >= prec || post_shift >= prec)
   3664  1.1  mrg 	    return NULL;
   3665  1.1  mrg 
   3666  1.1  mrg 	  /* t1 = oprnd0 >> pre_shift;
   3667  1.1  mrg 	     t2 = t1 h* ml;
   3668  1.1  mrg 	     q = t2 >> post_shift;  */
   3669  1.1  mrg 	  if (pre_shift)
   3670  1.1  mrg 	    {
   3671  1.1  mrg 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
   3672  1.1  mrg 	      def_stmt
   3673  1.1  mrg 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
   3674  1.1  mrg 				       build_int_cst (NULL, pre_shift));
   3675  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3676  1.1  mrg 	    }
   3677  1.1  mrg 	  else
   3678  1.1  mrg 	    t1 = oprnd0;
   3679  1.1  mrg 
   3680  1.1  mrg 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3681  1.1  mrg 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
   3682  1.1  mrg 					  build_int_cst (itype, ml));
   3683  1.1  mrg 
   3684  1.1  mrg 	  if (post_shift)
   3685  1.1  mrg 	    {
   3686  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3687  1.1  mrg 
   3688  1.1  mrg 	      q = vect_recog_temp_ssa_var (itype, NULL);
   3689  1.1  mrg 	      def_stmt
   3690  1.1  mrg 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
   3691  1.1  mrg 				       build_int_cst (itype, post_shift));
   3692  1.1  mrg 	    }
   3693  1.1  mrg 	  else
   3694  1.1  mrg 	    q = t2;
   3695  1.1  mrg 
   3696  1.1  mrg 	  pattern_stmt = def_stmt;
   3697  1.1  mrg 	}
   3698  1.1  mrg     }
   3699  1.1  mrg   else
   3700  1.1  mrg     {
   3701  1.1  mrg       unsigned HOST_WIDE_INT ml;
   3702  1.1  mrg       int post_shift;
   3703  1.1  mrg       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
   3704  1.1  mrg       unsigned HOST_WIDE_INT abs_d;
   3705  1.1  mrg       bool add = false;
   3706  1.1  mrg       tree t1, t2, t3, t4;
   3707  1.1  mrg 
   3708  1.1  mrg       /* Give up for -1.  */
   3709  1.1  mrg       if (d == -1)
   3710  1.1  mrg 	return NULL;
   3711  1.1  mrg 
   3712  1.1  mrg       /* Since d might be INT_MIN, we have to cast to
   3713  1.1  mrg 	 unsigned HOST_WIDE_INT before negating to avoid
   3714  1.1  mrg 	 undefined signed overflow.  */
   3715  1.1  mrg       abs_d = (d >= 0
   3716  1.1  mrg 	       ? (unsigned HOST_WIDE_INT) d
   3717  1.1  mrg 	       : - (unsigned HOST_WIDE_INT) d);
   3718  1.1  mrg 
   3719  1.1  mrg       /* n rem d = n rem -d */
   3720  1.1  mrg       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
   3721  1.1  mrg 	{
   3722  1.1  mrg 	  d = abs_d;
   3723  1.1  mrg 	  oprnd1 = build_int_cst (itype, abs_d);
   3724  1.1  mrg 	}
   3725  1.1  mrg       if (HOST_BITS_PER_WIDE_INT >= prec
   3726  1.1  mrg 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
   3727  1.1  mrg 	/* This case is not handled correctly below.  */
   3728  1.1  mrg 	return NULL;
   3729  1.1  mrg 
   3730  1.1  mrg       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
   3731  1.1  mrg       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
   3732  1.1  mrg 	{
   3733  1.1  mrg 	  add = true;
   3734  1.1  mrg 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
   3735  1.1  mrg 	}
   3736  1.1  mrg       if (post_shift >= prec)
   3737  1.1  mrg 	return NULL;
   3738  1.1  mrg 
   3739  1.1  mrg       /* t1 = oprnd0 h* ml;  */
   3740  1.1  mrg       t1 = vect_recog_temp_ssa_var (itype, NULL);
   3741  1.1  mrg       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
   3742  1.1  mrg 				      build_int_cst (itype, ml));
   3743  1.1  mrg 
   3744  1.1  mrg       if (add)
   3745  1.1  mrg 	{
   3746  1.1  mrg 	  /* t2 = t1 + oprnd0;  */
   3747  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3748  1.1  mrg 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
   3749  1.1  mrg 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
   3750  1.1  mrg 	}
   3751  1.1  mrg       else
   3752  1.1  mrg 	t2 = t1;
   3753  1.1  mrg 
   3754  1.1  mrg       if (post_shift)
   3755  1.1  mrg 	{
   3756  1.1  mrg 	  /* t3 = t2 >> post_shift;  */
   3757  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3758  1.1  mrg 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
   3759  1.1  mrg 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
   3760  1.1  mrg 					  build_int_cst (itype, post_shift));
   3761  1.1  mrg 	}
   3762  1.1  mrg       else
   3763  1.1  mrg 	t3 = t2;
   3764  1.1  mrg 
   3765  1.1  mrg       int msb = 1;
   3766  1.1  mrg       value_range r;
   3767  1.1  mrg       get_range_query (cfun)->range_of_expr (r, oprnd0);
   3768  1.1  mrg       if (r.kind () == VR_RANGE)
   3769  1.1  mrg 	{
   3770  1.1  mrg 	  if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
   3771  1.1  mrg 	    msb = 0;
   3772  1.1  mrg 	  else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
   3773  1.1  mrg 	    msb = -1;
   3774  1.1  mrg 	}
   3775  1.1  mrg 
   3776  1.1  mrg       if (msb == 0 && d >= 0)
   3777  1.1  mrg 	{
   3778  1.1  mrg 	  /* q = t3;  */
   3779  1.1  mrg 	  q = t3;
   3780  1.1  mrg 	  pattern_stmt = def_stmt;
   3781  1.1  mrg 	}
   3782  1.1  mrg       else
   3783  1.1  mrg 	{
   3784  1.1  mrg 	  /* t4 = oprnd0 >> (prec - 1);
   3785  1.1  mrg 	     or if we know from VRP that oprnd0 >= 0
   3786  1.1  mrg 	     t4 = 0;
   3787  1.1  mrg 	     or if we know from VRP that oprnd0 < 0
   3788  1.1  mrg 	     t4 = -1;  */
   3789  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3790  1.1  mrg 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
   3791  1.1  mrg 	  if (msb != 1)
   3792  1.1  mrg 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
   3793  1.1  mrg 					    build_int_cst (itype, msb));
   3794  1.1  mrg 	  else
   3795  1.1  mrg 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
   3796  1.1  mrg 					    build_int_cst (itype, prec - 1));
   3797  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3798  1.1  mrg 
   3799  1.1  mrg 	  /* q = t3 - t4;  or q = t4 - t3;  */
   3800  1.1  mrg 	  q = vect_recog_temp_ssa_var (itype, NULL);
   3801  1.1  mrg 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
   3802  1.1  mrg 					      d < 0 ? t3 : t4);
   3803  1.1  mrg 	}
   3804  1.1  mrg     }
   3805  1.1  mrg 
   3806  1.1  mrg   if (rhs_code == TRUNC_MOD_EXPR)
   3807  1.1  mrg     {
   3808  1.1  mrg       tree r, t1;
   3809  1.1  mrg 
   3810  1.1  mrg       /* We divided.  Now finish by:
   3811  1.1  mrg 	 t1 = q * oprnd1;
   3812  1.1  mrg 	 r = oprnd0 - t1;  */
   3813  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
   3814  1.1  mrg 
   3815  1.1  mrg       t1 = vect_recog_temp_ssa_var (itype, NULL);
   3816  1.1  mrg       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
   3817  1.1  mrg       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
   3818  1.1  mrg 
   3819  1.1  mrg       r = vect_recog_temp_ssa_var (itype, NULL);
   3820  1.1  mrg       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
   3821  1.1  mrg     }
   3822  1.1  mrg 
   3823  1.1  mrg   /* Pattern detected.  */
   3824  1.1  mrg   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
   3825  1.1  mrg 
   3826  1.1  mrg   *type_out = vectype;
   3827  1.1  mrg   return pattern_stmt;
   3828  1.1  mrg }
   3829  1.1  mrg 
   3830  1.1  mrg /* Function vect_recog_mixed_size_cond_pattern
   3831  1.1  mrg 
   3832  1.1  mrg    Try to find the following pattern:
   3833  1.1  mrg 
   3834  1.1  mrg      type x_t, y_t;
   3835  1.1  mrg      TYPE a_T, b_T, c_T;
   3836  1.1  mrg    loop:
   3837  1.1  mrg      S1  a_T = x_t CMP y_t ? b_T : c_T;
   3838  1.1  mrg 
   3839  1.1  mrg    where type 'TYPE' is an integral type which has different size
   3840  1.1  mrg    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
   3841  1.1  mrg    than 'type', the constants need to fit into an integer type
   3842  1.1  mrg    with the same width as 'type') or results of conversion from 'type'.
   3843  1.1  mrg 
   3844  1.1  mrg    Input:
   3845  1.1  mrg 
   3846  1.1  mrg    * STMT_VINFO: The stmt from which the pattern search begins.
   3847  1.1  mrg 
   3848  1.1  mrg    Output:
   3849  1.1  mrg 
   3850  1.1  mrg    * TYPE_OUT: The type of the output of this pattern.
   3851  1.1  mrg 
   3852  1.1  mrg    * Return value: A new stmt that will be used to replace the pattern.
   3853  1.1  mrg 	Additionally a def_stmt is added.
   3854  1.1  mrg 
   3855  1.1  mrg 	a_it = x_t CMP y_t ? b_it : c_it;
   3856  1.1  mrg 	a_T = (TYPE) a_it;  */
   3857  1.1  mrg 
   3858  1.1  mrg static gimple *
   3859  1.1  mrg vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
   3860  1.1  mrg 				    stmt_vec_info stmt_vinfo, tree *type_out)
   3861  1.1  mrg {
   3862  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   3863  1.1  mrg   tree cond_expr, then_clause, else_clause;
   3864  1.1  mrg   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
   3865  1.1  mrg   gimple *pattern_stmt, *def_stmt;
   3866  1.1  mrg   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
   3867  1.1  mrg   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
   3868  1.1  mrg   bool promotion;
   3869  1.1  mrg   tree comp_scalar_type;
   3870  1.1  mrg 
   3871  1.1  mrg   if (!is_gimple_assign (last_stmt)
   3872  1.1  mrg       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
   3873  1.1  mrg       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
   3874  1.1  mrg     return NULL;
   3875  1.1  mrg 
   3876  1.1  mrg   cond_expr = gimple_assign_rhs1 (last_stmt);
   3877  1.1  mrg   then_clause = gimple_assign_rhs2 (last_stmt);
   3878  1.1  mrg   else_clause = gimple_assign_rhs3 (last_stmt);
   3879  1.1  mrg 
   3880  1.1  mrg   if (!COMPARISON_CLASS_P (cond_expr))
   3881  1.1  mrg     return NULL;
   3882  1.1  mrg 
   3883  1.1  mrg   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
   3884  1.1  mrg   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
   3885  1.1  mrg   if (comp_vectype == NULL_TREE)
   3886  1.1  mrg     return NULL;
   3887  1.1  mrg 
   3888  1.1  mrg   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
   3889  1.1  mrg   if (types_compatible_p (type, comp_scalar_type)
   3890  1.1  mrg       || ((TREE_CODE (then_clause) != INTEGER_CST
   3891  1.1  mrg 	   || TREE_CODE (else_clause) != INTEGER_CST)
   3892  1.1  mrg 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
   3893  1.1  mrg       || !INTEGRAL_TYPE_P (type))
   3894  1.1  mrg     return NULL;
   3895  1.1  mrg 
   3896  1.1  mrg   if ((TREE_CODE (then_clause) != INTEGER_CST
   3897  1.1  mrg        && !type_conversion_p (vinfo, then_clause, false,
   3898  1.1  mrg 			      &orig_type0, &def_stmt0, &promotion))
   3899  1.1  mrg       || (TREE_CODE (else_clause) != INTEGER_CST
   3900  1.1  mrg 	  && !type_conversion_p (vinfo, else_clause, false,
   3901  1.1  mrg 				 &orig_type1, &def_stmt1, &promotion)))
   3902  1.1  mrg     return NULL;
   3903  1.1  mrg 
   3904  1.1  mrg   if (orig_type0 && orig_type1
   3905  1.1  mrg       && !types_compatible_p (orig_type0, orig_type1))
   3906  1.1  mrg     return NULL;
   3907  1.1  mrg 
   3908  1.1  mrg   if (orig_type0)
   3909  1.1  mrg     {
   3910  1.1  mrg       if (!types_compatible_p (orig_type0, comp_scalar_type))
   3911  1.1  mrg 	return NULL;
   3912  1.1  mrg       then_clause = gimple_assign_rhs1 (def_stmt0);
   3913  1.1  mrg       itype = orig_type0;
   3914  1.1  mrg     }
   3915  1.1  mrg 
   3916  1.1  mrg   if (orig_type1)
   3917  1.1  mrg     {
   3918  1.1  mrg       if (!types_compatible_p (orig_type1, comp_scalar_type))
   3919  1.1  mrg 	return NULL;
   3920  1.1  mrg       else_clause = gimple_assign_rhs1 (def_stmt1);
   3921  1.1  mrg       itype = orig_type1;
   3922  1.1  mrg     }
   3923  1.1  mrg 
   3924  1.1  mrg 
   3925  1.1  mrg   HOST_WIDE_INT cmp_mode_size
   3926  1.1  mrg     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
   3927  1.1  mrg 
   3928  1.1  mrg   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
   3929  1.1  mrg   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
   3930  1.1  mrg     return NULL;
   3931  1.1  mrg 
   3932  1.1  mrg   vectype = get_vectype_for_scalar_type (vinfo, type);
   3933  1.1  mrg   if (vectype == NULL_TREE)
   3934  1.1  mrg     return NULL;
   3935  1.1  mrg 
   3936  1.1  mrg   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
   3937  1.1  mrg     return NULL;
   3938  1.1  mrg 
   3939  1.1  mrg   if (itype == NULL_TREE)
   3940  1.1  mrg     itype = build_nonstandard_integer_type (cmp_mode_size,
   3941  1.1  mrg   					    TYPE_UNSIGNED (type));
   3942  1.1  mrg 
   3943  1.1  mrg   if (itype == NULL_TREE
   3944  1.1  mrg       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
   3945  1.1  mrg     return NULL;
   3946  1.1  mrg 
   3947  1.1  mrg   vecitype = get_vectype_for_scalar_type (vinfo, itype);
   3948  1.1  mrg   if (vecitype == NULL_TREE)
   3949  1.1  mrg     return NULL;
   3950  1.1  mrg 
   3951  1.1  mrg   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
   3952  1.1  mrg     return NULL;
   3953  1.1  mrg 
   3954  1.1  mrg   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
   3955  1.1  mrg     {
   3956  1.1  mrg       if ((TREE_CODE (then_clause) == INTEGER_CST
   3957  1.1  mrg 	   && !int_fits_type_p (then_clause, itype))
   3958  1.1  mrg 	  || (TREE_CODE (else_clause) == INTEGER_CST
   3959  1.1  mrg 	      && !int_fits_type_p (else_clause, itype)))
   3960  1.1  mrg 	return NULL;
   3961  1.1  mrg     }
   3962  1.1  mrg 
   3963  1.1  mrg   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   3964  1.1  mrg 				  COND_EXPR, unshare_expr (cond_expr),
   3965  1.1  mrg 				  fold_convert (itype, then_clause),
   3966  1.1  mrg 				  fold_convert (itype, else_clause));
   3967  1.1  mrg   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
   3968  1.1  mrg 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
   3969  1.1  mrg 
   3970  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
   3971  1.1  mrg   *type_out = vectype;
   3972  1.1  mrg 
   3973  1.1  mrg   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
   3974  1.1  mrg 
   3975  1.1  mrg   return pattern_stmt;
   3976  1.1  mrg }
   3977  1.1  mrg 
   3978  1.1  mrg 
   3979  1.1  mrg /* Helper function of vect_recog_bool_pattern.  Called recursively, return
   3980  1.1  mrg    true if bool VAR can and should be optimized that way.  Assume it shouldn't
   3981  1.1  mrg    in case it's a result of a comparison which can be directly vectorized into
   3982  1.1  mrg    a vector comparison.  Fills in STMTS with all stmts visited during the
   3983  1.1  mrg    walk.  */
   3984  1.1  mrg 
   3985  1.1  mrg static bool
   3986  1.1  mrg check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
   3987  1.1  mrg {
   3988  1.1  mrg   tree rhs1;
   3989  1.1  mrg   enum tree_code rhs_code;
   3990  1.1  mrg 
   3991  1.1  mrg   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
   3992  1.1  mrg   if (!def_stmt_info)
   3993  1.1  mrg     return false;
   3994  1.1  mrg 
   3995  1.1  mrg   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
   3996  1.1  mrg   if (!def_stmt)
   3997  1.1  mrg     return false;
   3998  1.1  mrg 
   3999  1.1  mrg   if (stmts.contains (def_stmt))
   4000  1.1  mrg     return true;
   4001  1.1  mrg 
   4002  1.1  mrg   rhs1 = gimple_assign_rhs1 (def_stmt);
   4003  1.1  mrg   rhs_code = gimple_assign_rhs_code (def_stmt);
   4004  1.1  mrg   switch (rhs_code)
   4005  1.1  mrg     {
   4006  1.1  mrg     case SSA_NAME:
   4007  1.1  mrg       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4008  1.1  mrg 	return false;
   4009  1.1  mrg       break;
   4010  1.1  mrg 
   4011  1.1  mrg     CASE_CONVERT:
   4012  1.1  mrg       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
   4013  1.1  mrg 	return false;
   4014  1.1  mrg       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4015  1.1  mrg 	return false;
   4016  1.1  mrg       break;
   4017  1.1  mrg 
   4018  1.1  mrg     case BIT_NOT_EXPR:
   4019  1.1  mrg       if (! check_bool_pattern (rhs1, vinfo, stmts))
   4020  1.1  mrg 	return false;
   4021  1.1  mrg       break;
   4022  1.1  mrg 
   4023  1.1  mrg     case BIT_AND_EXPR:
   4024  1.1  mrg     case BIT_IOR_EXPR:
   4025  1.1  mrg     case BIT_XOR_EXPR:
   4026  1.1  mrg       if (! check_bool_pattern (rhs1, vinfo, stmts)
   4027  1.1  mrg 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
   4028  1.1  mrg 	return false;
   4029  1.1  mrg       break;
   4030  1.1  mrg 
   4031  1.1  mrg     default:
   4032  1.1  mrg       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
   4033  1.1  mrg 	{
   4034  1.1  mrg 	  tree vecitype, comp_vectype;
   4035  1.1  mrg 
   4036  1.1  mrg 	  /* If the comparison can throw, then is_gimple_condexpr will be
   4037  1.1  mrg 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
   4038  1.1  mrg 	  if (stmt_could_throw_p (cfun, def_stmt))
   4039  1.1  mrg 	    return false;
   4040  1.1  mrg 
   4041  1.1  mrg 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
   4042  1.1  mrg 	  if (comp_vectype == NULL_TREE)
   4043  1.1  mrg 	    return false;
   4044  1.1  mrg 
   4045  1.1  mrg 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
   4046  1.1  mrg 							  TREE_TYPE (rhs1));
   4047  1.1  mrg 	  if (mask_type
   4048  1.1  mrg 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
   4049  1.1  mrg 	    return false;
   4050  1.1  mrg 
   4051  1.1  mrg 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
   4052  1.1  mrg 	    {
   4053  1.1  mrg 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
   4054  1.1  mrg 	      tree itype
   4055  1.1  mrg 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
   4056  1.1  mrg 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
   4057  1.1  mrg 	      if (vecitype == NULL_TREE)
   4058  1.1  mrg 		return false;
   4059  1.1  mrg 	    }
   4060  1.1  mrg 	  else
   4061  1.1  mrg 	    vecitype = comp_vectype;
   4062  1.1  mrg 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
   4063  1.1  mrg 	    return false;
   4064  1.1  mrg 	}
   4065  1.1  mrg       else
   4066  1.1  mrg 	return false;
   4067  1.1  mrg       break;
   4068  1.1  mrg     }
   4069  1.1  mrg 
   4070  1.1  mrg   bool res = stmts.add (def_stmt);
   4071  1.1  mrg   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
   4072  1.1  mrg   gcc_assert (!res);
   4073  1.1  mrg 
   4074  1.1  mrg   return true;
   4075  1.1  mrg }
   4076  1.1  mrg 
   4077  1.1  mrg 
   4078  1.1  mrg /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
   4079  1.1  mrg    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
   4080  1.1  mrg    pattern sequence.  */
   4081  1.1  mrg 
   4082  1.1  mrg static tree
   4083  1.1  mrg adjust_bool_pattern_cast (vec_info *vinfo,
   4084  1.1  mrg 			  tree type, tree var, stmt_vec_info stmt_info)
   4085  1.1  mrg {
   4086  1.1  mrg   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
   4087  1.1  mrg 					   NOP_EXPR, var);
   4088  1.1  mrg   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
   4089  1.1  mrg 			  get_vectype_for_scalar_type (vinfo, type));
   4090  1.1  mrg   return gimple_assign_lhs (cast_stmt);
   4091  1.1  mrg }
   4092  1.1  mrg 
   4093  1.1  mrg /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
   4094  1.1  mrg    VAR is an SSA_NAME that should be transformed from bool to a wider integer
   4095  1.1  mrg    type, OUT_TYPE is the desired final integer type of the whole pattern.
   4096  1.1  mrg    STMT_INFO is the info of the pattern root and is where pattern stmts should
   4097  1.1  mrg    be associated with.  DEFS is a map of pattern defs.  */
   4098  1.1  mrg 
   4099  1.1  mrg static void
   4100  1.1  mrg adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
   4101  1.1  mrg 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
   4102  1.1  mrg {
   4103  1.1  mrg   gimple *stmt = SSA_NAME_DEF_STMT (var);
   4104  1.1  mrg   enum tree_code rhs_code, def_rhs_code;
   4105  1.1  mrg   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
   4106  1.1  mrg   location_t loc;
   4107  1.1  mrg   gimple *pattern_stmt, *def_stmt;
   4108  1.1  mrg   tree trueval = NULL_TREE;
   4109  1.1  mrg 
   4110  1.1  mrg   rhs1 = gimple_assign_rhs1 (stmt);
   4111  1.1  mrg   rhs2 = gimple_assign_rhs2 (stmt);
   4112  1.1  mrg   rhs_code = gimple_assign_rhs_code (stmt);
   4113  1.1  mrg   loc = gimple_location (stmt);
   4114  1.1  mrg   switch (rhs_code)
   4115  1.1  mrg     {
   4116  1.1  mrg     case SSA_NAME:
   4117  1.1  mrg     CASE_CONVERT:
   4118  1.1  mrg       irhs1 = *defs.get (rhs1);
   4119  1.1  mrg       itype = TREE_TYPE (irhs1);
   4120  1.1  mrg       pattern_stmt
   4121  1.1  mrg 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4122  1.1  mrg 			       SSA_NAME, irhs1);
   4123  1.1  mrg       break;
   4124  1.1  mrg 
   4125  1.1  mrg     case BIT_NOT_EXPR:
   4126  1.1  mrg       irhs1 = *defs.get (rhs1);
   4127  1.1  mrg       itype = TREE_TYPE (irhs1);
   4128  1.1  mrg       pattern_stmt
   4129  1.1  mrg 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4130  1.1  mrg 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
   4131  1.1  mrg       break;
   4132  1.1  mrg 
   4133  1.1  mrg     case BIT_AND_EXPR:
   4134  1.1  mrg       /* Try to optimize x = y & (a < b ? 1 : 0); into
   4135  1.1  mrg 	 x = (a < b ? y : 0);
   4136  1.1  mrg 
   4137  1.1  mrg 	 E.g. for:
   4138  1.1  mrg 	   bool a_b, b_b, c_b;
   4139  1.1  mrg 	   TYPE d_T;
   4140  1.1  mrg 
   4141  1.1  mrg 	   S1  a_b = x1 CMP1 y1;
   4142  1.1  mrg 	   S2  b_b = x2 CMP2 y2;
   4143  1.1  mrg 	   S3  c_b = a_b & b_b;
   4144  1.1  mrg 	   S4  d_T = (TYPE) c_b;
   4145  1.1  mrg 
   4146  1.1  mrg 	 we would normally emit:
   4147  1.1  mrg 
   4148  1.1  mrg 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4149  1.1  mrg 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
   4150  1.1  mrg 	   S3'  c_T = a_T & b_T;
   4151  1.1  mrg 	   S4'  d_T = c_T;
   4152  1.1  mrg 
   4153  1.1  mrg 	 but we can save one stmt by using the
   4154  1.1  mrg 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
   4155  1.1  mrg 	 BIT_AND_EXPR stmt out:
   4156  1.1  mrg 
   4157  1.1  mrg 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4158  1.1  mrg 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
   4159  1.1  mrg 	   S4'  f_T = c_T;
   4160  1.1  mrg 
   4161  1.1  mrg 	 At least when VEC_COND_EXPR is implemented using masks
   4162  1.1  mrg 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
   4163  1.1  mrg 	 computes the comparison masks and ands it, in one case with
   4164  1.1  mrg 	 all ones vector, in the other case with a vector register.
   4165  1.1  mrg 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
   4166  1.1  mrg 	 often more expensive.  */
   4167  1.1  mrg       def_stmt = SSA_NAME_DEF_STMT (rhs2);
   4168  1.1  mrg       def_rhs_code = gimple_assign_rhs_code (def_stmt);
   4169  1.1  mrg       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
   4170  1.1  mrg 	{
   4171  1.1  mrg 	  irhs1 = *defs.get (rhs1);
   4172  1.1  mrg 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
   4173  1.1  mrg 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
   4174  1.1  mrg 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
   4175  1.1  mrg 	    {
   4176  1.1  mrg 	      rhs_code = def_rhs_code;
   4177  1.1  mrg 	      rhs1 = def_rhs1;
   4178  1.1  mrg 	      rhs2 = gimple_assign_rhs2 (def_stmt);
   4179  1.1  mrg 	      trueval = irhs1;
   4180  1.1  mrg 	      goto do_compare;
   4181  1.1  mrg 	    }
   4182  1.1  mrg 	  else
   4183  1.1  mrg 	    irhs2 = *defs.get (rhs2);
   4184  1.1  mrg 	  goto and_ior_xor;
   4185  1.1  mrg 	}
   4186  1.1  mrg       def_stmt = SSA_NAME_DEF_STMT (rhs1);
   4187  1.1  mrg       def_rhs_code = gimple_assign_rhs_code (def_stmt);
   4188  1.1  mrg       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
   4189  1.1  mrg 	{
   4190  1.1  mrg 	  irhs2 = *defs.get (rhs2);
   4191  1.1  mrg 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
   4192  1.1  mrg 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
   4193  1.1  mrg 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
   4194  1.1  mrg 	    {
   4195  1.1  mrg 	      rhs_code = def_rhs_code;
   4196  1.1  mrg 	      rhs1 = def_rhs1;
   4197  1.1  mrg 	      rhs2 = gimple_assign_rhs2 (def_stmt);
   4198  1.1  mrg 	      trueval = irhs2;
   4199  1.1  mrg 	      goto do_compare;
   4200  1.1  mrg 	    }
   4201  1.1  mrg 	  else
   4202  1.1  mrg 	    irhs1 = *defs.get (rhs1);
   4203  1.1  mrg 	  goto and_ior_xor;
   4204  1.1  mrg 	}
   4205  1.1  mrg       /* FALLTHRU */
   4206  1.1  mrg     case BIT_IOR_EXPR:
   4207  1.1  mrg     case BIT_XOR_EXPR:
   4208  1.1  mrg       irhs1 = *defs.get (rhs1);
   4209  1.1  mrg       irhs2 = *defs.get (rhs2);
   4210  1.1  mrg     and_ior_xor:
   4211  1.1  mrg       if (TYPE_PRECISION (TREE_TYPE (irhs1))
   4212  1.1  mrg 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
   4213  1.1  mrg 	{
   4214  1.1  mrg 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
   4215  1.1  mrg 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
   4216  1.1  mrg 	  int out_prec = TYPE_PRECISION (out_type);
   4217  1.1  mrg 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
   4218  1.1  mrg 	    irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
   4219  1.1  mrg 					      stmt_info);
   4220  1.1  mrg 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
   4221  1.1  mrg 	    irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
   4222  1.1  mrg 					      stmt_info);
   4223  1.1  mrg 	  else
   4224  1.1  mrg 	    {
   4225  1.1  mrg 	      irhs1 = adjust_bool_pattern_cast (vinfo,
   4226  1.1  mrg 						out_type, irhs1, stmt_info);
   4227  1.1  mrg 	      irhs2 = adjust_bool_pattern_cast (vinfo,
   4228  1.1  mrg 						out_type, irhs2, stmt_info);
   4229  1.1  mrg 	    }
   4230  1.1  mrg 	}
   4231  1.1  mrg       itype = TREE_TYPE (irhs1);
   4232  1.1  mrg       pattern_stmt
   4233  1.1  mrg 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4234  1.1  mrg 			       rhs_code, irhs1, irhs2);
   4235  1.1  mrg       break;
   4236  1.1  mrg 
   4237  1.1  mrg     default:
   4238  1.1  mrg     do_compare:
   4239  1.1  mrg       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
   4240  1.1  mrg       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
   4241  1.1  mrg 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
   4242  1.1  mrg 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
   4243  1.1  mrg 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
   4244  1.1  mrg 	{
   4245  1.1  mrg 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
   4246  1.1  mrg 	  itype
   4247  1.1  mrg 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
   4248  1.1  mrg 	}
   4249  1.1  mrg       else
   4250  1.1  mrg 	itype = TREE_TYPE (rhs1);
   4251  1.1  mrg       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
   4252  1.1  mrg       if (trueval == NULL_TREE)
   4253  1.1  mrg 	trueval = build_int_cst (itype, 1);
   4254  1.1  mrg       else
   4255  1.1  mrg 	gcc_checking_assert (useless_type_conversion_p (itype,
   4256  1.1  mrg 							TREE_TYPE (trueval)));
   4257  1.1  mrg       pattern_stmt
   4258  1.1  mrg 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
   4259  1.1  mrg 			       COND_EXPR, cond_expr, trueval,
   4260  1.1  mrg 			       build_int_cst (itype, 0));
   4261  1.1  mrg       break;
   4262  1.1  mrg     }
   4263  1.1  mrg 
   4264  1.1  mrg   gimple_set_location (pattern_stmt, loc);
   4265  1.1  mrg   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
   4266  1.1  mrg 			  get_vectype_for_scalar_type (vinfo, itype));
   4267  1.1  mrg   defs.put (var, gimple_assign_lhs (pattern_stmt));
   4268  1.1  mrg }
   4269  1.1  mrg 
   4270  1.1  mrg /* Comparison function to qsort a vector of gimple stmts after UID.  */
   4271  1.1  mrg 
   4272  1.1  mrg static int
   4273  1.1  mrg sort_after_uid (const void *p1, const void *p2)
   4274  1.1  mrg {
   4275  1.1  mrg   const gimple *stmt1 = *(const gimple * const *)p1;
   4276  1.1  mrg   const gimple *stmt2 = *(const gimple * const *)p2;
   4277  1.1  mrg   return gimple_uid (stmt1) - gimple_uid (stmt2);
   4278  1.1  mrg }
   4279  1.1  mrg 
   4280  1.1  mrg /* Create pattern stmts for all stmts participating in the bool pattern
   4281  1.1  mrg    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
   4282  1.1  mrg    OUT_TYPE.  Return the def of the pattern root.  */
   4283  1.1  mrg 
   4284  1.1  mrg static tree
   4285  1.1  mrg adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
   4286  1.1  mrg 		   tree out_type, stmt_vec_info stmt_info)
   4287  1.1  mrg {
   4288  1.1  mrg   /* Gather original stmts in the bool pattern in their order of appearance
   4289  1.1  mrg      in the IL.  */
   4290  1.1  mrg   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
   4291  1.1  mrg   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
   4292  1.1  mrg        i != bool_stmt_set.end (); ++i)
   4293  1.1  mrg     bool_stmts.quick_push (*i);
   4294  1.1  mrg   bool_stmts.qsort (sort_after_uid);
   4295  1.1  mrg 
   4296  1.1  mrg   /* Now process them in that order, producing pattern stmts.  */
   4297  1.1  mrg   hash_map <tree, tree> defs;
   4298  1.1  mrg   for (unsigned i = 0; i < bool_stmts.length (); ++i)
   4299  1.1  mrg     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
   4300  1.1  mrg 			 out_type, stmt_info, defs);
   4301  1.1  mrg 
   4302  1.1  mrg   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
   4303  1.1  mrg   gimple *pattern_stmt
   4304  1.1  mrg     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   4305  1.1  mrg   return gimple_assign_lhs (pattern_stmt);
   4306  1.1  mrg }
   4307  1.1  mrg 
   4308  1.1  mrg /* Return the proper type for converting bool VAR into
   4309  1.1  mrg    an integer value or NULL_TREE if no such type exists.
   4310  1.1  mrg    The type is chosen so that the converted value has the
   4311  1.1  mrg    same number of elements as VAR's vector type.  */
   4312  1.1  mrg 
   4313  1.1  mrg static tree
   4314  1.1  mrg integer_type_for_mask (tree var, vec_info *vinfo)
   4315  1.1  mrg {
   4316  1.1  mrg   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
   4317  1.1  mrg     return NULL_TREE;
   4318  1.1  mrg 
   4319  1.1  mrg   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
   4320  1.1  mrg   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
   4321  1.1  mrg     return NULL_TREE;
   4322  1.1  mrg 
   4323  1.1  mrg   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
   4324  1.1  mrg }
   4325  1.1  mrg 
   4326  1.1  mrg /* Function vect_recog_bool_pattern
   4327  1.1  mrg 
   4328  1.1  mrg    Try to find pattern like following:
   4329  1.1  mrg 
   4330  1.1  mrg      bool a_b, b_b, c_b, d_b, e_b;
   4331  1.1  mrg      TYPE f_T;
   4332  1.1  mrg    loop:
   4333  1.1  mrg      S1  a_b = x1 CMP1 y1;
   4334  1.1  mrg      S2  b_b = x2 CMP2 y2;
   4335  1.1  mrg      S3  c_b = a_b & b_b;
   4336  1.1  mrg      S4  d_b = x3 CMP3 y3;
   4337  1.1  mrg      S5  e_b = c_b | d_b;
   4338  1.1  mrg      S6  f_T = (TYPE) e_b;
   4339  1.1  mrg 
   4340  1.1  mrg    where type 'TYPE' is an integral type.  Or a similar pattern
   4341  1.1  mrg    ending in
   4342  1.1  mrg 
   4343  1.1  mrg      S6  f_Y = e_b ? r_Y : s_Y;
   4344  1.1  mrg 
   4345  1.1  mrg    as results from if-conversion of a complex condition.
   4346  1.1  mrg 
   4347  1.1  mrg    Input:
   4348  1.1  mrg 
   4349  1.1  mrg    * STMT_VINFO: The stmt at the end from which the pattern
   4350  1.1  mrg 		 search begins, i.e. cast of a bool to
   4351  1.1  mrg 		 an integer type.
   4352  1.1  mrg 
   4353  1.1  mrg    Output:
   4354  1.1  mrg 
   4355  1.1  mrg    * TYPE_OUT: The type of the output of this pattern.
   4356  1.1  mrg 
   4357  1.1  mrg    * Return value: A new stmt that will be used to replace the pattern.
   4358  1.1  mrg 
   4359  1.1  mrg 	Assuming size of TYPE is the same as size of all comparisons
   4360  1.1  mrg 	(otherwise some casts would be added where needed), the above
   4361  1.1  mrg 	sequence we create related pattern stmts:
   4362  1.1  mrg 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
   4363  1.1  mrg 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
   4364  1.1  mrg 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
   4365  1.1  mrg 	S5'  e_T = c_T | d_T;
   4366  1.1  mrg 	S6'  f_T = e_T;
   4367  1.1  mrg 
   4368  1.1  mrg 	Instead of the above S3' we could emit:
   4369  1.1  mrg 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
   4370  1.1  mrg 	S3'  c_T = a_T | b_T;
   4371  1.1  mrg 	but the above is more efficient.  */
   4372  1.1  mrg 
   4373  1.1  mrg static gimple *
   4374  1.1  mrg vect_recog_bool_pattern (vec_info *vinfo,
   4375  1.1  mrg 			 stmt_vec_info stmt_vinfo, tree *type_out)
   4376  1.1  mrg {
   4377  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   4378  1.1  mrg   enum tree_code rhs_code;
   4379  1.1  mrg   tree var, lhs, rhs, vectype;
   4380  1.1  mrg   gimple *pattern_stmt;
   4381  1.1  mrg 
   4382  1.1  mrg   if (!is_gimple_assign (last_stmt))
   4383  1.1  mrg     return NULL;
   4384  1.1  mrg 
   4385  1.1  mrg   var = gimple_assign_rhs1 (last_stmt);
   4386  1.1  mrg   lhs = gimple_assign_lhs (last_stmt);
   4387  1.1  mrg   rhs_code = gimple_assign_rhs_code (last_stmt);
   4388  1.1  mrg 
   4389  1.1  mrg   if (rhs_code == VIEW_CONVERT_EXPR)
   4390  1.1  mrg     var = TREE_OPERAND (var, 0);
   4391  1.1  mrg 
   4392  1.1  mrg   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
   4393  1.1  mrg     return NULL;
   4394  1.1  mrg 
   4395  1.1  mrg   hash_set<gimple *> bool_stmts;
   4396  1.1  mrg 
   4397  1.1  mrg   if (CONVERT_EXPR_CODE_P (rhs_code)
   4398  1.1  mrg       || rhs_code == VIEW_CONVERT_EXPR)
   4399  1.1  mrg     {
   4400  1.1  mrg       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
   4401  1.1  mrg 	  || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   4402  1.1  mrg 	return NULL;
   4403  1.1  mrg       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4404  1.1  mrg 
   4405  1.1  mrg       if (check_bool_pattern (var, vinfo, bool_stmts))
   4406  1.1  mrg 	{
   4407  1.1  mrg 	  rhs = adjust_bool_stmts (vinfo, bool_stmts,
   4408  1.1  mrg 				   TREE_TYPE (lhs), stmt_vinfo);
   4409  1.1  mrg 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4410  1.1  mrg 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
   4411  1.1  mrg 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
   4412  1.1  mrg 	  else
   4413  1.1  mrg 	    pattern_stmt
   4414  1.1  mrg 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
   4415  1.1  mrg 	}
   4416  1.1  mrg       else
   4417  1.1  mrg 	{
   4418  1.1  mrg 	  tree type = integer_type_for_mask (var, vinfo);
   4419  1.1  mrg 	  tree cst0, cst1, tmp;
   4420  1.1  mrg 
   4421  1.1  mrg 	  if (!type)
   4422  1.1  mrg 	    return NULL;
   4423  1.1  mrg 
   4424  1.1  mrg 	  /* We may directly use cond with narrowed type to avoid
   4425  1.1  mrg 	     multiple cond exprs with following result packing and
   4426  1.1  mrg 	     perform single cond with packed mask instead.  In case
   4427  1.1  mrg 	     of widening we better make cond first and then extract
   4428  1.1  mrg 	     results.  */
   4429  1.1  mrg 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
   4430  1.1  mrg 	    type = TREE_TYPE (lhs);
   4431  1.1  mrg 
   4432  1.1  mrg 	  cst0 = build_int_cst (type, 0);
   4433  1.1  mrg 	  cst1 = build_int_cst (type, 1);
   4434  1.1  mrg 	  tmp = vect_recog_temp_ssa_var (type, NULL);
   4435  1.1  mrg 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
   4436  1.1  mrg 
   4437  1.1  mrg 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
   4438  1.1  mrg 	    {
   4439  1.1  mrg 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
   4440  1.1  mrg 	      append_pattern_def_seq (vinfo, stmt_vinfo,
   4441  1.1  mrg 				      pattern_stmt, new_vectype);
   4442  1.1  mrg 
   4443  1.1  mrg 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4444  1.1  mrg 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
   4445  1.1  mrg 	    }
   4446  1.1  mrg 	}
   4447  1.1  mrg 
   4448  1.1  mrg       *type_out = vectype;
   4449  1.1  mrg       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4450  1.1  mrg 
   4451  1.1  mrg       return pattern_stmt;
   4452  1.1  mrg     }
   4453  1.1  mrg   else if (rhs_code == COND_EXPR
   4454  1.1  mrg 	   && TREE_CODE (var) == SSA_NAME)
   4455  1.1  mrg     {
   4456  1.1  mrg       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4457  1.1  mrg       if (vectype == NULL_TREE)
   4458  1.1  mrg 	return NULL;
   4459  1.1  mrg 
   4460  1.1  mrg       /* Build a scalar type for the boolean result that when
   4461  1.1  mrg          vectorized matches the vector type of the result in
   4462  1.1  mrg 	 size and number of elements.  */
   4463  1.1  mrg       unsigned prec
   4464  1.1  mrg 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
   4465  1.1  mrg 			       TYPE_VECTOR_SUBPARTS (vectype));
   4466  1.1  mrg 
   4467  1.1  mrg       tree type
   4468  1.1  mrg 	= build_nonstandard_integer_type (prec,
   4469  1.1  mrg 					  TYPE_UNSIGNED (TREE_TYPE (var)));
   4470  1.1  mrg       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
   4471  1.1  mrg 	return NULL;
   4472  1.1  mrg 
   4473  1.1  mrg       if (!check_bool_pattern (var, vinfo, bool_stmts))
   4474  1.1  mrg 	return NULL;
   4475  1.1  mrg 
   4476  1.1  mrg       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
   4477  1.1  mrg 
   4478  1.1  mrg       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4479  1.1  mrg       pattern_stmt
   4480  1.1  mrg 	  = gimple_build_assign (lhs, COND_EXPR,
   4481  1.1  mrg 				 build2 (NE_EXPR, boolean_type_node,
   4482  1.1  mrg 					 rhs, build_int_cst (type, 0)),
   4483  1.1  mrg 				 gimple_assign_rhs2 (last_stmt),
   4484  1.1  mrg 				 gimple_assign_rhs3 (last_stmt));
   4485  1.1  mrg       *type_out = vectype;
   4486  1.1  mrg       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4487  1.1  mrg 
   4488  1.1  mrg       return pattern_stmt;
   4489  1.1  mrg     }
   4490  1.1  mrg   else if (rhs_code == SSA_NAME
   4491  1.1  mrg 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
   4492  1.1  mrg     {
   4493  1.1  mrg       stmt_vec_info pattern_stmt_info;
   4494  1.1  mrg       tree nunits_vectype;
   4495  1.1  mrg       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
   4496  1.1  mrg 					   &nunits_vectype)
   4497  1.1  mrg 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
   4498  1.1  mrg 	return NULL;
   4499  1.1  mrg 
   4500  1.1  mrg       if (check_bool_pattern (var, vinfo, bool_stmts))
   4501  1.1  mrg 	rhs = adjust_bool_stmts (vinfo, bool_stmts,
   4502  1.1  mrg 				 TREE_TYPE (vectype), stmt_vinfo);
   4503  1.1  mrg       else
   4504  1.1  mrg 	{
   4505  1.1  mrg 	  tree type = integer_type_for_mask (var, vinfo);
   4506  1.1  mrg 	  tree cst0, cst1, new_vectype;
   4507  1.1  mrg 
   4508  1.1  mrg 	  if (!type)
   4509  1.1  mrg 	    return NULL;
   4510  1.1  mrg 
   4511  1.1  mrg 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
   4512  1.1  mrg 	    type = TREE_TYPE (vectype);
   4513  1.1  mrg 
   4514  1.1  mrg 	  cst0 = build_int_cst (type, 0);
   4515  1.1  mrg 	  cst1 = build_int_cst (type, 1);
   4516  1.1  mrg 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
   4517  1.1  mrg 
   4518  1.1  mrg 	  rhs = vect_recog_temp_ssa_var (type, NULL);
   4519  1.1  mrg 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
   4520  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
   4521  1.1  mrg 	}
   4522  1.1  mrg 
   4523  1.1  mrg       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
   4524  1.1  mrg       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
   4525  1.1  mrg 	{
   4526  1.1  mrg 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4527  1.1  mrg 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
   4528  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
   4529  1.1  mrg 	  rhs = rhs2;
   4530  1.1  mrg 	}
   4531  1.1  mrg       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
   4532  1.1  mrg       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
   4533  1.1  mrg       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
   4534  1.1  mrg       *type_out = vectype;
   4535  1.1  mrg       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
   4536  1.1  mrg 
   4537  1.1  mrg       return pattern_stmt;
   4538  1.1  mrg     }
   4539  1.1  mrg   else
   4540  1.1  mrg     return NULL;
   4541  1.1  mrg }
   4542  1.1  mrg 
   4543  1.1  mrg 
   4544  1.1  mrg /* A helper for vect_recog_mask_conversion_pattern.  Build
   4545  1.1  mrg    conversion of MASK to a type suitable for masking VECTYPE.
   4546  1.1  mrg    Built statement gets required vectype and is appended to
   4547  1.1  mrg    a pattern sequence of STMT_VINFO.
   4548  1.1  mrg 
   4549  1.1  mrg    Return converted mask.  */
   4550  1.1  mrg 
   4551  1.1  mrg static tree
   4552  1.1  mrg build_mask_conversion (vec_info *vinfo,
   4553  1.1  mrg 		       tree mask, tree vectype, stmt_vec_info stmt_vinfo)
   4554  1.1  mrg {
   4555  1.1  mrg   gimple *stmt;
   4556  1.1  mrg   tree masktype, tmp;
   4557  1.1  mrg 
   4558  1.1  mrg   masktype = truth_type_for (vectype);
   4559  1.1  mrg   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
   4560  1.1  mrg   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
   4561  1.1  mrg   append_pattern_def_seq (vinfo, stmt_vinfo,
   4562  1.1  mrg 			  stmt, masktype, TREE_TYPE (vectype));
   4563  1.1  mrg 
   4564  1.1  mrg   return tmp;
   4565  1.1  mrg }
   4566  1.1  mrg 
   4567  1.1  mrg 
   4568  1.1  mrg /* Function vect_recog_mask_conversion_pattern
   4569  1.1  mrg 
   4570  1.1  mrg    Try to find statements which require boolean type
   4571  1.1  mrg    converison.  Additional conversion statements are
   4572  1.1  mrg    added to handle such cases.  For example:
   4573  1.1  mrg 
   4574  1.1  mrg    bool m_1, m_2, m_3;
   4575  1.1  mrg    int i_4, i_5;
   4576  1.1  mrg    double d_6, d_7;
   4577  1.1  mrg    char c_1, c_2, c_3;
   4578  1.1  mrg 
   4579  1.1  mrg    S1   m_1 = i_4 > i_5;
   4580  1.1  mrg    S2   m_2 = d_6 < d_7;
   4581  1.1  mrg    S3   m_3 = m_1 & m_2;
   4582  1.1  mrg    S4   c_1 = m_3 ? c_2 : c_3;
   4583  1.1  mrg 
   4584  1.1  mrg    Will be transformed into:
   4585  1.1  mrg 
   4586  1.1  mrg    S1   m_1 = i_4 > i_5;
   4587  1.1  mrg    S2   m_2 = d_6 < d_7;
   4588  1.1  mrg    S3'' m_2' = (_Bool[bitsize=32])m_2
   4589  1.1  mrg    S3'  m_3' = m_1 & m_2';
   4590  1.1  mrg    S4'' m_3'' = (_Bool[bitsize=8])m_3'
   4591  1.1  mrg    S4'  c_1' = m_3'' ? c_2 : c_3;  */
   4592  1.1  mrg 
   4593  1.1  mrg static gimple *
   4594  1.1  mrg vect_recog_mask_conversion_pattern (vec_info *vinfo,
   4595  1.1  mrg 				    stmt_vec_info stmt_vinfo, tree *type_out)
   4596  1.1  mrg {
   4597  1.1  mrg   gimple *last_stmt = stmt_vinfo->stmt;
   4598  1.1  mrg   enum tree_code rhs_code;
   4599  1.1  mrg   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
   4600  1.1  mrg   tree vectype1, vectype2;
   4601  1.1  mrg   stmt_vec_info pattern_stmt_info;
   4602  1.1  mrg   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
   4603  1.1  mrg   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
   4604  1.1  mrg 
   4605  1.1  mrg   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
   4606  1.1  mrg   if (is_gimple_call (last_stmt)
   4607  1.1  mrg       && gimple_call_internal_p (last_stmt))
   4608  1.1  mrg     {
   4609  1.1  mrg       gcall *pattern_stmt;
   4610  1.1  mrg 
   4611  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (last_stmt);
   4612  1.1  mrg       int mask_argno = internal_fn_mask_index (ifn);
   4613  1.1  mrg       if (mask_argno < 0)
   4614  1.1  mrg 	return NULL;
   4615  1.1  mrg 
   4616  1.1  mrg       bool store_p = internal_store_fn_p (ifn);
   4617  1.1  mrg       if (store_p)
   4618  1.1  mrg 	{
   4619  1.1  mrg 	  int rhs_index = internal_fn_stored_value_index (ifn);
   4620  1.1  mrg 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
   4621  1.1  mrg 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
   4622  1.1  mrg 	}
   4623  1.1  mrg       else
   4624  1.1  mrg 	{
   4625  1.1  mrg 	  lhs = gimple_call_lhs (last_stmt);
   4626  1.1  mrg 	  if (!lhs)
   4627  1.1  mrg 	    return NULL;
   4628  1.1  mrg 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4629  1.1  mrg 	}
   4630  1.1  mrg 
   4631  1.1  mrg       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
   4632  1.1  mrg       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
   4633  1.1  mrg       if (!mask_arg_type)
   4634  1.1  mrg 	return NULL;
   4635  1.1  mrg       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
   4636  1.1  mrg 
   4637  1.1  mrg       if (!vectype1 || !vectype2
   4638  1.1  mrg 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
   4639  1.1  mrg 		       TYPE_VECTOR_SUBPARTS (vectype2)))
   4640  1.1  mrg 	return NULL;
   4641  1.1  mrg 
   4642  1.1  mrg       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
   4643  1.1  mrg 
   4644  1.1  mrg       auto_vec<tree, 8> args;
   4645  1.1  mrg       unsigned int nargs = gimple_call_num_args (last_stmt);
   4646  1.1  mrg       args.safe_grow (nargs, true);
   4647  1.1  mrg       for (unsigned int i = 0; i < nargs; ++i)
   4648  1.1  mrg 	args[i] = ((int) i == mask_argno
   4649  1.1  mrg 		   ? tmp
   4650  1.1  mrg 		   : gimple_call_arg (last_stmt, i));
   4651  1.1  mrg       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
   4652  1.1  mrg 
   4653  1.1  mrg       if (!store_p)
   4654  1.1  mrg 	{
   4655  1.1  mrg 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4656  1.1  mrg 	  gimple_call_set_lhs (pattern_stmt, lhs);
   4657  1.1  mrg 	}
   4658  1.1  mrg       gimple_call_set_nothrow (pattern_stmt, true);
   4659  1.1  mrg 
   4660  1.1  mrg       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
   4661  1.1  mrg       if (STMT_VINFO_DATA_REF (stmt_vinfo))
   4662  1.1  mrg 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
   4663  1.1  mrg 
   4664  1.1  mrg       *type_out = vectype1;
   4665  1.1  mrg       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4666  1.1  mrg 
   4667  1.1  mrg       return pattern_stmt;
   4668  1.1  mrg     }
   4669  1.1  mrg 
   4670  1.1  mrg   if (!is_gimple_assign (last_stmt))
   4671  1.1  mrg     return NULL;
   4672  1.1  mrg 
   4673  1.1  mrg   gimple *pattern_stmt;
   4674  1.1  mrg   lhs = gimple_assign_lhs (last_stmt);
   4675  1.1  mrg   rhs1 = gimple_assign_rhs1 (last_stmt);
   4676  1.1  mrg   rhs_code = gimple_assign_rhs_code (last_stmt);
   4677  1.1  mrg 
   4678  1.1  mrg   /* Check for cond expression requiring mask conversion.  */
   4679  1.1  mrg   if (rhs_code == COND_EXPR)
   4680  1.1  mrg     {
   4681  1.1  mrg       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
   4682  1.1  mrg 
   4683  1.1  mrg       if (TREE_CODE (rhs1) == SSA_NAME)
   4684  1.1  mrg 	{
   4685  1.1  mrg 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
   4686  1.1  mrg 	  if (!rhs1_type)
   4687  1.1  mrg 	    return NULL;
   4688  1.1  mrg 	}
   4689  1.1  mrg       else if (COMPARISON_CLASS_P (rhs1))
   4690  1.1  mrg 	{
   4691  1.1  mrg 	  /* Check whether we're comparing scalar booleans and (if so)
   4692  1.1  mrg 	     whether a better mask type exists than the mask associated
   4693  1.1  mrg 	     with boolean-sized elements.  This avoids unnecessary packs
   4694  1.1  mrg 	     and unpacks if the booleans are set from comparisons of
   4695  1.1  mrg 	     wider types.  E.g. in:
   4696  1.1  mrg 
   4697  1.1  mrg 	       int x1, x2, x3, x4, y1, y1;
   4698  1.1  mrg 	       ...
   4699  1.1  mrg 	       bool b1 = (x1 == x2);
   4700  1.1  mrg 	       bool b2 = (x3 == x4);
   4701  1.1  mrg 	       ... = b1 == b2 ? y1 : y2;
   4702  1.1  mrg 
   4703  1.1  mrg 	     it is better for b1 and b2 to use the mask type associated
   4704  1.1  mrg 	     with int elements rather bool (byte) elements.  */
   4705  1.1  mrg 	  rhs1_op0 = TREE_OPERAND (rhs1, 0);
   4706  1.1  mrg 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
   4707  1.1  mrg 	  if (!rhs1_op0 || !rhs1_op1)
   4708  1.1  mrg 	    return NULL;
   4709  1.1  mrg 	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
   4710  1.1  mrg 	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
   4711  1.1  mrg 
   4712  1.1  mrg 	  if (!rhs1_op0_type)
   4713  1.1  mrg 	    rhs1_type = TREE_TYPE (rhs1_op0);
   4714  1.1  mrg 	  else if (!rhs1_op1_type)
   4715  1.1  mrg 	    rhs1_type = TREE_TYPE (rhs1_op1);
   4716  1.1  mrg 	  else if (TYPE_PRECISION (rhs1_op0_type)
   4717  1.1  mrg 		   != TYPE_PRECISION (rhs1_op1_type))
   4718  1.1  mrg 	    {
   4719  1.1  mrg 	      int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
   4720  1.1  mrg 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
   4721  1.1  mrg 	      int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
   4722  1.1  mrg 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
   4723  1.1  mrg 	      if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
   4724  1.1  mrg 		{
   4725  1.1  mrg 		  if (abs (tmp0) > abs (tmp1))
   4726  1.1  mrg 		    rhs1_type = rhs1_op1_type;
   4727  1.1  mrg 		  else
   4728  1.1  mrg 		    rhs1_type = rhs1_op0_type;
   4729  1.1  mrg 		}
   4730  1.1  mrg 	      else
   4731  1.1  mrg 		rhs1_type = build_nonstandard_integer_type
   4732  1.1  mrg 		  (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
   4733  1.1  mrg 	    }
   4734  1.1  mrg 	  else
   4735  1.1  mrg 	    rhs1_type = rhs1_op0_type;
   4736  1.1  mrg 	}
   4737  1.1  mrg       else
   4738  1.1  mrg 	return NULL;
   4739  1.1  mrg 
   4740  1.1  mrg       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
   4741  1.1  mrg 
   4742  1.1  mrg       if (!vectype1 || !vectype2)
   4743  1.1  mrg 	return NULL;
   4744  1.1  mrg 
   4745  1.1  mrg       /* Continue if a conversion is needed.  Also continue if we have
   4746  1.1  mrg 	 a comparison whose vector type would normally be different from
   4747  1.1  mrg 	 VECTYPE2 when considered in isolation.  In that case we'll
   4748  1.1  mrg 	 replace the comparison with an SSA name (so that we can record
   4749  1.1  mrg 	 its vector type) and behave as though the comparison was an SSA
   4750  1.1  mrg 	 name from the outset.  */
   4751  1.1  mrg       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
   4752  1.1  mrg 		    TYPE_VECTOR_SUBPARTS (vectype2))
   4753  1.1  mrg 	  && !rhs1_op0_type
   4754  1.1  mrg 	  && !rhs1_op1_type)
   4755  1.1  mrg 	return NULL;
   4756  1.1  mrg 
   4757  1.1  mrg       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
   4758  1.1  mrg          in place, we can handle it in vectorizable_condition.  This avoids
   4759  1.1  mrg 	 unnecessary promotion stmts and increased vectorization factor.  */
   4760  1.1  mrg       if (COMPARISON_CLASS_P (rhs1)
   4761  1.1  mrg 	  && INTEGRAL_TYPE_P (rhs1_type)
   4762  1.1  mrg 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
   4763  1.1  mrg 		       TYPE_VECTOR_SUBPARTS (vectype2)))
   4764  1.1  mrg 	{
   4765  1.1  mrg 	  enum vect_def_type dt;
   4766  1.1  mrg 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
   4767  1.1  mrg 	      && dt == vect_external_def
   4768  1.1  mrg 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
   4769  1.1  mrg 	      && (dt == vect_external_def
   4770  1.1  mrg 		  || dt == vect_constant_def))
   4771  1.1  mrg 	    {
   4772  1.1  mrg 	      tree wide_scalar_type = build_nonstandard_integer_type
   4773  1.1  mrg 		(vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
   4774  1.1  mrg 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
   4775  1.1  mrg 							   wide_scalar_type);
   4776  1.1  mrg 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
   4777  1.1  mrg 		return NULL;
   4778  1.1  mrg 	    }
   4779  1.1  mrg 	}
   4780  1.1  mrg 
   4781  1.1  mrg       /* If rhs1 is a comparison we need to move it into a
   4782  1.1  mrg 	 separate statement.  */
   4783  1.1  mrg       if (TREE_CODE (rhs1) != SSA_NAME)
   4784  1.1  mrg 	{
   4785  1.1  mrg 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
   4786  1.1  mrg 	  if (rhs1_op0_type
   4787  1.1  mrg 	      && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
   4788  1.1  mrg 	    rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
   4789  1.1  mrg 					      vectype2, stmt_vinfo);
   4790  1.1  mrg 	  if (rhs1_op1_type
   4791  1.1  mrg 	      && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
   4792  1.1  mrg 	    rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
   4793  1.1  mrg 				      vectype2, stmt_vinfo);
   4794  1.1  mrg 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
   4795  1.1  mrg 					      rhs1_op0, rhs1_op1);
   4796  1.1  mrg 	  rhs1 = tmp;
   4797  1.1  mrg 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
   4798  1.1  mrg 				  rhs1_type);
   4799  1.1  mrg 	}
   4800  1.1  mrg 
   4801  1.1  mrg       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   4802  1.1  mrg 		    TYPE_VECTOR_SUBPARTS (vectype2)))
   4803  1.1  mrg 	tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
   4804  1.1  mrg       else
   4805  1.1  mrg 	tmp = rhs1;
   4806  1.1  mrg 
   4807  1.1  mrg       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4808  1.1  mrg       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
   4809  1.1  mrg 					  gimple_assign_rhs2 (last_stmt),
   4810  1.1  mrg 					  gimple_assign_rhs3 (last_stmt));
   4811  1.1  mrg 
   4812  1.1  mrg       *type_out = vectype1;
   4813  1.1  mrg       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4814  1.1  mrg 
   4815  1.1  mrg       return pattern_stmt;
   4816  1.1  mrg     }
   4817  1.1  mrg 
   4818  1.1  mrg   /* Now check for binary boolean operations requiring conversion for
   4819  1.1  mrg      one of operands.  */
   4820  1.1  mrg   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   4821  1.1  mrg     return NULL;
   4822  1.1  mrg 
   4823  1.1  mrg   if (rhs_code != BIT_IOR_EXPR
   4824  1.1  mrg       && rhs_code != BIT_XOR_EXPR
   4825  1.1  mrg       && rhs_code != BIT_AND_EXPR
   4826  1.1  mrg       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
   4827  1.1  mrg     return NULL;
   4828  1.1  mrg 
   4829  1.1  mrg   rhs2 = gimple_assign_rhs2 (last_stmt);
   4830  1.1  mrg 
   4831  1.1  mrg   rhs1_type = integer_type_for_mask (rhs1, vinfo);
   4832  1.1  mrg   rhs2_type = integer_type_for_mask (rhs2, vinfo);
   4833  1.1  mrg 
   4834  1.1  mrg   if (!rhs1_type || !rhs2_type
   4835  1.1  mrg       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
   4836  1.1  mrg     return NULL;
   4837  1.1  mrg 
   4838  1.1  mrg   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
   4839  1.1  mrg     {
   4840  1.1  mrg       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
   4841  1.1  mrg       if (!vectype1)
   4842  1.1  mrg 	return NULL;
   4843  1.1  mrg       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
   4844  1.1  mrg     }
   4845  1.1  mrg   else
   4846  1.1  mrg     {
   4847  1.1  mrg       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
   4848  1.1  mrg       if (!vectype1)
   4849  1.1  mrg 	return NULL;
   4850  1.1  mrg       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
   4851  1.1  mrg     }
   4852  1.1  mrg 
   4853  1.1  mrg   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   4854  1.1  mrg   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
   4855  1.1  mrg 
   4856  1.1  mrg   *type_out = vectype1;
   4857  1.1  mrg   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
   4858  1.1  mrg 
   4859  1.1  mrg   return pattern_stmt;
   4860  1.1  mrg }
   4861  1.1  mrg 
   4862  1.1  mrg /* STMT_INFO is a load or store.  If the load or store is conditional, return
   4863  1.1  mrg    the boolean condition under which it occurs, otherwise return null.  */
   4864  1.1  mrg 
   4865  1.1  mrg static tree
   4866  1.1  mrg vect_get_load_store_mask (stmt_vec_info stmt_info)
   4867  1.1  mrg {
   4868  1.1  mrg   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
   4869  1.1  mrg     {
   4870  1.1  mrg       gcc_assert (gimple_assign_single_p (def_assign));
   4871  1.1  mrg       return NULL_TREE;
   4872  1.1  mrg     }
   4873  1.1  mrg 
   4874  1.1  mrg   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
   4875  1.1  mrg     {
   4876  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (def_call);
   4877  1.1  mrg       int mask_index = internal_fn_mask_index (ifn);
   4878  1.1  mrg       return gimple_call_arg (def_call, mask_index);
   4879  1.1  mrg     }
   4880  1.1  mrg 
   4881  1.1  mrg   gcc_unreachable ();
   4882  1.1  mrg }
   4883  1.1  mrg 
   4884  1.1  mrg /* Return MASK if MASK is suitable for masking an operation on vectors
   4885  1.1  mrg    of type VECTYPE, otherwise convert it into such a form and return
   4886  1.1  mrg    the result.  Associate any conversion statements with STMT_INFO's
   4887  1.1  mrg    pattern.  */
   4888  1.1  mrg 
   4889  1.1  mrg static tree
   4890  1.1  mrg vect_convert_mask_for_vectype (tree mask, tree vectype,
   4891  1.1  mrg 			       stmt_vec_info stmt_info, vec_info *vinfo)
   4892  1.1  mrg {
   4893  1.1  mrg   tree mask_type = integer_type_for_mask (mask, vinfo);
   4894  1.1  mrg   if (mask_type)
   4895  1.1  mrg     {
   4896  1.1  mrg       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
   4897  1.1  mrg       if (mask_vectype
   4898  1.1  mrg 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
   4899  1.1  mrg 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
   4900  1.1  mrg 	mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
   4901  1.1  mrg     }
   4902  1.1  mrg   return mask;
   4903  1.1  mrg }
   4904  1.1  mrg 
   4905  1.1  mrg /* Return the equivalent of:
   4906  1.1  mrg 
   4907  1.1  mrg      fold_convert (TYPE, VALUE)
   4908  1.1  mrg 
   4909  1.1  mrg    with the expectation that the operation will be vectorized.
   4910  1.1  mrg    If new statements are needed, add them as pattern statements
   4911  1.1  mrg    to STMT_INFO.  */
   4912  1.1  mrg 
   4913  1.1  mrg static tree
   4914  1.1  mrg vect_add_conversion_to_pattern (vec_info *vinfo,
   4915  1.1  mrg 				tree type, tree value, stmt_vec_info stmt_info)
   4916  1.1  mrg {
   4917  1.1  mrg   if (useless_type_conversion_p (type, TREE_TYPE (value)))
   4918  1.1  mrg     return value;
   4919  1.1  mrg 
   4920  1.1  mrg   tree new_value = vect_recog_temp_ssa_var (type, NULL);
   4921  1.1  mrg   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
   4922  1.1  mrg   append_pattern_def_seq (vinfo, stmt_info, conversion,
   4923  1.1  mrg 			  get_vectype_for_scalar_type (vinfo, type));
   4924  1.1  mrg   return new_value;
   4925  1.1  mrg }
   4926  1.1  mrg 
   4927  1.1  mrg /* Try to convert STMT_INFO into a call to a gather load or scatter store
   4928  1.1  mrg    internal function.  Return the final statement on success and set
   4929  1.1  mrg    *TYPE_OUT to the vector type being loaded or stored.
   4930  1.1  mrg 
   4931  1.1  mrg    This function only handles gathers and scatters that were recognized
   4932  1.1  mrg    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
   4933  1.1  mrg 
   4934  1.1  mrg static gimple *
   4935  1.1  mrg vect_recog_gather_scatter_pattern (vec_info *vinfo,
   4936  1.1  mrg 				   stmt_vec_info stmt_info, tree *type_out)
   4937  1.1  mrg {
   4938  1.1  mrg   /* Currently we only support this for loop vectorization.  */
   4939  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4940  1.1  mrg   if (!loop_vinfo)
   4941  1.1  mrg     return NULL;
   4942  1.1  mrg 
   4943  1.1  mrg   /* Make sure that we're looking at a gather load or scatter store.  */
   4944  1.1  mrg   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   4945  1.1  mrg   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   4946  1.1  mrg     return NULL;
   4947  1.1  mrg 
   4948  1.1  mrg   /* Get the boolean that controls whether the load or store happens.
   4949  1.1  mrg      This is null if the operation is unconditional.  */
   4950  1.1  mrg   tree mask = vect_get_load_store_mask (stmt_info);
   4951  1.1  mrg 
   4952  1.1  mrg   /* Make sure that the target supports an appropriate internal
   4953  1.1  mrg      function for the gather/scatter operation.  */
   4954  1.1  mrg   gather_scatter_info gs_info;
   4955  1.1  mrg   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
   4956  1.1  mrg       || gs_info.ifn == IFN_LAST)
   4957  1.1  mrg     return NULL;
   4958  1.1  mrg 
   4959  1.1  mrg   /* Convert the mask to the right form.  */
   4960  1.1  mrg   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
   4961  1.1  mrg 						 gs_info.element_type);
   4962  1.1  mrg   if (mask)
   4963  1.1  mrg     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
   4964  1.1  mrg 					  loop_vinfo);
   4965  1.1  mrg   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
   4966  1.1  mrg 	   || gs_info.ifn == IFN_MASK_GATHER_LOAD)
   4967  1.1  mrg     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
   4968  1.1  mrg 
   4969  1.1  mrg   /* Get the invariant base and non-invariant offset, converting the
   4970  1.1  mrg      latter to the same width as the vector elements.  */
   4971  1.1  mrg   tree base = gs_info.base;
   4972  1.1  mrg   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
   4973  1.1  mrg   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
   4974  1.1  mrg 						gs_info.offset, stmt_info);
   4975  1.1  mrg 
   4976  1.1  mrg   /* Build the new pattern statement.  */
   4977  1.1  mrg   tree scale = size_int (gs_info.scale);
   4978  1.1  mrg   gcall *pattern_stmt;
   4979  1.1  mrg   if (DR_IS_READ (dr))
   4980  1.1  mrg     {
   4981  1.1  mrg       tree zero = build_zero_cst (gs_info.element_type);
   4982  1.1  mrg       if (mask != NULL)
   4983  1.1  mrg 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
   4984  1.1  mrg 						   offset, scale, zero, mask);
   4985  1.1  mrg       else
   4986  1.1  mrg 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
   4987  1.1  mrg 						   offset, scale, zero);
   4988  1.1  mrg       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
   4989  1.1  mrg       gimple_call_set_lhs (pattern_stmt, load_lhs);
   4990  1.1  mrg     }
   4991  1.1  mrg   else
   4992  1.1  mrg     {
   4993  1.1  mrg       tree rhs = vect_get_store_rhs (stmt_info);
   4994  1.1  mrg       if (mask != NULL)
   4995  1.1  mrg 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
   4996  1.1  mrg 						   base, offset, scale, rhs,
   4997  1.1  mrg 						   mask);
   4998  1.1  mrg       else
   4999  1.1  mrg 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
   5000  1.1  mrg 						   base, offset, scale, rhs);
   5001  1.1  mrg     }
   5002  1.1  mrg   gimple_call_set_nothrow (pattern_stmt, true);
   5003  1.1  mrg 
   5004  1.1  mrg   /* Copy across relevant vectorization info and associate DR with the
   5005  1.1  mrg      new pattern statement instead of the original statement.  */
   5006  1.1  mrg   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
   5007  1.1  mrg   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
   5008  1.1  mrg 
   5009  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5010  1.1  mrg   *type_out = vectype;
   5011  1.1  mrg   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
   5012  1.1  mrg 
   5013  1.1  mrg   return pattern_stmt;
   5014  1.1  mrg }
   5015  1.1  mrg 
   5016  1.1  mrg /* Return true if TYPE is a non-boolean integer type.  These are the types
   5017  1.1  mrg    that we want to consider for narrowing.  */
   5018  1.1  mrg 
   5019  1.1  mrg static bool
   5020  1.1  mrg vect_narrowable_type_p (tree type)
   5021  1.1  mrg {
   5022  1.1  mrg   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
   5023  1.1  mrg }
   5024  1.1  mrg 
   5025  1.1  mrg /* Return true if the operation given by CODE can be truncated to N bits
   5026  1.1  mrg    when only N bits of the output are needed.  This is only true if bit N+1
   5027  1.1  mrg    of the inputs has no effect on the low N bits of the result.  */
   5028  1.1  mrg 
   5029  1.1  mrg static bool
   5030  1.1  mrg vect_truncatable_operation_p (tree_code code)
   5031  1.1  mrg {
   5032  1.1  mrg   switch (code)
   5033  1.1  mrg     {
   5034  1.1  mrg     case NEGATE_EXPR:
   5035  1.1  mrg     case PLUS_EXPR:
   5036  1.1  mrg     case MINUS_EXPR:
   5037  1.1  mrg     case MULT_EXPR:
   5038  1.1  mrg     case BIT_NOT_EXPR:
   5039  1.1  mrg     case BIT_AND_EXPR:
   5040  1.1  mrg     case BIT_IOR_EXPR:
   5041  1.1  mrg     case BIT_XOR_EXPR:
   5042  1.1  mrg     case COND_EXPR:
   5043  1.1  mrg       return true;
   5044  1.1  mrg 
   5045  1.1  mrg     default:
   5046  1.1  mrg       return false;
   5047  1.1  mrg     }
   5048  1.1  mrg }
   5049  1.1  mrg 
   5050  1.1  mrg /* Record that STMT_INFO could be changed from operating on TYPE to
   5051  1.1  mrg    operating on a type with the precision and sign given by PRECISION
   5052  1.1  mrg    and SIGN respectively.  PRECISION is an arbitrary bit precision;
   5053  1.1  mrg    it might not be a whole number of bytes.  */
   5054  1.1  mrg 
   5055  1.1  mrg static void
   5056  1.1  mrg vect_set_operation_type (stmt_vec_info stmt_info, tree type,
   5057  1.1  mrg 			 unsigned int precision, signop sign)
   5058  1.1  mrg {
   5059  1.1  mrg   /* Round the precision up to a whole number of bytes.  */
   5060  1.1  mrg   precision = vect_element_precision (precision);
   5061  1.1  mrg   if (precision < TYPE_PRECISION (type)
   5062  1.1  mrg       && (!stmt_info->operation_precision
   5063  1.1  mrg 	  || stmt_info->operation_precision > precision))
   5064  1.1  mrg     {
   5065  1.1  mrg       stmt_info->operation_precision = precision;
   5066  1.1  mrg       stmt_info->operation_sign = sign;
   5067  1.1  mrg     }
   5068  1.1  mrg }
   5069  1.1  mrg 
   5070  1.1  mrg /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
   5071  1.1  mrg    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
   5072  1.1  mrg    is an arbitrary bit precision; it might not be a whole number of bytes.  */
   5073  1.1  mrg 
   5074  1.1  mrg static void
   5075  1.1  mrg vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
   5076  1.1  mrg 			      unsigned int min_input_precision)
   5077  1.1  mrg {
   5078  1.1  mrg   /* This operation in isolation only requires the inputs to have
   5079  1.1  mrg      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
   5080  1.1  mrg      that MIN_INPUT_PRECISION is a natural precision for the chain
   5081  1.1  mrg      as a whole.  E.g. consider something like:
   5082  1.1  mrg 
   5083  1.1  mrg 	 unsigned short *x, *y;
   5084  1.1  mrg 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
   5085  1.1  mrg 
   5086  1.1  mrg      The right shift can be done on unsigned chars, and only requires the
   5087  1.1  mrg      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
   5088  1.1  mrg      approach would mean turning a natural chain of single-vector unsigned
   5089  1.1  mrg      short operations into one that truncates "*x" and then extends
   5090  1.1  mrg      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
   5091  1.1  mrg      operation and one vector for each unsigned char operation.
   5092  1.1  mrg      This would be a significant pessimization.
   5093  1.1  mrg 
   5094  1.1  mrg      Instead only propagate the maximum of this precision and the precision
   5095  1.1  mrg      required by the users of the result.  This means that we don't pessimize
   5096  1.1  mrg      the case above but continue to optimize things like:
   5097  1.1  mrg 
   5098  1.1  mrg 	 unsigned char *y;
   5099  1.1  mrg 	 unsigned short *x;
   5100  1.1  mrg 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
   5101  1.1  mrg 
   5102  1.1  mrg      Here we would truncate two vectors of *x to a single vector of
   5103  1.1  mrg      unsigned chars and use single-vector unsigned char operations for
   5104  1.1  mrg      everything else, rather than doing two unsigned short copies of
   5105  1.1  mrg      "(*x & 0xf0) >> 4" and then truncating the result.  */
   5106  1.1  mrg   min_input_precision = MAX (min_input_precision,
   5107  1.1  mrg 			     stmt_info->min_output_precision);
   5108  1.1  mrg 
   5109  1.1  mrg   if (min_input_precision < TYPE_PRECISION (type)
   5110  1.1  mrg       && (!stmt_info->min_input_precision
   5111  1.1  mrg 	  || stmt_info->min_input_precision > min_input_precision))
   5112  1.1  mrg     stmt_info->min_input_precision = min_input_precision;
   5113  1.1  mrg }
   5114  1.1  mrg 
   5115  1.1  mrg /* Subroutine of vect_determine_min_output_precision.  Return true if
   5116  1.1  mrg    we can calculate a reduced number of output bits for STMT_INFO,
   5117  1.1  mrg    whose result is LHS.  */
   5118  1.1  mrg 
   5119  1.1  mrg static bool
   5120  1.1  mrg vect_determine_min_output_precision_1 (vec_info *vinfo,
   5121  1.1  mrg 				       stmt_vec_info stmt_info, tree lhs)
   5122  1.1  mrg {
   5123  1.1  mrg   /* Take the maximum precision required by users of the result.  */
   5124  1.1  mrg   unsigned int precision = 0;
   5125  1.1  mrg   imm_use_iterator iter;
   5126  1.1  mrg   use_operand_p use;
   5127  1.1  mrg   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
   5128  1.1  mrg     {
   5129  1.1  mrg       gimple *use_stmt = USE_STMT (use);
   5130  1.1  mrg       if (is_gimple_debug (use_stmt))
   5131  1.1  mrg 	continue;
   5132  1.1  mrg       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
   5133  1.1  mrg       if (!use_stmt_info || !use_stmt_info->min_input_precision)
   5134  1.1  mrg 	return false;
   5135  1.1  mrg       /* The input precision recorded for COND_EXPRs applies only to the
   5136  1.1  mrg 	 "then" and "else" values.  */
   5137  1.1  mrg       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
   5138  1.1  mrg       if (assign
   5139  1.1  mrg 	  && gimple_assign_rhs_code (assign) == COND_EXPR
   5140  1.1  mrg 	  && use->use != gimple_assign_rhs2_ptr (assign)
   5141  1.1  mrg 	  && use->use != gimple_assign_rhs3_ptr (assign))
   5142  1.1  mrg 	return false;
   5143  1.1  mrg       precision = MAX (precision, use_stmt_info->min_input_precision);
   5144  1.1  mrg     }
   5145  1.1  mrg 
   5146  1.1  mrg   if (dump_enabled_p ())
   5147  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   5148  1.1  mrg 		     "only the low %d bits of %T are significant\n",
   5149  1.1  mrg 		     precision, lhs);
   5150  1.1  mrg   stmt_info->min_output_precision = precision;
   5151  1.1  mrg   return true;
   5152  1.1  mrg }
   5153  1.1  mrg 
   5154  1.1  mrg /* Calculate min_output_precision for STMT_INFO.  */
   5155  1.1  mrg 
   5156  1.1  mrg static void
   5157  1.1  mrg vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
   5158  1.1  mrg {
   5159  1.1  mrg   /* We're only interested in statements with a narrowable result.  */
   5160  1.1  mrg   tree lhs = gimple_get_lhs (stmt_info->stmt);
   5161  1.1  mrg   if (!lhs
   5162  1.1  mrg       || TREE_CODE (lhs) != SSA_NAME
   5163  1.1  mrg       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
   5164  1.1  mrg     return;
   5165  1.1  mrg 
   5166  1.1  mrg   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
   5167  1.1  mrg     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
   5168  1.1  mrg }
   5169  1.1  mrg 
   5170  1.1  mrg /* Use range information to decide whether STMT (described by STMT_INFO)
   5171  1.1  mrg    could be done in a narrower type.  This is effectively a forward
   5172  1.1  mrg    propagation, since it uses context-independent information that applies
   5173  1.1  mrg    to all users of an SSA name.  */
   5174  1.1  mrg 
   5175  1.1  mrg static void
   5176  1.1  mrg vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
   5177  1.1  mrg {
   5178  1.1  mrg   tree lhs = gimple_assign_lhs (stmt);
   5179  1.1  mrg   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
   5180  1.1  mrg     return;
   5181  1.1  mrg 
   5182  1.1  mrg   tree type = TREE_TYPE (lhs);
   5183  1.1  mrg   if (!vect_narrowable_type_p (type))
   5184  1.1  mrg     return;
   5185  1.1  mrg 
   5186  1.1  mrg   /* First see whether we have any useful range information for the result.  */
   5187  1.1  mrg   unsigned int precision = TYPE_PRECISION (type);
   5188  1.1  mrg   signop sign = TYPE_SIGN (type);
   5189  1.1  mrg   wide_int min_value, max_value;
   5190  1.1  mrg   if (!vect_get_range_info (lhs, &min_value, &max_value))
   5191  1.1  mrg     return;
   5192  1.1  mrg 
   5193  1.1  mrg   tree_code code = gimple_assign_rhs_code (stmt);
   5194  1.1  mrg   unsigned int nops = gimple_num_ops (stmt);
   5195  1.1  mrg 
   5196  1.1  mrg   if (!vect_truncatable_operation_p (code))
   5197  1.1  mrg     {
   5198  1.1  mrg       /* Handle operations that can be computed in type T if all inputs
   5199  1.1  mrg 	 and outputs can be represented in type T.  Also handle left and
   5200  1.1  mrg 	 right shifts, where (in addition) the maximum shift amount must
   5201  1.1  mrg 	 be less than the number of bits in T.  */
   5202  1.1  mrg       bool is_shift;
   5203  1.1  mrg       switch (code)
   5204  1.1  mrg 	{
   5205  1.1  mrg 	case LSHIFT_EXPR:
   5206  1.1  mrg 	case RSHIFT_EXPR:
   5207  1.1  mrg 	  is_shift = true;
   5208  1.1  mrg 	  break;
   5209  1.1  mrg 
   5210  1.1  mrg 	case ABS_EXPR:
   5211  1.1  mrg 	case MIN_EXPR:
   5212  1.1  mrg 	case MAX_EXPR:
   5213  1.1  mrg 	case TRUNC_DIV_EXPR:
   5214  1.1  mrg 	case CEIL_DIV_EXPR:
   5215  1.1  mrg 	case FLOOR_DIV_EXPR:
   5216  1.1  mrg 	case ROUND_DIV_EXPR:
   5217  1.1  mrg 	case EXACT_DIV_EXPR:
   5218  1.1  mrg 	  /* Modulus is excluded because it is typically calculated by doing
   5219  1.1  mrg 	     a division, for which minimum signed / -1 isn't representable in
   5220  1.1  mrg 	     the original signed type.  We could take the division range into
   5221  1.1  mrg 	     account instead, if handling modulus ever becomes important.  */
   5222  1.1  mrg 	  is_shift = false;
   5223  1.1  mrg 	  break;
   5224  1.1  mrg 
   5225  1.1  mrg 	default:
   5226  1.1  mrg 	  return;
   5227  1.1  mrg 	}
   5228  1.1  mrg       for (unsigned int i = 1; i < nops; ++i)
   5229  1.1  mrg 	{
   5230  1.1  mrg 	  tree op = gimple_op (stmt, i);
   5231  1.1  mrg 	  wide_int op_min_value, op_max_value;
   5232  1.1  mrg 	  if (TREE_CODE (op) == INTEGER_CST)
   5233  1.1  mrg 	    {
   5234  1.1  mrg 	      unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
   5235  1.1  mrg 	      op_min_value = op_max_value = wi::to_wide (op, op_precision);
   5236  1.1  mrg 	    }
   5237  1.1  mrg 	  else if (TREE_CODE (op) == SSA_NAME)
   5238  1.1  mrg 	    {
   5239  1.1  mrg 	      if (!vect_get_range_info (op, &op_min_value, &op_max_value))
   5240  1.1  mrg 		return;
   5241  1.1  mrg 	    }
   5242  1.1  mrg 	  else
   5243  1.1  mrg 	    return;
   5244  1.1  mrg 
   5245  1.1  mrg 	  if (is_shift && i == 2)
   5246  1.1  mrg 	    {
   5247  1.1  mrg 	      /* There needs to be one more bit than the maximum shift amount.
   5248  1.1  mrg 
   5249  1.1  mrg 		 If the maximum shift amount is already 1 less than PRECISION
   5250  1.1  mrg 		 then we can't narrow the shift further.  Dealing with that
   5251  1.1  mrg 		 case first ensures that we can safely use an unsigned range
   5252  1.1  mrg 		 below.
   5253  1.1  mrg 
   5254  1.1  mrg 		 op_min_value isn't relevant, since shifts by negative amounts
   5255  1.1  mrg 		 are UB.  */
   5256  1.1  mrg 	      if (wi::geu_p (op_max_value, precision - 1))
   5257  1.1  mrg 		return;
   5258  1.1  mrg 	      unsigned int min_bits = op_max_value.to_uhwi () + 1;
   5259  1.1  mrg 
   5260  1.1  mrg 	      /* As explained below, we can convert a signed shift into an
   5261  1.1  mrg 		 unsigned shift if the sign bit is always clear.  At this
   5262  1.1  mrg 		 point we've already processed the ranges of the output and
   5263  1.1  mrg 		 the first input.  */
   5264  1.1  mrg 	      auto op_sign = sign;
   5265  1.1  mrg 	      if (sign == SIGNED && !wi::neg_p (min_value))
   5266  1.1  mrg 		op_sign = UNSIGNED;
   5267  1.1  mrg 	      op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
   5268  1.1  mrg 					     precision, op_sign);
   5269  1.1  mrg 	      op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
   5270  1.1  mrg 					     precision, op_sign);
   5271  1.1  mrg 	    }
   5272  1.1  mrg 	  min_value = wi::min (min_value, op_min_value, sign);
   5273  1.1  mrg 	  max_value = wi::max (max_value, op_max_value, sign);
   5274  1.1  mrg 	}
   5275  1.1  mrg     }
   5276  1.1  mrg 
   5277  1.1  mrg   /* Try to switch signed types for unsigned types if we can.
   5278  1.1  mrg      This is better for two reasons.  First, unsigned ops tend
   5279  1.1  mrg      to be cheaper than signed ops.  Second, it means that we can
   5280  1.1  mrg      handle things like:
   5281  1.1  mrg 
   5282  1.1  mrg 	signed char c;
   5283  1.1  mrg 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
   5284  1.1  mrg 
   5285  1.1  mrg      as:
   5286  1.1  mrg 
   5287  1.1  mrg 	signed char c;
   5288  1.1  mrg 	unsigned short res_1 = (unsigned short) c & 0xff00;
   5289  1.1  mrg 	int res = (int) res_1;
   5290  1.1  mrg 
   5291  1.1  mrg      where the intermediate result res_1 has unsigned rather than
   5292  1.1  mrg      signed type.  */
   5293  1.1  mrg   if (sign == SIGNED && !wi::neg_p (min_value))
   5294  1.1  mrg     sign = UNSIGNED;
   5295  1.1  mrg 
   5296  1.1  mrg   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
   5297  1.1  mrg   unsigned int precision1 = wi::min_precision (min_value, sign);
   5298  1.1  mrg   unsigned int precision2 = wi::min_precision (max_value, sign);
   5299  1.1  mrg   unsigned int value_precision = MAX (precision1, precision2);
   5300  1.1  mrg   if (value_precision >= precision)
   5301  1.1  mrg     return;
   5302  1.1  mrg 
   5303  1.1  mrg   if (dump_enabled_p ())
   5304  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
   5305  1.1  mrg 		     " without loss of precision: %G",
   5306  1.1  mrg 		     sign == SIGNED ? "signed" : "unsigned",
   5307  1.1  mrg 		     value_precision, stmt);
   5308  1.1  mrg 
   5309  1.1  mrg   vect_set_operation_type (stmt_info, type, value_precision, sign);
   5310  1.1  mrg   vect_set_min_input_precision (stmt_info, type, value_precision);
   5311  1.1  mrg }
   5312  1.1  mrg 
   5313  1.1  mrg /* Use information about the users of STMT's result to decide whether
   5314  1.1  mrg    STMT (described by STMT_INFO) could be done in a narrower type.
   5315  1.1  mrg    This is effectively a backward propagation.  */
   5316  1.1  mrg 
   5317  1.1  mrg static void
   5318  1.1  mrg vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
   5319  1.1  mrg {
   5320  1.1  mrg   tree_code code = gimple_assign_rhs_code (stmt);
   5321  1.1  mrg   unsigned int opno = (code == COND_EXPR ? 2 : 1);
   5322  1.1  mrg   tree type = TREE_TYPE (gimple_op (stmt, opno));
   5323  1.1  mrg   if (!vect_narrowable_type_p (type))
   5324  1.1  mrg     return;
   5325  1.1  mrg 
   5326  1.1  mrg   unsigned int precision = TYPE_PRECISION (type);
   5327  1.1  mrg   unsigned int operation_precision, min_input_precision;
   5328  1.1  mrg   switch (code)
   5329  1.1  mrg     {
   5330  1.1  mrg     CASE_CONVERT:
   5331  1.1  mrg       /* Only the bits that contribute to the output matter.  Don't change
   5332  1.1  mrg 	 the precision of the operation itself.  */
   5333  1.1  mrg       operation_precision = precision;
   5334  1.1  mrg       min_input_precision = stmt_info->min_output_precision;
   5335  1.1  mrg       break;
   5336  1.1  mrg 
   5337  1.1  mrg     case LSHIFT_EXPR:
   5338  1.1  mrg     case RSHIFT_EXPR:
   5339  1.1  mrg       {
   5340  1.1  mrg 	tree shift = gimple_assign_rhs2 (stmt);
   5341  1.1  mrg 	if (TREE_CODE (shift) != INTEGER_CST
   5342  1.1  mrg 	    || !wi::ltu_p (wi::to_widest (shift), precision))
   5343  1.1  mrg 	  return;
   5344  1.1  mrg 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
   5345  1.1  mrg 	if (code == LSHIFT_EXPR)
   5346  1.1  mrg 	  {
   5347  1.1  mrg 	    /* Avoid creating an undefined shift.
   5348  1.1  mrg 
   5349  1.1  mrg 	       ??? We could instead use min_output_precision as-is and
   5350  1.1  mrg 	       optimize out-of-range shifts to zero.  However, only
   5351  1.1  mrg 	       degenerate testcases shift away all their useful input data,
   5352  1.1  mrg 	       and it isn't natural to drop input operations in the middle
   5353  1.1  mrg 	       of vectorization.  This sort of thing should really be
   5354  1.1  mrg 	       handled before vectorization.  */
   5355  1.1  mrg 	    operation_precision = MAX (stmt_info->min_output_precision,
   5356  1.1  mrg 				       const_shift + 1);
   5357  1.1  mrg 	    /* We need CONST_SHIFT fewer bits of the input.  */
   5358  1.1  mrg 	    min_input_precision = (MAX (operation_precision, const_shift)
   5359  1.1  mrg 				   - const_shift);
   5360  1.1  mrg 	  }
   5361  1.1  mrg 	else
   5362  1.1  mrg 	  {
   5363  1.1  mrg 	    /* We need CONST_SHIFT extra bits to do the operation.  */
   5364  1.1  mrg 	    operation_precision = (stmt_info->min_output_precision
   5365  1.1  mrg 				   + const_shift);
   5366  1.1  mrg 	    min_input_precision = operation_precision;
   5367  1.1  mrg 	  }
   5368  1.1  mrg 	break;
   5369  1.1  mrg       }
   5370  1.1  mrg 
   5371  1.1  mrg     default:
   5372  1.1  mrg       if (vect_truncatable_operation_p (code))
   5373  1.1  mrg 	{
   5374  1.1  mrg 	  /* Input bit N has no effect on output bits N-1 and lower.  */
   5375  1.1  mrg 	  operation_precision = stmt_info->min_output_precision;
   5376  1.1  mrg 	  min_input_precision = operation_precision;
   5377  1.1  mrg 	  break;
   5378  1.1  mrg 	}
   5379  1.1  mrg       return;
   5380  1.1  mrg     }
   5381  1.1  mrg 
   5382  1.1  mrg   if (operation_precision < precision)
   5383  1.1  mrg     {
   5384  1.1  mrg       if (dump_enabled_p ())
   5385  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
   5386  1.1  mrg 			 " without affecting users: %G",
   5387  1.1  mrg 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
   5388  1.1  mrg 			 operation_precision, stmt);
   5389  1.1  mrg       vect_set_operation_type (stmt_info, type, operation_precision,
   5390  1.1  mrg 			       TYPE_SIGN (type));
   5391  1.1  mrg     }
   5392  1.1  mrg   vect_set_min_input_precision (stmt_info, type, min_input_precision);
   5393  1.1  mrg }
   5394  1.1  mrg 
   5395  1.1  mrg /* Return true if the statement described by STMT_INFO sets a boolean
   5396  1.1  mrg    SSA_NAME and if we know how to vectorize this kind of statement using
   5397  1.1  mrg    vector mask types.  */
   5398  1.1  mrg 
   5399  1.1  mrg static bool
   5400  1.1  mrg possible_vector_mask_operation_p (stmt_vec_info stmt_info)
   5401  1.1  mrg {
   5402  1.1  mrg   tree lhs = gimple_get_lhs (stmt_info->stmt);
   5403  1.1  mrg   if (!lhs
   5404  1.1  mrg       || TREE_CODE (lhs) != SSA_NAME
   5405  1.1  mrg       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
   5406  1.1  mrg     return false;
   5407  1.1  mrg 
   5408  1.1  mrg   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   5409  1.1  mrg     {
   5410  1.1  mrg       tree_code rhs_code = gimple_assign_rhs_code (assign);
   5411  1.1  mrg       switch (rhs_code)
   5412  1.1  mrg 	{
   5413  1.1  mrg 	CASE_CONVERT:
   5414  1.1  mrg 	case SSA_NAME:
   5415  1.1  mrg 	case BIT_NOT_EXPR:
   5416  1.1  mrg 	case BIT_IOR_EXPR:
   5417  1.1  mrg 	case BIT_XOR_EXPR:
   5418  1.1  mrg 	case BIT_AND_EXPR:
   5419  1.1  mrg 	  return true;
   5420  1.1  mrg 
   5421  1.1  mrg 	default:
   5422  1.1  mrg 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
   5423  1.1  mrg 	}
   5424  1.1  mrg     }
   5425  1.1  mrg   else if (is_a <gphi *> (stmt_info->stmt))
   5426  1.1  mrg     return true;
   5427  1.1  mrg   return false;
   5428  1.1  mrg }
   5429  1.1  mrg 
   5430  1.1  mrg /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
   5431  1.1  mrg    a vector mask type instead of a normal vector type.  Record the
   5432  1.1  mrg    result in STMT_INFO->mask_precision.  */
   5433  1.1  mrg 
   5434  1.1  mrg static void
   5435  1.1  mrg vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
   5436  1.1  mrg {
   5437  1.1  mrg   if (!possible_vector_mask_operation_p (stmt_info))
   5438  1.1  mrg     return;
   5439  1.1  mrg 
   5440  1.1  mrg   /* If at least one boolean input uses a vector mask type,
   5441  1.1  mrg      pick the mask type with the narrowest elements.
   5442  1.1  mrg 
   5443  1.1  mrg      ??? This is the traditional behavior.  It should always produce
   5444  1.1  mrg      the smallest number of operations, but isn't necessarily the
   5445  1.1  mrg      optimal choice.  For example, if we have:
   5446  1.1  mrg 
   5447  1.1  mrg        a = b & c
   5448  1.1  mrg 
   5449  1.1  mrg      where:
   5450  1.1  mrg 
   5451  1.1  mrg        - the user of a wants it to have a mask type for 16-bit elements (M16)
   5452  1.1  mrg        - b also uses M16
   5453  1.1  mrg        - c uses a mask type for 8-bit elements (M8)
   5454  1.1  mrg 
   5455  1.1  mrg      then picking M8 gives:
   5456  1.1  mrg 
   5457  1.1  mrg        - 1 M16->M8 pack for b
   5458  1.1  mrg        - 1 M8 AND for a
   5459  1.1  mrg        - 2 M8->M16 unpacks for the user of a
   5460  1.1  mrg 
   5461  1.1  mrg      whereas picking M16 would have given:
   5462  1.1  mrg 
   5463  1.1  mrg        - 2 M8->M16 unpacks for c
   5464  1.1  mrg        - 2 M16 ANDs for a
   5465  1.1  mrg 
   5466  1.1  mrg      The number of operations are equal, but M16 would have given
   5467  1.1  mrg      a shorter dependency chain and allowed more ILP.  */
   5468  1.1  mrg   unsigned int precision = ~0U;
   5469  1.1  mrg   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   5470  1.1  mrg     {
   5471  1.1  mrg       unsigned int nops = gimple_num_ops (assign);
   5472  1.1  mrg       for (unsigned int i = 1; i < nops; ++i)
   5473  1.1  mrg 	{
   5474  1.1  mrg 	  tree rhs = gimple_op (assign, i);
   5475  1.1  mrg 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
   5476  1.1  mrg 	    continue;
   5477  1.1  mrg 
   5478  1.1  mrg 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
   5479  1.1  mrg 	  if (!def_stmt_info)
   5480  1.1  mrg 	    /* Don't let external or constant operands influence the choice.
   5481  1.1  mrg 	       We can convert them to whichever vector type we pick.  */
   5482  1.1  mrg 	    continue;
   5483  1.1  mrg 
   5484  1.1  mrg 	  if (def_stmt_info->mask_precision)
   5485  1.1  mrg 	    {
   5486  1.1  mrg 	      if (precision > def_stmt_info->mask_precision)
   5487  1.1  mrg 		precision = def_stmt_info->mask_precision;
   5488  1.1  mrg 	    }
   5489  1.1  mrg 	}
   5490  1.1  mrg 
   5491  1.1  mrg       /* If the statement compares two values that shouldn't use vector masks,
   5492  1.1  mrg 	 try comparing the values as normal scalars instead.  */
   5493  1.1  mrg       tree_code rhs_code = gimple_assign_rhs_code (assign);
   5494  1.1  mrg       if (precision == ~0U
   5495  1.1  mrg 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
   5496  1.1  mrg 	{
   5497  1.1  mrg 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
   5498  1.1  mrg 	  scalar_mode mode;
   5499  1.1  mrg 	  tree vectype, mask_type;
   5500  1.1  mrg 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
   5501  1.1  mrg 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
   5502  1.1  mrg 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
   5503  1.1  mrg 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
   5504  1.1  mrg 	    precision = GET_MODE_BITSIZE (mode);
   5505  1.1  mrg 	}
   5506  1.1  mrg     }
   5507  1.1  mrg   else
   5508  1.1  mrg     {
   5509  1.1  mrg       gphi *phi = as_a <gphi *> (stmt_info->stmt);
   5510  1.1  mrg       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
   5511  1.1  mrg 	{
   5512  1.1  mrg 	  tree rhs = gimple_phi_arg_def (phi, i);
   5513  1.1  mrg 
   5514  1.1  mrg 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
   5515  1.1  mrg 	  if (!def_stmt_info)
   5516  1.1  mrg 	    /* Don't let external or constant operands influence the choice.
   5517  1.1  mrg 	       We can convert them to whichever vector type we pick.  */
   5518  1.1  mrg 	    continue;
   5519  1.1  mrg 
   5520  1.1  mrg 	  if (def_stmt_info->mask_precision)
   5521  1.1  mrg 	    {
   5522  1.1  mrg 	      if (precision > def_stmt_info->mask_precision)
   5523  1.1  mrg 		precision = def_stmt_info->mask_precision;
   5524  1.1  mrg 	    }
   5525  1.1  mrg 	}
   5526  1.1  mrg     }
   5527  1.1  mrg 
   5528  1.1  mrg   if (dump_enabled_p ())
   5529  1.1  mrg     {
   5530  1.1  mrg       if (precision == ~0U)
   5531  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   5532  1.1  mrg 			 "using normal nonmask vectors for %G",
   5533  1.1  mrg 			 stmt_info->stmt);
   5534  1.1  mrg       else
   5535  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   5536  1.1  mrg 			 "using boolean precision %d for %G",
   5537  1.1  mrg 			 precision, stmt_info->stmt);
   5538  1.1  mrg     }
   5539  1.1  mrg 
   5540  1.1  mrg   stmt_info->mask_precision = precision;
   5541  1.1  mrg }
   5542  1.1  mrg 
   5543  1.1  mrg /* Handle vect_determine_precisions for STMT_INFO, given that we
   5544  1.1  mrg    have already done so for the users of its result.  */
   5545  1.1  mrg 
   5546  1.1  mrg void
   5547  1.1  mrg vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
   5548  1.1  mrg {
   5549  1.1  mrg   vect_determine_min_output_precision (vinfo, stmt_info);
   5550  1.1  mrg   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
   5551  1.1  mrg     {
   5552  1.1  mrg       vect_determine_precisions_from_range (stmt_info, stmt);
   5553  1.1  mrg       vect_determine_precisions_from_users (stmt_info, stmt);
   5554  1.1  mrg     }
   5555  1.1  mrg }
   5556  1.1  mrg 
   5557  1.1  mrg /* Walk backwards through the vectorizable region to determine the
   5558  1.1  mrg    values of these fields:
   5559  1.1  mrg 
   5560  1.1  mrg    - min_output_precision
   5561  1.1  mrg    - min_input_precision
   5562  1.1  mrg    - operation_precision
   5563  1.1  mrg    - operation_sign.  */
   5564  1.1  mrg 
   5565  1.1  mrg void
   5566  1.1  mrg vect_determine_precisions (vec_info *vinfo)
   5567  1.1  mrg {
   5568  1.1  mrg   DUMP_VECT_SCOPE ("vect_determine_precisions");
   5569  1.1  mrg 
   5570  1.1  mrg   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   5571  1.1  mrg     {
   5572  1.1  mrg       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   5573  1.1  mrg       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   5574  1.1  mrg       unsigned int nbbs = loop->num_nodes;
   5575  1.1  mrg 
   5576  1.1  mrg       for (unsigned int i = 0; i < nbbs; i++)
   5577  1.1  mrg 	{
   5578  1.1  mrg 	  basic_block bb = bbs[i];
   5579  1.1  mrg 	  for (auto gsi = gsi_start_phis (bb);
   5580  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5581  1.1  mrg 	    {
   5582  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5583  1.1  mrg 	      if (stmt_info)
   5584  1.1  mrg 		vect_determine_mask_precision (vinfo, stmt_info);
   5585  1.1  mrg 	    }
   5586  1.1  mrg 	  for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
   5587  1.1  mrg 	    if (!is_gimple_debug (gsi_stmt (si)))
   5588  1.1  mrg 	      vect_determine_mask_precision
   5589  1.1  mrg 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
   5590  1.1  mrg 	}
   5591  1.1  mrg       for (unsigned int i = 0; i < nbbs; i++)
   5592  1.1  mrg 	{
   5593  1.1  mrg 	  basic_block bb = bbs[nbbs - i - 1];
   5594  1.1  mrg 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
   5595  1.1  mrg 	       !gsi_end_p (si); gsi_prev (&si))
   5596  1.1  mrg 	    if (!is_gimple_debug (gsi_stmt (si)))
   5597  1.1  mrg 	      vect_determine_stmt_precisions
   5598  1.1  mrg 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
   5599  1.1  mrg 	  for (auto gsi = gsi_start_phis (bb);
   5600  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5601  1.1  mrg 	    {
   5602  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5603  1.1  mrg 	      if (stmt_info)
   5604  1.1  mrg 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5605  1.1  mrg 	    }
   5606  1.1  mrg 	}
   5607  1.1  mrg     }
   5608  1.1  mrg   else
   5609  1.1  mrg     {
   5610  1.1  mrg       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
   5611  1.1  mrg       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
   5612  1.1  mrg 	{
   5613  1.1  mrg 	  basic_block bb = bb_vinfo->bbs[i];
   5614  1.1  mrg 	  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
   5615  1.1  mrg 	    {
   5616  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5617  1.1  mrg 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5618  1.1  mrg 		vect_determine_mask_precision (vinfo, stmt_info);
   5619  1.1  mrg 	    }
   5620  1.1  mrg 	  for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
   5621  1.1  mrg 	    {
   5622  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
   5623  1.1  mrg 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5624  1.1  mrg 		vect_determine_mask_precision (vinfo, stmt_info);
   5625  1.1  mrg 	    }
   5626  1.1  mrg 	}
   5627  1.1  mrg       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
   5628  1.1  mrg 	{
   5629  1.1  mrg 	  for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
   5630  1.1  mrg 	       !gsi_end_p (gsi); gsi_prev (&gsi))
   5631  1.1  mrg 	    {
   5632  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
   5633  1.1  mrg 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5634  1.1  mrg 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5635  1.1  mrg 	    }
   5636  1.1  mrg 	  for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
   5637  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
   5638  1.1  mrg 	    {
   5639  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
   5640  1.1  mrg 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
   5641  1.1  mrg 		vect_determine_stmt_precisions (vinfo, stmt_info);
   5642  1.1  mrg 	    }
   5643  1.1  mrg 	}
   5644  1.1  mrg     }
   5645  1.1  mrg }
   5646  1.1  mrg 
   5647  1.1  mrg typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
   5648  1.1  mrg 
   5649  1.1  mrg struct vect_recog_func
   5650  1.1  mrg {
   5651  1.1  mrg   vect_recog_func_ptr fn;
   5652  1.1  mrg   const char *name;
   5653  1.1  mrg };
   5654  1.1  mrg 
   5655  1.1  mrg /* Note that ordering matters - the first pattern matching on a stmt is
   5656  1.1  mrg    taken which means usually the more complex one needs to preceed the
   5657  1.1  mrg    less comples onex (widen_sum only after dot_prod or sad for example).  */
   5658  1.1  mrg static vect_recog_func vect_vect_recog_func_ptrs[] = {
   5659  1.1  mrg   { vect_recog_over_widening_pattern, "over_widening" },
   5660  1.1  mrg   /* Must come after over_widening, which narrows the shift as much as
   5661  1.1  mrg      possible beforehand.  */
   5662  1.1  mrg   { vect_recog_average_pattern, "average" },
   5663  1.1  mrg   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
   5664  1.1  mrg   { vect_recog_mulhs_pattern, "mult_high" },
   5665  1.1  mrg   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
   5666  1.1  mrg   { vect_recog_widen_mult_pattern, "widen_mult" },
   5667  1.1  mrg   { vect_recog_dot_prod_pattern, "dot_prod" },
   5668  1.1  mrg   { vect_recog_sad_pattern, "sad" },
   5669  1.1  mrg   { vect_recog_widen_sum_pattern, "widen_sum" },
   5670  1.1  mrg   { vect_recog_pow_pattern, "pow" },
   5671  1.1  mrg   { vect_recog_popcount_pattern, "popcount" },
   5672  1.1  mrg   { vect_recog_widen_shift_pattern, "widen_shift" },
   5673  1.1  mrg   { vect_recog_rotate_pattern, "rotate" },
   5674  1.1  mrg   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   5675  1.1  mrg   { vect_recog_divmod_pattern, "divmod" },
   5676  1.1  mrg   { vect_recog_mult_pattern, "mult" },
   5677  1.1  mrg   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   5678  1.1  mrg   { vect_recog_bool_pattern, "bool" },
   5679  1.1  mrg   /* This must come before mask conversion, and includes the parts
   5680  1.1  mrg      of mask conversion that are needed for gather and scatter
   5681  1.1  mrg      internal functions.  */
   5682  1.1  mrg   { vect_recog_gather_scatter_pattern, "gather_scatter" },
   5683  1.1  mrg   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   5684  1.1  mrg   { vect_recog_widen_plus_pattern, "widen_plus" },
   5685  1.1  mrg   { vect_recog_widen_minus_pattern, "widen_minus" },
   5686  1.1  mrg };
   5687  1.1  mrg 
   5688  1.1  mrg const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
   5689  1.1  mrg 
   5690  1.1  mrg /* Mark statements that are involved in a pattern.  */
   5691  1.1  mrg 
   5692  1.1  mrg void
   5693  1.1  mrg vect_mark_pattern_stmts (vec_info *vinfo,
   5694  1.1  mrg 			 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
   5695  1.1  mrg                          tree pattern_vectype)
   5696  1.1  mrg {
   5697  1.1  mrg   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
   5698  1.1  mrg   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
   5699  1.1  mrg 
   5700  1.1  mrg   gimple *orig_pattern_stmt = NULL;
   5701  1.1  mrg   if (is_pattern_stmt_p (orig_stmt_info))
   5702  1.1  mrg     {
   5703  1.1  mrg       /* We're replacing a statement in an existing pattern definition
   5704  1.1  mrg 	 sequence.  */
   5705  1.1  mrg       orig_pattern_stmt = orig_stmt_info->stmt;
   5706  1.1  mrg       if (dump_enabled_p ())
   5707  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   5708  1.1  mrg 			 "replacing earlier pattern %G", orig_pattern_stmt);
   5709  1.1  mrg 
   5710  1.1  mrg       /* To keep the book-keeping simple, just swap the lhs of the
   5711  1.1  mrg 	 old and new statements, so that the old one has a valid but
   5712  1.1  mrg 	 unused lhs.  */
   5713  1.1  mrg       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
   5714  1.1  mrg       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
   5715  1.1  mrg       gimple_set_lhs (pattern_stmt, old_lhs);
   5716  1.1  mrg 
   5717  1.1  mrg       if (dump_enabled_p ())
   5718  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
   5719  1.1  mrg 
   5720  1.1  mrg       /* Switch to the statement that ORIG replaces.  */
   5721  1.1  mrg       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
   5722  1.1  mrg 
   5723  1.1  mrg       /* We shouldn't be replacing the main pattern statement.  */
   5724  1.1  mrg       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
   5725  1.1  mrg 		  != orig_pattern_stmt);
   5726  1.1  mrg     }
   5727  1.1  mrg 
   5728  1.1  mrg   if (def_seq)
   5729  1.1  mrg     for (gimple_stmt_iterator si = gsi_start (def_seq);
   5730  1.1  mrg 	 !gsi_end_p (si); gsi_next (&si))
   5731  1.1  mrg       {
   5732  1.1  mrg 	if (dump_enabled_p ())
   5733  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location,
   5734  1.1  mrg 			   "extra pattern stmt: %G", gsi_stmt (si));
   5735  1.1  mrg 	stmt_vec_info pattern_stmt_info
   5736  1.1  mrg 	  = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
   5737  1.1  mrg 				    orig_stmt_info, pattern_vectype);
   5738  1.1  mrg 	/* Stmts in the def sequence are not vectorizable cycle or
   5739  1.1  mrg 	   induction defs, instead they should all be vect_internal_def
   5740  1.1  mrg 	   feeding the main pattern stmt which retains this def type.  */
   5741  1.1  mrg 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
   5742  1.1  mrg       }
   5743  1.1  mrg 
   5744  1.1  mrg   if (orig_pattern_stmt)
   5745  1.1  mrg     {
   5746  1.1  mrg       vect_init_pattern_stmt (vinfo, pattern_stmt,
   5747  1.1  mrg 			      orig_stmt_info, pattern_vectype);
   5748  1.1  mrg 
   5749  1.1  mrg       /* Insert all the new pattern statements before the original one.  */
   5750  1.1  mrg       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
   5751  1.1  mrg       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
   5752  1.1  mrg 					       orig_def_seq);
   5753  1.1  mrg       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
   5754  1.1  mrg       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
   5755  1.1  mrg 
   5756  1.1  mrg       /* Remove the pattern statement that this new pattern replaces.  */
   5757  1.1  mrg       gsi_remove (&gsi, false);
   5758  1.1  mrg     }
   5759  1.1  mrg   else
   5760  1.1  mrg     vect_set_pattern_stmt (vinfo,
   5761  1.1  mrg 			   pattern_stmt, orig_stmt_info, pattern_vectype);
   5762  1.1  mrg 
   5763  1.1  mrg   /* Transfer reduction path info to the pattern.  */
   5764  1.1  mrg   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
   5765  1.1  mrg     {
   5766  1.1  mrg       gimple_match_op op;
   5767  1.1  mrg       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
   5768  1.1  mrg 	gcc_unreachable ();
   5769  1.1  mrg       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
   5770  1.1  mrg       /* Search the pattern def sequence and the main pattern stmt.  Note
   5771  1.1  mrg          we may have inserted all into a containing pattern def sequence
   5772  1.1  mrg 	 so the following is a bit awkward.  */
   5773  1.1  mrg       gimple_stmt_iterator si;
   5774  1.1  mrg       gimple *s;
   5775  1.1  mrg       if (def_seq)
   5776  1.1  mrg 	{
   5777  1.1  mrg 	  si = gsi_start (def_seq);
   5778  1.1  mrg 	  s = gsi_stmt (si);
   5779  1.1  mrg 	  gsi_next (&si);
   5780  1.1  mrg 	}
   5781  1.1  mrg       else
   5782  1.1  mrg 	{
   5783  1.1  mrg 	  si = gsi_none ();
   5784  1.1  mrg 	  s = pattern_stmt;
   5785  1.1  mrg 	}
   5786  1.1  mrg       do
   5787  1.1  mrg 	{
   5788  1.1  mrg 	  bool found = false;
   5789  1.1  mrg 	  if (gimple_extract_op (s, &op))
   5790  1.1  mrg 	    for (unsigned i = 0; i < op.num_ops; ++i)
   5791  1.1  mrg 	      if (op.ops[i] == lookfor)
   5792  1.1  mrg 		{
   5793  1.1  mrg 		  STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
   5794  1.1  mrg 		  lookfor = gimple_get_lhs (s);
   5795  1.1  mrg 		  found = true;
   5796  1.1  mrg 		  break;
   5797  1.1  mrg 		}
   5798  1.1  mrg 	  if (s == pattern_stmt)
   5799  1.1  mrg 	    {
   5800  1.1  mrg 	      if (!found && dump_enabled_p ())
   5801  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   5802  1.1  mrg 				 "failed to update reduction index.\n");
   5803  1.1  mrg 	      break;
   5804  1.1  mrg 	    }
   5805  1.1  mrg 	  if (gsi_end_p (si))
   5806  1.1  mrg 	    s = pattern_stmt;
   5807  1.1  mrg 	  else
   5808  1.1  mrg 	    {
   5809  1.1  mrg 	      s = gsi_stmt (si);
   5810  1.1  mrg 	      if (s == pattern_stmt)
   5811  1.1  mrg 		/* Found the end inside a bigger pattern def seq.  */
   5812  1.1  mrg 		si = gsi_none ();
   5813  1.1  mrg 	      else
   5814  1.1  mrg 		gsi_next (&si);
   5815  1.1  mrg 	    }
   5816  1.1  mrg 	} while (1);
   5817  1.1  mrg     }
   5818  1.1  mrg }
   5819  1.1  mrg 
   5820  1.1  mrg /* Function vect_pattern_recog_1
   5821  1.1  mrg 
   5822  1.1  mrg    Input:
   5823  1.1  mrg    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
   5824  1.1  mrg         computation pattern.
   5825  1.1  mrg    STMT_INFO: A stmt from which the pattern search should start.
   5826  1.1  mrg 
   5827  1.1  mrg    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
   5828  1.1  mrg    a sequence of statements that has the same functionality and can be
   5829  1.1  mrg    used to replace STMT_INFO.  It returns the last statement in the sequence
   5830  1.1  mrg    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
   5831  1.1  mrg    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
   5832  1.1  mrg    statement, having first checked that the target supports the new operation
   5833  1.1  mrg    in that type.
   5834  1.1  mrg 
   5835  1.1  mrg    This function also does some bookkeeping, as explained in the documentation
   5836  1.1  mrg    for vect_recog_pattern.  */
   5837  1.1  mrg 
   5838  1.1  mrg static void
   5839  1.1  mrg vect_pattern_recog_1 (vec_info *vinfo,
   5840  1.1  mrg 		      vect_recog_func *recog_func, stmt_vec_info stmt_info)
   5841  1.1  mrg {
   5842  1.1  mrg   gimple *pattern_stmt;
   5843  1.1  mrg   loop_vec_info loop_vinfo;
   5844  1.1  mrg   tree pattern_vectype;
   5845  1.1  mrg 
   5846  1.1  mrg   /* If this statement has already been replaced with pattern statements,
   5847  1.1  mrg      leave the original statement alone, since the first match wins.
   5848  1.1  mrg      Instead try to match against the definition statements that feed
   5849  1.1  mrg      the main pattern statement.  */
   5850  1.1  mrg   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
   5851  1.1  mrg     {
   5852  1.1  mrg       gimple_stmt_iterator gsi;
   5853  1.1  mrg       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   5854  1.1  mrg 	   !gsi_end_p (gsi); gsi_next (&gsi))
   5855  1.1  mrg 	vect_pattern_recog_1 (vinfo, recog_func,
   5856  1.1  mrg 			      vinfo->lookup_stmt (gsi_stmt (gsi)));
   5857  1.1  mrg       return;
   5858  1.1  mrg     }
   5859  1.1  mrg 
   5860  1.1  mrg   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
   5861  1.1  mrg   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
   5862  1.1  mrg   if (!pattern_stmt)
   5863  1.1  mrg     {
   5864  1.1  mrg       /* Clear any half-formed pattern definition sequence.  */
   5865  1.1  mrg       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
   5866  1.1  mrg       return;
   5867  1.1  mrg     }
   5868  1.1  mrg 
   5869  1.1  mrg   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5870  1.1  mrg 
   5871  1.1  mrg   /* Found a vectorizable pattern.  */
   5872  1.1  mrg   if (dump_enabled_p ())
   5873  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   5874  1.1  mrg 		     "%s pattern recognized: %G",
   5875  1.1  mrg 		     recog_func->name, pattern_stmt);
   5876  1.1  mrg 
   5877  1.1  mrg   /* Mark the stmts that are involved in the pattern. */
   5878  1.1  mrg   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
   5879  1.1  mrg 
   5880  1.1  mrg   /* Patterns cannot be vectorized using SLP, because they change the order of
   5881  1.1  mrg      computation.  */
   5882  1.1  mrg   if (loop_vinfo)
   5883  1.1  mrg     {
   5884  1.1  mrg       unsigned ix, ix2;
   5885  1.1  mrg       stmt_vec_info *elem_ptr;
   5886  1.1  mrg       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
   5887  1.1  mrg 			     elem_ptr, *elem_ptr == stmt_info);
   5888  1.1  mrg     }
   5889  1.1  mrg }
   5890  1.1  mrg 
   5891  1.1  mrg 
   5892  1.1  mrg /* Function vect_pattern_recog
   5893  1.1  mrg 
   5894  1.1  mrg    Input:
   5895  1.1  mrg    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
   5896  1.1  mrg         computation idioms.
   5897  1.1  mrg 
   5898  1.1  mrg    Output - for each computation idiom that is detected we create a new stmt
   5899  1.1  mrg         that provides the same functionality and that can be vectorized.  We
   5900  1.1  mrg         also record some information in the struct_stmt_info of the relevant
   5901  1.1  mrg         stmts, as explained below:
   5902  1.1  mrg 
   5903  1.1  mrg    At the entry to this function we have the following stmts, with the
   5904  1.1  mrg    following initial value in the STMT_VINFO fields:
   5905  1.1  mrg 
   5906  1.1  mrg          stmt                     in_pattern_p  related_stmt    vec_stmt
   5907  1.1  mrg          S1: a_i = ....                 -       -               -
   5908  1.1  mrg          S2: a_2 = ..use(a_i)..         -       -               -
   5909  1.1  mrg          S3: a_1 = ..use(a_2)..         -       -               -
   5910  1.1  mrg          S4: a_0 = ..use(a_1)..         -       -               -
   5911  1.1  mrg          S5: ... = ..use(a_0)..         -       -               -
   5912  1.1  mrg 
   5913  1.1  mrg    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
   5914  1.1  mrg    represented by a single stmt.  We then:
   5915  1.1  mrg    - create a new stmt S6 equivalent to the pattern (the stmt is not
   5916  1.1  mrg      inserted into the code)
   5917  1.1  mrg    - fill in the STMT_VINFO fields as follows:
   5918  1.1  mrg 
   5919  1.1  mrg                                   in_pattern_p  related_stmt    vec_stmt
   5920  1.1  mrg          S1: a_i = ....                 -       -               -
   5921  1.1  mrg          S2: a_2 = ..use(a_i)..         -       -               -
   5922  1.1  mrg          S3: a_1 = ..use(a_2)..         -       -               -
   5923  1.1  mrg          S4: a_0 = ..use(a_1)..         true    S6              -
   5924  1.1  mrg           '---> S6: a_new = ....        -       S4              -
   5925  1.1  mrg          S5: ... = ..use(a_0)..         -       -               -
   5926  1.1  mrg 
   5927  1.1  mrg    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
   5928  1.1  mrg    to each other through the RELATED_STMT field).
   5929  1.1  mrg 
   5930  1.1  mrg    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
   5931  1.1  mrg    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
   5932  1.1  mrg    remain irrelevant unless used by stmts other than S4.
   5933  1.1  mrg 
   5934  1.1  mrg    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
   5935  1.1  mrg    (because they are marked as irrelevant).  It will vectorize S6, and record
   5936  1.1  mrg    a pointer to the new vector stmt VS6 from S6 (as usual).
   5937  1.1  mrg    S4 will be skipped, and S5 will be vectorized as usual:
   5938  1.1  mrg 
   5939  1.1  mrg                                   in_pattern_p  related_stmt    vec_stmt
   5940  1.1  mrg          S1: a_i = ....                 -       -               -
   5941  1.1  mrg          S2: a_2 = ..use(a_i)..         -       -               -
   5942  1.1  mrg          S3: a_1 = ..use(a_2)..         -       -               -
   5943  1.1  mrg        > VS6: va_new = ....             -       -               -
   5944  1.1  mrg          S4: a_0 = ..use(a_1)..         true    S6              VS6
   5945  1.1  mrg           '---> S6: a_new = ....        -       S4              VS6
   5946  1.1  mrg        > VS5: ... = ..vuse(va_new)..    -       -               -
   5947  1.1  mrg          S5: ... = ..use(a_0)..         -       -               -
   5948  1.1  mrg 
   5949  1.1  mrg    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
   5950  1.1  mrg    elsewhere), and we'll end up with:
   5951  1.1  mrg 
   5952  1.1  mrg         VS6: va_new = ....
   5953  1.1  mrg         VS5: ... = ..vuse(va_new)..
   5954  1.1  mrg 
   5955  1.1  mrg    In case of more than one pattern statements, e.g., widen-mult with
   5956  1.1  mrg    intermediate type:
   5957  1.1  mrg 
   5958  1.1  mrg      S1  a_t = ;
   5959  1.1  mrg      S2  a_T = (TYPE) a_t;
   5960  1.1  mrg            '--> S3: a_it = (interm_type) a_t;
   5961  1.1  mrg      S4  prod_T = a_T * CONST;
   5962  1.1  mrg            '--> S5: prod_T' = a_it w* CONST;
   5963  1.1  mrg 
   5964  1.1  mrg    there may be other users of a_T outside the pattern.  In that case S2 will
   5965  1.1  mrg    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
   5966  1.1  mrg    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
   5967  1.1  mrg    be recorded in S3.  */
   5968  1.1  mrg 
   5969  1.1  mrg void
   5970  1.1  mrg vect_pattern_recog (vec_info *vinfo)
   5971  1.1  mrg {
   5972  1.1  mrg   class loop *loop;
   5973  1.1  mrg   basic_block *bbs;
   5974  1.1  mrg   unsigned int nbbs;
   5975  1.1  mrg   gimple_stmt_iterator si;
   5976  1.1  mrg   unsigned int i, j;
   5977  1.1  mrg 
   5978  1.1  mrg   vect_determine_precisions (vinfo);
   5979  1.1  mrg 
   5980  1.1  mrg   DUMP_VECT_SCOPE ("vect_pattern_recog");
   5981  1.1  mrg 
   5982  1.1  mrg   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   5983  1.1  mrg     {
   5984  1.1  mrg       loop = LOOP_VINFO_LOOP (loop_vinfo);
   5985  1.1  mrg       bbs = LOOP_VINFO_BBS (loop_vinfo);
   5986  1.1  mrg       nbbs = loop->num_nodes;
   5987  1.1  mrg 
   5988  1.1  mrg       /* Scan through the loop stmts, applying the pattern recognition
   5989  1.1  mrg 	 functions starting at each stmt visited:  */
   5990  1.1  mrg       for (i = 0; i < nbbs; i++)
   5991  1.1  mrg 	{
   5992  1.1  mrg 	  basic_block bb = bbs[i];
   5993  1.1  mrg 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
   5994  1.1  mrg 	    {
   5995  1.1  mrg 	      if (is_gimple_debug (gsi_stmt (si)))
   5996  1.1  mrg 		continue;
   5997  1.1  mrg 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
   5998  1.1  mrg 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
   5999  1.1  mrg 	      for (j = 0; j < NUM_PATTERNS; j++)
   6000  1.1  mrg 		vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
   6001  1.1  mrg 				      stmt_info);
   6002  1.1  mrg 	    }
   6003  1.1  mrg 	}
   6004  1.1  mrg     }
   6005  1.1  mrg   else
   6006  1.1  mrg     {
   6007  1.1  mrg       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
   6008  1.1  mrg       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
   6009  1.1  mrg 	for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
   6010  1.1  mrg 	     !gsi_end_p (gsi); gsi_next (&gsi))
   6011  1.1  mrg 	  {
   6012  1.1  mrg 	    stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
   6013  1.1  mrg 	    if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
   6014  1.1  mrg 	      continue;
   6015  1.1  mrg 
   6016  1.1  mrg 	    /* Scan over all generic vect_recog_xxx_pattern functions.  */
   6017  1.1  mrg 	    for (j = 0; j < NUM_PATTERNS; j++)
   6018  1.1  mrg 	      vect_pattern_recog_1 (vinfo,
   6019  1.1  mrg 				    &vect_vect_recog_func_ptrs[j], stmt_info);
   6020  1.1  mrg 	  }
   6021  1.1  mrg     }
   6022  1.1  mrg 
   6023  1.1  mrg   /* After this no more add_stmt calls are allowed.  */
   6024  1.1  mrg   vinfo->stmt_vec_info_ro = true;
   6025  1.1  mrg }
   6026