Home | History | Annotate | Line # | Download | only in gcc
tree-vect-data-refs.cc revision 1.1.1.1
      1  1.1  mrg /* Data References Analysis and Manipulation Utilities for Vectorization.
      2  1.1  mrg    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
      4  1.1  mrg    and Ira Rosen <irar (at) il.ibm.com>
      5  1.1  mrg 
      6  1.1  mrg This file is part of GCC.
      7  1.1  mrg 
      8  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      9  1.1  mrg the terms of the GNU General Public License as published by the Free
     10  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     11  1.1  mrg version.
     12  1.1  mrg 
     13  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16  1.1  mrg for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU General Public License
     19  1.1  mrg along with GCC; see the file COPYING3.  If not see
     20  1.1  mrg <http://www.gnu.org/licenses/>.  */
     21  1.1  mrg 
     22  1.1  mrg #define INCLUDE_ALGORITHM
     23  1.1  mrg #include "config.h"
     24  1.1  mrg #include "system.h"
     25  1.1  mrg #include "coretypes.h"
     26  1.1  mrg #include "backend.h"
     27  1.1  mrg #include "target.h"
     28  1.1  mrg #include "rtl.h"
     29  1.1  mrg #include "tree.h"
     30  1.1  mrg #include "gimple.h"
     31  1.1  mrg #include "predict.h"
     32  1.1  mrg #include "memmodel.h"
     33  1.1  mrg #include "tm_p.h"
     34  1.1  mrg #include "ssa.h"
     35  1.1  mrg #include "optabs-tree.h"
     36  1.1  mrg #include "cgraph.h"
     37  1.1  mrg #include "dumpfile.h"
     38  1.1  mrg #include "alias.h"
     39  1.1  mrg #include "fold-const.h"
     40  1.1  mrg #include "stor-layout.h"
     41  1.1  mrg #include "tree-eh.h"
     42  1.1  mrg #include "gimplify.h"
     43  1.1  mrg #include "gimple-iterator.h"
     44  1.1  mrg #include "gimplify-me.h"
     45  1.1  mrg #include "tree-ssa-loop-ivopts.h"
     46  1.1  mrg #include "tree-ssa-loop-manip.h"
     47  1.1  mrg #include "tree-ssa-loop.h"
     48  1.1  mrg #include "cfgloop.h"
     49  1.1  mrg #include "tree-scalar-evolution.h"
     50  1.1  mrg #include "tree-vectorizer.h"
     51  1.1  mrg #include "expr.h"
     52  1.1  mrg #include "builtins.h"
     53  1.1  mrg #include "tree-cfg.h"
     54  1.1  mrg #include "tree-hash-traits.h"
     55  1.1  mrg #include "vec-perm-indices.h"
     56  1.1  mrg #include "internal-fn.h"
     57  1.1  mrg #include "gimple-fold.h"
     58  1.1  mrg 
     59  1.1  mrg /* Return true if load- or store-lanes optab OPTAB is implemented for
     60  1.1  mrg    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
     61  1.1  mrg 
     62  1.1  mrg static bool
     63  1.1  mrg vect_lanes_optab_supported_p (const char *name, convert_optab optab,
     64  1.1  mrg 			      tree vectype, unsigned HOST_WIDE_INT count)
     65  1.1  mrg {
     66  1.1  mrg   machine_mode mode, array_mode;
     67  1.1  mrg   bool limit_p;
     68  1.1  mrg 
     69  1.1  mrg   mode = TYPE_MODE (vectype);
     70  1.1  mrg   if (!targetm.array_mode (mode, count).exists (&array_mode))
     71  1.1  mrg     {
     72  1.1  mrg       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
     73  1.1  mrg       limit_p = !targetm.array_mode_supported_p (mode, count);
     74  1.1  mrg       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
     75  1.1  mrg 	{
     76  1.1  mrg 	  if (dump_enabled_p ())
     77  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     78  1.1  mrg 			     "no array mode for %s[%wu]\n",
     79  1.1  mrg 			     GET_MODE_NAME (mode), count);
     80  1.1  mrg 	  return false;
     81  1.1  mrg 	}
     82  1.1  mrg     }
     83  1.1  mrg 
     84  1.1  mrg   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
     85  1.1  mrg     {
     86  1.1  mrg       if (dump_enabled_p ())
     87  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     88  1.1  mrg                          "cannot use %s<%s><%s>\n", name,
     89  1.1  mrg                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
     90  1.1  mrg       return false;
     91  1.1  mrg     }
     92  1.1  mrg 
     93  1.1  mrg   if (dump_enabled_p ())
     94  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
     95  1.1  mrg                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
     96  1.1  mrg                      GET_MODE_NAME (mode));
     97  1.1  mrg 
     98  1.1  mrg   return true;
     99  1.1  mrg }
    100  1.1  mrg 
    101  1.1  mrg 
    102  1.1  mrg /* Return the smallest scalar part of STMT_INFO.
    103  1.1  mrg    This is used to determine the vectype of the stmt.  We generally set the
    104  1.1  mrg    vectype according to the type of the result (lhs).  For stmts whose
    105  1.1  mrg    result-type is different than the type of the arguments (e.g., demotion,
    106  1.1  mrg    promotion), vectype will be reset appropriately (later).  Note that we have
    107  1.1  mrg    to visit the smallest datatype in this function, because that determines the
    108  1.1  mrg    VF.  If the smallest datatype in the loop is present only as the rhs of a
    109  1.1  mrg    promotion operation - we'd miss it.
    110  1.1  mrg    Such a case, where a variable of this datatype does not appear in the lhs
    111  1.1  mrg    anywhere in the loop, can only occur if it's an invariant: e.g.:
    112  1.1  mrg    'int_x = (int) short_inv', which we'd expect to have been optimized away by
    113  1.1  mrg    invariant motion.  However, we cannot rely on invariant motion to always
    114  1.1  mrg    take invariants out of the loop, and so in the case of promotion we also
    115  1.1  mrg    have to check the rhs.
    116  1.1  mrg    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
    117  1.1  mrg    types.  */
    118  1.1  mrg 
    119  1.1  mrg tree
    120  1.1  mrg vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
    121  1.1  mrg {
    122  1.1  mrg   HOST_WIDE_INT lhs, rhs;
    123  1.1  mrg 
    124  1.1  mrg   /* During the analysis phase, this function is called on arbitrary
    125  1.1  mrg      statements that might not have scalar results.  */
    126  1.1  mrg   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
    127  1.1  mrg     return scalar_type;
    128  1.1  mrg 
    129  1.1  mrg   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
    130  1.1  mrg 
    131  1.1  mrg   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    132  1.1  mrg   if (assign)
    133  1.1  mrg     {
    134  1.1  mrg       scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
    135  1.1  mrg       if (gimple_assign_cast_p (assign)
    136  1.1  mrg 	  || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
    137  1.1  mrg 	  || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
    138  1.1  mrg 	  || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
    139  1.1  mrg 	  || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
    140  1.1  mrg 	  || gimple_assign_rhs_code (assign) == WIDEN_PLUS_EXPR
    141  1.1  mrg 	  || gimple_assign_rhs_code (assign) == WIDEN_MINUS_EXPR
    142  1.1  mrg 	  || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
    143  1.1  mrg 	{
    144  1.1  mrg 	  tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    145  1.1  mrg 
    146  1.1  mrg 	  rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
    147  1.1  mrg 	  if (rhs < lhs)
    148  1.1  mrg 	    scalar_type = rhs_type;
    149  1.1  mrg 	}
    150  1.1  mrg     }
    151  1.1  mrg   else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
    152  1.1  mrg     {
    153  1.1  mrg       unsigned int i = 0;
    154  1.1  mrg       if (gimple_call_internal_p (call))
    155  1.1  mrg 	{
    156  1.1  mrg 	  internal_fn ifn = gimple_call_internal_fn (call);
    157  1.1  mrg 	  if (internal_load_fn_p (ifn))
    158  1.1  mrg 	    /* For loads the LHS type does the trick.  */
    159  1.1  mrg 	    i = ~0U;
    160  1.1  mrg 	  else if (internal_store_fn_p (ifn))
    161  1.1  mrg 	    {
    162  1.1  mrg 	      /* For stores use the tyep of the stored value.  */
    163  1.1  mrg 	      i = internal_fn_stored_value_index (ifn);
    164  1.1  mrg 	      scalar_type = TREE_TYPE (gimple_call_arg (call, i));
    165  1.1  mrg 	      i = ~0U;
    166  1.1  mrg 	    }
    167  1.1  mrg 	  else if (internal_fn_mask_index (ifn) == 0)
    168  1.1  mrg 	    i = 1;
    169  1.1  mrg 	}
    170  1.1  mrg       if (i < gimple_call_num_args (call))
    171  1.1  mrg 	{
    172  1.1  mrg 	  tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
    173  1.1  mrg 	  if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
    174  1.1  mrg 	    {
    175  1.1  mrg 	      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
    176  1.1  mrg 	      if (rhs < lhs)
    177  1.1  mrg 		scalar_type = rhs_type;
    178  1.1  mrg 	    }
    179  1.1  mrg 	}
    180  1.1  mrg     }
    181  1.1  mrg 
    182  1.1  mrg   return scalar_type;
    183  1.1  mrg }
    184  1.1  mrg 
    185  1.1  mrg 
    186  1.1  mrg /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
    187  1.1  mrg    tested at run-time.  Return TRUE if DDR was successfully inserted.
    188  1.1  mrg    Return false if versioning is not supported.  */
    189  1.1  mrg 
    190  1.1  mrg static opt_result
    191  1.1  mrg vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
    192  1.1  mrg {
    193  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    194  1.1  mrg 
    195  1.1  mrg   if ((unsigned) param_vect_max_version_for_alias_checks == 0)
    196  1.1  mrg     return opt_result::failure_at (vect_location,
    197  1.1  mrg 				   "will not create alias checks, as"
    198  1.1  mrg 				   " --param vect-max-version-for-alias-checks"
    199  1.1  mrg 				   " == 0\n");
    200  1.1  mrg 
    201  1.1  mrg   opt_result res
    202  1.1  mrg     = runtime_alias_check_p (ddr, loop,
    203  1.1  mrg 			     optimize_loop_nest_for_speed_p (loop));
    204  1.1  mrg   if (!res)
    205  1.1  mrg     return res;
    206  1.1  mrg 
    207  1.1  mrg   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
    208  1.1  mrg   return opt_result::success ();
    209  1.1  mrg }
    210  1.1  mrg 
    211  1.1  mrg /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
    212  1.1  mrg 
    213  1.1  mrg static void
    214  1.1  mrg vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
    215  1.1  mrg {
    216  1.1  mrg   const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
    217  1.1  mrg   for (unsigned int i = 0; i < checks.length(); ++i)
    218  1.1  mrg     if (checks[i] == value)
    219  1.1  mrg       return;
    220  1.1  mrg 
    221  1.1  mrg   if (dump_enabled_p ())
    222  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
    223  1.1  mrg 		     "need run-time check that %T is nonzero\n",
    224  1.1  mrg 		     value);
    225  1.1  mrg   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
    226  1.1  mrg }
    227  1.1  mrg 
    228  1.1  mrg /* Return true if we know that the order of vectorized DR_INFO_A and
    229  1.1  mrg    vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
    230  1.1  mrg    DR_INFO_B.  At least one of the accesses is a write.  */
    231  1.1  mrg 
    232  1.1  mrg static bool
    233  1.1  mrg vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
    234  1.1  mrg {
    235  1.1  mrg   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    236  1.1  mrg   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    237  1.1  mrg 
    238  1.1  mrg   /* Single statements are always kept in their original order.  */
    239  1.1  mrg   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
    240  1.1  mrg       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
    241  1.1  mrg     return true;
    242  1.1  mrg 
    243  1.1  mrg   /* STMT_A and STMT_B belong to overlapping groups.  All loads are
    244  1.1  mrg      emitted at the position of the first scalar load.
    245  1.1  mrg      Stores in a group are emitted at the position of the last scalar store.
    246  1.1  mrg      Compute that position and check whether the resulting order matches
    247  1.1  mrg      the current one.  */
    248  1.1  mrg   stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
    249  1.1  mrg   if (il_a)
    250  1.1  mrg     {
    251  1.1  mrg       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
    252  1.1  mrg 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
    253  1.1  mrg 	     s = DR_GROUP_NEXT_ELEMENT (s))
    254  1.1  mrg 	  il_a = get_later_stmt (il_a, s);
    255  1.1  mrg       else /* DR_IS_READ */
    256  1.1  mrg 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
    257  1.1  mrg 	     s = DR_GROUP_NEXT_ELEMENT (s))
    258  1.1  mrg 	  if (get_later_stmt (il_a, s) == il_a)
    259  1.1  mrg 	    il_a = s;
    260  1.1  mrg     }
    261  1.1  mrg   else
    262  1.1  mrg     il_a = stmtinfo_a;
    263  1.1  mrg   stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
    264  1.1  mrg   if (il_b)
    265  1.1  mrg     {
    266  1.1  mrg       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
    267  1.1  mrg 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
    268  1.1  mrg 	     s = DR_GROUP_NEXT_ELEMENT (s))
    269  1.1  mrg 	  il_b = get_later_stmt (il_b, s);
    270  1.1  mrg       else /* DR_IS_READ */
    271  1.1  mrg 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
    272  1.1  mrg 	     s = DR_GROUP_NEXT_ELEMENT (s))
    273  1.1  mrg 	  if (get_later_stmt (il_b, s) == il_b)
    274  1.1  mrg 	    il_b = s;
    275  1.1  mrg     }
    276  1.1  mrg   else
    277  1.1  mrg     il_b = stmtinfo_b;
    278  1.1  mrg   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
    279  1.1  mrg   return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
    280  1.1  mrg }
    281  1.1  mrg 
    282  1.1  mrg /* A subroutine of vect_analyze_data_ref_dependence.  Handle
    283  1.1  mrg    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
    284  1.1  mrg    distances.  These distances are conservatively correct but they don't
    285  1.1  mrg    reflect a guaranteed dependence.
    286  1.1  mrg 
    287  1.1  mrg    Return true if this function does all the work necessary to avoid
    288  1.1  mrg    an alias or false if the caller should use the dependence distances
    289  1.1  mrg    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
    290  1.1  mrg    the depth of the loop described by LOOP_VINFO and the other arguments
    291  1.1  mrg    are as for vect_analyze_data_ref_dependence.  */
    292  1.1  mrg 
    293  1.1  mrg static bool
    294  1.1  mrg vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
    295  1.1  mrg 				       loop_vec_info loop_vinfo,
    296  1.1  mrg 				       int loop_depth, unsigned int *max_vf)
    297  1.1  mrg {
    298  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    299  1.1  mrg   for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
    300  1.1  mrg     {
    301  1.1  mrg       int dist = dist_v[loop_depth];
    302  1.1  mrg       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
    303  1.1  mrg 	{
    304  1.1  mrg 	  /* If the user asserted safelen >= DIST consecutive iterations
    305  1.1  mrg 	     can be executed concurrently, assume independence.
    306  1.1  mrg 
    307  1.1  mrg 	     ??? An alternative would be to add the alias check even
    308  1.1  mrg 	     in this case, and vectorize the fallback loop with the
    309  1.1  mrg 	     maximum VF set to safelen.  However, if the user has
    310  1.1  mrg 	     explicitly given a length, it's less likely that that
    311  1.1  mrg 	     would be a win.  */
    312  1.1  mrg 	  if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
    313  1.1  mrg 	    {
    314  1.1  mrg 	      if ((unsigned int) loop->safelen < *max_vf)
    315  1.1  mrg 		*max_vf = loop->safelen;
    316  1.1  mrg 	      LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
    317  1.1  mrg 	      continue;
    318  1.1  mrg 	    }
    319  1.1  mrg 
    320  1.1  mrg 	  /* For dependence distances of 2 or more, we have the option
    321  1.1  mrg 	     of limiting VF or checking for an alias at runtime.
    322  1.1  mrg 	     Prefer to check at runtime if we can, to avoid limiting
    323  1.1  mrg 	     the VF unnecessarily when the bases are in fact independent.
    324  1.1  mrg 
    325  1.1  mrg 	     Note that the alias checks will be removed if the VF ends up
    326  1.1  mrg 	     being small enough.  */
    327  1.1  mrg 	  dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
    328  1.1  mrg 	  dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
    329  1.1  mrg 	  return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
    330  1.1  mrg 		  && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
    331  1.1  mrg 		  && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
    332  1.1  mrg 	}
    333  1.1  mrg     }
    334  1.1  mrg   return true;
    335  1.1  mrg }
    336  1.1  mrg 
    337  1.1  mrg 
    338  1.1  mrg /* Function vect_analyze_data_ref_dependence.
    339  1.1  mrg 
    340  1.1  mrg    FIXME: I needed to change the sense of the returned flag.
    341  1.1  mrg 
    342  1.1  mrg    Return FALSE if there (might) exist a dependence between a memory-reference
    343  1.1  mrg    DRA and a memory-reference DRB.  When versioning for alias may check a
    344  1.1  mrg    dependence at run-time, return TRUE.  Adjust *MAX_VF according to
    345  1.1  mrg    the data dependence.  */
    346  1.1  mrg 
    347  1.1  mrg static opt_result
    348  1.1  mrg vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
    349  1.1  mrg 				  loop_vec_info loop_vinfo,
    350  1.1  mrg 				  unsigned int *max_vf)
    351  1.1  mrg {
    352  1.1  mrg   unsigned int i;
    353  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    354  1.1  mrg   struct data_reference *dra = DDR_A (ddr);
    355  1.1  mrg   struct data_reference *drb = DDR_B (ddr);
    356  1.1  mrg   dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
    357  1.1  mrg   dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
    358  1.1  mrg   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    359  1.1  mrg   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    360  1.1  mrg   lambda_vector dist_v;
    361  1.1  mrg   unsigned int loop_depth;
    362  1.1  mrg 
    363  1.1  mrg   /* If user asserted safelen consecutive iterations can be
    364  1.1  mrg      executed concurrently, assume independence.  */
    365  1.1  mrg   auto apply_safelen = [&]()
    366  1.1  mrg     {
    367  1.1  mrg       if (loop->safelen >= 2)
    368  1.1  mrg 	{
    369  1.1  mrg 	  if ((unsigned int) loop->safelen < *max_vf)
    370  1.1  mrg 	    *max_vf = loop->safelen;
    371  1.1  mrg 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
    372  1.1  mrg 	  return true;
    373  1.1  mrg 	}
    374  1.1  mrg       return false;
    375  1.1  mrg     };
    376  1.1  mrg 
    377  1.1  mrg   /* In loop analysis all data references should be vectorizable.  */
    378  1.1  mrg   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
    379  1.1  mrg       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
    380  1.1  mrg     gcc_unreachable ();
    381  1.1  mrg 
    382  1.1  mrg   /* Independent data accesses.  */
    383  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    384  1.1  mrg     return opt_result::success ();
    385  1.1  mrg 
    386  1.1  mrg   if (dra == drb
    387  1.1  mrg       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
    388  1.1  mrg     return opt_result::success ();
    389  1.1  mrg 
    390  1.1  mrg   /* We do not have to consider dependences between accesses that belong
    391  1.1  mrg      to the same group, unless the stride could be smaller than the
    392  1.1  mrg      group size.  */
    393  1.1  mrg   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
    394  1.1  mrg       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
    395  1.1  mrg 	  == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
    396  1.1  mrg       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
    397  1.1  mrg     return opt_result::success ();
    398  1.1  mrg 
    399  1.1  mrg   /* Even if we have an anti-dependence then, as the vectorized loop covers at
    400  1.1  mrg      least two scalar iterations, there is always also a true dependence.
    401  1.1  mrg      As the vectorizer does not re-order loads and stores we can ignore
    402  1.1  mrg      the anti-dependence if TBAA can disambiguate both DRs similar to the
    403  1.1  mrg      case with known negative distance anti-dependences (positive
    404  1.1  mrg      distance anti-dependences would violate TBAA constraints).  */
    405  1.1  mrg   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
    406  1.1  mrg        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
    407  1.1  mrg       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
    408  1.1  mrg 				 get_alias_set (DR_REF (drb))))
    409  1.1  mrg     return opt_result::success ();
    410  1.1  mrg 
    411  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
    412  1.1  mrg       || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
    413  1.1  mrg     {
    414  1.1  mrg       if (apply_safelen ())
    415  1.1  mrg 	return opt_result::success ();
    416  1.1  mrg 
    417  1.1  mrg       return opt_result::failure_at
    418  1.1  mrg 	(stmtinfo_a->stmt,
    419  1.1  mrg 	 "possible alias involving gather/scatter between %T and %T\n",
    420  1.1  mrg 	 DR_REF (dra), DR_REF (drb));
    421  1.1  mrg     }
    422  1.1  mrg 
    423  1.1  mrg   /* Unknown data dependence.  */
    424  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    425  1.1  mrg     {
    426  1.1  mrg       if (apply_safelen ())
    427  1.1  mrg 	return opt_result::success ();
    428  1.1  mrg 
    429  1.1  mrg       if (dump_enabled_p ())
    430  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
    431  1.1  mrg 			 "versioning for alias required: "
    432  1.1  mrg 			 "can't determine dependence between %T and %T\n",
    433  1.1  mrg 			 DR_REF (dra), DR_REF (drb));
    434  1.1  mrg 
    435  1.1  mrg       /* Add to list of ddrs that need to be tested at run-time.  */
    436  1.1  mrg       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
    437  1.1  mrg     }
    438  1.1  mrg 
    439  1.1  mrg   /* Known data dependence.  */
    440  1.1  mrg   if (DDR_NUM_DIST_VECTS (ddr) == 0)
    441  1.1  mrg     {
    442  1.1  mrg       if (apply_safelen ())
    443  1.1  mrg 	return opt_result::success ();
    444  1.1  mrg 
    445  1.1  mrg       if (dump_enabled_p ())
    446  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
    447  1.1  mrg 			 "versioning for alias required: "
    448  1.1  mrg 			 "bad dist vector for %T and %T\n",
    449  1.1  mrg 			 DR_REF (dra), DR_REF (drb));
    450  1.1  mrg       /* Add to list of ddrs that need to be tested at run-time.  */
    451  1.1  mrg       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
    452  1.1  mrg     }
    453  1.1  mrg 
    454  1.1  mrg   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
    455  1.1  mrg 
    456  1.1  mrg   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
    457  1.1  mrg       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
    458  1.1  mrg 						loop_depth, max_vf))
    459  1.1  mrg     return opt_result::success ();
    460  1.1  mrg 
    461  1.1  mrg   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
    462  1.1  mrg     {
    463  1.1  mrg       int dist = dist_v[loop_depth];
    464  1.1  mrg 
    465  1.1  mrg       if (dump_enabled_p ())
    466  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    467  1.1  mrg                          "dependence distance  = %d.\n", dist);
    468  1.1  mrg 
    469  1.1  mrg       if (dist == 0)
    470  1.1  mrg 	{
    471  1.1  mrg 	  if (dump_enabled_p ())
    472  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
    473  1.1  mrg 			     "dependence distance == 0 between %T and %T\n",
    474  1.1  mrg 			     DR_REF (dra), DR_REF (drb));
    475  1.1  mrg 
    476  1.1  mrg 	  /* When we perform grouped accesses and perform implicit CSE
    477  1.1  mrg 	     by detecting equal accesses and doing disambiguation with
    478  1.1  mrg 	     runtime alias tests like for
    479  1.1  mrg 	        .. = a[i];
    480  1.1  mrg 		.. = a[i+1];
    481  1.1  mrg 		a[i] = ..;
    482  1.1  mrg 		a[i+1] = ..;
    483  1.1  mrg 		*p = ..;
    484  1.1  mrg 		.. = a[i];
    485  1.1  mrg 		.. = a[i+1];
    486  1.1  mrg 	     where we will end up loading { a[i], a[i+1] } once, make
    487  1.1  mrg 	     sure that inserting group loads before the first load and
    488  1.1  mrg 	     stores after the last store will do the right thing.
    489  1.1  mrg 	     Similar for groups like
    490  1.1  mrg 	        a[i] = ...;
    491  1.1  mrg 		... = a[i];
    492  1.1  mrg 		a[i+1] = ...;
    493  1.1  mrg 	     where loads from the group interleave with the store.  */
    494  1.1  mrg 	  if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
    495  1.1  mrg 	    return opt_result::failure_at (stmtinfo_a->stmt,
    496  1.1  mrg 					   "READ_WRITE dependence"
    497  1.1  mrg 					   " in interleaving.\n");
    498  1.1  mrg 
    499  1.1  mrg 	  if (loop->safelen < 2)
    500  1.1  mrg 	    {
    501  1.1  mrg 	      tree indicator = dr_zero_step_indicator (dra);
    502  1.1  mrg 	      if (!indicator || integer_zerop (indicator))
    503  1.1  mrg 		return opt_result::failure_at (stmtinfo_a->stmt,
    504  1.1  mrg 					       "access also has a zero step\n");
    505  1.1  mrg 	      else if (TREE_CODE (indicator) != INTEGER_CST)
    506  1.1  mrg 		vect_check_nonzero_value (loop_vinfo, indicator);
    507  1.1  mrg 	    }
    508  1.1  mrg 	  continue;
    509  1.1  mrg 	}
    510  1.1  mrg 
    511  1.1  mrg       if (dist > 0 && DDR_REVERSED_P (ddr))
    512  1.1  mrg 	{
    513  1.1  mrg 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
    514  1.1  mrg 	     reversed (to make distance vector positive), and the actual
    515  1.1  mrg 	     distance is negative.  */
    516  1.1  mrg 	  if (dump_enabled_p ())
    517  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
    518  1.1  mrg 	                     "dependence distance negative.\n");
    519  1.1  mrg 	  /* When doing outer loop vectorization, we need to check if there is
    520  1.1  mrg 	     a backward dependence at the inner loop level if the dependence
    521  1.1  mrg 	     at the outer loop is reversed.  See PR81740.  */
    522  1.1  mrg 	  if (nested_in_vect_loop_p (loop, stmtinfo_a)
    523  1.1  mrg 	      || nested_in_vect_loop_p (loop, stmtinfo_b))
    524  1.1  mrg 	    {
    525  1.1  mrg 	      unsigned inner_depth = index_in_loop_nest (loop->inner->num,
    526  1.1  mrg 							 DDR_LOOP_NEST (ddr));
    527  1.1  mrg 	      if (dist_v[inner_depth] < 0)
    528  1.1  mrg 		return opt_result::failure_at (stmtinfo_a->stmt,
    529  1.1  mrg 					       "not vectorized, dependence "
    530  1.1  mrg 					       "between data-refs %T and %T\n",
    531  1.1  mrg 					       DR_REF (dra), DR_REF (drb));
    532  1.1  mrg 	    }
    533  1.1  mrg 	  /* Record a negative dependence distance to later limit the
    534  1.1  mrg 	     amount of stmt copying / unrolling we can perform.
    535  1.1  mrg 	     Only need to handle read-after-write dependence.  */
    536  1.1  mrg 	  if (DR_IS_READ (drb)
    537  1.1  mrg 	      && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
    538  1.1  mrg 		  || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
    539  1.1  mrg 	    STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
    540  1.1  mrg 	  continue;
    541  1.1  mrg 	}
    542  1.1  mrg 
    543  1.1  mrg       unsigned int abs_dist = abs (dist);
    544  1.1  mrg       if (abs_dist >= 2 && abs_dist < *max_vf)
    545  1.1  mrg 	{
    546  1.1  mrg 	  /* The dependence distance requires reduction of the maximal
    547  1.1  mrg 	     vectorization factor.  */
    548  1.1  mrg 	  *max_vf = abs_dist;
    549  1.1  mrg 	  if (dump_enabled_p ())
    550  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
    551  1.1  mrg 	                     "adjusting maximal vectorization factor to %i\n",
    552  1.1  mrg 	                     *max_vf);
    553  1.1  mrg 	}
    554  1.1  mrg 
    555  1.1  mrg       if (abs_dist >= *max_vf)
    556  1.1  mrg 	{
    557  1.1  mrg 	  /* Dependence distance does not create dependence, as far as
    558  1.1  mrg 	     vectorization is concerned, in this case.  */
    559  1.1  mrg 	  if (dump_enabled_p ())
    560  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
    561  1.1  mrg 	                     "dependence distance >= VF.\n");
    562  1.1  mrg 	  continue;
    563  1.1  mrg 	}
    564  1.1  mrg 
    565  1.1  mrg       return opt_result::failure_at (stmtinfo_a->stmt,
    566  1.1  mrg 				     "not vectorized, possible dependence "
    567  1.1  mrg 				     "between data-refs %T and %T\n",
    568  1.1  mrg 				     DR_REF (dra), DR_REF (drb));
    569  1.1  mrg     }
    570  1.1  mrg 
    571  1.1  mrg   return opt_result::success ();
    572  1.1  mrg }
    573  1.1  mrg 
    574  1.1  mrg /* Function vect_analyze_data_ref_dependences.
    575  1.1  mrg 
    576  1.1  mrg    Examine all the data references in the loop, and make sure there do not
    577  1.1  mrg    exist any data dependences between them.  Set *MAX_VF according to
    578  1.1  mrg    the maximum vectorization factor the data dependences allow.  */
    579  1.1  mrg 
    580  1.1  mrg opt_result
    581  1.1  mrg vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
    582  1.1  mrg 				   unsigned int *max_vf)
    583  1.1  mrg {
    584  1.1  mrg   unsigned int i;
    585  1.1  mrg   struct data_dependence_relation *ddr;
    586  1.1  mrg 
    587  1.1  mrg   DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
    588  1.1  mrg 
    589  1.1  mrg   if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
    590  1.1  mrg     {
    591  1.1  mrg       LOOP_VINFO_DDRS (loop_vinfo)
    592  1.1  mrg 	.create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
    593  1.1  mrg 		 * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
    594  1.1  mrg       /* We do not need read-read dependences.  */
    595  1.1  mrg       bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
    596  1.1  mrg 					  &LOOP_VINFO_DDRS (loop_vinfo),
    597  1.1  mrg 					  LOOP_VINFO_LOOP_NEST (loop_vinfo),
    598  1.1  mrg 					  false);
    599  1.1  mrg       gcc_assert (res);
    600  1.1  mrg     }
    601  1.1  mrg 
    602  1.1  mrg   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
    603  1.1  mrg 
    604  1.1  mrg   /* For epilogues we either have no aliases or alias versioning
    605  1.1  mrg      was applied to original loop.  Therefore we may just get max_vf
    606  1.1  mrg      using VF of original loop.  */
    607  1.1  mrg   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
    608  1.1  mrg     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
    609  1.1  mrg   else
    610  1.1  mrg     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
    611  1.1  mrg       {
    612  1.1  mrg 	opt_result res
    613  1.1  mrg 	  = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
    614  1.1  mrg 	if (!res)
    615  1.1  mrg 	  return res;
    616  1.1  mrg       }
    617  1.1  mrg 
    618  1.1  mrg   return opt_result::success ();
    619  1.1  mrg }
    620  1.1  mrg 
    621  1.1  mrg 
    622  1.1  mrg /* Function vect_slp_analyze_data_ref_dependence.
    623  1.1  mrg 
    624  1.1  mrg    Return TRUE if there (might) exist a dependence between a memory-reference
    625  1.1  mrg    DRA and a memory-reference DRB for VINFO.  When versioning for alias
    626  1.1  mrg    may check a dependence at run-time, return FALSE.  Adjust *MAX_VF
    627  1.1  mrg    according to the data dependence.  */
    628  1.1  mrg 
    629  1.1  mrg static bool
    630  1.1  mrg vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
    631  1.1  mrg 				      struct data_dependence_relation *ddr)
    632  1.1  mrg {
    633  1.1  mrg   struct data_reference *dra = DDR_A (ddr);
    634  1.1  mrg   struct data_reference *drb = DDR_B (ddr);
    635  1.1  mrg   dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
    636  1.1  mrg   dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
    637  1.1  mrg 
    638  1.1  mrg   /* We need to check dependences of statements marked as unvectorizable
    639  1.1  mrg      as well, they still can prohibit vectorization.  */
    640  1.1  mrg 
    641  1.1  mrg   /* Independent data accesses.  */
    642  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    643  1.1  mrg     return false;
    644  1.1  mrg 
    645  1.1  mrg   if (dra == drb)
    646  1.1  mrg     return false;
    647  1.1  mrg 
    648  1.1  mrg   /* Read-read is OK.  */
    649  1.1  mrg   if (DR_IS_READ (dra) && DR_IS_READ (drb))
    650  1.1  mrg     return false;
    651  1.1  mrg 
    652  1.1  mrg   /* If dra and drb are part of the same interleaving chain consider
    653  1.1  mrg      them independent.  */
    654  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
    655  1.1  mrg       && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
    656  1.1  mrg 	  == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
    657  1.1  mrg     return false;
    658  1.1  mrg 
    659  1.1  mrg   /* Unknown data dependence.  */
    660  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    661  1.1  mrg     {
    662  1.1  mrg       if  (dump_enabled_p ())
    663  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    664  1.1  mrg 			 "can't determine dependence between %T and %T\n",
    665  1.1  mrg 			 DR_REF (dra), DR_REF (drb));
    666  1.1  mrg     }
    667  1.1  mrg   else if (dump_enabled_p ())
    668  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
    669  1.1  mrg 		     "determined dependence between %T and %T\n",
    670  1.1  mrg 		     DR_REF (dra), DR_REF (drb));
    671  1.1  mrg 
    672  1.1  mrg   return true;
    673  1.1  mrg }
    674  1.1  mrg 
    675  1.1  mrg 
    676  1.1  mrg /* Analyze dependences involved in the transform of a store SLP NODE.  */
    677  1.1  mrg 
    678  1.1  mrg static bool
    679  1.1  mrg vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
    680  1.1  mrg {
    681  1.1  mrg   /* This walks over all stmts involved in the SLP store done
    682  1.1  mrg      in NODE verifying we can sink them up to the last stmt in the
    683  1.1  mrg      group.  */
    684  1.1  mrg   stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
    685  1.1  mrg   gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
    686  1.1  mrg 
    687  1.1  mrg   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    688  1.1  mrg     {
    689  1.1  mrg       stmt_vec_info access_info
    690  1.1  mrg 	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    691  1.1  mrg       if (access_info == last_access_info)
    692  1.1  mrg 	continue;
    693  1.1  mrg       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    694  1.1  mrg       ao_ref ref;
    695  1.1  mrg       bool ref_initialized_p = false;
    696  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    697  1.1  mrg 	   gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
    698  1.1  mrg 	{
    699  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    700  1.1  mrg 	  if (! gimple_vuse (stmt))
    701  1.1  mrg 	    continue;
    702  1.1  mrg 
    703  1.1  mrg 	  /* If we couldn't record a (single) data reference for this
    704  1.1  mrg 	     stmt we have to resort to the alias oracle.  */
    705  1.1  mrg 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    706  1.1  mrg 	  data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    707  1.1  mrg 	  if (!dr_b)
    708  1.1  mrg 	    {
    709  1.1  mrg 	      /* We are moving a store - this means
    710  1.1  mrg 		 we cannot use TBAA for disambiguation.  */
    711  1.1  mrg 	      if (!ref_initialized_p)
    712  1.1  mrg 		ao_ref_init (&ref, DR_REF (dr_a));
    713  1.1  mrg 	      if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
    714  1.1  mrg 		  || ref_maybe_used_by_stmt_p (stmt, &ref, false))
    715  1.1  mrg 		return false;
    716  1.1  mrg 	      continue;
    717  1.1  mrg 	    }
    718  1.1  mrg 
    719  1.1  mrg 	  gcc_assert (!gimple_visited_p (stmt));
    720  1.1  mrg 
    721  1.1  mrg 	  ddr_p ddr = initialize_data_dependence_relation (dr_a,
    722  1.1  mrg 							   dr_b, vNULL);
    723  1.1  mrg 	  bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    724  1.1  mrg 	  free_dependence_relation (ddr);
    725  1.1  mrg 	  if (dependent)
    726  1.1  mrg 	    return false;
    727  1.1  mrg 	}
    728  1.1  mrg     }
    729  1.1  mrg   return true;
    730  1.1  mrg }
    731  1.1  mrg 
    732  1.1  mrg /* Analyze dependences involved in the transform of a load SLP NODE.  STORES
    733  1.1  mrg    contain the vector of scalar stores of this instance if we are
    734  1.1  mrg    disambiguating the loads.  */
    735  1.1  mrg 
    736  1.1  mrg static bool
    737  1.1  mrg vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
    738  1.1  mrg 				   vec<stmt_vec_info> stores,
    739  1.1  mrg 				   stmt_vec_info last_store_info)
    740  1.1  mrg {
    741  1.1  mrg   /* This walks over all stmts involved in the SLP load done
    742  1.1  mrg      in NODE verifying we can hoist them up to the first stmt in the
    743  1.1  mrg      group.  */
    744  1.1  mrg   stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
    745  1.1  mrg   gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
    746  1.1  mrg 
    747  1.1  mrg   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    748  1.1  mrg     {
    749  1.1  mrg       stmt_vec_info access_info
    750  1.1  mrg 	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    751  1.1  mrg       if (access_info == first_access_info)
    752  1.1  mrg 	continue;
    753  1.1  mrg       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    754  1.1  mrg       ao_ref ref;
    755  1.1  mrg       bool ref_initialized_p = false;
    756  1.1  mrg       hash_set<stmt_vec_info> grp_visited;
    757  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    758  1.1  mrg 	   gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
    759  1.1  mrg 	{
    760  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    761  1.1  mrg 	  if (! gimple_vdef (stmt))
    762  1.1  mrg 	    continue;
    763  1.1  mrg 
    764  1.1  mrg 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    765  1.1  mrg 
    766  1.1  mrg 	  /* If we run into a store of this same instance (we've just
    767  1.1  mrg 	     marked those) then delay dependence checking until we run
    768  1.1  mrg 	     into the last store because this is where it will have
    769  1.1  mrg 	     been sunk to (and we verified that we can do that already).  */
    770  1.1  mrg 	  if (gimple_visited_p (stmt))
    771  1.1  mrg 	    {
    772  1.1  mrg 	      if (stmt_info != last_store_info)
    773  1.1  mrg 		continue;
    774  1.1  mrg 
    775  1.1  mrg 	      for (stmt_vec_info &store_info : stores)
    776  1.1  mrg 		{
    777  1.1  mrg 		  data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
    778  1.1  mrg 		  ddr_p ddr = initialize_data_dependence_relation
    779  1.1  mrg 				(dr_a, store_dr, vNULL);
    780  1.1  mrg 		  bool dependent
    781  1.1  mrg 		    = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    782  1.1  mrg 		  free_dependence_relation (ddr);
    783  1.1  mrg 		  if (dependent)
    784  1.1  mrg 		    return false;
    785  1.1  mrg 		}
    786  1.1  mrg 	      continue;
    787  1.1  mrg 	    }
    788  1.1  mrg 
    789  1.1  mrg 	  auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
    790  1.1  mrg 	    {
    791  1.1  mrg 	      /* We are hoisting a load - this means we can use TBAA for
    792  1.1  mrg 		 disambiguation.  */
    793  1.1  mrg 	      if (!ref_initialized_p)
    794  1.1  mrg 		ao_ref_init (&ref, DR_REF (dr_a));
    795  1.1  mrg 	      if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
    796  1.1  mrg 		{
    797  1.1  mrg 		  /* If we couldn't record a (single) data reference for this
    798  1.1  mrg 		     stmt we have to give up now.  */
    799  1.1  mrg 		  data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    800  1.1  mrg 		  if (!dr_b)
    801  1.1  mrg 		    return false;
    802  1.1  mrg 		  ddr_p ddr = initialize_data_dependence_relation (dr_a,
    803  1.1  mrg 								   dr_b, vNULL);
    804  1.1  mrg 		  bool dependent
    805  1.1  mrg 		    = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    806  1.1  mrg 		  free_dependence_relation (ddr);
    807  1.1  mrg 		  if (dependent)
    808  1.1  mrg 		    return false;
    809  1.1  mrg 		}
    810  1.1  mrg 	      /* No dependence.  */
    811  1.1  mrg 	      return true;
    812  1.1  mrg 	    };
    813  1.1  mrg 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    814  1.1  mrg 	    {
    815  1.1  mrg 	      /* When we run into a store group we have to honor
    816  1.1  mrg 		 that earlier stores might be moved here.  We don't
    817  1.1  mrg 		 know exactly which and where to since we lack a
    818  1.1  mrg 		 back-mapping from DR to SLP node, so assume all
    819  1.1  mrg 		 earlier stores are sunk here.  It's enough to
    820  1.1  mrg 		 consider the last stmt of a group for this.
    821  1.1  mrg 		 ???  Both this and the fact that we disregard that
    822  1.1  mrg 		 the conflicting instance might be removed later
    823  1.1  mrg 		 is overly conservative.  */
    824  1.1  mrg 	      if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
    825  1.1  mrg 		for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    826  1.1  mrg 		     store_info != NULL;
    827  1.1  mrg 		     store_info = DR_GROUP_NEXT_ELEMENT (store_info))
    828  1.1  mrg 		  if ((store_info == stmt_info
    829  1.1  mrg 		       || get_later_stmt (store_info, stmt_info) == stmt_info)
    830  1.1  mrg 		      && !check_hoist (store_info))
    831  1.1  mrg 		    return false;
    832  1.1  mrg 	    }
    833  1.1  mrg 	  else
    834  1.1  mrg 	    {
    835  1.1  mrg 	      if (!check_hoist (stmt_info))
    836  1.1  mrg 		return false;
    837  1.1  mrg 	    }
    838  1.1  mrg 	}
    839  1.1  mrg     }
    840  1.1  mrg   return true;
    841  1.1  mrg }
    842  1.1  mrg 
    843  1.1  mrg 
    844  1.1  mrg /* Function vect_analyze_data_ref_dependences.
    845  1.1  mrg 
    846  1.1  mrg    Examine all the data references in the basic-block, and make sure there
    847  1.1  mrg    do not exist any data dependences between them.  Set *MAX_VF according to
    848  1.1  mrg    the maximum vectorization factor the data dependences allow.  */
    849  1.1  mrg 
    850  1.1  mrg bool
    851  1.1  mrg vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
    852  1.1  mrg {
    853  1.1  mrg   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
    854  1.1  mrg 
    855  1.1  mrg   /* The stores of this instance are at the root of the SLP tree.  */
    856  1.1  mrg   slp_tree store = NULL;
    857  1.1  mrg   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
    858  1.1  mrg     store = SLP_INSTANCE_TREE (instance);
    859  1.1  mrg 
    860  1.1  mrg   /* Verify we can sink stores to the vectorized stmt insert location.  */
    861  1.1  mrg   stmt_vec_info last_store_info = NULL;
    862  1.1  mrg   if (store)
    863  1.1  mrg     {
    864  1.1  mrg       if (! vect_slp_analyze_store_dependences (vinfo, store))
    865  1.1  mrg 	return false;
    866  1.1  mrg 
    867  1.1  mrg       /* Mark stores in this instance and remember the last one.  */
    868  1.1  mrg       last_store_info = vect_find_last_scalar_stmt_in_slp (store);
    869  1.1  mrg       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    870  1.1  mrg 	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
    871  1.1  mrg     }
    872  1.1  mrg 
    873  1.1  mrg   bool res = true;
    874  1.1  mrg 
    875  1.1  mrg   /* Verify we can sink loads to the vectorized stmt insert location,
    876  1.1  mrg      special-casing stores of this instance.  */
    877  1.1  mrg   for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
    878  1.1  mrg     if (! vect_slp_analyze_load_dependences (vinfo, load,
    879  1.1  mrg 					     store
    880  1.1  mrg 					     ? SLP_TREE_SCALAR_STMTS (store)
    881  1.1  mrg 					     : vNULL, last_store_info))
    882  1.1  mrg       {
    883  1.1  mrg 	res = false;
    884  1.1  mrg 	break;
    885  1.1  mrg       }
    886  1.1  mrg 
    887  1.1  mrg   /* Unset the visited flag.  */
    888  1.1  mrg   if (store)
    889  1.1  mrg     for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    890  1.1  mrg       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
    891  1.1  mrg 
    892  1.1  mrg   return res;
    893  1.1  mrg }
    894  1.1  mrg 
    895  1.1  mrg /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
    896  1.1  mrg    applied.  */
    897  1.1  mrg 
    898  1.1  mrg int
    899  1.1  mrg dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
    900  1.1  mrg {
    901  1.1  mrg   HOST_WIDE_INT diff = 0;
    902  1.1  mrg   /* Alignment is only analyzed for the first element of a DR group,
    903  1.1  mrg      use that but adjust misalignment by the offset of the access.  */
    904  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
    905  1.1  mrg     {
    906  1.1  mrg       dr_vec_info *first_dr
    907  1.1  mrg 	= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
    908  1.1  mrg       /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
    909  1.1  mrg 	 INTEGER_CSTs and the first element in the group has the lowest
    910  1.1  mrg 	 address.  */
    911  1.1  mrg       diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
    912  1.1  mrg 	      - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
    913  1.1  mrg       gcc_assert (diff >= 0);
    914  1.1  mrg       dr_info = first_dr;
    915  1.1  mrg     }
    916  1.1  mrg 
    917  1.1  mrg   int misalign = dr_info->misalignment;
    918  1.1  mrg   gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
    919  1.1  mrg   if (misalign == DR_MISALIGNMENT_UNKNOWN)
    920  1.1  mrg     return misalign;
    921  1.1  mrg 
    922  1.1  mrg   /* If the access is only aligned for a vector type with smaller alignment
    923  1.1  mrg      requirement the access has unknown misalignment.  */
    924  1.1  mrg   if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
    925  1.1  mrg 		targetm.vectorize.preferred_vector_alignment (vectype)))
    926  1.1  mrg     return DR_MISALIGNMENT_UNKNOWN;
    927  1.1  mrg 
    928  1.1  mrg   /* Apply the offset from the DR group start and the externally supplied
    929  1.1  mrg      offset which can for example result from a negative stride access.  */
    930  1.1  mrg   poly_int64 misalignment = misalign + diff + offset;
    931  1.1  mrg 
    932  1.1  mrg   /* vect_compute_data_ref_alignment will have ensured that target_alignment
    933  1.1  mrg      is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN.  */
    934  1.1  mrg   unsigned HOST_WIDE_INT target_alignment_c
    935  1.1  mrg     = dr_info->target_alignment.to_constant ();
    936  1.1  mrg   if (!known_misalignment (misalignment, target_alignment_c, &misalign))
    937  1.1  mrg     return DR_MISALIGNMENT_UNKNOWN;
    938  1.1  mrg   return misalign;
    939  1.1  mrg }
    940  1.1  mrg 
    941  1.1  mrg /* Record the base alignment guarantee given by DRB, which occurs
    942  1.1  mrg    in STMT_INFO.  */
    943  1.1  mrg 
    944  1.1  mrg static void
    945  1.1  mrg vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
    946  1.1  mrg 			    innermost_loop_behavior *drb)
    947  1.1  mrg {
    948  1.1  mrg   bool existed;
    949  1.1  mrg   std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
    950  1.1  mrg     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
    951  1.1  mrg   if (!existed || entry.second->base_alignment < drb->base_alignment)
    952  1.1  mrg     {
    953  1.1  mrg       entry = std::make_pair (stmt_info, drb);
    954  1.1  mrg       if (dump_enabled_p ())
    955  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    956  1.1  mrg 			 "recording new base alignment for %T\n"
    957  1.1  mrg 			 "  alignment:    %d\n"
    958  1.1  mrg 			 "  misalignment: %d\n"
    959  1.1  mrg 			 "  based on:     %G",
    960  1.1  mrg 			 drb->base_address,
    961  1.1  mrg 			 drb->base_alignment,
    962  1.1  mrg 			 drb->base_misalignment,
    963  1.1  mrg 			 stmt_info->stmt);
    964  1.1  mrg     }
    965  1.1  mrg }
    966  1.1  mrg 
    967  1.1  mrg /* If the region we're going to vectorize is reached, all unconditional
    968  1.1  mrg    data references occur at least once.  We can therefore pool the base
    969  1.1  mrg    alignment guarantees from each unconditional reference.  Do this by
    970  1.1  mrg    going through all the data references in VINFO and checking whether
    971  1.1  mrg    the containing statement makes the reference unconditionally.  If so,
    972  1.1  mrg    record the alignment of the base address in VINFO so that it can be
    973  1.1  mrg    used for all other references with the same base.  */
    974  1.1  mrg 
    975  1.1  mrg void
    976  1.1  mrg vect_record_base_alignments (vec_info *vinfo)
    977  1.1  mrg {
    978  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    979  1.1  mrg   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    980  1.1  mrg   for (data_reference *dr : vinfo->shared->datarefs)
    981  1.1  mrg     {
    982  1.1  mrg       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
    983  1.1  mrg       stmt_vec_info stmt_info = dr_info->stmt;
    984  1.1  mrg       if (!DR_IS_CONDITIONAL_IN_STMT (dr)
    985  1.1  mrg 	  && STMT_VINFO_VECTORIZABLE (stmt_info)
    986  1.1  mrg 	  && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    987  1.1  mrg 	{
    988  1.1  mrg 	  vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
    989  1.1  mrg 
    990  1.1  mrg 	  /* If DR is nested in the loop that is being vectorized, we can also
    991  1.1  mrg 	     record the alignment of the base wrt the outer loop.  */
    992  1.1  mrg 	  if (loop && nested_in_vect_loop_p (loop, stmt_info))
    993  1.1  mrg 	    vect_record_base_alignment
    994  1.1  mrg 	      (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
    995  1.1  mrg 	}
    996  1.1  mrg     }
    997  1.1  mrg }
    998  1.1  mrg 
    999  1.1  mrg /* Function vect_compute_data_ref_alignment
   1000  1.1  mrg 
   1001  1.1  mrg    Compute the misalignment of the data reference DR_INFO when vectorizing
   1002  1.1  mrg    with VECTYPE.
   1003  1.1  mrg 
   1004  1.1  mrg    Output:
   1005  1.1  mrg    1. initialized misalignment info for DR_INFO
   1006  1.1  mrg 
   1007  1.1  mrg    FOR NOW: No analysis is actually performed. Misalignment is calculated
   1008  1.1  mrg    only for trivial cases. TODO.  */
   1009  1.1  mrg 
   1010  1.1  mrg static void
   1011  1.1  mrg vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
   1012  1.1  mrg 				 tree vectype)
   1013  1.1  mrg {
   1014  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   1015  1.1  mrg   vec_base_alignments *base_alignments = &vinfo->base_alignments;
   1016  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   1017  1.1  mrg   class loop *loop = NULL;
   1018  1.1  mrg   tree ref = DR_REF (dr_info->dr);
   1019  1.1  mrg 
   1020  1.1  mrg   if (dump_enabled_p ())
   1021  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1022  1.1  mrg                      "vect_compute_data_ref_alignment:\n");
   1023  1.1  mrg 
   1024  1.1  mrg   if (loop_vinfo)
   1025  1.1  mrg     loop = LOOP_VINFO_LOOP (loop_vinfo);
   1026  1.1  mrg 
   1027  1.1  mrg   /* Initialize misalignment to unknown.  */
   1028  1.1  mrg   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
   1029  1.1  mrg 
   1030  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   1031  1.1  mrg     return;
   1032  1.1  mrg 
   1033  1.1  mrg   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
   1034  1.1  mrg   bool step_preserves_misalignment_p;
   1035  1.1  mrg 
   1036  1.1  mrg   poly_uint64 vector_alignment
   1037  1.1  mrg     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
   1038  1.1  mrg 		 BITS_PER_UNIT);
   1039  1.1  mrg   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
   1040  1.1  mrg 
   1041  1.1  mrg   /* If the main loop has peeled for alignment we have no way of knowing
   1042  1.1  mrg      whether the data accesses in the epilogues are aligned.  We can't at
   1043  1.1  mrg      compile time answer the question whether we have entered the main loop or
   1044  1.1  mrg      not.  Fixes PR 92351.  */
   1045  1.1  mrg   if (loop_vinfo)
   1046  1.1  mrg     {
   1047  1.1  mrg       loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
   1048  1.1  mrg       if (orig_loop_vinfo
   1049  1.1  mrg 	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
   1050  1.1  mrg 	return;
   1051  1.1  mrg     }
   1052  1.1  mrg 
   1053  1.1  mrg   unsigned HOST_WIDE_INT vect_align_c;
   1054  1.1  mrg   if (!vector_alignment.is_constant (&vect_align_c))
   1055  1.1  mrg     return;
   1056  1.1  mrg 
   1057  1.1  mrg   /* No step for BB vectorization.  */
   1058  1.1  mrg   if (!loop)
   1059  1.1  mrg     {
   1060  1.1  mrg       gcc_assert (integer_zerop (drb->step));
   1061  1.1  mrg       step_preserves_misalignment_p = true;
   1062  1.1  mrg     }
   1063  1.1  mrg 
   1064  1.1  mrg   /* In case the dataref is in an inner-loop of the loop that is being
   1065  1.1  mrg      vectorized (LOOP), we use the base and misalignment information
   1066  1.1  mrg      relative to the outer-loop (LOOP).  This is ok only if the misalignment
   1067  1.1  mrg      stays the same throughout the execution of the inner-loop, which is why
   1068  1.1  mrg      we have to check that the stride of the dataref in the inner-loop evenly
   1069  1.1  mrg      divides by the vector alignment.  */
   1070  1.1  mrg   else if (nested_in_vect_loop_p (loop, stmt_info))
   1071  1.1  mrg     {
   1072  1.1  mrg       step_preserves_misalignment_p
   1073  1.1  mrg 	= (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
   1074  1.1  mrg 
   1075  1.1  mrg       if (dump_enabled_p ())
   1076  1.1  mrg 	{
   1077  1.1  mrg 	  if (step_preserves_misalignment_p)
   1078  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   1079  1.1  mrg 			     "inner step divides the vector alignment.\n");
   1080  1.1  mrg 	  else
   1081  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1082  1.1  mrg 			     "inner step doesn't divide the vector"
   1083  1.1  mrg 			     " alignment.\n");
   1084  1.1  mrg 	}
   1085  1.1  mrg     }
   1086  1.1  mrg 
   1087  1.1  mrg   /* Similarly we can only use base and misalignment information relative to
   1088  1.1  mrg      an innermost loop if the misalignment stays the same throughout the
   1089  1.1  mrg      execution of the loop.  As above, this is the case if the stride of
   1090  1.1  mrg      the dataref evenly divides by the alignment.  */
   1091  1.1  mrg   else
   1092  1.1  mrg     {
   1093  1.1  mrg       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   1094  1.1  mrg       step_preserves_misalignment_p
   1095  1.1  mrg 	= multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
   1096  1.1  mrg 
   1097  1.1  mrg       if (!step_preserves_misalignment_p && dump_enabled_p ())
   1098  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1099  1.1  mrg 			 "step doesn't divide the vector alignment.\n");
   1100  1.1  mrg     }
   1101  1.1  mrg 
   1102  1.1  mrg   unsigned int base_alignment = drb->base_alignment;
   1103  1.1  mrg   unsigned int base_misalignment = drb->base_misalignment;
   1104  1.1  mrg 
   1105  1.1  mrg   /* Calculate the maximum of the pooled base address alignment and the
   1106  1.1  mrg      alignment that we can compute for DR itself.  */
   1107  1.1  mrg   std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
   1108  1.1  mrg     = base_alignments->get (drb->base_address);
   1109  1.1  mrg   if (entry
   1110  1.1  mrg       && base_alignment < (*entry).second->base_alignment
   1111  1.1  mrg       && (loop_vinfo
   1112  1.1  mrg 	  || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
   1113  1.1  mrg 			      gimple_bb (entry->first->stmt))
   1114  1.1  mrg 	      && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
   1115  1.1  mrg 		  || (entry->first->dr_aux.group <= dr_info->group)))))
   1116  1.1  mrg     {
   1117  1.1  mrg       base_alignment = entry->second->base_alignment;
   1118  1.1  mrg       base_misalignment = entry->second->base_misalignment;
   1119  1.1  mrg     }
   1120  1.1  mrg 
   1121  1.1  mrg   if (drb->offset_alignment < vect_align_c
   1122  1.1  mrg       || !step_preserves_misalignment_p
   1123  1.1  mrg       /* We need to know whether the step wrt the vectorized loop is
   1124  1.1  mrg 	 negative when computing the starting misalignment below.  */
   1125  1.1  mrg       || TREE_CODE (drb->step) != INTEGER_CST)
   1126  1.1  mrg     {
   1127  1.1  mrg       if (dump_enabled_p ())
   1128  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1129  1.1  mrg 			 "Unknown alignment for access: %T\n", ref);
   1130  1.1  mrg       return;
   1131  1.1  mrg     }
   1132  1.1  mrg 
   1133  1.1  mrg   if (base_alignment < vect_align_c)
   1134  1.1  mrg     {
   1135  1.1  mrg       unsigned int max_alignment;
   1136  1.1  mrg       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
   1137  1.1  mrg       if (max_alignment < vect_align_c
   1138  1.1  mrg 	  || !vect_can_force_dr_alignment_p (base,
   1139  1.1  mrg 					     vect_align_c * BITS_PER_UNIT))
   1140  1.1  mrg 	{
   1141  1.1  mrg 	  if (dump_enabled_p ())
   1142  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   1143  1.1  mrg 			     "can't force alignment of ref: %T\n", ref);
   1144  1.1  mrg 	  return;
   1145  1.1  mrg 	}
   1146  1.1  mrg 
   1147  1.1  mrg       /* Force the alignment of the decl.
   1148  1.1  mrg 	 NOTE: This is the only change to the code we make during
   1149  1.1  mrg 	 the analysis phase, before deciding to vectorize the loop.  */
   1150  1.1  mrg       if (dump_enabled_p ())
   1151  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   1152  1.1  mrg 			 "force alignment of %T\n", ref);
   1153  1.1  mrg 
   1154  1.1  mrg       dr_info->base_decl = base;
   1155  1.1  mrg       dr_info->base_misaligned = true;
   1156  1.1  mrg       base_misalignment = 0;
   1157  1.1  mrg     }
   1158  1.1  mrg   poly_int64 misalignment
   1159  1.1  mrg     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
   1160  1.1  mrg 
   1161  1.1  mrg   unsigned int const_misalignment;
   1162  1.1  mrg   if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
   1163  1.1  mrg     {
   1164  1.1  mrg       if (dump_enabled_p ())
   1165  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1166  1.1  mrg 			 "Non-constant misalignment for access: %T\n", ref);
   1167  1.1  mrg       return;
   1168  1.1  mrg     }
   1169  1.1  mrg 
   1170  1.1  mrg   SET_DR_MISALIGNMENT (dr_info, const_misalignment);
   1171  1.1  mrg 
   1172  1.1  mrg   if (dump_enabled_p ())
   1173  1.1  mrg     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1174  1.1  mrg 		     "misalign = %d bytes of ref %T\n",
   1175  1.1  mrg 		     const_misalignment, ref);
   1176  1.1  mrg 
   1177  1.1  mrg   return;
   1178  1.1  mrg }
   1179  1.1  mrg 
   1180  1.1  mrg /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
   1181  1.1  mrg    that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
   1182  1.1  mrg    is made aligned via peeling.  */
   1183  1.1  mrg 
   1184  1.1  mrg static bool
   1185  1.1  mrg vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
   1186  1.1  mrg 					 dr_vec_info *dr_peel_info)
   1187  1.1  mrg {
   1188  1.1  mrg   if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
   1189  1.1  mrg 		  DR_TARGET_ALIGNMENT (dr_info)))
   1190  1.1  mrg     {
   1191  1.1  mrg       poly_offset_int diff
   1192  1.1  mrg 	= (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
   1193  1.1  mrg 	   - wi::to_poly_offset (DR_INIT (dr_info->dr)));
   1194  1.1  mrg       if (known_eq (diff, 0)
   1195  1.1  mrg 	  || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
   1196  1.1  mrg 	return true;
   1197  1.1  mrg     }
   1198  1.1  mrg   return false;
   1199  1.1  mrg }
   1200  1.1  mrg 
   1201  1.1  mrg /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
   1202  1.1  mrg    aligned via peeling.  */
   1203  1.1  mrg 
   1204  1.1  mrg static bool
   1205  1.1  mrg vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
   1206  1.1  mrg 				 dr_vec_info *dr_peel_info)
   1207  1.1  mrg {
   1208  1.1  mrg   if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
   1209  1.1  mrg 			DR_BASE_ADDRESS (dr_peel_info->dr), 0)
   1210  1.1  mrg       || !operand_equal_p (DR_OFFSET (dr_info->dr),
   1211  1.1  mrg 			   DR_OFFSET (dr_peel_info->dr), 0)
   1212  1.1  mrg       || !operand_equal_p (DR_STEP (dr_info->dr),
   1213  1.1  mrg 			   DR_STEP (dr_peel_info->dr), 0))
   1214  1.1  mrg     return false;
   1215  1.1  mrg 
   1216  1.1  mrg   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
   1217  1.1  mrg }
   1218  1.1  mrg 
   1219  1.1  mrg /* Compute the value for dr_info->misalign so that the access appears
   1220  1.1  mrg    aligned.  This is used by peeling to compensate for dr_misalignment
   1221  1.1  mrg    applying the offset for negative step.  */
   1222  1.1  mrg 
   1223  1.1  mrg int
   1224  1.1  mrg vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
   1225  1.1  mrg {
   1226  1.1  mrg   if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
   1227  1.1  mrg     return 0;
   1228  1.1  mrg 
   1229  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1230  1.1  mrg   poly_int64 misalignment
   1231  1.1  mrg     = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   1232  1.1  mrg        * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   1233  1.1  mrg 
   1234  1.1  mrg   unsigned HOST_WIDE_INT target_alignment_c;
   1235  1.1  mrg   int misalign;
   1236  1.1  mrg   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
   1237  1.1  mrg       || !known_misalignment (misalignment, target_alignment_c, &misalign))
   1238  1.1  mrg     return DR_MISALIGNMENT_UNKNOWN;
   1239  1.1  mrg   return misalign;
   1240  1.1  mrg }
   1241  1.1  mrg 
   1242  1.1  mrg /* Function vect_update_misalignment_for_peel.
   1243  1.1  mrg    Sets DR_INFO's misalignment
   1244  1.1  mrg    - to 0 if it has the same alignment as DR_PEEL_INFO,
   1245  1.1  mrg    - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
   1246  1.1  mrg    - to -1 (unknown) otherwise.
   1247  1.1  mrg 
   1248  1.1  mrg    DR_INFO - the data reference whose misalignment is to be adjusted.
   1249  1.1  mrg    DR_PEEL_INFO - the data reference whose misalignment is being made
   1250  1.1  mrg 		  zero in the vector loop by the peel.
   1251  1.1  mrg    NPEEL - the number of iterations in the peel loop if the misalignment
   1252  1.1  mrg            of DR_PEEL_INFO is known at compile time.  */
   1253  1.1  mrg 
   1254  1.1  mrg static void
   1255  1.1  mrg vect_update_misalignment_for_peel (dr_vec_info *dr_info,
   1256  1.1  mrg 				   dr_vec_info *dr_peel_info, int npeel)
   1257  1.1  mrg {
   1258  1.1  mrg   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
   1259  1.1  mrg   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
   1260  1.1  mrg     {
   1261  1.1  mrg       SET_DR_MISALIGNMENT (dr_info,
   1262  1.1  mrg 			   vect_dr_misalign_for_aligned_access (dr_peel_info));
   1263  1.1  mrg       return;
   1264  1.1  mrg     }
   1265  1.1  mrg 
   1266  1.1  mrg   unsigned HOST_WIDE_INT alignment;
   1267  1.1  mrg   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
   1268  1.1  mrg       && known_alignment_for_access_p (dr_info,
   1269  1.1  mrg 				       STMT_VINFO_VECTYPE (dr_info->stmt))
   1270  1.1  mrg       && known_alignment_for_access_p (dr_peel_info,
   1271  1.1  mrg 				       STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
   1272  1.1  mrg     {
   1273  1.1  mrg       int misal = dr_info->misalignment;
   1274  1.1  mrg       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1275  1.1  mrg       misal &= alignment - 1;
   1276  1.1  mrg       set_dr_misalignment (dr_info, misal);
   1277  1.1  mrg       return;
   1278  1.1  mrg     }
   1279  1.1  mrg 
   1280  1.1  mrg   if (dump_enabled_p ())
   1281  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
   1282  1.1  mrg 		     "to unknown (-1).\n");
   1283  1.1  mrg   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
   1284  1.1  mrg }
   1285  1.1  mrg 
   1286  1.1  mrg /* Return true if alignment is relevant for DR_INFO.  */
   1287  1.1  mrg 
   1288  1.1  mrg static bool
   1289  1.1  mrg vect_relevant_for_alignment_p (dr_vec_info *dr_info)
   1290  1.1  mrg {
   1291  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   1292  1.1  mrg 
   1293  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info))
   1294  1.1  mrg     return false;
   1295  1.1  mrg 
   1296  1.1  mrg   /* For interleaving, only the alignment of the first access matters.  */
   1297  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
   1298  1.1  mrg       && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
   1299  1.1  mrg     return false;
   1300  1.1  mrg 
   1301  1.1  mrg   /* Scatter-gather and invariant accesses continue to address individual
   1302  1.1  mrg      scalars, so vector-level alignment is irrelevant.  */
   1303  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
   1304  1.1  mrg       || integer_zerop (DR_STEP (dr_info->dr)))
   1305  1.1  mrg     return false;
   1306  1.1  mrg 
   1307  1.1  mrg   /* Strided accesses perform only component accesses, alignment is
   1308  1.1  mrg      irrelevant for them.  */
   1309  1.1  mrg   if (STMT_VINFO_STRIDED_P (stmt_info)
   1310  1.1  mrg       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
   1311  1.1  mrg     return false;
   1312  1.1  mrg 
   1313  1.1  mrg   return true;
   1314  1.1  mrg }
   1315  1.1  mrg 
   1316  1.1  mrg /* Given an memory reference EXP return whether its alignment is less
   1317  1.1  mrg    than its size.  */
   1318  1.1  mrg 
   1319  1.1  mrg static bool
   1320  1.1  mrg not_size_aligned (tree exp)
   1321  1.1  mrg {
   1322  1.1  mrg   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
   1323  1.1  mrg     return true;
   1324  1.1  mrg 
   1325  1.1  mrg   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
   1326  1.1  mrg 	  > get_object_alignment (exp));
   1327  1.1  mrg }
   1328  1.1  mrg 
   1329  1.1  mrg /* Function vector_alignment_reachable_p
   1330  1.1  mrg 
   1331  1.1  mrg    Return true if vector alignment for DR_INFO is reachable by peeling
   1332  1.1  mrg    a few loop iterations.  Return false otherwise.  */
   1333  1.1  mrg 
   1334  1.1  mrg static bool
   1335  1.1  mrg vector_alignment_reachable_p (dr_vec_info *dr_info)
   1336  1.1  mrg {
   1337  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   1338  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1339  1.1  mrg 
   1340  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   1341  1.1  mrg     {
   1342  1.1  mrg       /* For interleaved access we peel only if number of iterations in
   1343  1.1  mrg 	 the prolog loop ({VF - misalignment}), is a multiple of the
   1344  1.1  mrg 	 number of the interleaved accesses.  */
   1345  1.1  mrg       int elem_size, mis_in_elements;
   1346  1.1  mrg 
   1347  1.1  mrg       /* FORNOW: handle only known alignment.  */
   1348  1.1  mrg       if (!known_alignment_for_access_p (dr_info, vectype))
   1349  1.1  mrg 	return false;
   1350  1.1  mrg 
   1351  1.1  mrg       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
   1352  1.1  mrg       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
   1353  1.1  mrg       elem_size = vector_element_size (vector_size, nelements);
   1354  1.1  mrg       mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
   1355  1.1  mrg 
   1356  1.1  mrg       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
   1357  1.1  mrg 	return false;
   1358  1.1  mrg     }
   1359  1.1  mrg 
   1360  1.1  mrg   /* If misalignment is known at the compile time then allow peeling
   1361  1.1  mrg      only if natural alignment is reachable through peeling.  */
   1362  1.1  mrg   if (known_alignment_for_access_p (dr_info, vectype)
   1363  1.1  mrg       && !aligned_access_p (dr_info, vectype))
   1364  1.1  mrg     {
   1365  1.1  mrg       HOST_WIDE_INT elmsize =
   1366  1.1  mrg 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
   1367  1.1  mrg       if (dump_enabled_p ())
   1368  1.1  mrg 	{
   1369  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location,
   1370  1.1  mrg 	                   "data size = %wd. misalignment = %d.\n", elmsize,
   1371  1.1  mrg 			   dr_misalignment (dr_info, vectype));
   1372  1.1  mrg 	}
   1373  1.1  mrg       if (dr_misalignment (dr_info, vectype) % elmsize)
   1374  1.1  mrg 	{
   1375  1.1  mrg 	  if (dump_enabled_p ())
   1376  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1377  1.1  mrg 	                     "data size does not divide the misalignment.\n");
   1378  1.1  mrg 	  return false;
   1379  1.1  mrg 	}
   1380  1.1  mrg     }
   1381  1.1  mrg 
   1382  1.1  mrg   if (!known_alignment_for_access_p (dr_info, vectype))
   1383  1.1  mrg     {
   1384  1.1  mrg       tree type = TREE_TYPE (DR_REF (dr_info->dr));
   1385  1.1  mrg       bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
   1386  1.1  mrg       if (dump_enabled_p ())
   1387  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1388  1.1  mrg 	                 "Unknown misalignment, %snaturally aligned\n",
   1389  1.1  mrg 			 is_packed ? "not " : "");
   1390  1.1  mrg       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
   1391  1.1  mrg     }
   1392  1.1  mrg 
   1393  1.1  mrg   return true;
   1394  1.1  mrg }
   1395  1.1  mrg 
   1396  1.1  mrg 
   1397  1.1  mrg /* Calculate the cost of the memory access represented by DR_INFO.  */
   1398  1.1  mrg 
   1399  1.1  mrg static void
   1400  1.1  mrg vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
   1401  1.1  mrg 			   dr_alignment_support alignment_support_scheme,
   1402  1.1  mrg 			   int misalignment,
   1403  1.1  mrg 			   unsigned int *inside_cost,
   1404  1.1  mrg                            unsigned int *outside_cost,
   1405  1.1  mrg 			   stmt_vector_for_cost *body_cost_vec,
   1406  1.1  mrg 			   stmt_vector_for_cost *prologue_cost_vec)
   1407  1.1  mrg {
   1408  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   1409  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   1410  1.1  mrg   int ncopies;
   1411  1.1  mrg 
   1412  1.1  mrg   if (PURE_SLP_STMT (stmt_info))
   1413  1.1  mrg     ncopies = 1;
   1414  1.1  mrg   else
   1415  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
   1416  1.1  mrg 
   1417  1.1  mrg   if (DR_IS_READ (dr_info->dr))
   1418  1.1  mrg     vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
   1419  1.1  mrg 			misalignment, true, inside_cost,
   1420  1.1  mrg 			outside_cost, prologue_cost_vec, body_cost_vec, false);
   1421  1.1  mrg   else
   1422  1.1  mrg     vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
   1423  1.1  mrg 			 misalignment, inside_cost, body_cost_vec);
   1424  1.1  mrg 
   1425  1.1  mrg   if (dump_enabled_p ())
   1426  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1427  1.1  mrg                      "vect_get_data_access_cost: inside_cost = %d, "
   1428  1.1  mrg                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
   1429  1.1  mrg }
   1430  1.1  mrg 
   1431  1.1  mrg 
   1432  1.1  mrg typedef struct _vect_peel_info
   1433  1.1  mrg {
   1434  1.1  mrg   dr_vec_info *dr_info;
   1435  1.1  mrg   int npeel;
   1436  1.1  mrg   unsigned int count;
   1437  1.1  mrg } *vect_peel_info;
   1438  1.1  mrg 
   1439  1.1  mrg typedef struct _vect_peel_extended_info
   1440  1.1  mrg {
   1441  1.1  mrg   vec_info *vinfo;
   1442  1.1  mrg   struct _vect_peel_info peel_info;
   1443  1.1  mrg   unsigned int inside_cost;
   1444  1.1  mrg   unsigned int outside_cost;
   1445  1.1  mrg } *vect_peel_extended_info;
   1446  1.1  mrg 
   1447  1.1  mrg 
   1448  1.1  mrg /* Peeling hashtable helpers.  */
   1449  1.1  mrg 
   1450  1.1  mrg struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
   1451  1.1  mrg {
   1452  1.1  mrg   static inline hashval_t hash (const _vect_peel_info *);
   1453  1.1  mrg   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
   1454  1.1  mrg };
   1455  1.1  mrg 
   1456  1.1  mrg inline hashval_t
   1457  1.1  mrg peel_info_hasher::hash (const _vect_peel_info *peel_info)
   1458  1.1  mrg {
   1459  1.1  mrg   return (hashval_t) peel_info->npeel;
   1460  1.1  mrg }
   1461  1.1  mrg 
   1462  1.1  mrg inline bool
   1463  1.1  mrg peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
   1464  1.1  mrg {
   1465  1.1  mrg   return (a->npeel == b->npeel);
   1466  1.1  mrg }
   1467  1.1  mrg 
   1468  1.1  mrg 
   1469  1.1  mrg /* Insert DR_INFO into peeling hash table with NPEEL as key.  */
   1470  1.1  mrg 
   1471  1.1  mrg static void
   1472  1.1  mrg vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
   1473  1.1  mrg 			  loop_vec_info loop_vinfo, dr_vec_info *dr_info,
   1474  1.1  mrg 			  int npeel, bool supportable_if_not_aligned)
   1475  1.1  mrg {
   1476  1.1  mrg   struct _vect_peel_info elem, *slot;
   1477  1.1  mrg   _vect_peel_info **new_slot;
   1478  1.1  mrg 
   1479  1.1  mrg   elem.npeel = npeel;
   1480  1.1  mrg   slot = peeling_htab->find (&elem);
   1481  1.1  mrg   if (slot)
   1482  1.1  mrg     slot->count++;
   1483  1.1  mrg   else
   1484  1.1  mrg     {
   1485  1.1  mrg       slot = XNEW (struct _vect_peel_info);
   1486  1.1  mrg       slot->npeel = npeel;
   1487  1.1  mrg       slot->dr_info = dr_info;
   1488  1.1  mrg       slot->count = 1;
   1489  1.1  mrg       new_slot = peeling_htab->find_slot (slot, INSERT);
   1490  1.1  mrg       *new_slot = slot;
   1491  1.1  mrg     }
   1492  1.1  mrg 
   1493  1.1  mrg   /* If this DR is not supported with unknown misalignment then bias
   1494  1.1  mrg      this slot when the cost model is disabled.  */
   1495  1.1  mrg   if (!supportable_if_not_aligned
   1496  1.1  mrg       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1497  1.1  mrg     slot->count += VECT_MAX_COST;
   1498  1.1  mrg }
   1499  1.1  mrg 
   1500  1.1  mrg 
   1501  1.1  mrg /* Traverse peeling hash table to find peeling option that aligns maximum
   1502  1.1  mrg    number of data accesses.  */
   1503  1.1  mrg 
   1504  1.1  mrg int
   1505  1.1  mrg vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
   1506  1.1  mrg 				     _vect_peel_extended_info *max)
   1507  1.1  mrg {
   1508  1.1  mrg   vect_peel_info elem = *slot;
   1509  1.1  mrg 
   1510  1.1  mrg   if (elem->count > max->peel_info.count
   1511  1.1  mrg       || (elem->count == max->peel_info.count
   1512  1.1  mrg           && max->peel_info.npeel > elem->npeel))
   1513  1.1  mrg     {
   1514  1.1  mrg       max->peel_info.npeel = elem->npeel;
   1515  1.1  mrg       max->peel_info.count = elem->count;
   1516  1.1  mrg       max->peel_info.dr_info = elem->dr_info;
   1517  1.1  mrg     }
   1518  1.1  mrg 
   1519  1.1  mrg   return 1;
   1520  1.1  mrg }
   1521  1.1  mrg 
   1522  1.1  mrg /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
   1523  1.1  mrg    data access costs for all data refs.  If UNKNOWN_MISALIGNMENT is true,
   1524  1.1  mrg    npeel is computed at runtime but DR0_INFO's misalignment will be zero
   1525  1.1  mrg    after peeling.  */
   1526  1.1  mrg 
   1527  1.1  mrg static void
   1528  1.1  mrg vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
   1529  1.1  mrg 				dr_vec_info *dr0_info,
   1530  1.1  mrg 				unsigned int *inside_cost,
   1531  1.1  mrg 				unsigned int *outside_cost,
   1532  1.1  mrg 				stmt_vector_for_cost *body_cost_vec,
   1533  1.1  mrg 				stmt_vector_for_cost *prologue_cost_vec,
   1534  1.1  mrg 				unsigned int npeel)
   1535  1.1  mrg {
   1536  1.1  mrg   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   1537  1.1  mrg 
   1538  1.1  mrg   bool dr0_alignment_known_p
   1539  1.1  mrg     = (dr0_info
   1540  1.1  mrg        && known_alignment_for_access_p (dr0_info,
   1541  1.1  mrg 					STMT_VINFO_VECTYPE (dr0_info->stmt)));
   1542  1.1  mrg 
   1543  1.1  mrg   for (data_reference *dr : datarefs)
   1544  1.1  mrg     {
   1545  1.1  mrg       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1546  1.1  mrg       if (!vect_relevant_for_alignment_p (dr_info))
   1547  1.1  mrg 	continue;
   1548  1.1  mrg 
   1549  1.1  mrg       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1550  1.1  mrg       dr_alignment_support alignment_support_scheme;
   1551  1.1  mrg       int misalignment;
   1552  1.1  mrg       unsigned HOST_WIDE_INT alignment;
   1553  1.1  mrg 
   1554  1.1  mrg       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
   1555  1.1  mrg 					    size_zero_node) < 0;
   1556  1.1  mrg       poly_int64 off = 0;
   1557  1.1  mrg       if (negative)
   1558  1.1  mrg 	off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   1559  1.1  mrg 	       * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   1560  1.1  mrg 
   1561  1.1  mrg       if (npeel == 0)
   1562  1.1  mrg 	misalignment = dr_misalignment (dr_info, vectype, off);
   1563  1.1  mrg       else if (dr_info == dr0_info
   1564  1.1  mrg 	       || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
   1565  1.1  mrg 	misalignment = 0;
   1566  1.1  mrg       else if (!dr0_alignment_known_p
   1567  1.1  mrg 	       || !known_alignment_for_access_p (dr_info, vectype)
   1568  1.1  mrg 	       || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
   1569  1.1  mrg 	misalignment = DR_MISALIGNMENT_UNKNOWN;
   1570  1.1  mrg       else
   1571  1.1  mrg 	{
   1572  1.1  mrg 	  misalignment = dr_misalignment (dr_info, vectype, off);
   1573  1.1  mrg 	  misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1574  1.1  mrg 	  misalignment &= alignment - 1;
   1575  1.1  mrg 	}
   1576  1.1  mrg       alignment_support_scheme
   1577  1.1  mrg 	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   1578  1.1  mrg 					 misalignment);
   1579  1.1  mrg 
   1580  1.1  mrg       vect_get_data_access_cost (loop_vinfo, dr_info,
   1581  1.1  mrg 				 alignment_support_scheme, misalignment,
   1582  1.1  mrg 				 inside_cost, outside_cost,
   1583  1.1  mrg 				 body_cost_vec, prologue_cost_vec);
   1584  1.1  mrg     }
   1585  1.1  mrg }
   1586  1.1  mrg 
   1587  1.1  mrg /* Traverse peeling hash table and calculate cost for each peeling option.
   1588  1.1  mrg    Find the one with the lowest cost.  */
   1589  1.1  mrg 
   1590  1.1  mrg int
   1591  1.1  mrg vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
   1592  1.1  mrg 				   _vect_peel_extended_info *min)
   1593  1.1  mrg {
   1594  1.1  mrg   vect_peel_info elem = *slot;
   1595  1.1  mrg   int dummy;
   1596  1.1  mrg   unsigned int inside_cost = 0, outside_cost = 0;
   1597  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
   1598  1.1  mrg   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
   1599  1.1  mrg 		       epilogue_cost_vec;
   1600  1.1  mrg 
   1601  1.1  mrg   prologue_cost_vec.create (2);
   1602  1.1  mrg   body_cost_vec.create (2);
   1603  1.1  mrg   epilogue_cost_vec.create (2);
   1604  1.1  mrg 
   1605  1.1  mrg   vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
   1606  1.1  mrg 				  &outside_cost, &body_cost_vec,
   1607  1.1  mrg 				  &prologue_cost_vec, elem->npeel);
   1608  1.1  mrg 
   1609  1.1  mrg   body_cost_vec.release ();
   1610  1.1  mrg 
   1611  1.1  mrg   outside_cost += vect_get_known_peeling_cost
   1612  1.1  mrg     (loop_vinfo, elem->npeel, &dummy,
   1613  1.1  mrg      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   1614  1.1  mrg      &prologue_cost_vec, &epilogue_cost_vec);
   1615  1.1  mrg 
   1616  1.1  mrg   /* Prologue and epilogue costs are added to the target model later.
   1617  1.1  mrg      These costs depend only on the scalar iteration cost, the
   1618  1.1  mrg      number of peeling iterations finally chosen, and the number of
   1619  1.1  mrg      misaligned statements.  So discard the information found here.  */
   1620  1.1  mrg   prologue_cost_vec.release ();
   1621  1.1  mrg   epilogue_cost_vec.release ();
   1622  1.1  mrg 
   1623  1.1  mrg   if (inside_cost < min->inside_cost
   1624  1.1  mrg       || (inside_cost == min->inside_cost
   1625  1.1  mrg 	  && outside_cost < min->outside_cost))
   1626  1.1  mrg     {
   1627  1.1  mrg       min->inside_cost = inside_cost;
   1628  1.1  mrg       min->outside_cost = outside_cost;
   1629  1.1  mrg       min->peel_info.dr_info = elem->dr_info;
   1630  1.1  mrg       min->peel_info.npeel = elem->npeel;
   1631  1.1  mrg       min->peel_info.count = elem->count;
   1632  1.1  mrg     }
   1633  1.1  mrg 
   1634  1.1  mrg   return 1;
   1635  1.1  mrg }
   1636  1.1  mrg 
   1637  1.1  mrg 
   1638  1.1  mrg /* Choose best peeling option by traversing peeling hash table and either
   1639  1.1  mrg    choosing an option with the lowest cost (if cost model is enabled) or the
   1640  1.1  mrg    option that aligns as many accesses as possible.  */
   1641  1.1  mrg 
   1642  1.1  mrg static struct _vect_peel_extended_info
   1643  1.1  mrg vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
   1644  1.1  mrg 				       loop_vec_info loop_vinfo)
   1645  1.1  mrg {
   1646  1.1  mrg    struct _vect_peel_extended_info res;
   1647  1.1  mrg 
   1648  1.1  mrg    res.peel_info.dr_info = NULL;
   1649  1.1  mrg    res.vinfo = loop_vinfo;
   1650  1.1  mrg 
   1651  1.1  mrg    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1652  1.1  mrg      {
   1653  1.1  mrg        res.inside_cost = INT_MAX;
   1654  1.1  mrg        res.outside_cost = INT_MAX;
   1655  1.1  mrg        peeling_htab->traverse <_vect_peel_extended_info *,
   1656  1.1  mrg 	   		       vect_peeling_hash_get_lowest_cost> (&res);
   1657  1.1  mrg      }
   1658  1.1  mrg    else
   1659  1.1  mrg      {
   1660  1.1  mrg        res.peel_info.count = 0;
   1661  1.1  mrg        peeling_htab->traverse <_vect_peel_extended_info *,
   1662  1.1  mrg 	   		       vect_peeling_hash_get_most_frequent> (&res);
   1663  1.1  mrg        res.inside_cost = 0;
   1664  1.1  mrg        res.outside_cost = 0;
   1665  1.1  mrg      }
   1666  1.1  mrg 
   1667  1.1  mrg    return res;
   1668  1.1  mrg }
   1669  1.1  mrg 
   1670  1.1  mrg /* Return true if the new peeling NPEEL is supported.  */
   1671  1.1  mrg 
   1672  1.1  mrg static bool
   1673  1.1  mrg vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
   1674  1.1  mrg 			  unsigned npeel)
   1675  1.1  mrg {
   1676  1.1  mrg   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   1677  1.1  mrg   enum dr_alignment_support supportable_dr_alignment;
   1678  1.1  mrg 
   1679  1.1  mrg   bool dr0_alignment_known_p
   1680  1.1  mrg     = known_alignment_for_access_p (dr0_info,
   1681  1.1  mrg 				    STMT_VINFO_VECTYPE (dr0_info->stmt));
   1682  1.1  mrg 
   1683  1.1  mrg   /* Ensure that all data refs can be vectorized after the peel.  */
   1684  1.1  mrg   for (data_reference *dr : datarefs)
   1685  1.1  mrg     {
   1686  1.1  mrg       if (dr == dr0_info->dr)
   1687  1.1  mrg 	continue;
   1688  1.1  mrg 
   1689  1.1  mrg       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1690  1.1  mrg       if (!vect_relevant_for_alignment_p (dr_info)
   1691  1.1  mrg 	  || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
   1692  1.1  mrg 	continue;
   1693  1.1  mrg 
   1694  1.1  mrg       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1695  1.1  mrg       int misalignment;
   1696  1.1  mrg       unsigned HOST_WIDE_INT alignment;
   1697  1.1  mrg       if (!dr0_alignment_known_p
   1698  1.1  mrg 	  || !known_alignment_for_access_p (dr_info, vectype)
   1699  1.1  mrg 	  || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
   1700  1.1  mrg 	misalignment = DR_MISALIGNMENT_UNKNOWN;
   1701  1.1  mrg       else
   1702  1.1  mrg 	{
   1703  1.1  mrg 	  misalignment = dr_misalignment (dr_info, vectype);
   1704  1.1  mrg 	  misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1705  1.1  mrg 	  misalignment &= alignment - 1;
   1706  1.1  mrg 	}
   1707  1.1  mrg       supportable_dr_alignment
   1708  1.1  mrg 	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   1709  1.1  mrg 					 misalignment);
   1710  1.1  mrg       if (supportable_dr_alignment == dr_unaligned_unsupported)
   1711  1.1  mrg 	return false;
   1712  1.1  mrg     }
   1713  1.1  mrg 
   1714  1.1  mrg   return true;
   1715  1.1  mrg }
   1716  1.1  mrg 
   1717  1.1  mrg /* Compare two data-references DRA and DRB to group them into chunks
   1718  1.1  mrg    with related alignment.  */
   1719  1.1  mrg 
   1720  1.1  mrg static int
   1721  1.1  mrg dr_align_group_sort_cmp (const void *dra_, const void *drb_)
   1722  1.1  mrg {
   1723  1.1  mrg   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
   1724  1.1  mrg   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
   1725  1.1  mrg   int cmp;
   1726  1.1  mrg 
   1727  1.1  mrg   /* Stabilize sort.  */
   1728  1.1  mrg   if (dra == drb)
   1729  1.1  mrg     return 0;
   1730  1.1  mrg 
   1731  1.1  mrg   /* Ordering of DRs according to base.  */
   1732  1.1  mrg   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   1733  1.1  mrg 			       DR_BASE_ADDRESS (drb));
   1734  1.1  mrg   if (cmp != 0)
   1735  1.1  mrg     return cmp;
   1736  1.1  mrg 
   1737  1.1  mrg   /* And according to DR_OFFSET.  */
   1738  1.1  mrg   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
   1739  1.1  mrg   if (cmp != 0)
   1740  1.1  mrg     return cmp;
   1741  1.1  mrg 
   1742  1.1  mrg   /* And after step.  */
   1743  1.1  mrg   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
   1744  1.1  mrg   if (cmp != 0)
   1745  1.1  mrg     return cmp;
   1746  1.1  mrg 
   1747  1.1  mrg   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
   1748  1.1  mrg   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
   1749  1.1  mrg   if (cmp == 0)
   1750  1.1  mrg     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
   1751  1.1  mrg   return cmp;
   1752  1.1  mrg }
   1753  1.1  mrg 
   1754  1.1  mrg /* Function vect_enhance_data_refs_alignment
   1755  1.1  mrg 
   1756  1.1  mrg    This pass will use loop versioning and loop peeling in order to enhance
   1757  1.1  mrg    the alignment of data references in the loop.
   1758  1.1  mrg 
   1759  1.1  mrg    FOR NOW: we assume that whatever versioning/peeling takes place, only the
   1760  1.1  mrg    original loop is to be vectorized.  Any other loops that are created by
   1761  1.1  mrg    the transformations performed in this pass - are not supposed to be
   1762  1.1  mrg    vectorized.  This restriction will be relaxed.
   1763  1.1  mrg 
   1764  1.1  mrg    This pass will require a cost model to guide it whether to apply peeling
   1765  1.1  mrg    or versioning or a combination of the two.  For example, the scheme that
   1766  1.1  mrg    intel uses when given a loop with several memory accesses, is as follows:
   1767  1.1  mrg    choose one memory access ('p') which alignment you want to force by doing
   1768  1.1  mrg    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
   1769  1.1  mrg    other accesses are not necessarily aligned, or (2) use loop versioning to
   1770  1.1  mrg    generate one loop in which all accesses are aligned, and another loop in
   1771  1.1  mrg    which only 'p' is necessarily aligned.
   1772  1.1  mrg 
   1773  1.1  mrg    ("Automatic Intra-Register Vectorization for the Intel Architecture",
   1774  1.1  mrg    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
   1775  1.1  mrg    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
   1776  1.1  mrg 
   1777  1.1  mrg    Devising a cost model is the most critical aspect of this work.  It will
   1778  1.1  mrg    guide us on which access to peel for, whether to use loop versioning, how
   1779  1.1  mrg    many versions to create, etc.  The cost model will probably consist of
   1780  1.1  mrg    generic considerations as well as target specific considerations (on
   1781  1.1  mrg    powerpc for example, misaligned stores are more painful than misaligned
   1782  1.1  mrg    loads).
   1783  1.1  mrg 
   1784  1.1  mrg    Here are the general steps involved in alignment enhancements:
   1785  1.1  mrg 
   1786  1.1  mrg      -- original loop, before alignment analysis:
   1787  1.1  mrg 	for (i=0; i<N; i++){
   1788  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
   1789  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1790  1.1  mrg 	}
   1791  1.1  mrg 
   1792  1.1  mrg      -- After vect_compute_data_refs_alignment:
   1793  1.1  mrg 	for (i=0; i<N; i++){
   1794  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1795  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1796  1.1  mrg 	}
   1797  1.1  mrg 
   1798  1.1  mrg      -- Possibility 1: we do loop versioning:
   1799  1.1  mrg      if (p is aligned) {
   1800  1.1  mrg 	for (i=0; i<N; i++){	# loop 1A
   1801  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1802  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
   1803  1.1  mrg 	}
   1804  1.1  mrg      }
   1805  1.1  mrg      else {
   1806  1.1  mrg 	for (i=0; i<N; i++){	# loop 1B
   1807  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1808  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
   1809  1.1  mrg 	}
   1810  1.1  mrg      }
   1811  1.1  mrg 
   1812  1.1  mrg      -- Possibility 2: we do loop peeling:
   1813  1.1  mrg      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
   1814  1.1  mrg 	x = q[i];
   1815  1.1  mrg 	p[i] = y;
   1816  1.1  mrg      }
   1817  1.1  mrg      for (i = 3; i < N; i++){	# loop 2A
   1818  1.1  mrg 	x = q[i];			# DR_MISALIGNMENT(q) = 0
   1819  1.1  mrg 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1820  1.1  mrg      }
   1821  1.1  mrg 
   1822  1.1  mrg      -- Possibility 3: combination of loop peeling and versioning:
   1823  1.1  mrg      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
   1824  1.1  mrg 	x = q[i];
   1825  1.1  mrg 	p[i] = y;
   1826  1.1  mrg      }
   1827  1.1  mrg      if (p is aligned) {
   1828  1.1  mrg 	for (i = 3; i<N; i++){	# loop 3A
   1829  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
   1830  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
   1831  1.1  mrg 	}
   1832  1.1  mrg      }
   1833  1.1  mrg      else {
   1834  1.1  mrg 	for (i = 3; i<N; i++){	# loop 3B
   1835  1.1  mrg 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
   1836  1.1  mrg 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
   1837  1.1  mrg 	}
   1838  1.1  mrg      }
   1839  1.1  mrg 
   1840  1.1  mrg      These loops are later passed to loop_transform to be vectorized.  The
   1841  1.1  mrg      vectorizer will use the alignment information to guide the transformation
   1842  1.1  mrg      (whether to generate regular loads/stores, or with special handling for
   1843  1.1  mrg      misalignment).  */
   1844  1.1  mrg 
   1845  1.1  mrg opt_result
   1846  1.1  mrg vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   1847  1.1  mrg {
   1848  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   1849  1.1  mrg   dr_vec_info *first_store = NULL;
   1850  1.1  mrg   dr_vec_info *dr0_info = NULL;
   1851  1.1  mrg   struct data_reference *dr;
   1852  1.1  mrg   unsigned int i;
   1853  1.1  mrg   bool do_peeling = false;
   1854  1.1  mrg   bool do_versioning = false;
   1855  1.1  mrg   unsigned int npeel = 0;
   1856  1.1  mrg   bool one_misalignment_known = false;
   1857  1.1  mrg   bool one_misalignment_unknown = false;
   1858  1.1  mrg   bool one_dr_unsupportable = false;
   1859  1.1  mrg   dr_vec_info *unsupportable_dr_info = NULL;
   1860  1.1  mrg   unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
   1861  1.1  mrg   hash_table<peel_info_hasher> peeling_htab (1);
   1862  1.1  mrg 
   1863  1.1  mrg   DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
   1864  1.1  mrg 
   1865  1.1  mrg   /* Reset data so we can safely be called multiple times.  */
   1866  1.1  mrg   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
   1867  1.1  mrg   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
   1868  1.1  mrg 
   1869  1.1  mrg   if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
   1870  1.1  mrg     return opt_result::success ();
   1871  1.1  mrg 
   1872  1.1  mrg   /* Sort the vector of datarefs so DRs that have the same or dependent
   1873  1.1  mrg      alignment are next to each other.  */
   1874  1.1  mrg   auto_vec<data_reference_p> datarefs
   1875  1.1  mrg     = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
   1876  1.1  mrg   datarefs.qsort (dr_align_group_sort_cmp);
   1877  1.1  mrg 
   1878  1.1  mrg   /* Compute the number of DRs that become aligned when we peel
   1879  1.1  mrg      a dataref so it becomes aligned.  */
   1880  1.1  mrg   auto_vec<unsigned> n_same_align_refs (datarefs.length ());
   1881  1.1  mrg   n_same_align_refs.quick_grow_cleared (datarefs.length ());
   1882  1.1  mrg   unsigned i0;
   1883  1.1  mrg   for (i0 = 0; i0 < datarefs.length (); ++i0)
   1884  1.1  mrg     if (DR_BASE_ADDRESS (datarefs[i0]))
   1885  1.1  mrg       break;
   1886  1.1  mrg   for (i = i0 + 1; i <= datarefs.length (); ++i)
   1887  1.1  mrg     {
   1888  1.1  mrg       if (i == datarefs.length ()
   1889  1.1  mrg 	  || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
   1890  1.1  mrg 			       DR_BASE_ADDRESS (datarefs[i]), 0)
   1891  1.1  mrg 	  || !operand_equal_p (DR_OFFSET (datarefs[i0]),
   1892  1.1  mrg 			       DR_OFFSET (datarefs[i]), 0)
   1893  1.1  mrg 	  || !operand_equal_p (DR_STEP (datarefs[i0]),
   1894  1.1  mrg 			       DR_STEP (datarefs[i]), 0))
   1895  1.1  mrg 	{
   1896  1.1  mrg 	  /* The subgroup [i0, i-1] now only differs in DR_INIT and
   1897  1.1  mrg 	     possibly DR_TARGET_ALIGNMENT.  Still the whole subgroup
   1898  1.1  mrg 	     will get known misalignment if we align one of the refs
   1899  1.1  mrg 	     with the largest DR_TARGET_ALIGNMENT.  */
   1900  1.1  mrg 	  for (unsigned j = i0; j < i; ++j)
   1901  1.1  mrg 	    {
   1902  1.1  mrg 	      dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
   1903  1.1  mrg 	      for (unsigned k = i0; k < i; ++k)
   1904  1.1  mrg 		{
   1905  1.1  mrg 		  if (k == j)
   1906  1.1  mrg 		    continue;
   1907  1.1  mrg 		  dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
   1908  1.1  mrg 		  if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
   1909  1.1  mrg 							       dr_infoj))
   1910  1.1  mrg 		    n_same_align_refs[j]++;
   1911  1.1  mrg 		}
   1912  1.1  mrg 	    }
   1913  1.1  mrg 	  i0 = i;
   1914  1.1  mrg 	}
   1915  1.1  mrg     }
   1916  1.1  mrg 
   1917  1.1  mrg   /* While cost model enhancements are expected in the future, the high level
   1918  1.1  mrg      view of the code at this time is as follows:
   1919  1.1  mrg 
   1920  1.1  mrg      A) If there is a misaligned access then see if peeling to align
   1921  1.1  mrg         this access can make all data references satisfy
   1922  1.1  mrg         vect_supportable_dr_alignment.  If so, update data structures
   1923  1.1  mrg         as needed and return true.
   1924  1.1  mrg 
   1925  1.1  mrg      B) If peeling wasn't possible and there is a data reference with an
   1926  1.1  mrg         unknown misalignment that does not satisfy vect_supportable_dr_alignment
   1927  1.1  mrg         then see if loop versioning checks can be used to make all data
   1928  1.1  mrg         references satisfy vect_supportable_dr_alignment.  If so, update
   1929  1.1  mrg         data structures as needed and return true.
   1930  1.1  mrg 
   1931  1.1  mrg      C) If neither peeling nor versioning were successful then return false if
   1932  1.1  mrg         any data reference does not satisfy vect_supportable_dr_alignment.
   1933  1.1  mrg 
   1934  1.1  mrg      D) Return true (all data references satisfy vect_supportable_dr_alignment).
   1935  1.1  mrg 
   1936  1.1  mrg      Note, Possibility 3 above (which is peeling and versioning together) is not
   1937  1.1  mrg      being done at this time.  */
   1938  1.1  mrg 
   1939  1.1  mrg   /* (1) Peeling to force alignment.  */
   1940  1.1  mrg 
   1941  1.1  mrg   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
   1942  1.1  mrg      Considerations:
   1943  1.1  mrg      + How many accesses will become aligned due to the peeling
   1944  1.1  mrg      - How many accesses will become unaligned due to the peeling,
   1945  1.1  mrg        and the cost of misaligned accesses.
   1946  1.1  mrg      - The cost of peeling (the extra runtime checks, the increase
   1947  1.1  mrg        in code size).  */
   1948  1.1  mrg 
   1949  1.1  mrg   FOR_EACH_VEC_ELT (datarefs, i, dr)
   1950  1.1  mrg     {
   1951  1.1  mrg       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1952  1.1  mrg       if (!vect_relevant_for_alignment_p (dr_info))
   1953  1.1  mrg 	continue;
   1954  1.1  mrg 
   1955  1.1  mrg       stmt_vec_info stmt_info = dr_info->stmt;
   1956  1.1  mrg       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1957  1.1  mrg       do_peeling = vector_alignment_reachable_p (dr_info);
   1958  1.1  mrg       if (do_peeling)
   1959  1.1  mrg         {
   1960  1.1  mrg 	  if (known_alignment_for_access_p (dr_info, vectype))
   1961  1.1  mrg             {
   1962  1.1  mrg 	      unsigned int npeel_tmp = 0;
   1963  1.1  mrg 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
   1964  1.1  mrg 						    size_zero_node) < 0;
   1965  1.1  mrg 
   1966  1.1  mrg 	      /* If known_alignment_for_access_p then we have set
   1967  1.1  mrg 	         DR_MISALIGNMENT which is only done if we know it at compiler
   1968  1.1  mrg 	         time, so it is safe to assume target alignment is constant.
   1969  1.1  mrg 	       */
   1970  1.1  mrg 	      unsigned int target_align =
   1971  1.1  mrg 		DR_TARGET_ALIGNMENT (dr_info).to_constant ();
   1972  1.1  mrg 	      unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
   1973  1.1  mrg 	      poly_int64 off = 0;
   1974  1.1  mrg 	      if (negative)
   1975  1.1  mrg 		off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
   1976  1.1  mrg 	      unsigned int mis = dr_misalignment (dr_info, vectype, off);
   1977  1.1  mrg 	      mis = negative ? mis : -mis;
   1978  1.1  mrg 	      if (mis != 0)
   1979  1.1  mrg 		npeel_tmp = (mis & (target_align - 1)) / dr_size;
   1980  1.1  mrg 
   1981  1.1  mrg               /* For multiple types, it is possible that the bigger type access
   1982  1.1  mrg                  will have more than one peeling option.  E.g., a loop with two
   1983  1.1  mrg                  types: one of size (vector size / 4), and the other one of
   1984  1.1  mrg                  size (vector size / 8).  Vectorization factor will 8.  If both
   1985  1.1  mrg                  accesses are misaligned by 3, the first one needs one scalar
   1986  1.1  mrg                  iteration to be aligned, and the second one needs 5.  But the
   1987  1.1  mrg 		 first one will be aligned also by peeling 5 scalar
   1988  1.1  mrg                  iterations, and in that case both accesses will be aligned.
   1989  1.1  mrg                  Hence, except for the immediate peeling amount, we also want
   1990  1.1  mrg                  to try to add full vector size, while we don't exceed
   1991  1.1  mrg                  vectorization factor.
   1992  1.1  mrg                  We do this automatically for cost model, since we calculate
   1993  1.1  mrg 		 cost for every peeling option.  */
   1994  1.1  mrg 	      poly_uint64 nscalars = npeel_tmp;
   1995  1.1  mrg               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1996  1.1  mrg 		{
   1997  1.1  mrg 		  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   1998  1.1  mrg 		  nscalars = (STMT_SLP_TYPE (stmt_info)
   1999  1.1  mrg 			      ? vf * DR_GROUP_SIZE (stmt_info) : vf);
   2000  1.1  mrg 		}
   2001  1.1  mrg 
   2002  1.1  mrg 	      /* Save info about DR in the hash table.  Also include peeling
   2003  1.1  mrg 		 amounts according to the explanation above.  Indicate
   2004  1.1  mrg 		 the alignment status when the ref is not aligned.
   2005  1.1  mrg 		 ???  Rather than using unknown alignment here we should
   2006  1.1  mrg 		 prune all entries from the peeling hashtable which cause
   2007  1.1  mrg 		 DRs to be not supported.  */
   2008  1.1  mrg 	      bool supportable_if_not_aligned
   2009  1.1  mrg 		= vect_supportable_dr_alignment
   2010  1.1  mrg 		    (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
   2011  1.1  mrg 	      while (known_le (npeel_tmp, nscalars))
   2012  1.1  mrg                 {
   2013  1.1  mrg                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
   2014  1.1  mrg 					    dr_info, npeel_tmp,
   2015  1.1  mrg 					    supportable_if_not_aligned);
   2016  1.1  mrg 		  npeel_tmp += MAX (1, target_align / dr_size);
   2017  1.1  mrg                 }
   2018  1.1  mrg 
   2019  1.1  mrg 	      one_misalignment_known = true;
   2020  1.1  mrg             }
   2021  1.1  mrg           else
   2022  1.1  mrg             {
   2023  1.1  mrg               /* If we don't know any misalignment values, we prefer
   2024  1.1  mrg                  peeling for data-ref that has the maximum number of data-refs
   2025  1.1  mrg                  with the same alignment, unless the target prefers to align
   2026  1.1  mrg                  stores over load.  */
   2027  1.1  mrg 	      unsigned same_align_drs = n_same_align_refs[i];
   2028  1.1  mrg 	      if (!dr0_info
   2029  1.1  mrg 		  || dr0_same_align_drs < same_align_drs)
   2030  1.1  mrg 		{
   2031  1.1  mrg 		  dr0_same_align_drs = same_align_drs;
   2032  1.1  mrg 		  dr0_info = dr_info;
   2033  1.1  mrg 		}
   2034  1.1  mrg 	      /* For data-refs with the same number of related
   2035  1.1  mrg 		 accesses prefer the one where the misalign
   2036  1.1  mrg 		 computation will be invariant in the outermost loop.  */
   2037  1.1  mrg 	      else if (dr0_same_align_drs == same_align_drs)
   2038  1.1  mrg 		{
   2039  1.1  mrg 		  class loop *ivloop0, *ivloop;
   2040  1.1  mrg 		  ivloop0 = outermost_invariant_loop_for_expr
   2041  1.1  mrg 		    (loop, DR_BASE_ADDRESS (dr0_info->dr));
   2042  1.1  mrg 		  ivloop = outermost_invariant_loop_for_expr
   2043  1.1  mrg 		    (loop, DR_BASE_ADDRESS (dr));
   2044  1.1  mrg 		  if ((ivloop && !ivloop0)
   2045  1.1  mrg 		      || (ivloop && ivloop0
   2046  1.1  mrg 			  && flow_loop_nested_p (ivloop, ivloop0)))
   2047  1.1  mrg 		    dr0_info = dr_info;
   2048  1.1  mrg 		}
   2049  1.1  mrg 
   2050  1.1  mrg 	      one_misalignment_unknown = true;
   2051  1.1  mrg 
   2052  1.1  mrg 	      /* Check for data refs with unsupportable alignment that
   2053  1.1  mrg 	         can be peeled.  */
   2054  1.1  mrg 	      enum dr_alignment_support supportable_dr_alignment
   2055  1.1  mrg 		= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   2056  1.1  mrg 						 DR_MISALIGNMENT_UNKNOWN);
   2057  1.1  mrg 	      if (supportable_dr_alignment == dr_unaligned_unsupported)
   2058  1.1  mrg 		{
   2059  1.1  mrg 		  one_dr_unsupportable = true;
   2060  1.1  mrg 		  unsupportable_dr_info = dr_info;
   2061  1.1  mrg 		}
   2062  1.1  mrg 
   2063  1.1  mrg 	      if (!first_store && DR_IS_WRITE (dr))
   2064  1.1  mrg 		{
   2065  1.1  mrg 		  first_store = dr_info;
   2066  1.1  mrg 		  first_store_same_align_drs = same_align_drs;
   2067  1.1  mrg 		}
   2068  1.1  mrg             }
   2069  1.1  mrg         }
   2070  1.1  mrg       else
   2071  1.1  mrg         {
   2072  1.1  mrg 	  if (!aligned_access_p (dr_info, vectype))
   2073  1.1  mrg             {
   2074  1.1  mrg               if (dump_enabled_p ())
   2075  1.1  mrg                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2076  1.1  mrg                                  "vector alignment may not be reachable\n");
   2077  1.1  mrg               break;
   2078  1.1  mrg             }
   2079  1.1  mrg         }
   2080  1.1  mrg     }
   2081  1.1  mrg 
   2082  1.1  mrg   /* Check if we can possibly peel the loop.  */
   2083  1.1  mrg   if (!vect_can_advance_ivs_p (loop_vinfo)
   2084  1.1  mrg       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
   2085  1.1  mrg       || loop->inner)
   2086  1.1  mrg     do_peeling = false;
   2087  1.1  mrg 
   2088  1.1  mrg   struct _vect_peel_extended_info peel_for_known_alignment;
   2089  1.1  mrg   struct _vect_peel_extended_info peel_for_unknown_alignment;
   2090  1.1  mrg   struct _vect_peel_extended_info best_peel;
   2091  1.1  mrg 
   2092  1.1  mrg   peel_for_unknown_alignment.inside_cost = INT_MAX;
   2093  1.1  mrg   peel_for_unknown_alignment.outside_cost = INT_MAX;
   2094  1.1  mrg   peel_for_unknown_alignment.peel_info.count = 0;
   2095  1.1  mrg 
   2096  1.1  mrg   if (do_peeling
   2097  1.1  mrg       && one_misalignment_unknown)
   2098  1.1  mrg     {
   2099  1.1  mrg       /* Check if the target requires to prefer stores over loads, i.e., if
   2100  1.1  mrg          misaligned stores are more expensive than misaligned loads (taking
   2101  1.1  mrg          drs with same alignment into account).  */
   2102  1.1  mrg       unsigned int load_inside_cost = 0;
   2103  1.1  mrg       unsigned int load_outside_cost = 0;
   2104  1.1  mrg       unsigned int store_inside_cost = 0;
   2105  1.1  mrg       unsigned int store_outside_cost = 0;
   2106  1.1  mrg       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
   2107  1.1  mrg 
   2108  1.1  mrg       stmt_vector_for_cost dummy;
   2109  1.1  mrg       dummy.create (2);
   2110  1.1  mrg       vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
   2111  1.1  mrg 				      &load_inside_cost,
   2112  1.1  mrg 				      &load_outside_cost,
   2113  1.1  mrg 				      &dummy, &dummy, estimated_npeels);
   2114  1.1  mrg       dummy.release ();
   2115  1.1  mrg 
   2116  1.1  mrg       if (first_store)
   2117  1.1  mrg 	{
   2118  1.1  mrg 	  dummy.create (2);
   2119  1.1  mrg 	  vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
   2120  1.1  mrg 					  &store_inside_cost,
   2121  1.1  mrg 					  &store_outside_cost,
   2122  1.1  mrg 					  &dummy, &dummy,
   2123  1.1  mrg 					  estimated_npeels);
   2124  1.1  mrg 	  dummy.release ();
   2125  1.1  mrg 	}
   2126  1.1  mrg       else
   2127  1.1  mrg 	{
   2128  1.1  mrg 	  store_inside_cost = INT_MAX;
   2129  1.1  mrg 	  store_outside_cost = INT_MAX;
   2130  1.1  mrg 	}
   2131  1.1  mrg 
   2132  1.1  mrg       if (load_inside_cost > store_inside_cost
   2133  1.1  mrg 	  || (load_inside_cost == store_inside_cost
   2134  1.1  mrg 	      && load_outside_cost > store_outside_cost))
   2135  1.1  mrg 	{
   2136  1.1  mrg 	  dr0_info = first_store;
   2137  1.1  mrg 	  dr0_same_align_drs = first_store_same_align_drs;
   2138  1.1  mrg 	  peel_for_unknown_alignment.inside_cost = store_inside_cost;
   2139  1.1  mrg 	  peel_for_unknown_alignment.outside_cost = store_outside_cost;
   2140  1.1  mrg 	}
   2141  1.1  mrg       else
   2142  1.1  mrg 	{
   2143  1.1  mrg 	  peel_for_unknown_alignment.inside_cost = load_inside_cost;
   2144  1.1  mrg 	  peel_for_unknown_alignment.outside_cost = load_outside_cost;
   2145  1.1  mrg 	}
   2146  1.1  mrg 
   2147  1.1  mrg       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
   2148  1.1  mrg       prologue_cost_vec.create (2);
   2149  1.1  mrg       epilogue_cost_vec.create (2);
   2150  1.1  mrg 
   2151  1.1  mrg       int dummy2;
   2152  1.1  mrg       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
   2153  1.1  mrg 	(loop_vinfo, estimated_npeels, &dummy2,
   2154  1.1  mrg 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   2155  1.1  mrg 	 &prologue_cost_vec, &epilogue_cost_vec);
   2156  1.1  mrg 
   2157  1.1  mrg       prologue_cost_vec.release ();
   2158  1.1  mrg       epilogue_cost_vec.release ();
   2159  1.1  mrg 
   2160  1.1  mrg       peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
   2161  1.1  mrg     }
   2162  1.1  mrg 
   2163  1.1  mrg   peel_for_unknown_alignment.peel_info.npeel = 0;
   2164  1.1  mrg   peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
   2165  1.1  mrg 
   2166  1.1  mrg   best_peel = peel_for_unknown_alignment;
   2167  1.1  mrg 
   2168  1.1  mrg   peel_for_known_alignment.inside_cost = INT_MAX;
   2169  1.1  mrg   peel_for_known_alignment.outside_cost = INT_MAX;
   2170  1.1  mrg   peel_for_known_alignment.peel_info.count = 0;
   2171  1.1  mrg   peel_for_known_alignment.peel_info.dr_info = NULL;
   2172  1.1  mrg 
   2173  1.1  mrg   if (do_peeling && one_misalignment_known)
   2174  1.1  mrg     {
   2175  1.1  mrg       /* Peeling is possible, but there is no data access that is not supported
   2176  1.1  mrg          unless aligned.  So we try to choose the best possible peeling from
   2177  1.1  mrg 	 the hash table.  */
   2178  1.1  mrg       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
   2179  1.1  mrg 	(&peeling_htab, loop_vinfo);
   2180  1.1  mrg     }
   2181  1.1  mrg 
   2182  1.1  mrg   /* Compare costs of peeling for known and unknown alignment. */
   2183  1.1  mrg   if (peel_for_known_alignment.peel_info.dr_info != NULL
   2184  1.1  mrg       && peel_for_unknown_alignment.inside_cost
   2185  1.1  mrg       >= peel_for_known_alignment.inside_cost)
   2186  1.1  mrg     {
   2187  1.1  mrg       best_peel = peel_for_known_alignment;
   2188  1.1  mrg 
   2189  1.1  mrg       /* If the best peeling for known alignment has NPEEL == 0, perform no
   2190  1.1  mrg          peeling at all except if there is an unsupportable dr that we can
   2191  1.1  mrg          align.  */
   2192  1.1  mrg       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
   2193  1.1  mrg 	do_peeling = false;
   2194  1.1  mrg     }
   2195  1.1  mrg 
   2196  1.1  mrg   /* If there is an unsupportable data ref, prefer this over all choices so far
   2197  1.1  mrg      since we'd have to discard a chosen peeling except when it accidentally
   2198  1.1  mrg      aligned the unsupportable data ref.  */
   2199  1.1  mrg   if (one_dr_unsupportable)
   2200  1.1  mrg     dr0_info = unsupportable_dr_info;
   2201  1.1  mrg   else if (do_peeling)
   2202  1.1  mrg     {
   2203  1.1  mrg       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
   2204  1.1  mrg 	 TODO: Use nopeel_outside_cost or get rid of it?  */
   2205  1.1  mrg       unsigned nopeel_inside_cost = 0;
   2206  1.1  mrg       unsigned nopeel_outside_cost = 0;
   2207  1.1  mrg 
   2208  1.1  mrg       stmt_vector_for_cost dummy;
   2209  1.1  mrg       dummy.create (2);
   2210  1.1  mrg       vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
   2211  1.1  mrg 				      &nopeel_outside_cost, &dummy, &dummy, 0);
   2212  1.1  mrg       dummy.release ();
   2213  1.1  mrg 
   2214  1.1  mrg       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
   2215  1.1  mrg 	 costs will be recorded.  */
   2216  1.1  mrg       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
   2217  1.1  mrg       prologue_cost_vec.create (2);
   2218  1.1  mrg       epilogue_cost_vec.create (2);
   2219  1.1  mrg 
   2220  1.1  mrg       int dummy2;
   2221  1.1  mrg       nopeel_outside_cost += vect_get_known_peeling_cost
   2222  1.1  mrg 	(loop_vinfo, 0, &dummy2,
   2223  1.1  mrg 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   2224  1.1  mrg 	 &prologue_cost_vec, &epilogue_cost_vec);
   2225  1.1  mrg 
   2226  1.1  mrg       prologue_cost_vec.release ();
   2227  1.1  mrg       epilogue_cost_vec.release ();
   2228  1.1  mrg 
   2229  1.1  mrg       npeel = best_peel.peel_info.npeel;
   2230  1.1  mrg       dr0_info = best_peel.peel_info.dr_info;
   2231  1.1  mrg 
   2232  1.1  mrg       /* If no peeling is not more expensive than the best peeling we
   2233  1.1  mrg 	 have so far, don't perform any peeling.  */
   2234  1.1  mrg       if (nopeel_inside_cost <= best_peel.inside_cost)
   2235  1.1  mrg 	do_peeling = false;
   2236  1.1  mrg     }
   2237  1.1  mrg 
   2238  1.1  mrg   if (do_peeling)
   2239  1.1  mrg     {
   2240  1.1  mrg       stmt_vec_info stmt_info = dr0_info->stmt;
   2241  1.1  mrg       if (known_alignment_for_access_p (dr0_info,
   2242  1.1  mrg 					STMT_VINFO_VECTYPE (stmt_info)))
   2243  1.1  mrg         {
   2244  1.1  mrg 	  bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
   2245  1.1  mrg 						size_zero_node) < 0;
   2246  1.1  mrg           if (!npeel)
   2247  1.1  mrg             {
   2248  1.1  mrg               /* Since it's known at compile time, compute the number of
   2249  1.1  mrg                  iterations in the peeled loop (the peeling factor) for use in
   2250  1.1  mrg                  updating DR_MISALIGNMENT values.  The peeling factor is the
   2251  1.1  mrg                  vectorization factor minus the misalignment as an element
   2252  1.1  mrg                  count.  */
   2253  1.1  mrg 	      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2254  1.1  mrg 	      poly_int64 off = 0;
   2255  1.1  mrg 	      if (negative)
   2256  1.1  mrg 		off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   2257  1.1  mrg 		       * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   2258  1.1  mrg 	      unsigned int mis
   2259  1.1  mrg 		= dr_misalignment (dr0_info, vectype, off);
   2260  1.1  mrg 	      mis = negative ? mis : -mis;
   2261  1.1  mrg 	      /* If known_alignment_for_access_p then we have set
   2262  1.1  mrg 	         DR_MISALIGNMENT which is only done if we know it at compiler
   2263  1.1  mrg 	         time, so it is safe to assume target alignment is constant.
   2264  1.1  mrg 	       */
   2265  1.1  mrg 	      unsigned int target_align =
   2266  1.1  mrg 		DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
   2267  1.1  mrg 	      npeel = ((mis & (target_align - 1))
   2268  1.1  mrg 		       / vect_get_scalar_dr_size (dr0_info));
   2269  1.1  mrg             }
   2270  1.1  mrg 
   2271  1.1  mrg 	  /* For interleaved data access every iteration accesses all the
   2272  1.1  mrg 	     members of the group, therefore we divide the number of iterations
   2273  1.1  mrg 	     by the group size.  */
   2274  1.1  mrg 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   2275  1.1  mrg 	    npeel /= DR_GROUP_SIZE (stmt_info);
   2276  1.1  mrg 
   2277  1.1  mrg           if (dump_enabled_p ())
   2278  1.1  mrg             dump_printf_loc (MSG_NOTE, vect_location,
   2279  1.1  mrg                              "Try peeling by %d\n", npeel);
   2280  1.1  mrg         }
   2281  1.1  mrg 
   2282  1.1  mrg       /* Ensure that all datarefs can be vectorized after the peel.  */
   2283  1.1  mrg       if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
   2284  1.1  mrg 	do_peeling = false;
   2285  1.1  mrg 
   2286  1.1  mrg       /* Check if all datarefs are supportable and log.  */
   2287  1.1  mrg       if (do_peeling
   2288  1.1  mrg 	  && npeel == 0
   2289  1.1  mrg 	  && known_alignment_for_access_p (dr0_info,
   2290  1.1  mrg 					   STMT_VINFO_VECTYPE (stmt_info)))
   2291  1.1  mrg 	return opt_result::success ();
   2292  1.1  mrg 
   2293  1.1  mrg       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
   2294  1.1  mrg       if (do_peeling)
   2295  1.1  mrg         {
   2296  1.1  mrg           unsigned max_allowed_peel
   2297  1.1  mrg 	    = param_vect_max_peeling_for_alignment;
   2298  1.1  mrg 	  if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
   2299  1.1  mrg 	    max_allowed_peel = 0;
   2300  1.1  mrg           if (max_allowed_peel != (unsigned)-1)
   2301  1.1  mrg             {
   2302  1.1  mrg               unsigned max_peel = npeel;
   2303  1.1  mrg               if (max_peel == 0)
   2304  1.1  mrg                 {
   2305  1.1  mrg 		  poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
   2306  1.1  mrg 		  unsigned HOST_WIDE_INT target_align_c;
   2307  1.1  mrg 		  if (target_align.is_constant (&target_align_c))
   2308  1.1  mrg 		    max_peel =
   2309  1.1  mrg 		      target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
   2310  1.1  mrg 		  else
   2311  1.1  mrg 		    {
   2312  1.1  mrg 		      do_peeling = false;
   2313  1.1  mrg 		      if (dump_enabled_p ())
   2314  1.1  mrg 			dump_printf_loc (MSG_NOTE, vect_location,
   2315  1.1  mrg 			  "Disable peeling, max peels set and vector"
   2316  1.1  mrg 			  " alignment unknown\n");
   2317  1.1  mrg 		    }
   2318  1.1  mrg                 }
   2319  1.1  mrg               if (max_peel > max_allowed_peel)
   2320  1.1  mrg                 {
   2321  1.1  mrg                   do_peeling = false;
   2322  1.1  mrg                   if (dump_enabled_p ())
   2323  1.1  mrg                     dump_printf_loc (MSG_NOTE, vect_location,
   2324  1.1  mrg                         "Disable peeling, max peels reached: %d\n", max_peel);
   2325  1.1  mrg                 }
   2326  1.1  mrg             }
   2327  1.1  mrg         }
   2328  1.1  mrg 
   2329  1.1  mrg       /* Cost model #2 - if peeling may result in a remaining loop not
   2330  1.1  mrg 	 iterating enough to be vectorized then do not peel.  Since this
   2331  1.1  mrg 	 is a cost heuristic rather than a correctness decision, use the
   2332  1.1  mrg 	 most likely runtime value for variable vectorization factors.  */
   2333  1.1  mrg       if (do_peeling
   2334  1.1  mrg 	  && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
   2335  1.1  mrg 	{
   2336  1.1  mrg 	  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
   2337  1.1  mrg 	  unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
   2338  1.1  mrg 	  if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
   2339  1.1  mrg 	      < assumed_vf + max_peel)
   2340  1.1  mrg 	    do_peeling = false;
   2341  1.1  mrg 	}
   2342  1.1  mrg 
   2343  1.1  mrg       if (do_peeling)
   2344  1.1  mrg         {
   2345  1.1  mrg           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
   2346  1.1  mrg              If the misalignment of DR_i is identical to that of dr0 then set
   2347  1.1  mrg              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
   2348  1.1  mrg              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
   2349  1.1  mrg              by the peeling factor times the element size of DR_i (MOD the
   2350  1.1  mrg              vectorization factor times the size).  Otherwise, the
   2351  1.1  mrg              misalignment of DR_i must be set to unknown.  */
   2352  1.1  mrg 	  FOR_EACH_VEC_ELT (datarefs, i, dr)
   2353  1.1  mrg 	    if (dr != dr0_info->dr)
   2354  1.1  mrg 	      {
   2355  1.1  mrg 		dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2356  1.1  mrg 		if (!vect_relevant_for_alignment_p (dr_info))
   2357  1.1  mrg 		  continue;
   2358  1.1  mrg 
   2359  1.1  mrg 		vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
   2360  1.1  mrg 	      }
   2361  1.1  mrg 
   2362  1.1  mrg           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
   2363  1.1  mrg           if (npeel)
   2364  1.1  mrg             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
   2365  1.1  mrg           else
   2366  1.1  mrg 	    LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
   2367  1.1  mrg 	  SET_DR_MISALIGNMENT (dr0_info,
   2368  1.1  mrg 			       vect_dr_misalign_for_aligned_access (dr0_info));
   2369  1.1  mrg 	  if (dump_enabled_p ())
   2370  1.1  mrg             {
   2371  1.1  mrg               dump_printf_loc (MSG_NOTE, vect_location,
   2372  1.1  mrg                                "Alignment of access forced using peeling.\n");
   2373  1.1  mrg               dump_printf_loc (MSG_NOTE, vect_location,
   2374  1.1  mrg                                "Peeling for alignment will be applied.\n");
   2375  1.1  mrg             }
   2376  1.1  mrg 
   2377  1.1  mrg 	  /* The inside-loop cost will be accounted for in vectorizable_load
   2378  1.1  mrg 	     and vectorizable_store correctly with adjusted alignments.
   2379  1.1  mrg 	     Drop the body_cst_vec on the floor here.  */
   2380  1.1  mrg 	  return opt_result::success ();
   2381  1.1  mrg         }
   2382  1.1  mrg     }
   2383  1.1  mrg 
   2384  1.1  mrg   /* (2) Versioning to force alignment.  */
   2385  1.1  mrg 
   2386  1.1  mrg   /* Try versioning if:
   2387  1.1  mrg      1) optimize loop for speed and the cost-model is not cheap
   2388  1.1  mrg      2) there is at least one unsupported misaligned data ref with an unknown
   2389  1.1  mrg         misalignment, and
   2390  1.1  mrg      3) all misaligned data refs with a known misalignment are supported, and
   2391  1.1  mrg      4) the number of runtime alignment checks is within reason.  */
   2392  1.1  mrg 
   2393  1.1  mrg   do_versioning
   2394  1.1  mrg     = (optimize_loop_nest_for_speed_p (loop)
   2395  1.1  mrg        && !loop->inner /* FORNOW */
   2396  1.1  mrg        && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
   2397  1.1  mrg 
   2398  1.1  mrg   if (do_versioning)
   2399  1.1  mrg     {
   2400  1.1  mrg       FOR_EACH_VEC_ELT (datarefs, i, dr)
   2401  1.1  mrg         {
   2402  1.1  mrg 	  dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2403  1.1  mrg 	  if (!vect_relevant_for_alignment_p (dr_info))
   2404  1.1  mrg 	    continue;
   2405  1.1  mrg 
   2406  1.1  mrg 	  stmt_vec_info stmt_info = dr_info->stmt;
   2407  1.1  mrg 	  if (STMT_VINFO_STRIDED_P (stmt_info))
   2408  1.1  mrg 	    {
   2409  1.1  mrg 	      do_versioning = false;
   2410  1.1  mrg 	      break;
   2411  1.1  mrg 	    }
   2412  1.1  mrg 
   2413  1.1  mrg 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2414  1.1  mrg 	  bool negative = tree_int_cst_compare (DR_STEP (dr),
   2415  1.1  mrg 						size_zero_node) < 0;
   2416  1.1  mrg 	  poly_int64 off = 0;
   2417  1.1  mrg 	  if (negative)
   2418  1.1  mrg 	    off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   2419  1.1  mrg 		   * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   2420  1.1  mrg 	  int misalignment;
   2421  1.1  mrg 	  if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
   2422  1.1  mrg 	    continue;
   2423  1.1  mrg 
   2424  1.1  mrg 	  enum dr_alignment_support supportable_dr_alignment
   2425  1.1  mrg 	    = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   2426  1.1  mrg 					     misalignment);
   2427  1.1  mrg 	  if (supportable_dr_alignment == dr_unaligned_unsupported)
   2428  1.1  mrg             {
   2429  1.1  mrg 	      if (misalignment != DR_MISALIGNMENT_UNKNOWN
   2430  1.1  mrg 		  || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
   2431  1.1  mrg 		      >= (unsigned) param_vect_max_version_for_alignment_checks))
   2432  1.1  mrg                 {
   2433  1.1  mrg                   do_versioning = false;
   2434  1.1  mrg                   break;
   2435  1.1  mrg                 }
   2436  1.1  mrg 
   2437  1.1  mrg 	      /* At present we don't support versioning for alignment
   2438  1.1  mrg 		 with variable VF, since there's no guarantee that the
   2439  1.1  mrg 		 VF is a power of two.  We could relax this if we added
   2440  1.1  mrg 		 a way of enforcing a power-of-two size.  */
   2441  1.1  mrg 	      unsigned HOST_WIDE_INT size;
   2442  1.1  mrg 	      if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
   2443  1.1  mrg 		{
   2444  1.1  mrg 		  do_versioning = false;
   2445  1.1  mrg 		  break;
   2446  1.1  mrg 		}
   2447  1.1  mrg 
   2448  1.1  mrg 	      /* Forcing alignment in the first iteration is no good if
   2449  1.1  mrg 		 we don't keep it across iterations.  For now, just disable
   2450  1.1  mrg 		 versioning in this case.
   2451  1.1  mrg 		 ?? We could actually unroll the loop to achieve the required
   2452  1.1  mrg 		 overall step alignment, and forcing the alignment could be
   2453  1.1  mrg 		 done by doing some iterations of the non-vectorized loop.  */
   2454  1.1  mrg 	      if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
   2455  1.1  mrg 			       * DR_STEP_ALIGNMENT (dr),
   2456  1.1  mrg 			       DR_TARGET_ALIGNMENT (dr_info)))
   2457  1.1  mrg 		{
   2458  1.1  mrg 		  do_versioning = false;
   2459  1.1  mrg 		  break;
   2460  1.1  mrg 		}
   2461  1.1  mrg 
   2462  1.1  mrg               /* The rightmost bits of an aligned address must be zeros.
   2463  1.1  mrg                  Construct the mask needed for this test.  For example,
   2464  1.1  mrg                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
   2465  1.1  mrg                  mask must be 15 = 0xf. */
   2466  1.1  mrg 	      int mask = size - 1;
   2467  1.1  mrg 
   2468  1.1  mrg 	      /* FORNOW: use the same mask to test all potentially unaligned
   2469  1.1  mrg 		 references in the loop.  */
   2470  1.1  mrg 	      if (LOOP_VINFO_PTR_MASK (loop_vinfo)
   2471  1.1  mrg 		  && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
   2472  1.1  mrg 		{
   2473  1.1  mrg 		  do_versioning = false;
   2474  1.1  mrg 		  break;
   2475  1.1  mrg 		}
   2476  1.1  mrg 
   2477  1.1  mrg               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
   2478  1.1  mrg 	      LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
   2479  1.1  mrg             }
   2480  1.1  mrg         }
   2481  1.1  mrg 
   2482  1.1  mrg       /* Versioning requires at least one misaligned data reference.  */
   2483  1.1  mrg       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
   2484  1.1  mrg         do_versioning = false;
   2485  1.1  mrg       else if (!do_versioning)
   2486  1.1  mrg         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
   2487  1.1  mrg     }
   2488  1.1  mrg 
   2489  1.1  mrg   if (do_versioning)
   2490  1.1  mrg     {
   2491  1.1  mrg       const vec<stmt_vec_info> &may_misalign_stmts
   2492  1.1  mrg 	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
   2493  1.1  mrg       stmt_vec_info stmt_info;
   2494  1.1  mrg 
   2495  1.1  mrg       /* It can now be assumed that the data references in the statements
   2496  1.1  mrg          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
   2497  1.1  mrg          of the loop being vectorized.  */
   2498  1.1  mrg       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
   2499  1.1  mrg         {
   2500  1.1  mrg 	  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   2501  1.1  mrg 	  SET_DR_MISALIGNMENT (dr_info,
   2502  1.1  mrg 			       vect_dr_misalign_for_aligned_access (dr_info));
   2503  1.1  mrg 	  if (dump_enabled_p ())
   2504  1.1  mrg             dump_printf_loc (MSG_NOTE, vect_location,
   2505  1.1  mrg                              "Alignment of access forced using versioning.\n");
   2506  1.1  mrg         }
   2507  1.1  mrg 
   2508  1.1  mrg       if (dump_enabled_p ())
   2509  1.1  mrg         dump_printf_loc (MSG_NOTE, vect_location,
   2510  1.1  mrg                          "Versioning for alignment will be applied.\n");
   2511  1.1  mrg 
   2512  1.1  mrg       /* Peeling and versioning can't be done together at this time.  */
   2513  1.1  mrg       gcc_assert (! (do_peeling && do_versioning));
   2514  1.1  mrg 
   2515  1.1  mrg       return opt_result::success ();
   2516  1.1  mrg     }
   2517  1.1  mrg 
   2518  1.1  mrg   /* This point is reached if neither peeling nor versioning is being done.  */
   2519  1.1  mrg   gcc_assert (! (do_peeling || do_versioning));
   2520  1.1  mrg 
   2521  1.1  mrg   return opt_result::success ();
   2522  1.1  mrg }
   2523  1.1  mrg 
   2524  1.1  mrg 
   2525  1.1  mrg /* Function vect_analyze_data_refs_alignment
   2526  1.1  mrg 
   2527  1.1  mrg    Analyze the alignment of the data-references in the loop.
   2528  1.1  mrg    Return FALSE if a data reference is found that cannot be vectorized.  */
   2529  1.1  mrg 
   2530  1.1  mrg opt_result
   2531  1.1  mrg vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
   2532  1.1  mrg {
   2533  1.1  mrg   DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
   2534  1.1  mrg 
   2535  1.1  mrg   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   2536  1.1  mrg   struct data_reference *dr;
   2537  1.1  mrg   unsigned int i;
   2538  1.1  mrg 
   2539  1.1  mrg   vect_record_base_alignments (loop_vinfo);
   2540  1.1  mrg   FOR_EACH_VEC_ELT (datarefs, i, dr)
   2541  1.1  mrg     {
   2542  1.1  mrg       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2543  1.1  mrg       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
   2544  1.1  mrg 	{
   2545  1.1  mrg 	  if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
   2546  1.1  mrg 	      && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
   2547  1.1  mrg 	    continue;
   2548  1.1  mrg 	  vect_compute_data_ref_alignment (loop_vinfo, dr_info,
   2549  1.1  mrg 					   STMT_VINFO_VECTYPE (dr_info->stmt));
   2550  1.1  mrg 	}
   2551  1.1  mrg     }
   2552  1.1  mrg 
   2553  1.1  mrg   return opt_result::success ();
   2554  1.1  mrg }
   2555  1.1  mrg 
   2556  1.1  mrg 
   2557  1.1  mrg /* Analyze alignment of DRs of stmts in NODE.  */
   2558  1.1  mrg 
   2559  1.1  mrg static bool
   2560  1.1  mrg vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
   2561  1.1  mrg {
   2562  1.1  mrg   /* Alignment is maintained in the first element of the group.  */
   2563  1.1  mrg   stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
   2564  1.1  mrg   first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
   2565  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   2566  1.1  mrg   tree vectype = SLP_TREE_VECTYPE (node);
   2567  1.1  mrg   poly_uint64 vector_alignment
   2568  1.1  mrg     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
   2569  1.1  mrg 		 BITS_PER_UNIT);
   2570  1.1  mrg   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
   2571  1.1  mrg     vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
   2572  1.1  mrg   /* Re-analyze alignment when we're facing a vectorization with a bigger
   2573  1.1  mrg      alignment requirement.  */
   2574  1.1  mrg   else if (known_lt (dr_info->target_alignment, vector_alignment))
   2575  1.1  mrg     {
   2576  1.1  mrg       poly_uint64 old_target_alignment = dr_info->target_alignment;
   2577  1.1  mrg       int old_misalignment = dr_info->misalignment;
   2578  1.1  mrg       vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
   2579  1.1  mrg       /* But keep knowledge about a smaller alignment.  */
   2580  1.1  mrg       if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
   2581  1.1  mrg 	  && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
   2582  1.1  mrg 	{
   2583  1.1  mrg 	  dr_info->target_alignment = old_target_alignment;
   2584  1.1  mrg 	  dr_info->misalignment = old_misalignment;
   2585  1.1  mrg 	}
   2586  1.1  mrg     }
   2587  1.1  mrg   /* When we ever face unordered target alignments the first one wins in terms
   2588  1.1  mrg      of analyzing and the other will become unknown in dr_misalignment.  */
   2589  1.1  mrg   return true;
   2590  1.1  mrg }
   2591  1.1  mrg 
   2592  1.1  mrg /* Function vect_slp_analyze_instance_alignment
   2593  1.1  mrg 
   2594  1.1  mrg    Analyze the alignment of the data-references in the SLP instance.
   2595  1.1  mrg    Return FALSE if a data reference is found that cannot be vectorized.  */
   2596  1.1  mrg 
   2597  1.1  mrg bool
   2598  1.1  mrg vect_slp_analyze_instance_alignment (vec_info *vinfo,
   2599  1.1  mrg 						slp_instance instance)
   2600  1.1  mrg {
   2601  1.1  mrg   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
   2602  1.1  mrg 
   2603  1.1  mrg   slp_tree node;
   2604  1.1  mrg   unsigned i;
   2605  1.1  mrg   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
   2606  1.1  mrg     if (! vect_slp_analyze_node_alignment (vinfo, node))
   2607  1.1  mrg       return false;
   2608  1.1  mrg 
   2609  1.1  mrg   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
   2610  1.1  mrg       && ! vect_slp_analyze_node_alignment
   2611  1.1  mrg 	     (vinfo, SLP_INSTANCE_TREE (instance)))
   2612  1.1  mrg     return false;
   2613  1.1  mrg 
   2614  1.1  mrg   return true;
   2615  1.1  mrg }
   2616  1.1  mrg 
   2617  1.1  mrg 
   2618  1.1  mrg /* Analyze groups of accesses: check that DR_INFO belongs to a group of
   2619  1.1  mrg    accesses of legal size, step, etc.  Detect gaps, single element
   2620  1.1  mrg    interleaving, and other special cases. Set grouped access info.
   2621  1.1  mrg    Collect groups of strided stores for further use in SLP analysis.
   2622  1.1  mrg    Worker for vect_analyze_group_access.  */
   2623  1.1  mrg 
   2624  1.1  mrg static bool
   2625  1.1  mrg vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
   2626  1.1  mrg {
   2627  1.1  mrg   data_reference *dr = dr_info->dr;
   2628  1.1  mrg   tree step = DR_STEP (dr);
   2629  1.1  mrg   tree scalar_type = TREE_TYPE (DR_REF (dr));
   2630  1.1  mrg   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
   2631  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   2632  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2633  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   2634  1.1  mrg   HOST_WIDE_INT dr_step = -1;
   2635  1.1  mrg   HOST_WIDE_INT groupsize, last_accessed_element = 1;
   2636  1.1  mrg   bool slp_impossible = false;
   2637  1.1  mrg 
   2638  1.1  mrg   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
   2639  1.1  mrg      size of the interleaving group (including gaps).  */
   2640  1.1  mrg   if (tree_fits_shwi_p (step))
   2641  1.1  mrg     {
   2642  1.1  mrg       dr_step = tree_to_shwi (step);
   2643  1.1  mrg       /* Check that STEP is a multiple of type size.  Otherwise there is
   2644  1.1  mrg          a non-element-sized gap at the end of the group which we
   2645  1.1  mrg 	 cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
   2646  1.1  mrg 	 ???  As we can handle non-constant step fine here we should
   2647  1.1  mrg 	 simply remove uses of DR_GROUP_GAP between the last and first
   2648  1.1  mrg 	 element and instead rely on DR_STEP.  DR_GROUP_SIZE then would
   2649  1.1  mrg 	 simply not include that gap.  */
   2650  1.1  mrg       if ((dr_step % type_size) != 0)
   2651  1.1  mrg 	{
   2652  1.1  mrg 	  if (dump_enabled_p ())
   2653  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   2654  1.1  mrg 			     "Step %T is not a multiple of the element size"
   2655  1.1  mrg 			     " for %T\n",
   2656  1.1  mrg 			     step, DR_REF (dr));
   2657  1.1  mrg 	  return false;
   2658  1.1  mrg 	}
   2659  1.1  mrg       groupsize = absu_hwi (dr_step) / type_size;
   2660  1.1  mrg     }
   2661  1.1  mrg   else
   2662  1.1  mrg     groupsize = 0;
   2663  1.1  mrg 
   2664  1.1  mrg   /* Not consecutive access is possible only if it is a part of interleaving.  */
   2665  1.1  mrg   if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
   2666  1.1  mrg     {
   2667  1.1  mrg       /* Check if it this DR is a part of interleaving, and is a single
   2668  1.1  mrg 	 element of the group that is accessed in the loop.  */
   2669  1.1  mrg 
   2670  1.1  mrg       /* Gaps are supported only for loads. STEP must be a multiple of the type
   2671  1.1  mrg 	 size.  */
   2672  1.1  mrg       if (DR_IS_READ (dr)
   2673  1.1  mrg 	  && (dr_step % type_size) == 0
   2674  1.1  mrg 	  && groupsize > 0
   2675  1.1  mrg 	  /* This could be UINT_MAX but as we are generating code in a very
   2676  1.1  mrg 	     inefficient way we have to cap earlier.
   2677  1.1  mrg 	     See PR91403 for example.  */
   2678  1.1  mrg 	  && groupsize <= 4096)
   2679  1.1  mrg 	{
   2680  1.1  mrg 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
   2681  1.1  mrg 	  DR_GROUP_SIZE (stmt_info) = groupsize;
   2682  1.1  mrg 	  DR_GROUP_GAP (stmt_info) = groupsize - 1;
   2683  1.1  mrg 	  if (dump_enabled_p ())
   2684  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   2685  1.1  mrg 			     "Detected single element interleaving %T"
   2686  1.1  mrg 			     " step %T\n",
   2687  1.1  mrg 			     DR_REF (dr), step);
   2688  1.1  mrg 
   2689  1.1  mrg 	  return true;
   2690  1.1  mrg 	}
   2691  1.1  mrg 
   2692  1.1  mrg       if (dump_enabled_p ())
   2693  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2694  1.1  mrg 			 "not consecutive access %G", stmt_info->stmt);
   2695  1.1  mrg 
   2696  1.1  mrg       if (bb_vinfo)
   2697  1.1  mrg 	{
   2698  1.1  mrg 	  /* Mark the statement as unvectorizable.  */
   2699  1.1  mrg 	  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   2700  1.1  mrg 	  return true;
   2701  1.1  mrg 	}
   2702  1.1  mrg 
   2703  1.1  mrg       if (dump_enabled_p ())
   2704  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
   2705  1.1  mrg       STMT_VINFO_STRIDED_P (stmt_info) = true;
   2706  1.1  mrg       return true;
   2707  1.1  mrg     }
   2708  1.1  mrg 
   2709  1.1  mrg   if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
   2710  1.1  mrg     {
   2711  1.1  mrg       /* First stmt in the interleaving chain. Check the chain.  */
   2712  1.1  mrg       stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2713  1.1  mrg       struct data_reference *data_ref = dr;
   2714  1.1  mrg       unsigned int count = 1;
   2715  1.1  mrg       tree prev_init = DR_INIT (data_ref);
   2716  1.1  mrg       HOST_WIDE_INT diff, gaps = 0;
   2717  1.1  mrg 
   2718  1.1  mrg       /* By construction, all group members have INTEGER_CST DR_INITs.  */
   2719  1.1  mrg       while (next)
   2720  1.1  mrg         {
   2721  1.1  mrg           /* We never have the same DR multiple times.  */
   2722  1.1  mrg           gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
   2723  1.1  mrg 				DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
   2724  1.1  mrg 
   2725  1.1  mrg 	  data_ref = STMT_VINFO_DATA_REF (next);
   2726  1.1  mrg 
   2727  1.1  mrg 	  /* All group members have the same STEP by construction.  */
   2728  1.1  mrg 	  gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
   2729  1.1  mrg 
   2730  1.1  mrg           /* Check that the distance between two accesses is equal to the type
   2731  1.1  mrg              size. Otherwise, we have gaps.  */
   2732  1.1  mrg           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
   2733  1.1  mrg 		  - TREE_INT_CST_LOW (prev_init)) / type_size;
   2734  1.1  mrg 	  if (diff < 1 || diff > UINT_MAX)
   2735  1.1  mrg 	    {
   2736  1.1  mrg 	      /* For artificial testcases with array accesses with large
   2737  1.1  mrg 		 constant indices we can run into overflow issues which
   2738  1.1  mrg 		 can end up fooling the groupsize constraint below so
   2739  1.1  mrg 		 check the individual gaps (which are represented as
   2740  1.1  mrg 		 unsigned int) as well.  */
   2741  1.1  mrg 	      if (dump_enabled_p ())
   2742  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2743  1.1  mrg 				 "interleaved access with gap larger "
   2744  1.1  mrg 				 "than representable\n");
   2745  1.1  mrg 	      return false;
   2746  1.1  mrg 	    }
   2747  1.1  mrg 	  if (diff != 1)
   2748  1.1  mrg 	    {
   2749  1.1  mrg 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
   2750  1.1  mrg 	      slp_impossible = true;
   2751  1.1  mrg 	      if (DR_IS_WRITE (data_ref))
   2752  1.1  mrg 		{
   2753  1.1  mrg                   if (dump_enabled_p ())
   2754  1.1  mrg                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2755  1.1  mrg                                      "interleaved store with gaps\n");
   2756  1.1  mrg 		  return false;
   2757  1.1  mrg 		}
   2758  1.1  mrg 
   2759  1.1  mrg               gaps += diff - 1;
   2760  1.1  mrg 	    }
   2761  1.1  mrg 
   2762  1.1  mrg 	  last_accessed_element += diff;
   2763  1.1  mrg 
   2764  1.1  mrg           /* Store the gap from the previous member of the group. If there is no
   2765  1.1  mrg              gap in the access, DR_GROUP_GAP is always 1.  */
   2766  1.1  mrg 	  DR_GROUP_GAP (next) = diff;
   2767  1.1  mrg 
   2768  1.1  mrg 	  prev_init = DR_INIT (data_ref);
   2769  1.1  mrg 	  next = DR_GROUP_NEXT_ELEMENT (next);
   2770  1.1  mrg 	  /* Count the number of data-refs in the chain.  */
   2771  1.1  mrg 	  count++;
   2772  1.1  mrg         }
   2773  1.1  mrg 
   2774  1.1  mrg       if (groupsize == 0)
   2775  1.1  mrg         groupsize = count + gaps;
   2776  1.1  mrg 
   2777  1.1  mrg       /* This could be UINT_MAX but as we are generating code in a very
   2778  1.1  mrg          inefficient way we have to cap earlier.  See PR78699 for example.  */
   2779  1.1  mrg       if (groupsize > 4096)
   2780  1.1  mrg 	{
   2781  1.1  mrg 	  if (dump_enabled_p ())
   2782  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2783  1.1  mrg 			     "group is too large\n");
   2784  1.1  mrg 	  return false;
   2785  1.1  mrg 	}
   2786  1.1  mrg 
   2787  1.1  mrg       /* Check that the size of the interleaving is equal to count for stores,
   2788  1.1  mrg          i.e., that there are no gaps.  */
   2789  1.1  mrg       if (groupsize != count
   2790  1.1  mrg 	  && !DR_IS_READ (dr))
   2791  1.1  mrg         {
   2792  1.1  mrg 	  groupsize = count;
   2793  1.1  mrg 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
   2794  1.1  mrg 	}
   2795  1.1  mrg 
   2796  1.1  mrg       /* If there is a gap after the last load in the group it is the
   2797  1.1  mrg 	 difference between the groupsize and the last accessed
   2798  1.1  mrg 	 element.
   2799  1.1  mrg 	 When there is no gap, this difference should be 0.  */
   2800  1.1  mrg       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
   2801  1.1  mrg 
   2802  1.1  mrg       DR_GROUP_SIZE (stmt_info) = groupsize;
   2803  1.1  mrg       if (dump_enabled_p ())
   2804  1.1  mrg 	{
   2805  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location,
   2806  1.1  mrg 			   "Detected interleaving ");
   2807  1.1  mrg 	  if (DR_IS_READ (dr))
   2808  1.1  mrg 	    dump_printf (MSG_NOTE, "load ");
   2809  1.1  mrg 	  else if (STMT_VINFO_STRIDED_P (stmt_info))
   2810  1.1  mrg 	    dump_printf (MSG_NOTE, "strided store ");
   2811  1.1  mrg 	  else
   2812  1.1  mrg 	    dump_printf (MSG_NOTE, "store ");
   2813  1.1  mrg 	  dump_printf (MSG_NOTE, "of size %u\n",
   2814  1.1  mrg 		       (unsigned)groupsize);
   2815  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
   2816  1.1  mrg 	  next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2817  1.1  mrg 	  while (next)
   2818  1.1  mrg 	    {
   2819  1.1  mrg 	      if (DR_GROUP_GAP (next) != 1)
   2820  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   2821  1.1  mrg 				 "\t<gap of %d elements>\n",
   2822  1.1  mrg 				 DR_GROUP_GAP (next) - 1);
   2823  1.1  mrg 	      dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
   2824  1.1  mrg 	      next = DR_GROUP_NEXT_ELEMENT (next);
   2825  1.1  mrg 	    }
   2826  1.1  mrg 	  if (DR_GROUP_GAP (stmt_info) != 0)
   2827  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   2828  1.1  mrg 			     "\t<gap of %d elements>\n",
   2829  1.1  mrg 			     DR_GROUP_GAP (stmt_info));
   2830  1.1  mrg 	}
   2831  1.1  mrg 
   2832  1.1  mrg       /* SLP: create an SLP data structure for every interleaving group of
   2833  1.1  mrg 	 stores for further analysis in vect_analyse_slp.  */
   2834  1.1  mrg       if (DR_IS_WRITE (dr) && !slp_impossible)
   2835  1.1  mrg 	{
   2836  1.1  mrg 	  if (loop_vinfo)
   2837  1.1  mrg 	    LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
   2838  1.1  mrg 	  if (bb_vinfo)
   2839  1.1  mrg 	    BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
   2840  1.1  mrg 	}
   2841  1.1  mrg     }
   2842  1.1  mrg 
   2843  1.1  mrg   return true;
   2844  1.1  mrg }
   2845  1.1  mrg 
   2846  1.1  mrg /* Analyze groups of accesses: check that DR_INFO belongs to a group of
   2847  1.1  mrg    accesses of legal size, step, etc.  Detect gaps, single element
   2848  1.1  mrg    interleaving, and other special cases. Set grouped access info.
   2849  1.1  mrg    Collect groups of strided stores for further use in SLP analysis.  */
   2850  1.1  mrg 
   2851  1.1  mrg static bool
   2852  1.1  mrg vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
   2853  1.1  mrg {
   2854  1.1  mrg   if (!vect_analyze_group_access_1 (vinfo, dr_info))
   2855  1.1  mrg     {
   2856  1.1  mrg       /* Dissolve the group if present.  */
   2857  1.1  mrg       stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
   2858  1.1  mrg       while (stmt_info)
   2859  1.1  mrg 	{
   2860  1.1  mrg 	  stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2861  1.1  mrg 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2862  1.1  mrg 	  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
   2863  1.1  mrg 	  stmt_info = next;
   2864  1.1  mrg 	}
   2865  1.1  mrg       return false;
   2866  1.1  mrg     }
   2867  1.1  mrg   return true;
   2868  1.1  mrg }
   2869  1.1  mrg 
   2870  1.1  mrg /* Analyze the access pattern of the data-reference DR_INFO.
   2871  1.1  mrg    In case of non-consecutive accesses call vect_analyze_group_access() to
   2872  1.1  mrg    analyze groups of accesses.  */
   2873  1.1  mrg 
   2874  1.1  mrg static bool
   2875  1.1  mrg vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
   2876  1.1  mrg {
   2877  1.1  mrg   data_reference *dr = dr_info->dr;
   2878  1.1  mrg   tree step = DR_STEP (dr);
   2879  1.1  mrg   tree scalar_type = TREE_TYPE (DR_REF (dr));
   2880  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   2881  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2882  1.1  mrg   class loop *loop = NULL;
   2883  1.1  mrg 
   2884  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   2885  1.1  mrg     return true;
   2886  1.1  mrg 
   2887  1.1  mrg   if (loop_vinfo)
   2888  1.1  mrg     loop = LOOP_VINFO_LOOP (loop_vinfo);
   2889  1.1  mrg 
   2890  1.1  mrg   if (loop_vinfo && !step)
   2891  1.1  mrg     {
   2892  1.1  mrg       if (dump_enabled_p ())
   2893  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2894  1.1  mrg 	                 "bad data-ref access in loop\n");
   2895  1.1  mrg       return false;
   2896  1.1  mrg     }
   2897  1.1  mrg 
   2898  1.1  mrg   /* Allow loads with zero step in inner-loop vectorization.  */
   2899  1.1  mrg   if (loop_vinfo && integer_zerop (step))
   2900  1.1  mrg     {
   2901  1.1  mrg       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2902  1.1  mrg       if (!nested_in_vect_loop_p (loop, stmt_info))
   2903  1.1  mrg 	return DR_IS_READ (dr);
   2904  1.1  mrg       /* Allow references with zero step for outer loops marked
   2905  1.1  mrg 	 with pragma omp simd only - it guarantees absence of
   2906  1.1  mrg 	 loop-carried dependencies between inner loop iterations.  */
   2907  1.1  mrg       if (loop->safelen < 2)
   2908  1.1  mrg 	{
   2909  1.1  mrg 	  if (dump_enabled_p ())
   2910  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   2911  1.1  mrg 			     "zero step in inner loop of nest\n");
   2912  1.1  mrg 	  return false;
   2913  1.1  mrg 	}
   2914  1.1  mrg     }
   2915  1.1  mrg 
   2916  1.1  mrg   if (loop && nested_in_vect_loop_p (loop, stmt_info))
   2917  1.1  mrg     {
   2918  1.1  mrg       /* Interleaved accesses are not yet supported within outer-loop
   2919  1.1  mrg         vectorization for references in the inner-loop.  */
   2920  1.1  mrg       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2921  1.1  mrg 
   2922  1.1  mrg       /* For the rest of the analysis we use the outer-loop step.  */
   2923  1.1  mrg       step = STMT_VINFO_DR_STEP (stmt_info);
   2924  1.1  mrg       if (integer_zerop (step))
   2925  1.1  mrg 	{
   2926  1.1  mrg 	  if (dump_enabled_p ())
   2927  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   2928  1.1  mrg 	                     "zero step in outer loop.\n");
   2929  1.1  mrg 	  return DR_IS_READ (dr);
   2930  1.1  mrg 	}
   2931  1.1  mrg     }
   2932  1.1  mrg 
   2933  1.1  mrg   /* Consecutive?  */
   2934  1.1  mrg   if (TREE_CODE (step) == INTEGER_CST)
   2935  1.1  mrg     {
   2936  1.1  mrg       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
   2937  1.1  mrg       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
   2938  1.1  mrg 	  || (dr_step < 0
   2939  1.1  mrg 	      && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
   2940  1.1  mrg 	{
   2941  1.1  mrg 	  /* Mark that it is not interleaving.  */
   2942  1.1  mrg 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2943  1.1  mrg 	  return true;
   2944  1.1  mrg 	}
   2945  1.1  mrg     }
   2946  1.1  mrg 
   2947  1.1  mrg   if (loop && nested_in_vect_loop_p (loop, stmt_info))
   2948  1.1  mrg     {
   2949  1.1  mrg       if (dump_enabled_p ())
   2950  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   2951  1.1  mrg 	                 "grouped access in outer loop.\n");
   2952  1.1  mrg       return false;
   2953  1.1  mrg     }
   2954  1.1  mrg 
   2955  1.1  mrg 
   2956  1.1  mrg   /* Assume this is a DR handled by non-constant strided load case.  */
   2957  1.1  mrg   if (TREE_CODE (step) != INTEGER_CST)
   2958  1.1  mrg     return (STMT_VINFO_STRIDED_P (stmt_info)
   2959  1.1  mrg 	    && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
   2960  1.1  mrg 		|| vect_analyze_group_access (vinfo, dr_info)));
   2961  1.1  mrg 
   2962  1.1  mrg   /* Not consecutive access - check if it's a part of interleaving group.  */
   2963  1.1  mrg   return vect_analyze_group_access (vinfo, dr_info);
   2964  1.1  mrg }
   2965  1.1  mrg 
   2966  1.1  mrg /* Compare two data-references DRA and DRB to group them into chunks
   2967  1.1  mrg    suitable for grouping.  */
   2968  1.1  mrg 
   2969  1.1  mrg static int
   2970  1.1  mrg dr_group_sort_cmp (const void *dra_, const void *drb_)
   2971  1.1  mrg {
   2972  1.1  mrg   dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
   2973  1.1  mrg   dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
   2974  1.1  mrg   data_reference_p dra = dra_info->dr;
   2975  1.1  mrg   data_reference_p drb = drb_info->dr;
   2976  1.1  mrg   int cmp;
   2977  1.1  mrg 
   2978  1.1  mrg   /* Stabilize sort.  */
   2979  1.1  mrg   if (dra == drb)
   2980  1.1  mrg     return 0;
   2981  1.1  mrg 
   2982  1.1  mrg   /* Different group IDs lead never belong to the same group.  */
   2983  1.1  mrg   if (dra_info->group != drb_info->group)
   2984  1.1  mrg     return dra_info->group < drb_info->group ? -1 : 1;
   2985  1.1  mrg 
   2986  1.1  mrg   /* Ordering of DRs according to base.  */
   2987  1.1  mrg   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   2988  1.1  mrg 			       DR_BASE_ADDRESS (drb));
   2989  1.1  mrg   if (cmp != 0)
   2990  1.1  mrg     return cmp;
   2991  1.1  mrg 
   2992  1.1  mrg   /* And according to DR_OFFSET.  */
   2993  1.1  mrg   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
   2994  1.1  mrg   if (cmp != 0)
   2995  1.1  mrg     return cmp;
   2996  1.1  mrg 
   2997  1.1  mrg   /* Put reads before writes.  */
   2998  1.1  mrg   if (DR_IS_READ (dra) != DR_IS_READ (drb))
   2999  1.1  mrg     return DR_IS_READ (dra) ? -1 : 1;
   3000  1.1  mrg 
   3001  1.1  mrg   /* Then sort after access size.  */
   3002  1.1  mrg   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
   3003  1.1  mrg 			       TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
   3004  1.1  mrg   if (cmp != 0)
   3005  1.1  mrg     return cmp;
   3006  1.1  mrg 
   3007  1.1  mrg   /* And after step.  */
   3008  1.1  mrg   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
   3009  1.1  mrg   if (cmp != 0)
   3010  1.1  mrg     return cmp;
   3011  1.1  mrg 
   3012  1.1  mrg   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
   3013  1.1  mrg   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
   3014  1.1  mrg   if (cmp == 0)
   3015  1.1  mrg     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
   3016  1.1  mrg   return cmp;
   3017  1.1  mrg }
   3018  1.1  mrg 
   3019  1.1  mrg /* If OP is the result of a conversion, return the unconverted value,
   3020  1.1  mrg    otherwise return null.  */
   3021  1.1  mrg 
   3022  1.1  mrg static tree
   3023  1.1  mrg strip_conversion (tree op)
   3024  1.1  mrg {
   3025  1.1  mrg   if (TREE_CODE (op) != SSA_NAME)
   3026  1.1  mrg     return NULL_TREE;
   3027  1.1  mrg   gimple *stmt = SSA_NAME_DEF_STMT (op);
   3028  1.1  mrg   if (!is_gimple_assign (stmt)
   3029  1.1  mrg       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
   3030  1.1  mrg     return NULL_TREE;
   3031  1.1  mrg   return gimple_assign_rhs1 (stmt);
   3032  1.1  mrg }
   3033  1.1  mrg 
   3034  1.1  mrg /* Return true if vectorizable_* routines can handle statements STMT1_INFO
   3035  1.1  mrg    and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
   3036  1.1  mrg    be grouped in SLP mode.  */
   3037  1.1  mrg 
   3038  1.1  mrg static bool
   3039  1.1  mrg can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
   3040  1.1  mrg 		   bool allow_slp_p)
   3041  1.1  mrg {
   3042  1.1  mrg   if (gimple_assign_single_p (stmt1_info->stmt))
   3043  1.1  mrg     return gimple_assign_single_p (stmt2_info->stmt);
   3044  1.1  mrg 
   3045  1.1  mrg   gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
   3046  1.1  mrg   if (call1 && gimple_call_internal_p (call1))
   3047  1.1  mrg     {
   3048  1.1  mrg       /* Check for two masked loads or two masked stores.  */
   3049  1.1  mrg       gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
   3050  1.1  mrg       if (!call2 || !gimple_call_internal_p (call2))
   3051  1.1  mrg 	return false;
   3052  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (call1);
   3053  1.1  mrg       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
   3054  1.1  mrg 	return false;
   3055  1.1  mrg       if (ifn != gimple_call_internal_fn (call2))
   3056  1.1  mrg 	return false;
   3057  1.1  mrg 
   3058  1.1  mrg       /* Check that the masks are the same.  Cope with casts of masks,
   3059  1.1  mrg 	 like those created by build_mask_conversion.  */
   3060  1.1  mrg       tree mask1 = gimple_call_arg (call1, 2);
   3061  1.1  mrg       tree mask2 = gimple_call_arg (call2, 2);
   3062  1.1  mrg       if (!operand_equal_p (mask1, mask2, 0)
   3063  1.1  mrg           && (ifn == IFN_MASK_STORE || !allow_slp_p))
   3064  1.1  mrg 	{
   3065  1.1  mrg 	  mask1 = strip_conversion (mask1);
   3066  1.1  mrg 	  if (!mask1)
   3067  1.1  mrg 	    return false;
   3068  1.1  mrg 	  mask2 = strip_conversion (mask2);
   3069  1.1  mrg 	  if (!mask2)
   3070  1.1  mrg 	    return false;
   3071  1.1  mrg 	  if (!operand_equal_p (mask1, mask2, 0))
   3072  1.1  mrg 	    return false;
   3073  1.1  mrg 	}
   3074  1.1  mrg       return true;
   3075  1.1  mrg     }
   3076  1.1  mrg 
   3077  1.1  mrg   return false;
   3078  1.1  mrg }
   3079  1.1  mrg 
   3080  1.1  mrg /* Function vect_analyze_data_ref_accesses.
   3081  1.1  mrg 
   3082  1.1  mrg    Analyze the access pattern of all the data references in the loop.
   3083  1.1  mrg 
   3084  1.1  mrg    FORNOW: the only access pattern that is considered vectorizable is a
   3085  1.1  mrg 	   simple step 1 (consecutive) access.
   3086  1.1  mrg 
   3087  1.1  mrg    FORNOW: handle only arrays and pointer accesses.  */
   3088  1.1  mrg 
   3089  1.1  mrg opt_result
   3090  1.1  mrg vect_analyze_data_ref_accesses (vec_info *vinfo,
   3091  1.1  mrg 				vec<int> *dataref_groups)
   3092  1.1  mrg {
   3093  1.1  mrg   unsigned int i;
   3094  1.1  mrg   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
   3095  1.1  mrg 
   3096  1.1  mrg   DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
   3097  1.1  mrg 
   3098  1.1  mrg   if (datarefs.is_empty ())
   3099  1.1  mrg     return opt_result::success ();
   3100  1.1  mrg 
   3101  1.1  mrg   /* Sort the array of datarefs to make building the interleaving chains
   3102  1.1  mrg      linear.  Don't modify the original vector's order, it is needed for
   3103  1.1  mrg      determining what dependencies are reversed.  */
   3104  1.1  mrg   vec<dr_vec_info *> datarefs_copy;
   3105  1.1  mrg   datarefs_copy.create (datarefs.length ());
   3106  1.1  mrg   for (unsigned i = 0; i < datarefs.length (); i++)
   3107  1.1  mrg     {
   3108  1.1  mrg       dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
   3109  1.1  mrg       /* If the caller computed DR grouping use that, otherwise group by
   3110  1.1  mrg 	 basic blocks.  */
   3111  1.1  mrg       if (dataref_groups)
   3112  1.1  mrg 	dr_info->group = (*dataref_groups)[i];
   3113  1.1  mrg       else
   3114  1.1  mrg 	dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
   3115  1.1  mrg       datarefs_copy.quick_push (dr_info);
   3116  1.1  mrg     }
   3117  1.1  mrg   datarefs_copy.qsort (dr_group_sort_cmp);
   3118  1.1  mrg   hash_set<stmt_vec_info> to_fixup;
   3119  1.1  mrg 
   3120  1.1  mrg   /* Build the interleaving chains.  */
   3121  1.1  mrg   for (i = 0; i < datarefs_copy.length () - 1;)
   3122  1.1  mrg     {
   3123  1.1  mrg       dr_vec_info *dr_info_a = datarefs_copy[i];
   3124  1.1  mrg       data_reference_p dra = dr_info_a->dr;
   3125  1.1  mrg       int dra_group_id = dr_info_a->group;
   3126  1.1  mrg       stmt_vec_info stmtinfo_a = dr_info_a->stmt;
   3127  1.1  mrg       stmt_vec_info lastinfo = NULL;
   3128  1.1  mrg       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
   3129  1.1  mrg 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
   3130  1.1  mrg 	{
   3131  1.1  mrg 	  ++i;
   3132  1.1  mrg 	  continue;
   3133  1.1  mrg 	}
   3134  1.1  mrg       for (i = i + 1; i < datarefs_copy.length (); ++i)
   3135  1.1  mrg 	{
   3136  1.1  mrg 	  dr_vec_info *dr_info_b = datarefs_copy[i];
   3137  1.1  mrg 	  data_reference_p drb = dr_info_b->dr;
   3138  1.1  mrg 	  int drb_group_id = dr_info_b->group;
   3139  1.1  mrg 	  stmt_vec_info stmtinfo_b = dr_info_b->stmt;
   3140  1.1  mrg 	  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
   3141  1.1  mrg 	      || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
   3142  1.1  mrg 	    break;
   3143  1.1  mrg 
   3144  1.1  mrg 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
   3145  1.1  mrg 	     accesses, same accesses) can lead to a group to be artificially
   3146  1.1  mrg 	     split here as we don't just skip over those.  If it really
   3147  1.1  mrg 	     matters we can push those to a worklist and re-iterate
   3148  1.1  mrg 	     over them.  The we can just skip ahead to the next DR here.  */
   3149  1.1  mrg 
   3150  1.1  mrg 	  /* DRs in a different DR group should not be put into the same
   3151  1.1  mrg 	     interleaving group.  */
   3152  1.1  mrg 	  if (dra_group_id != drb_group_id)
   3153  1.1  mrg 	    break;
   3154  1.1  mrg 
   3155  1.1  mrg 	  /* Check that the data-refs have same first location (except init)
   3156  1.1  mrg 	     and they are both either store or load (not load and store,
   3157  1.1  mrg 	     not masked loads or stores).  */
   3158  1.1  mrg 	  if (DR_IS_READ (dra) != DR_IS_READ (drb)
   3159  1.1  mrg 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   3160  1.1  mrg 					DR_BASE_ADDRESS (drb)) != 0
   3161  1.1  mrg 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
   3162  1.1  mrg 	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
   3163  1.1  mrg 	    break;
   3164  1.1  mrg 
   3165  1.1  mrg 	  /* Check that the data-refs have the same constant size.  */
   3166  1.1  mrg 	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
   3167  1.1  mrg 	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
   3168  1.1  mrg 	  if (!tree_fits_uhwi_p (sza)
   3169  1.1  mrg 	      || !tree_fits_uhwi_p (szb)
   3170  1.1  mrg 	      || !tree_int_cst_equal (sza, szb))
   3171  1.1  mrg 	    break;
   3172  1.1  mrg 
   3173  1.1  mrg 	  /* Check that the data-refs have the same step.  */
   3174  1.1  mrg 	  if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
   3175  1.1  mrg 	    break;
   3176  1.1  mrg 
   3177  1.1  mrg 	  /* Check the types are compatible.
   3178  1.1  mrg 	     ???  We don't distinguish this during sorting.  */
   3179  1.1  mrg 	  if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
   3180  1.1  mrg 				   TREE_TYPE (DR_REF (drb))))
   3181  1.1  mrg 	    break;
   3182  1.1  mrg 
   3183  1.1  mrg 	  /* Check that the DR_INITs are compile-time constants.  */
   3184  1.1  mrg 	  if (!tree_fits_shwi_p (DR_INIT (dra))
   3185  1.1  mrg 	      || !tree_fits_shwi_p (DR_INIT (drb)))
   3186  1.1  mrg 	    break;
   3187  1.1  mrg 
   3188  1.1  mrg 	  /* Different .GOMP_SIMD_LANE calls still give the same lane,
   3189  1.1  mrg 	     just hold extra information.  */
   3190  1.1  mrg 	  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
   3191  1.1  mrg 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
   3192  1.1  mrg 	      && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
   3193  1.1  mrg 	    break;
   3194  1.1  mrg 
   3195  1.1  mrg 	  /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
   3196  1.1  mrg 	  HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
   3197  1.1  mrg 	  HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
   3198  1.1  mrg 	  HOST_WIDE_INT init_prev
   3199  1.1  mrg 	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
   3200  1.1  mrg 	  gcc_assert (init_a <= init_b
   3201  1.1  mrg 		      && init_a <= init_prev
   3202  1.1  mrg 		      && init_prev <= init_b);
   3203  1.1  mrg 
   3204  1.1  mrg 	  /* Do not place the same access in the interleaving chain twice.  */
   3205  1.1  mrg 	  if (init_b == init_prev)
   3206  1.1  mrg 	    {
   3207  1.1  mrg 	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
   3208  1.1  mrg 			  < gimple_uid (DR_STMT (drb)));
   3209  1.1  mrg 	      /* Simply link in duplicates and fix up the chain below.  */
   3210  1.1  mrg 	    }
   3211  1.1  mrg 	  else
   3212  1.1  mrg 	    {
   3213  1.1  mrg 	      /* If init_b == init_a + the size of the type * k, we have an
   3214  1.1  mrg 		 interleaving, and DRA is accessed before DRB.  */
   3215  1.1  mrg 	      unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
   3216  1.1  mrg 	      if (type_size_a == 0
   3217  1.1  mrg 		  || (((unsigned HOST_WIDE_INT)init_b - init_a)
   3218  1.1  mrg 		      % type_size_a != 0))
   3219  1.1  mrg 		break;
   3220  1.1  mrg 
   3221  1.1  mrg 	      /* If we have a store, the accesses are adjacent.  This splits
   3222  1.1  mrg 		 groups into chunks we support (we don't support vectorization
   3223  1.1  mrg 		 of stores with gaps).  */
   3224  1.1  mrg 	      if (!DR_IS_READ (dra)
   3225  1.1  mrg 		  && (((unsigned HOST_WIDE_INT)init_b - init_prev)
   3226  1.1  mrg 		      != type_size_a))
   3227  1.1  mrg 		break;
   3228  1.1  mrg 
   3229  1.1  mrg 	      /* If the step (if not zero or non-constant) is smaller than the
   3230  1.1  mrg 		 difference between data-refs' inits this splits groups into
   3231  1.1  mrg 		 suitable sizes.  */
   3232  1.1  mrg 	      if (tree_fits_shwi_p (DR_STEP (dra)))
   3233  1.1  mrg 		{
   3234  1.1  mrg 		  unsigned HOST_WIDE_INT step
   3235  1.1  mrg 		    = absu_hwi (tree_to_shwi (DR_STEP (dra)));
   3236  1.1  mrg 		  if (step != 0
   3237  1.1  mrg 		      && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
   3238  1.1  mrg 		    break;
   3239  1.1  mrg 		}
   3240  1.1  mrg 	    }
   3241  1.1  mrg 
   3242  1.1  mrg 	  if (dump_enabled_p ())
   3243  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   3244  1.1  mrg 			     DR_IS_READ (dra)
   3245  1.1  mrg 			     ? "Detected interleaving load %T and %T\n"
   3246  1.1  mrg 			     : "Detected interleaving store %T and %T\n",
   3247  1.1  mrg 			     DR_REF (dra), DR_REF (drb));
   3248  1.1  mrg 
   3249  1.1  mrg 	  /* Link the found element into the group list.  */
   3250  1.1  mrg 	  if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
   3251  1.1  mrg 	    {
   3252  1.1  mrg 	      DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
   3253  1.1  mrg 	      lastinfo = stmtinfo_a;
   3254  1.1  mrg 	    }
   3255  1.1  mrg 	  DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
   3256  1.1  mrg 	  DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
   3257  1.1  mrg 	  lastinfo = stmtinfo_b;
   3258  1.1  mrg 
   3259  1.1  mrg 	  if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
   3260  1.1  mrg 	    {
   3261  1.1  mrg 	      STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
   3262  1.1  mrg 		= !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
   3263  1.1  mrg 
   3264  1.1  mrg 	      if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
   3265  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   3266  1.1  mrg 				 "Load suitable for SLP vectorization only.\n");
   3267  1.1  mrg 	    }
   3268  1.1  mrg 
   3269  1.1  mrg 	  if (init_b == init_prev
   3270  1.1  mrg 	      && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
   3271  1.1  mrg 	      && dump_enabled_p ())
   3272  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   3273  1.1  mrg 			     "Queuing group with duplicate access for fixup\n");
   3274  1.1  mrg 	}
   3275  1.1  mrg     }
   3276  1.1  mrg 
   3277  1.1  mrg   /* Fixup groups with duplicate entries by splitting it.  */
   3278  1.1  mrg   while (1)
   3279  1.1  mrg     {
   3280  1.1  mrg       hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
   3281  1.1  mrg       if (!(it != to_fixup.end ()))
   3282  1.1  mrg 	break;
   3283  1.1  mrg       stmt_vec_info grp = *it;
   3284  1.1  mrg       to_fixup.remove (grp);
   3285  1.1  mrg 
   3286  1.1  mrg       /* Find the earliest duplicate group member.  */
   3287  1.1  mrg       unsigned first_duplicate = -1u;
   3288  1.1  mrg       stmt_vec_info next, g = grp;
   3289  1.1  mrg       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
   3290  1.1  mrg 	{
   3291  1.1  mrg 	  if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
   3292  1.1  mrg 				  DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
   3293  1.1  mrg 	      && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
   3294  1.1  mrg 	    first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
   3295  1.1  mrg 	  g = next;
   3296  1.1  mrg 	}
   3297  1.1  mrg       if (first_duplicate == -1U)
   3298  1.1  mrg 	continue;
   3299  1.1  mrg 
   3300  1.1  mrg       /* Then move all stmts after the first duplicate to a new group.
   3301  1.1  mrg          Note this is a heuristic but one with the property that *it
   3302  1.1  mrg 	 is fixed up completely.  */
   3303  1.1  mrg       g = grp;
   3304  1.1  mrg       stmt_vec_info newgroup = NULL, ng = grp;
   3305  1.1  mrg       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
   3306  1.1  mrg 	{
   3307  1.1  mrg 	  if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
   3308  1.1  mrg 	    {
   3309  1.1  mrg 	      DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
   3310  1.1  mrg 	      if (!newgroup)
   3311  1.1  mrg 		{
   3312  1.1  mrg 		  newgroup = next;
   3313  1.1  mrg 		  STMT_VINFO_SLP_VECT_ONLY (newgroup)
   3314  1.1  mrg 		    = STMT_VINFO_SLP_VECT_ONLY (grp);
   3315  1.1  mrg 		}
   3316  1.1  mrg 	      else
   3317  1.1  mrg 		DR_GROUP_NEXT_ELEMENT (ng) = next;
   3318  1.1  mrg 	      ng = next;
   3319  1.1  mrg 	      DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
   3320  1.1  mrg 	    }
   3321  1.1  mrg 	  else
   3322  1.1  mrg 	    g = DR_GROUP_NEXT_ELEMENT (g);
   3323  1.1  mrg 	}
   3324  1.1  mrg       DR_GROUP_NEXT_ELEMENT (ng) = NULL;
   3325  1.1  mrg 
   3326  1.1  mrg       /* Fixup the new group which still may contain duplicates.  */
   3327  1.1  mrg       to_fixup.add (newgroup);
   3328  1.1  mrg     }
   3329  1.1  mrg 
   3330  1.1  mrg   dr_vec_info *dr_info;
   3331  1.1  mrg   FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
   3332  1.1  mrg     {
   3333  1.1  mrg       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
   3334  1.1  mrg 	  && !vect_analyze_data_ref_access (vinfo, dr_info))
   3335  1.1  mrg 	{
   3336  1.1  mrg 	  if (dump_enabled_p ())
   3337  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3338  1.1  mrg 			     "not vectorized: complicated access pattern.\n");
   3339  1.1  mrg 
   3340  1.1  mrg 	  if (is_a <bb_vec_info> (vinfo))
   3341  1.1  mrg 	    {
   3342  1.1  mrg 	      /* Mark the statement as not vectorizable.  */
   3343  1.1  mrg 	      STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
   3344  1.1  mrg 	      continue;
   3345  1.1  mrg 	    }
   3346  1.1  mrg 	  else
   3347  1.1  mrg 	    {
   3348  1.1  mrg 	      datarefs_copy.release ();
   3349  1.1  mrg 	      return opt_result::failure_at (dr_info->stmt->stmt,
   3350  1.1  mrg 					     "not vectorized:"
   3351  1.1  mrg 					     " complicated access pattern.\n");
   3352  1.1  mrg 	    }
   3353  1.1  mrg 	}
   3354  1.1  mrg     }
   3355  1.1  mrg 
   3356  1.1  mrg   datarefs_copy.release ();
   3357  1.1  mrg   return opt_result::success ();
   3358  1.1  mrg }
   3359  1.1  mrg 
   3360  1.1  mrg /* Function vect_vfa_segment_size.
   3361  1.1  mrg 
   3362  1.1  mrg    Input:
   3363  1.1  mrg      DR_INFO: The data reference.
   3364  1.1  mrg      LENGTH_FACTOR: segment length to consider.
   3365  1.1  mrg 
   3366  1.1  mrg    Return a value suitable for the dr_with_seg_len::seg_len field.
   3367  1.1  mrg    This is the "distance travelled" by the pointer from the first
   3368  1.1  mrg    iteration in the segment to the last.  Note that it does not include
   3369  1.1  mrg    the size of the access; in effect it only describes the first byte.  */
   3370  1.1  mrg 
   3371  1.1  mrg static tree
   3372  1.1  mrg vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
   3373  1.1  mrg {
   3374  1.1  mrg   length_factor = size_binop (MINUS_EXPR,
   3375  1.1  mrg 			      fold_convert (sizetype, length_factor),
   3376  1.1  mrg 			      size_one_node);
   3377  1.1  mrg   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
   3378  1.1  mrg 		     length_factor);
   3379  1.1  mrg }
   3380  1.1  mrg 
   3381  1.1  mrg /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
   3382  1.1  mrg    gives the worst-case number of bytes covered by the segment.  */
   3383  1.1  mrg 
   3384  1.1  mrg static unsigned HOST_WIDE_INT
   3385  1.1  mrg vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
   3386  1.1  mrg {
   3387  1.1  mrg   stmt_vec_info stmt_vinfo = dr_info->stmt;
   3388  1.1  mrg   tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
   3389  1.1  mrg   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
   3390  1.1  mrg   unsigned HOST_WIDE_INT access_size = ref_size;
   3391  1.1  mrg   if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
   3392  1.1  mrg     {
   3393  1.1  mrg       gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
   3394  1.1  mrg       access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
   3395  1.1  mrg     }
   3396  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
   3397  1.1  mrg   int misalignment;
   3398  1.1  mrg   if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
   3399  1.1  mrg       && ((misalignment = dr_misalignment (dr_info, vectype)), true)
   3400  1.1  mrg       && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
   3401  1.1  mrg 	  == dr_explicit_realign_optimized))
   3402  1.1  mrg     {
   3403  1.1  mrg       /* We might access a full vector's worth.  */
   3404  1.1  mrg       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
   3405  1.1  mrg     }
   3406  1.1  mrg   return access_size;
   3407  1.1  mrg }
   3408  1.1  mrg 
   3409  1.1  mrg /* Get the minimum alignment for all the scalar accesses that DR_INFO
   3410  1.1  mrg    describes.  */
   3411  1.1  mrg 
   3412  1.1  mrg static unsigned int
   3413  1.1  mrg vect_vfa_align (dr_vec_info *dr_info)
   3414  1.1  mrg {
   3415  1.1  mrg   return dr_alignment (dr_info->dr);
   3416  1.1  mrg }
   3417  1.1  mrg 
   3418  1.1  mrg /* Function vect_no_alias_p.
   3419  1.1  mrg 
   3420  1.1  mrg    Given data references A and B with equal base and offset, see whether
   3421  1.1  mrg    the alias relation can be decided at compilation time.  Return 1 if
   3422  1.1  mrg    it can and the references alias, 0 if it can and the references do
   3423  1.1  mrg    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
   3424  1.1  mrg    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
   3425  1.1  mrg    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
   3426  1.1  mrg 
   3427  1.1  mrg static int
   3428  1.1  mrg vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
   3429  1.1  mrg 			 tree segment_length_a, tree segment_length_b,
   3430  1.1  mrg 			 unsigned HOST_WIDE_INT access_size_a,
   3431  1.1  mrg 			 unsigned HOST_WIDE_INT access_size_b)
   3432  1.1  mrg {
   3433  1.1  mrg   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
   3434  1.1  mrg   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
   3435  1.1  mrg   poly_uint64 const_length_a;
   3436  1.1  mrg   poly_uint64 const_length_b;
   3437  1.1  mrg 
   3438  1.1  mrg   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
   3439  1.1  mrg      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
   3440  1.1  mrg      [a, a+12) */
   3441  1.1  mrg   if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
   3442  1.1  mrg     {
   3443  1.1  mrg       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
   3444  1.1  mrg       offset_a -= const_length_a;
   3445  1.1  mrg     }
   3446  1.1  mrg   else
   3447  1.1  mrg     const_length_a = tree_to_poly_uint64 (segment_length_a);
   3448  1.1  mrg   if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
   3449  1.1  mrg     {
   3450  1.1  mrg       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
   3451  1.1  mrg       offset_b -= const_length_b;
   3452  1.1  mrg     }
   3453  1.1  mrg   else
   3454  1.1  mrg     const_length_b = tree_to_poly_uint64 (segment_length_b);
   3455  1.1  mrg 
   3456  1.1  mrg   const_length_a += access_size_a;
   3457  1.1  mrg   const_length_b += access_size_b;
   3458  1.1  mrg 
   3459  1.1  mrg   if (ranges_known_overlap_p (offset_a, const_length_a,
   3460  1.1  mrg 			      offset_b, const_length_b))
   3461  1.1  mrg     return 1;
   3462  1.1  mrg 
   3463  1.1  mrg   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
   3464  1.1  mrg 			       offset_b, const_length_b))
   3465  1.1  mrg     return 0;
   3466  1.1  mrg 
   3467  1.1  mrg   return -1;
   3468  1.1  mrg }
   3469  1.1  mrg 
   3470  1.1  mrg /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
   3471  1.1  mrg    in DDR is >= VF.  */
   3472  1.1  mrg 
   3473  1.1  mrg static bool
   3474  1.1  mrg dependence_distance_ge_vf (data_dependence_relation *ddr,
   3475  1.1  mrg 			   unsigned int loop_depth, poly_uint64 vf)
   3476  1.1  mrg {
   3477  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
   3478  1.1  mrg       || DDR_NUM_DIST_VECTS (ddr) == 0)
   3479  1.1  mrg     return false;
   3480  1.1  mrg 
   3481  1.1  mrg   /* If the dependence is exact, we should have limited the VF instead.  */
   3482  1.1  mrg   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
   3483  1.1  mrg 
   3484  1.1  mrg   unsigned int i;
   3485  1.1  mrg   lambda_vector dist_v;
   3486  1.1  mrg   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
   3487  1.1  mrg     {
   3488  1.1  mrg       HOST_WIDE_INT dist = dist_v[loop_depth];
   3489  1.1  mrg       if (dist != 0
   3490  1.1  mrg 	  && !(dist > 0 && DDR_REVERSED_P (ddr))
   3491  1.1  mrg 	  && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
   3492  1.1  mrg 	return false;
   3493  1.1  mrg     }
   3494  1.1  mrg 
   3495  1.1  mrg   if (dump_enabled_p ())
   3496  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   3497  1.1  mrg 		     "dependence distance between %T and %T is >= VF\n",
   3498  1.1  mrg 		     DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
   3499  1.1  mrg 
   3500  1.1  mrg   return true;
   3501  1.1  mrg }
   3502  1.1  mrg 
   3503  1.1  mrg /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
   3504  1.1  mrg 
   3505  1.1  mrg static void
   3506  1.1  mrg dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
   3507  1.1  mrg {
   3508  1.1  mrg   dump_printf (dump_kind, "%s (%T) >= ",
   3509  1.1  mrg 	       lower_bound.unsigned_p ? "unsigned" : "abs",
   3510  1.1  mrg 	       lower_bound.expr);
   3511  1.1  mrg   dump_dec (dump_kind, lower_bound.min_value);
   3512  1.1  mrg }
   3513  1.1  mrg 
   3514  1.1  mrg /* Record that the vectorized loop requires the vec_lower_bound described
   3515  1.1  mrg    by EXPR, UNSIGNED_P and MIN_VALUE.  */
   3516  1.1  mrg 
   3517  1.1  mrg static void
   3518  1.1  mrg vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
   3519  1.1  mrg 			poly_uint64 min_value)
   3520  1.1  mrg {
   3521  1.1  mrg   vec<vec_lower_bound> &lower_bounds
   3522  1.1  mrg     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
   3523  1.1  mrg   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
   3524  1.1  mrg     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
   3525  1.1  mrg       {
   3526  1.1  mrg 	unsigned_p &= lower_bounds[i].unsigned_p;
   3527  1.1  mrg 	min_value = upper_bound (lower_bounds[i].min_value, min_value);
   3528  1.1  mrg 	if (lower_bounds[i].unsigned_p != unsigned_p
   3529  1.1  mrg 	    || maybe_lt (lower_bounds[i].min_value, min_value))
   3530  1.1  mrg 	  {
   3531  1.1  mrg 	    lower_bounds[i].unsigned_p = unsigned_p;
   3532  1.1  mrg 	    lower_bounds[i].min_value = min_value;
   3533  1.1  mrg 	    if (dump_enabled_p ())
   3534  1.1  mrg 	      {
   3535  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   3536  1.1  mrg 				 "updating run-time check to ");
   3537  1.1  mrg 		dump_lower_bound (MSG_NOTE, lower_bounds[i]);
   3538  1.1  mrg 		dump_printf (MSG_NOTE, "\n");
   3539  1.1  mrg 	      }
   3540  1.1  mrg 	  }
   3541  1.1  mrg 	return;
   3542  1.1  mrg       }
   3543  1.1  mrg 
   3544  1.1  mrg   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
   3545  1.1  mrg   if (dump_enabled_p ())
   3546  1.1  mrg     {
   3547  1.1  mrg       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
   3548  1.1  mrg       dump_lower_bound (MSG_NOTE, lower_bound);
   3549  1.1  mrg       dump_printf (MSG_NOTE, "\n");
   3550  1.1  mrg     }
   3551  1.1  mrg   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
   3552  1.1  mrg }
   3553  1.1  mrg 
   3554  1.1  mrg /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
   3555  1.1  mrg    will span fewer than GAP bytes.  */
   3556  1.1  mrg 
   3557  1.1  mrg static bool
   3558  1.1  mrg vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
   3559  1.1  mrg 		  poly_int64 gap)
   3560  1.1  mrg {
   3561  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   3562  1.1  mrg   HOST_WIDE_INT count
   3563  1.1  mrg     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   3564  1.1  mrg   if (DR_GROUP_FIRST_ELEMENT (stmt_info))
   3565  1.1  mrg     count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
   3566  1.1  mrg   return (estimated_poly_value (gap)
   3567  1.1  mrg 	  <= count * vect_get_scalar_dr_size (dr_info));
   3568  1.1  mrg }
   3569  1.1  mrg 
   3570  1.1  mrg /* Return true if we know that there is no alias between DR_INFO_A and
   3571  1.1  mrg    DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
   3572  1.1  mrg    When returning true, set *LOWER_BOUND_OUT to this N.  */
   3573  1.1  mrg 
   3574  1.1  mrg static bool
   3575  1.1  mrg vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
   3576  1.1  mrg 				poly_uint64 *lower_bound_out)
   3577  1.1  mrg {
   3578  1.1  mrg   /* Check that there is a constant gap of known sign between DR_A
   3579  1.1  mrg      and DR_B.  */
   3580  1.1  mrg   data_reference *dr_a = dr_info_a->dr;
   3581  1.1  mrg   data_reference *dr_b = dr_info_b->dr;
   3582  1.1  mrg   poly_int64 init_a, init_b;
   3583  1.1  mrg   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
   3584  1.1  mrg       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
   3585  1.1  mrg       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
   3586  1.1  mrg       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
   3587  1.1  mrg       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
   3588  1.1  mrg       || !ordered_p (init_a, init_b))
   3589  1.1  mrg     return false;
   3590  1.1  mrg 
   3591  1.1  mrg   /* Sort DR_A and DR_B by the address they access.  */
   3592  1.1  mrg   if (maybe_lt (init_b, init_a))
   3593  1.1  mrg     {
   3594  1.1  mrg       std::swap (init_a, init_b);
   3595  1.1  mrg       std::swap (dr_info_a, dr_info_b);
   3596  1.1  mrg       std::swap (dr_a, dr_b);
   3597  1.1  mrg     }
   3598  1.1  mrg 
   3599  1.1  mrg   /* If the two accesses could be dependent within a scalar iteration,
   3600  1.1  mrg      make sure that we'd retain their order.  */
   3601  1.1  mrg   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
   3602  1.1  mrg       && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
   3603  1.1  mrg     return false;
   3604  1.1  mrg 
   3605  1.1  mrg   /* There is no alias if abs (DR_STEP) is greater than or equal to
   3606  1.1  mrg      the bytes spanned by the combination of the two accesses.  */
   3607  1.1  mrg   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
   3608  1.1  mrg   return true;
   3609  1.1  mrg }
   3610  1.1  mrg 
   3611  1.1  mrg /* Function vect_prune_runtime_alias_test_list.
   3612  1.1  mrg 
   3613  1.1  mrg    Prune a list of ddrs to be tested at run-time by versioning for alias.
   3614  1.1  mrg    Merge several alias checks into one if possible.
   3615  1.1  mrg    Return FALSE if resulting list of ddrs is longer then allowed by
   3616  1.1  mrg    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
   3617  1.1  mrg 
   3618  1.1  mrg opt_result
   3619  1.1  mrg vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   3620  1.1  mrg {
   3621  1.1  mrg   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
   3622  1.1  mrg   hash_set <tree_pair_hash> compared_objects;
   3623  1.1  mrg 
   3624  1.1  mrg   const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
   3625  1.1  mrg   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
   3626  1.1  mrg     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
   3627  1.1  mrg   const vec<vec_object_pair> &check_unequal_addrs
   3628  1.1  mrg     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
   3629  1.1  mrg   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   3630  1.1  mrg   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
   3631  1.1  mrg 
   3632  1.1  mrg   ddr_p ddr;
   3633  1.1  mrg   unsigned int i;
   3634  1.1  mrg   tree length_factor;
   3635  1.1  mrg 
   3636  1.1  mrg   DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
   3637  1.1  mrg 
   3638  1.1  mrg   /* Step values are irrelevant for aliasing if the number of vector
   3639  1.1  mrg      iterations is equal to the number of scalar iterations (which can
   3640  1.1  mrg      happen for fully-SLP loops).  */
   3641  1.1  mrg   bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
   3642  1.1  mrg 
   3643  1.1  mrg   if (!vf_one_p)
   3644  1.1  mrg     {
   3645  1.1  mrg       /* Convert the checks for nonzero steps into bound tests.  */
   3646  1.1  mrg       tree value;
   3647  1.1  mrg       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
   3648  1.1  mrg 	vect_check_lower_bound (loop_vinfo, value, true, 1);
   3649  1.1  mrg     }
   3650  1.1  mrg 
   3651  1.1  mrg   if (may_alias_ddrs.is_empty ())
   3652  1.1  mrg     return opt_result::success ();
   3653  1.1  mrg 
   3654  1.1  mrg   comp_alias_ddrs.create (may_alias_ddrs.length ());
   3655  1.1  mrg 
   3656  1.1  mrg   unsigned int loop_depth
   3657  1.1  mrg     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   3658  1.1  mrg 			  LOOP_VINFO_LOOP_NEST (loop_vinfo));
   3659  1.1  mrg 
   3660  1.1  mrg   /* First, we collect all data ref pairs for aliasing checks.  */
   3661  1.1  mrg   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
   3662  1.1  mrg     {
   3663  1.1  mrg       poly_uint64 lower_bound;
   3664  1.1  mrg       tree segment_length_a, segment_length_b;
   3665  1.1  mrg       unsigned HOST_WIDE_INT access_size_a, access_size_b;
   3666  1.1  mrg       unsigned HOST_WIDE_INT align_a, align_b;
   3667  1.1  mrg 
   3668  1.1  mrg       /* Ignore the alias if the VF we chose ended up being no greater
   3669  1.1  mrg 	 than the dependence distance.  */
   3670  1.1  mrg       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
   3671  1.1  mrg 	continue;
   3672  1.1  mrg 
   3673  1.1  mrg       if (DDR_OBJECT_A (ddr))
   3674  1.1  mrg 	{
   3675  1.1  mrg 	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
   3676  1.1  mrg 	  if (!compared_objects.add (new_pair))
   3677  1.1  mrg 	    {
   3678  1.1  mrg 	      if (dump_enabled_p ())
   3679  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   3680  1.1  mrg 				 "checking that %T and %T"
   3681  1.1  mrg 				 " have different addresses\n",
   3682  1.1  mrg 				 new_pair.first, new_pair.second);
   3683  1.1  mrg 	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
   3684  1.1  mrg 	    }
   3685  1.1  mrg 	  continue;
   3686  1.1  mrg 	}
   3687  1.1  mrg 
   3688  1.1  mrg       dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
   3689  1.1  mrg       stmt_vec_info stmt_info_a = dr_info_a->stmt;
   3690  1.1  mrg 
   3691  1.1  mrg       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
   3692  1.1  mrg       stmt_vec_info stmt_info_b = dr_info_b->stmt;
   3693  1.1  mrg 
   3694  1.1  mrg       bool preserves_scalar_order_p
   3695  1.1  mrg 	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
   3696  1.1  mrg       bool ignore_step_p
   3697  1.1  mrg 	  = (vf_one_p
   3698  1.1  mrg 	     && (preserves_scalar_order_p
   3699  1.1  mrg 		 || operand_equal_p (DR_STEP (dr_info_a->dr),
   3700  1.1  mrg 				     DR_STEP (dr_info_b->dr))));
   3701  1.1  mrg 
   3702  1.1  mrg       /* Skip the pair if inter-iteration dependencies are irrelevant
   3703  1.1  mrg 	 and intra-iteration dependencies are guaranteed to be honored.  */
   3704  1.1  mrg       if (ignore_step_p
   3705  1.1  mrg 	  && (preserves_scalar_order_p
   3706  1.1  mrg 	      || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
   3707  1.1  mrg 						 &lower_bound)))
   3708  1.1  mrg 	{
   3709  1.1  mrg 	  if (dump_enabled_p ())
   3710  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   3711  1.1  mrg 			     "no need for alias check between "
   3712  1.1  mrg 			     "%T and %T when VF is 1\n",
   3713  1.1  mrg 			     DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
   3714  1.1  mrg 	  continue;
   3715  1.1  mrg 	}
   3716  1.1  mrg 
   3717  1.1  mrg       /* See whether we can handle the alias using a bounds check on
   3718  1.1  mrg 	 the step, and whether that's likely to be the best approach.
   3719  1.1  mrg 	 (It might not be, for example, if the minimum step is much larger
   3720  1.1  mrg 	 than the number of bytes handled by one vector iteration.)  */
   3721  1.1  mrg       if (!ignore_step_p
   3722  1.1  mrg 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
   3723  1.1  mrg 	  && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
   3724  1.1  mrg 					     &lower_bound)
   3725  1.1  mrg 	  && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
   3726  1.1  mrg 	      || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
   3727  1.1  mrg 	{
   3728  1.1  mrg 	  bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
   3729  1.1  mrg 	  if (dump_enabled_p ())
   3730  1.1  mrg 	    {
   3731  1.1  mrg 	      dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
   3732  1.1  mrg 			       "%T and %T when the step %T is outside ",
   3733  1.1  mrg 			       DR_REF (dr_info_a->dr),
   3734  1.1  mrg 			       DR_REF (dr_info_b->dr),
   3735  1.1  mrg 			       DR_STEP (dr_info_a->dr));
   3736  1.1  mrg 	      if (unsigned_p)
   3737  1.1  mrg 		dump_printf (MSG_NOTE, "[0");
   3738  1.1  mrg 	      else
   3739  1.1  mrg 		{
   3740  1.1  mrg 		  dump_printf (MSG_NOTE, "(");
   3741  1.1  mrg 		  dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
   3742  1.1  mrg 		}
   3743  1.1  mrg 	      dump_printf (MSG_NOTE, ", ");
   3744  1.1  mrg 	      dump_dec (MSG_NOTE, lower_bound);
   3745  1.1  mrg 	      dump_printf (MSG_NOTE, ")\n");
   3746  1.1  mrg 	    }
   3747  1.1  mrg 	  vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
   3748  1.1  mrg 				  unsigned_p, lower_bound);
   3749  1.1  mrg 	  continue;
   3750  1.1  mrg 	}
   3751  1.1  mrg 
   3752  1.1  mrg       stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
   3753  1.1  mrg       if (dr_group_first_a)
   3754  1.1  mrg 	{
   3755  1.1  mrg 	  stmt_info_a = dr_group_first_a;
   3756  1.1  mrg 	  dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
   3757  1.1  mrg 	}
   3758  1.1  mrg 
   3759  1.1  mrg       stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
   3760  1.1  mrg       if (dr_group_first_b)
   3761  1.1  mrg 	{
   3762  1.1  mrg 	  stmt_info_b = dr_group_first_b;
   3763  1.1  mrg 	  dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
   3764  1.1  mrg 	}
   3765  1.1  mrg 
   3766  1.1  mrg       if (ignore_step_p)
   3767  1.1  mrg 	{
   3768  1.1  mrg 	  segment_length_a = size_zero_node;
   3769  1.1  mrg 	  segment_length_b = size_zero_node;
   3770  1.1  mrg 	}
   3771  1.1  mrg       else
   3772  1.1  mrg 	{
   3773  1.1  mrg 	  if (!operand_equal_p (DR_STEP (dr_info_a->dr),
   3774  1.1  mrg 				DR_STEP (dr_info_b->dr), 0))
   3775  1.1  mrg 	    length_factor = scalar_loop_iters;
   3776  1.1  mrg 	  else
   3777  1.1  mrg 	    length_factor = size_int (vect_factor);
   3778  1.1  mrg 	  segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
   3779  1.1  mrg 	  segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
   3780  1.1  mrg 	}
   3781  1.1  mrg       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
   3782  1.1  mrg       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
   3783  1.1  mrg       align_a = vect_vfa_align (dr_info_a);
   3784  1.1  mrg       align_b = vect_vfa_align (dr_info_b);
   3785  1.1  mrg 
   3786  1.1  mrg       /* See whether the alias is known at compilation time.  */
   3787  1.1  mrg       if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
   3788  1.1  mrg 			   DR_BASE_ADDRESS (dr_info_b->dr), 0)
   3789  1.1  mrg 	  && operand_equal_p (DR_OFFSET (dr_info_a->dr),
   3790  1.1  mrg 			      DR_OFFSET (dr_info_b->dr), 0)
   3791  1.1  mrg 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
   3792  1.1  mrg 	  && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
   3793  1.1  mrg 	  && poly_int_tree_p (segment_length_a)
   3794  1.1  mrg 	  && poly_int_tree_p (segment_length_b))
   3795  1.1  mrg 	{
   3796  1.1  mrg 	  int res = vect_compile_time_alias (dr_info_a, dr_info_b,
   3797  1.1  mrg 					     segment_length_a,
   3798  1.1  mrg 					     segment_length_b,
   3799  1.1  mrg 					     access_size_a,
   3800  1.1  mrg 					     access_size_b);
   3801  1.1  mrg 	  if (res >= 0 && dump_enabled_p ())
   3802  1.1  mrg 	    {
   3803  1.1  mrg 	      dump_printf_loc (MSG_NOTE, vect_location,
   3804  1.1  mrg 			       "can tell at compile time that %T and %T",
   3805  1.1  mrg 			       DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
   3806  1.1  mrg 	      if (res == 0)
   3807  1.1  mrg 		dump_printf (MSG_NOTE, " do not alias\n");
   3808  1.1  mrg 	      else
   3809  1.1  mrg 		dump_printf (MSG_NOTE, " alias\n");
   3810  1.1  mrg 	    }
   3811  1.1  mrg 
   3812  1.1  mrg 	  if (res == 0)
   3813  1.1  mrg 	    continue;
   3814  1.1  mrg 
   3815  1.1  mrg 	  if (res == 1)
   3816  1.1  mrg 	    return opt_result::failure_at (stmt_info_b->stmt,
   3817  1.1  mrg 					   "not vectorized:"
   3818  1.1  mrg 					   " compilation time alias: %G%G",
   3819  1.1  mrg 					   stmt_info_a->stmt,
   3820  1.1  mrg 					   stmt_info_b->stmt);
   3821  1.1  mrg 	}
   3822  1.1  mrg 
   3823  1.1  mrg       /* dr_with_seg_len requires the alignment to apply to the segment length
   3824  1.1  mrg 	 and access size, not just the start address.  The access size can be
   3825  1.1  mrg 	 smaller than the pointer alignment for grouped accesses and bitfield
   3826  1.1  mrg 	 references; see PR115192 and PR116125 respectively.  */
   3827  1.1  mrg       align_a = std::min (align_a, least_bit_hwi (access_size_a));
   3828  1.1  mrg       align_b = std::min (align_b, least_bit_hwi (access_size_b));
   3829  1.1  mrg 
   3830  1.1  mrg       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
   3831  1.1  mrg 			    access_size_a, align_a);
   3832  1.1  mrg       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
   3833  1.1  mrg 			    access_size_b, align_b);
   3834  1.1  mrg       /* Canonicalize the order to be the one that's needed for accurate
   3835  1.1  mrg 	 RAW, WAR and WAW flags, in cases where the data references are
   3836  1.1  mrg 	 well-ordered.  The order doesn't really matter otherwise,
   3837  1.1  mrg 	 but we might as well be consistent.  */
   3838  1.1  mrg       if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
   3839  1.1  mrg 	std::swap (dr_a, dr_b);
   3840  1.1  mrg 
   3841  1.1  mrg       dr_with_seg_len_pair_t dr_with_seg_len_pair
   3842  1.1  mrg 	(dr_a, dr_b, (preserves_scalar_order_p
   3843  1.1  mrg 		      ? dr_with_seg_len_pair_t::WELL_ORDERED
   3844  1.1  mrg 		      : dr_with_seg_len_pair_t::REORDERED));
   3845  1.1  mrg 
   3846  1.1  mrg       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
   3847  1.1  mrg     }
   3848  1.1  mrg 
   3849  1.1  mrg   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
   3850  1.1  mrg 
   3851  1.1  mrg   unsigned int count = (comp_alias_ddrs.length ()
   3852  1.1  mrg 			+ check_unequal_addrs.length ());
   3853  1.1  mrg 
   3854  1.1  mrg   if (count
   3855  1.1  mrg       && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
   3856  1.1  mrg 	  == VECT_COST_MODEL_VERY_CHEAP))
   3857  1.1  mrg     return opt_result::failure_at
   3858  1.1  mrg       (vect_location, "would need a runtime alias check\n");
   3859  1.1  mrg 
   3860  1.1  mrg   if (dump_enabled_p ())
   3861  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   3862  1.1  mrg 		     "improved number of alias checks from %d to %d\n",
   3863  1.1  mrg 		     may_alias_ddrs.length (), count);
   3864  1.1  mrg   unsigned limit = param_vect_max_version_for_alias_checks;
   3865  1.1  mrg   if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
   3866  1.1  mrg     limit = param_vect_max_version_for_alias_checks * 6 / 10;
   3867  1.1  mrg   if (count > limit)
   3868  1.1  mrg     return opt_result::failure_at
   3869  1.1  mrg       (vect_location,
   3870  1.1  mrg        "number of versioning for alias run-time tests exceeds %d "
   3871  1.1  mrg        "(--param vect-max-version-for-alias-checks)\n", limit);
   3872  1.1  mrg 
   3873  1.1  mrg   return opt_result::success ();
   3874  1.1  mrg }
   3875  1.1  mrg 
   3876  1.1  mrg /* Check whether we can use an internal function for a gather load
   3877  1.1  mrg    or scatter store.  READ_P is true for loads and false for stores.
   3878  1.1  mrg    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
   3879  1.1  mrg    the type of the memory elements being loaded or stored.  OFFSET_TYPE
   3880  1.1  mrg    is the type of the offset that is being applied to the invariant
   3881  1.1  mrg    base address.  SCALE is the amount by which the offset should
   3882  1.1  mrg    be multiplied *after* it has been converted to address width.
   3883  1.1  mrg 
   3884  1.1  mrg    Return true if the function is supported, storing the function id in
   3885  1.1  mrg    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.  */
   3886  1.1  mrg 
   3887  1.1  mrg bool
   3888  1.1  mrg vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
   3889  1.1  mrg 			  tree vectype, tree memory_type, tree offset_type,
   3890  1.1  mrg 			  int scale, internal_fn *ifn_out,
   3891  1.1  mrg 			  tree *offset_vectype_out)
   3892  1.1  mrg {
   3893  1.1  mrg   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
   3894  1.1  mrg   unsigned int element_bits = vector_element_bits (vectype);
   3895  1.1  mrg   if (element_bits != memory_bits)
   3896  1.1  mrg     /* For now the vector elements must be the same width as the
   3897  1.1  mrg        memory elements.  */
   3898  1.1  mrg     return false;
   3899  1.1  mrg 
   3900  1.1  mrg   /* Work out which function we need.  */
   3901  1.1  mrg   internal_fn ifn, alt_ifn;
   3902  1.1  mrg   if (read_p)
   3903  1.1  mrg     {
   3904  1.1  mrg       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
   3905  1.1  mrg       alt_ifn = IFN_MASK_GATHER_LOAD;
   3906  1.1  mrg     }
   3907  1.1  mrg   else
   3908  1.1  mrg     {
   3909  1.1  mrg       ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
   3910  1.1  mrg       alt_ifn = IFN_MASK_SCATTER_STORE;
   3911  1.1  mrg     }
   3912  1.1  mrg 
   3913  1.1  mrg   for (;;)
   3914  1.1  mrg     {
   3915  1.1  mrg       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
   3916  1.1  mrg       if (!offset_vectype)
   3917  1.1  mrg 	return false;
   3918  1.1  mrg 
   3919  1.1  mrg       /* Test whether the target supports this combination.  */
   3920  1.1  mrg       if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
   3921  1.1  mrg 						  offset_vectype, scale))
   3922  1.1  mrg 	{
   3923  1.1  mrg 	  *ifn_out = ifn;
   3924  1.1  mrg 	  *offset_vectype_out = offset_vectype;
   3925  1.1  mrg 	  return true;
   3926  1.1  mrg 	}
   3927  1.1  mrg       else if (!masked_p
   3928  1.1  mrg 	       && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
   3929  1.1  mrg 							  memory_type,
   3930  1.1  mrg 							  offset_vectype,
   3931  1.1  mrg 							  scale))
   3932  1.1  mrg 	{
   3933  1.1  mrg 	  *ifn_out = alt_ifn;
   3934  1.1  mrg 	  *offset_vectype_out = offset_vectype;
   3935  1.1  mrg 	  return true;
   3936  1.1  mrg 	}
   3937  1.1  mrg 
   3938  1.1  mrg       if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
   3939  1.1  mrg 	  && TYPE_PRECISION (offset_type) >= element_bits)
   3940  1.1  mrg 	return false;
   3941  1.1  mrg 
   3942  1.1  mrg       offset_type = build_nonstandard_integer_type
   3943  1.1  mrg 	(TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
   3944  1.1  mrg     }
   3945  1.1  mrg }
   3946  1.1  mrg 
   3947  1.1  mrg /* STMT_INFO is a call to an internal gather load or scatter store function.
   3948  1.1  mrg    Describe the operation in INFO.  */
   3949  1.1  mrg 
   3950  1.1  mrg static void
   3951  1.1  mrg vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
   3952  1.1  mrg 				   gather_scatter_info *info)
   3953  1.1  mrg {
   3954  1.1  mrg   gcall *call = as_a <gcall *> (stmt_info->stmt);
   3955  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   3956  1.1  mrg   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   3957  1.1  mrg 
   3958  1.1  mrg   info->ifn = gimple_call_internal_fn (call);
   3959  1.1  mrg   info->decl = NULL_TREE;
   3960  1.1  mrg   info->base = gimple_call_arg (call, 0);
   3961  1.1  mrg   info->offset = gimple_call_arg (call, 1);
   3962  1.1  mrg   info->offset_dt = vect_unknown_def_type;
   3963  1.1  mrg   info->offset_vectype = NULL_TREE;
   3964  1.1  mrg   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
   3965  1.1  mrg   info->element_type = TREE_TYPE (vectype);
   3966  1.1  mrg   info->memory_type = TREE_TYPE (DR_REF (dr));
   3967  1.1  mrg }
   3968  1.1  mrg 
   3969  1.1  mrg /* Return true if a non-affine read or write in STMT_INFO is suitable for a
   3970  1.1  mrg    gather load or scatter store.  Describe the operation in *INFO if so.  */
   3971  1.1  mrg 
   3972  1.1  mrg bool
   3973  1.1  mrg vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
   3974  1.1  mrg 			   gather_scatter_info *info)
   3975  1.1  mrg {
   3976  1.1  mrg   HOST_WIDE_INT scale = 1;
   3977  1.1  mrg   poly_int64 pbitpos, pbitsize;
   3978  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   3979  1.1  mrg   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   3980  1.1  mrg   tree offtype = NULL_TREE;
   3981  1.1  mrg   tree decl = NULL_TREE, base, off;
   3982  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   3983  1.1  mrg   tree memory_type = TREE_TYPE (DR_REF (dr));
   3984  1.1  mrg   machine_mode pmode;
   3985  1.1  mrg   int punsignedp, reversep, pvolatilep = 0;
   3986  1.1  mrg   internal_fn ifn;
   3987  1.1  mrg   tree offset_vectype;
   3988  1.1  mrg   bool masked_p = false;
   3989  1.1  mrg 
   3990  1.1  mrg   /* See whether this is already a call to a gather/scatter internal function.
   3991  1.1  mrg      If not, see whether it's a masked load or store.  */
   3992  1.1  mrg   gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   3993  1.1  mrg   if (call && gimple_call_internal_p (call))
   3994  1.1  mrg     {
   3995  1.1  mrg       ifn = gimple_call_internal_fn (call);
   3996  1.1  mrg       if (internal_gather_scatter_fn_p (ifn))
   3997  1.1  mrg 	{
   3998  1.1  mrg 	  vect_describe_gather_scatter_call (stmt_info, info);
   3999  1.1  mrg 	  return true;
   4000  1.1  mrg 	}
   4001  1.1  mrg       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
   4002  1.1  mrg     }
   4003  1.1  mrg 
   4004  1.1  mrg   /* True if we should aim to use internal functions rather than
   4005  1.1  mrg      built-in functions.  */
   4006  1.1  mrg   bool use_ifn_p = (DR_IS_READ (dr)
   4007  1.1  mrg 		    ? supports_vec_gather_load_p (TYPE_MODE (vectype))
   4008  1.1  mrg 		    : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
   4009  1.1  mrg 
   4010  1.1  mrg   base = DR_REF (dr);
   4011  1.1  mrg   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
   4012  1.1  mrg      see if we can use the def stmt of the address.  */
   4013  1.1  mrg   if (masked_p
   4014  1.1  mrg       && TREE_CODE (base) == MEM_REF
   4015  1.1  mrg       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
   4016  1.1  mrg       && integer_zerop (TREE_OPERAND (base, 1))
   4017  1.1  mrg       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
   4018  1.1  mrg     {
   4019  1.1  mrg       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
   4020  1.1  mrg       if (is_gimple_assign (def_stmt)
   4021  1.1  mrg 	  && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
   4022  1.1  mrg 	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
   4023  1.1  mrg     }
   4024  1.1  mrg 
   4025  1.1  mrg   /* The gather and scatter builtins need address of the form
   4026  1.1  mrg      loop_invariant + vector * {1, 2, 4, 8}
   4027  1.1  mrg      or
   4028  1.1  mrg      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
   4029  1.1  mrg      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
   4030  1.1  mrg      of loop invariants/SSA_NAMEs defined in the loop, with casts,
   4031  1.1  mrg      multiplications and additions in it.  To get a vector, we need
   4032  1.1  mrg      a single SSA_NAME that will be defined in the loop and will
   4033  1.1  mrg      contain everything that is not loop invariant and that can be
   4034  1.1  mrg      vectorized.  The following code attempts to find such a preexistng
   4035  1.1  mrg      SSA_NAME OFF and put the loop invariants into a tree BASE
   4036  1.1  mrg      that can be gimplified before the loop.  */
   4037  1.1  mrg   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
   4038  1.1  mrg 			      &punsignedp, &reversep, &pvolatilep);
   4039  1.1  mrg   if (reversep)
   4040  1.1  mrg     return false;
   4041  1.1  mrg 
   4042  1.1  mrg   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
   4043  1.1  mrg 
   4044  1.1  mrg   if (TREE_CODE (base) == MEM_REF)
   4045  1.1  mrg     {
   4046  1.1  mrg       if (!integer_zerop (TREE_OPERAND (base, 1)))
   4047  1.1  mrg 	{
   4048  1.1  mrg 	  if (off == NULL_TREE)
   4049  1.1  mrg 	    off = wide_int_to_tree (sizetype, mem_ref_offset (base));
   4050  1.1  mrg 	  else
   4051  1.1  mrg 	    off = size_binop (PLUS_EXPR, off,
   4052  1.1  mrg 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
   4053  1.1  mrg 	}
   4054  1.1  mrg       base = TREE_OPERAND (base, 0);
   4055  1.1  mrg     }
   4056  1.1  mrg   else
   4057  1.1  mrg     base = build_fold_addr_expr (base);
   4058  1.1  mrg 
   4059  1.1  mrg   if (off == NULL_TREE)
   4060  1.1  mrg     off = size_zero_node;
   4061  1.1  mrg 
   4062  1.1  mrg   /* If base is not loop invariant, either off is 0, then we start with just
   4063  1.1  mrg      the constant offset in the loop invariant BASE and continue with base
   4064  1.1  mrg      as OFF, otherwise give up.
   4065  1.1  mrg      We could handle that case by gimplifying the addition of base + off
   4066  1.1  mrg      into some SSA_NAME and use that as off, but for now punt.  */
   4067  1.1  mrg   if (!expr_invariant_in_loop_p (loop, base))
   4068  1.1  mrg     {
   4069  1.1  mrg       if (!integer_zerop (off))
   4070  1.1  mrg 	return false;
   4071  1.1  mrg       off = base;
   4072  1.1  mrg       base = size_int (pbytepos);
   4073  1.1  mrg     }
   4074  1.1  mrg   /* Otherwise put base + constant offset into the loop invariant BASE
   4075  1.1  mrg      and continue with OFF.  */
   4076  1.1  mrg   else
   4077  1.1  mrg     {
   4078  1.1  mrg       base = fold_convert (sizetype, base);
   4079  1.1  mrg       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
   4080  1.1  mrg     }
   4081  1.1  mrg 
   4082  1.1  mrg   /* OFF at this point may be either a SSA_NAME or some tree expression
   4083  1.1  mrg      from get_inner_reference.  Try to peel off loop invariants from it
   4084  1.1  mrg      into BASE as long as possible.  */
   4085  1.1  mrg   STRIP_NOPS (off);
   4086  1.1  mrg   while (offtype == NULL_TREE)
   4087  1.1  mrg     {
   4088  1.1  mrg       enum tree_code code;
   4089  1.1  mrg       tree op0, op1, add = NULL_TREE;
   4090  1.1  mrg 
   4091  1.1  mrg       if (TREE_CODE (off) == SSA_NAME)
   4092  1.1  mrg 	{
   4093  1.1  mrg 	  gimple *def_stmt = SSA_NAME_DEF_STMT (off);
   4094  1.1  mrg 
   4095  1.1  mrg 	  if (expr_invariant_in_loop_p (loop, off))
   4096  1.1  mrg 	    return false;
   4097  1.1  mrg 
   4098  1.1  mrg 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
   4099  1.1  mrg 	    break;
   4100  1.1  mrg 
   4101  1.1  mrg 	  op0 = gimple_assign_rhs1 (def_stmt);
   4102  1.1  mrg 	  code = gimple_assign_rhs_code (def_stmt);
   4103  1.1  mrg 	  op1 = gimple_assign_rhs2 (def_stmt);
   4104  1.1  mrg 	}
   4105  1.1  mrg       else
   4106  1.1  mrg 	{
   4107  1.1  mrg 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
   4108  1.1  mrg 	    return false;
   4109  1.1  mrg 	  code = TREE_CODE (off);
   4110  1.1  mrg 	  extract_ops_from_tree (off, &code, &op0, &op1);
   4111  1.1  mrg 	}
   4112  1.1  mrg       switch (code)
   4113  1.1  mrg 	{
   4114  1.1  mrg 	case POINTER_PLUS_EXPR:
   4115  1.1  mrg 	case PLUS_EXPR:
   4116  1.1  mrg 	  if (expr_invariant_in_loop_p (loop, op0))
   4117  1.1  mrg 	    {
   4118  1.1  mrg 	      add = op0;
   4119  1.1  mrg 	      off = op1;
   4120  1.1  mrg 	    do_add:
   4121  1.1  mrg 	      add = fold_convert (sizetype, add);
   4122  1.1  mrg 	      if (scale != 1)
   4123  1.1  mrg 		add = size_binop (MULT_EXPR, add, size_int (scale));
   4124  1.1  mrg 	      base = size_binop (PLUS_EXPR, base, add);
   4125  1.1  mrg 	      continue;
   4126  1.1  mrg 	    }
   4127  1.1  mrg 	  if (expr_invariant_in_loop_p (loop, op1))
   4128  1.1  mrg 	    {
   4129  1.1  mrg 	      add = op1;
   4130  1.1  mrg 	      off = op0;
   4131  1.1  mrg 	      goto do_add;
   4132  1.1  mrg 	    }
   4133  1.1  mrg 	  break;
   4134  1.1  mrg 	case MINUS_EXPR:
   4135  1.1  mrg 	  if (expr_invariant_in_loop_p (loop, op1))
   4136  1.1  mrg 	    {
   4137  1.1  mrg 	      add = fold_convert (sizetype, op1);
   4138  1.1  mrg 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
   4139  1.1  mrg 	      off = op0;
   4140  1.1  mrg 	      goto do_add;
   4141  1.1  mrg 	    }
   4142  1.1  mrg 	  break;
   4143  1.1  mrg 	case MULT_EXPR:
   4144  1.1  mrg 	  if (scale == 1 && tree_fits_shwi_p (op1))
   4145  1.1  mrg 	    {
   4146  1.1  mrg 	      int new_scale = tree_to_shwi (op1);
   4147  1.1  mrg 	      /* Only treat this as a scaling operation if the target
   4148  1.1  mrg 		 supports it for at least some offset type.  */
   4149  1.1  mrg 	      if (use_ifn_p
   4150  1.1  mrg 		  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4151  1.1  mrg 						masked_p, vectype, memory_type,
   4152  1.1  mrg 						signed_char_type_node,
   4153  1.1  mrg 						new_scale, &ifn,
   4154  1.1  mrg 						&offset_vectype)
   4155  1.1  mrg 		  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4156  1.1  mrg 						masked_p, vectype, memory_type,
   4157  1.1  mrg 						unsigned_char_type_node,
   4158  1.1  mrg 						new_scale, &ifn,
   4159  1.1  mrg 						&offset_vectype))
   4160  1.1  mrg 		break;
   4161  1.1  mrg 	      scale = new_scale;
   4162  1.1  mrg 	      off = op0;
   4163  1.1  mrg 	      continue;
   4164  1.1  mrg 	    }
   4165  1.1  mrg 	  break;
   4166  1.1  mrg 	case SSA_NAME:
   4167  1.1  mrg 	  off = op0;
   4168  1.1  mrg 	  continue;
   4169  1.1  mrg 	CASE_CONVERT:
   4170  1.1  mrg 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
   4171  1.1  mrg 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
   4172  1.1  mrg 	    break;
   4173  1.1  mrg 
   4174  1.1  mrg 	  /* Don't include the conversion if the target is happy with
   4175  1.1  mrg 	     the current offset type.  */
   4176  1.1  mrg 	  if (use_ifn_p
   4177  1.1  mrg 	      && !POINTER_TYPE_P (TREE_TYPE (off))
   4178  1.1  mrg 	      && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4179  1.1  mrg 					   masked_p, vectype, memory_type,
   4180  1.1  mrg 					   TREE_TYPE (off), scale, &ifn,
   4181  1.1  mrg 					   &offset_vectype))
   4182  1.1  mrg 	    break;
   4183  1.1  mrg 
   4184  1.1  mrg 	  if (TYPE_PRECISION (TREE_TYPE (op0))
   4185  1.1  mrg 	      == TYPE_PRECISION (TREE_TYPE (off)))
   4186  1.1  mrg 	    {
   4187  1.1  mrg 	      off = op0;
   4188  1.1  mrg 	      continue;
   4189  1.1  mrg 	    }
   4190  1.1  mrg 
   4191  1.1  mrg 	  /* Include the conversion if it is widening and we're using
   4192  1.1  mrg 	     the IFN path or the target can handle the converted from
   4193  1.1  mrg 	     offset or the current size is not already the same as the
   4194  1.1  mrg 	     data vector element size.  */
   4195  1.1  mrg 	  if ((TYPE_PRECISION (TREE_TYPE (op0))
   4196  1.1  mrg 	       < TYPE_PRECISION (TREE_TYPE (off)))
   4197  1.1  mrg 	      && (use_ifn_p
   4198  1.1  mrg 		  || (DR_IS_READ (dr)
   4199  1.1  mrg 		      ? (targetm.vectorize.builtin_gather
   4200  1.1  mrg 			 && targetm.vectorize.builtin_gather (vectype,
   4201  1.1  mrg 							      TREE_TYPE (op0),
   4202  1.1  mrg 							      scale))
   4203  1.1  mrg 		      : (targetm.vectorize.builtin_scatter
   4204  1.1  mrg 			 && targetm.vectorize.builtin_scatter (vectype,
   4205  1.1  mrg 							       TREE_TYPE (op0),
   4206  1.1  mrg 							       scale)))
   4207  1.1  mrg 		  || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
   4208  1.1  mrg 				       TYPE_SIZE (TREE_TYPE (vectype)), 0)))
   4209  1.1  mrg 	    {
   4210  1.1  mrg 	      off = op0;
   4211  1.1  mrg 	      offtype = TREE_TYPE (off);
   4212  1.1  mrg 	      STRIP_NOPS (off);
   4213  1.1  mrg 	      continue;
   4214  1.1  mrg 	    }
   4215  1.1  mrg 	  break;
   4216  1.1  mrg 	default:
   4217  1.1  mrg 	  break;
   4218  1.1  mrg 	}
   4219  1.1  mrg       break;
   4220  1.1  mrg     }
   4221  1.1  mrg 
   4222  1.1  mrg   /* If at the end OFF still isn't a SSA_NAME or isn't
   4223  1.1  mrg      defined in the loop, punt.  */
   4224  1.1  mrg   if (TREE_CODE (off) != SSA_NAME
   4225  1.1  mrg       || expr_invariant_in_loop_p (loop, off))
   4226  1.1  mrg     return false;
   4227  1.1  mrg 
   4228  1.1  mrg   if (offtype == NULL_TREE)
   4229  1.1  mrg     offtype = TREE_TYPE (off);
   4230  1.1  mrg 
   4231  1.1  mrg   if (use_ifn_p)
   4232  1.1  mrg     {
   4233  1.1  mrg       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
   4234  1.1  mrg 				     vectype, memory_type, offtype, scale,
   4235  1.1  mrg 				     &ifn, &offset_vectype))
   4236  1.1  mrg 	ifn = IFN_LAST;
   4237  1.1  mrg       decl = NULL_TREE;
   4238  1.1  mrg     }
   4239  1.1  mrg   else
   4240  1.1  mrg     {
   4241  1.1  mrg       if (DR_IS_READ (dr))
   4242  1.1  mrg 	{
   4243  1.1  mrg 	  if (targetm.vectorize.builtin_gather)
   4244  1.1  mrg 	    decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
   4245  1.1  mrg 	}
   4246  1.1  mrg       else
   4247  1.1  mrg 	{
   4248  1.1  mrg 	  if (targetm.vectorize.builtin_scatter)
   4249  1.1  mrg 	    decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
   4250  1.1  mrg 	}
   4251  1.1  mrg       ifn = IFN_LAST;
   4252  1.1  mrg       /* The offset vector type will be read from DECL when needed.  */
   4253  1.1  mrg       offset_vectype = NULL_TREE;
   4254  1.1  mrg     }
   4255  1.1  mrg 
   4256  1.1  mrg   info->ifn = ifn;
   4257  1.1  mrg   info->decl = decl;
   4258  1.1  mrg   info->base = base;
   4259  1.1  mrg   info->offset = off;
   4260  1.1  mrg   info->offset_dt = vect_unknown_def_type;
   4261  1.1  mrg   info->offset_vectype = offset_vectype;
   4262  1.1  mrg   info->scale = scale;
   4263  1.1  mrg   info->element_type = TREE_TYPE (vectype);
   4264  1.1  mrg   info->memory_type = memory_type;
   4265  1.1  mrg   return true;
   4266  1.1  mrg }
   4267  1.1  mrg 
   4268  1.1  mrg /* Find the data references in STMT, analyze them with respect to LOOP and
   4269  1.1  mrg    append them to DATAREFS.  Return false if datarefs in this stmt cannot
   4270  1.1  mrg    be handled.  */
   4271  1.1  mrg 
   4272  1.1  mrg opt_result
   4273  1.1  mrg vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
   4274  1.1  mrg 			       vec<data_reference_p> *datarefs,
   4275  1.1  mrg 			       vec<int> *dataref_groups, int group_id)
   4276  1.1  mrg {
   4277  1.1  mrg   /* We can ignore clobbers for dataref analysis - they are removed during
   4278  1.1  mrg      loop vectorization and BB vectorization checks dependences with a
   4279  1.1  mrg      stmt walk.  */
   4280  1.1  mrg   if (gimple_clobber_p (stmt))
   4281  1.1  mrg     return opt_result::success ();
   4282  1.1  mrg 
   4283  1.1  mrg   if (gimple_has_volatile_ops (stmt))
   4284  1.1  mrg     return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
   4285  1.1  mrg 				   stmt);
   4286  1.1  mrg 
   4287  1.1  mrg   if (stmt_can_throw_internal (cfun, stmt))
   4288  1.1  mrg     return opt_result::failure_at (stmt,
   4289  1.1  mrg 				   "not vectorized:"
   4290  1.1  mrg 				   " statement can throw an exception: %G",
   4291  1.1  mrg 				   stmt);
   4292  1.1  mrg 
   4293  1.1  mrg   auto_vec<data_reference_p, 2> refs;
   4294  1.1  mrg   opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
   4295  1.1  mrg   if (!res)
   4296  1.1  mrg     return res;
   4297  1.1  mrg 
   4298  1.1  mrg   if (refs.is_empty ())
   4299  1.1  mrg     return opt_result::success ();
   4300  1.1  mrg 
   4301  1.1  mrg   if (refs.length () > 1)
   4302  1.1  mrg     {
   4303  1.1  mrg       while (!refs.is_empty ())
   4304  1.1  mrg 	free_data_ref (refs.pop ());
   4305  1.1  mrg       return opt_result::failure_at (stmt,
   4306  1.1  mrg 				     "not vectorized: more than one "
   4307  1.1  mrg 				     "data ref in stmt: %G", stmt);
   4308  1.1  mrg     }
   4309  1.1  mrg 
   4310  1.1  mrg   data_reference_p dr = refs.pop ();
   4311  1.1  mrg   if (gcall *call = dyn_cast <gcall *> (stmt))
   4312  1.1  mrg     if (!gimple_call_internal_p (call)
   4313  1.1  mrg 	|| (gimple_call_internal_fn (call) != IFN_MASK_LOAD
   4314  1.1  mrg 	    && gimple_call_internal_fn (call) != IFN_MASK_STORE))
   4315  1.1  mrg       {
   4316  1.1  mrg 	free_data_ref (dr);
   4317  1.1  mrg 	return opt_result::failure_at (stmt,
   4318  1.1  mrg 				       "not vectorized: dr in a call %G", stmt);
   4319  1.1  mrg       }
   4320  1.1  mrg 
   4321  1.1  mrg   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
   4322  1.1  mrg       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
   4323  1.1  mrg     {
   4324  1.1  mrg       free_data_ref (dr);
   4325  1.1  mrg       return opt_result::failure_at (stmt,
   4326  1.1  mrg 				     "not vectorized:"
   4327  1.1  mrg 				     " statement is bitfield access %G", stmt);
   4328  1.1  mrg     }
   4329  1.1  mrg 
   4330  1.1  mrg   if (DR_BASE_ADDRESS (dr)
   4331  1.1  mrg       && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
   4332  1.1  mrg     {
   4333  1.1  mrg       free_data_ref (dr);
   4334  1.1  mrg       return opt_result::failure_at (stmt,
   4335  1.1  mrg 				     "not vectorized:"
   4336  1.1  mrg 				     " base addr of dr is a constant\n");
   4337  1.1  mrg     }
   4338  1.1  mrg 
   4339  1.1  mrg   /* Check whether this may be a SIMD lane access and adjust the
   4340  1.1  mrg      DR to make it easier for us to handle it.  */
   4341  1.1  mrg   if (loop
   4342  1.1  mrg       && loop->simduid
   4343  1.1  mrg       && (!DR_BASE_ADDRESS (dr)
   4344  1.1  mrg 	  || !DR_OFFSET (dr)
   4345  1.1  mrg 	  || !DR_INIT (dr)
   4346  1.1  mrg 	  || !DR_STEP (dr)))
   4347  1.1  mrg     {
   4348  1.1  mrg       struct data_reference *newdr
   4349  1.1  mrg 	= create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
   4350  1.1  mrg 			   DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
   4351  1.1  mrg       if (DR_BASE_ADDRESS (newdr)
   4352  1.1  mrg 	  && DR_OFFSET (newdr)
   4353  1.1  mrg 	  && DR_INIT (newdr)
   4354  1.1  mrg 	  && DR_STEP (newdr)
   4355  1.1  mrg 	  && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
   4356  1.1  mrg 	  && integer_zerop (DR_STEP (newdr)))
   4357  1.1  mrg 	{
   4358  1.1  mrg 	  tree base_address = DR_BASE_ADDRESS (newdr);
   4359  1.1  mrg 	  tree off = DR_OFFSET (newdr);
   4360  1.1  mrg 	  tree step = ssize_int (1);
   4361  1.1  mrg 	  if (integer_zerop (off)
   4362  1.1  mrg 	      && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
   4363  1.1  mrg 	    {
   4364  1.1  mrg 	      off = TREE_OPERAND (base_address, 1);
   4365  1.1  mrg 	      base_address = TREE_OPERAND (base_address, 0);
   4366  1.1  mrg 	    }
   4367  1.1  mrg 	  STRIP_NOPS (off);
   4368  1.1  mrg 	  if (TREE_CODE (off) == MULT_EXPR
   4369  1.1  mrg 	      && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
   4370  1.1  mrg 	    {
   4371  1.1  mrg 	      step = TREE_OPERAND (off, 1);
   4372  1.1  mrg 	      off = TREE_OPERAND (off, 0);
   4373  1.1  mrg 	      STRIP_NOPS (off);
   4374  1.1  mrg 	    }
   4375  1.1  mrg 	  if (CONVERT_EXPR_P (off)
   4376  1.1  mrg 	      && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
   4377  1.1  mrg 		  < TYPE_PRECISION (TREE_TYPE (off))))
   4378  1.1  mrg 	    off = TREE_OPERAND (off, 0);
   4379  1.1  mrg 	  if (TREE_CODE (off) == SSA_NAME)
   4380  1.1  mrg 	    {
   4381  1.1  mrg 	      gimple *def = SSA_NAME_DEF_STMT (off);
   4382  1.1  mrg 	      /* Look through widening conversion.  */
   4383  1.1  mrg 	      if (is_gimple_assign (def)
   4384  1.1  mrg 		  && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
   4385  1.1  mrg 		{
   4386  1.1  mrg 		  tree rhs1 = gimple_assign_rhs1 (def);
   4387  1.1  mrg 		  if (TREE_CODE (rhs1) == SSA_NAME
   4388  1.1  mrg 		      && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
   4389  1.1  mrg 		      && (TYPE_PRECISION (TREE_TYPE (off))
   4390  1.1  mrg 			  > TYPE_PRECISION (TREE_TYPE (rhs1))))
   4391  1.1  mrg 		    def = SSA_NAME_DEF_STMT (rhs1);
   4392  1.1  mrg 		}
   4393  1.1  mrg 	      if (is_gimple_call (def)
   4394  1.1  mrg 		  && gimple_call_internal_p (def)
   4395  1.1  mrg 		  && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
   4396  1.1  mrg 		{
   4397  1.1  mrg 		  tree arg = gimple_call_arg (def, 0);
   4398  1.1  mrg 		  tree reft = TREE_TYPE (DR_REF (newdr));
   4399  1.1  mrg 		  gcc_assert (TREE_CODE (arg) == SSA_NAME);
   4400  1.1  mrg 		  arg = SSA_NAME_VAR (arg);
   4401  1.1  mrg 		  if (arg == loop->simduid
   4402  1.1  mrg 		      /* For now.  */
   4403  1.1  mrg 		      && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
   4404  1.1  mrg 		    {
   4405  1.1  mrg 		      DR_BASE_ADDRESS (newdr) = base_address;
   4406  1.1  mrg 		      DR_OFFSET (newdr) = ssize_int (0);
   4407  1.1  mrg 		      DR_STEP (newdr) = step;
   4408  1.1  mrg 		      DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
   4409  1.1  mrg 		      DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
   4410  1.1  mrg 		      /* Mark as simd-lane access.  */
   4411  1.1  mrg 		      tree arg2 = gimple_call_arg (def, 1);
   4412  1.1  mrg 		      newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
   4413  1.1  mrg 		      free_data_ref (dr);
   4414  1.1  mrg 		      datarefs->safe_push (newdr);
   4415  1.1  mrg 		      if (dataref_groups)
   4416  1.1  mrg 			dataref_groups->safe_push (group_id);
   4417  1.1  mrg 		      return opt_result::success ();
   4418  1.1  mrg 		    }
   4419  1.1  mrg 		}
   4420  1.1  mrg 	    }
   4421  1.1  mrg 	}
   4422  1.1  mrg       free_data_ref (newdr);
   4423  1.1  mrg     }
   4424  1.1  mrg 
   4425  1.1  mrg   datarefs->safe_push (dr);
   4426  1.1  mrg   if (dataref_groups)
   4427  1.1  mrg     dataref_groups->safe_push (group_id);
   4428  1.1  mrg   return opt_result::success ();
   4429  1.1  mrg }
   4430  1.1  mrg 
   4431  1.1  mrg /* Function vect_analyze_data_refs.
   4432  1.1  mrg 
   4433  1.1  mrg   Find all the data references in the loop or basic block.
   4434  1.1  mrg 
   4435  1.1  mrg    The general structure of the analysis of data refs in the vectorizer is as
   4436  1.1  mrg    follows:
   4437  1.1  mrg    1- vect_analyze_data_refs(loop/bb): call
   4438  1.1  mrg       compute_data_dependences_for_loop/bb to find and analyze all data-refs
   4439  1.1  mrg       in the loop/bb and their dependences.
   4440  1.1  mrg    2- vect_analyze_dependences(): apply dependence testing using ddrs.
   4441  1.1  mrg    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
   4442  1.1  mrg    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
   4443  1.1  mrg 
   4444  1.1  mrg */
   4445  1.1  mrg 
   4446  1.1  mrg opt_result
   4447  1.1  mrg vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
   4448  1.1  mrg {
   4449  1.1  mrg   class loop *loop = NULL;
   4450  1.1  mrg   unsigned int i;
   4451  1.1  mrg   struct data_reference *dr;
   4452  1.1  mrg   tree scalar_type;
   4453  1.1  mrg 
   4454  1.1  mrg   DUMP_VECT_SCOPE ("vect_analyze_data_refs");
   4455  1.1  mrg 
   4456  1.1  mrg   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   4457  1.1  mrg     loop = LOOP_VINFO_LOOP (loop_vinfo);
   4458  1.1  mrg 
   4459  1.1  mrg   /* Go through the data-refs, check that the analysis succeeded.  Update
   4460  1.1  mrg      pointer from stmt_vec_info struct to DR and vectype.  */
   4461  1.1  mrg 
   4462  1.1  mrg   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
   4463  1.1  mrg   FOR_EACH_VEC_ELT (datarefs, i, dr)
   4464  1.1  mrg     {
   4465  1.1  mrg       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
   4466  1.1  mrg       poly_uint64 vf;
   4467  1.1  mrg 
   4468  1.1  mrg       gcc_assert (DR_REF (dr));
   4469  1.1  mrg       stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
   4470  1.1  mrg       gcc_assert (!stmt_info->dr_aux.dr);
   4471  1.1  mrg       stmt_info->dr_aux.dr = dr;
   4472  1.1  mrg       stmt_info->dr_aux.stmt = stmt_info;
   4473  1.1  mrg 
   4474  1.1  mrg       /* Check that analysis of the data-ref succeeded.  */
   4475  1.1  mrg       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
   4476  1.1  mrg 	  || !DR_STEP (dr))
   4477  1.1  mrg         {
   4478  1.1  mrg 	  bool maybe_gather
   4479  1.1  mrg 	    = DR_IS_READ (dr)
   4480  1.1  mrg 	      && !TREE_THIS_VOLATILE (DR_REF (dr));
   4481  1.1  mrg 	  bool maybe_scatter
   4482  1.1  mrg 	    = DR_IS_WRITE (dr)
   4483  1.1  mrg 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
   4484  1.1  mrg 	      && (targetm.vectorize.builtin_scatter != NULL
   4485  1.1  mrg 		  || supports_vec_scatter_store_p ());
   4486  1.1  mrg 
   4487  1.1  mrg 	  /* If target supports vector gather loads or scatter stores,
   4488  1.1  mrg 	     see if they can't be used.  */
   4489  1.1  mrg 	  if (is_a <loop_vec_info> (vinfo)
   4490  1.1  mrg 	      && !nested_in_vect_loop_p (loop, stmt_info))
   4491  1.1  mrg 	    {
   4492  1.1  mrg 	      if (maybe_gather || maybe_scatter)
   4493  1.1  mrg 		{
   4494  1.1  mrg 		  if (maybe_gather)
   4495  1.1  mrg 		    gatherscatter = GATHER;
   4496  1.1  mrg 		  else
   4497  1.1  mrg 		    gatherscatter = SCATTER;
   4498  1.1  mrg 		}
   4499  1.1  mrg 	    }
   4500  1.1  mrg 
   4501  1.1  mrg 	  if (gatherscatter == SG_NONE)
   4502  1.1  mrg 	    {
   4503  1.1  mrg 	      if (dump_enabled_p ())
   4504  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4505  1.1  mrg 				 "not vectorized: data ref analysis "
   4506  1.1  mrg 				 "failed %G", stmt_info->stmt);
   4507  1.1  mrg 	      if (is_a <bb_vec_info> (vinfo))
   4508  1.1  mrg 		{
   4509  1.1  mrg 		  /* In BB vectorization the ref can still participate
   4510  1.1  mrg 		     in dependence analysis, we just can't vectorize it.  */
   4511  1.1  mrg 		  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4512  1.1  mrg 		  continue;
   4513  1.1  mrg 		}
   4514  1.1  mrg 	      return opt_result::failure_at (stmt_info->stmt,
   4515  1.1  mrg 					     "not vectorized:"
   4516  1.1  mrg 					     " data ref analysis failed: %G",
   4517  1.1  mrg 					     stmt_info->stmt);
   4518  1.1  mrg 	    }
   4519  1.1  mrg         }
   4520  1.1  mrg 
   4521  1.1  mrg       /* See if this was detected as SIMD lane access.  */
   4522  1.1  mrg       if (dr->aux == (void *)-1
   4523  1.1  mrg 	  || dr->aux == (void *)-2
   4524  1.1  mrg 	  || dr->aux == (void *)-3
   4525  1.1  mrg 	  || dr->aux == (void *)-4)
   4526  1.1  mrg 	{
   4527  1.1  mrg 	  if (nested_in_vect_loop_p (loop, stmt_info))
   4528  1.1  mrg 	    return opt_result::failure_at (stmt_info->stmt,
   4529  1.1  mrg 					   "not vectorized:"
   4530  1.1  mrg 					   " data ref analysis failed: %G",
   4531  1.1  mrg 					   stmt_info->stmt);
   4532  1.1  mrg 	  STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
   4533  1.1  mrg 	    = -(uintptr_t) dr->aux;
   4534  1.1  mrg 	}
   4535  1.1  mrg 
   4536  1.1  mrg       tree base = get_base_address (DR_REF (dr));
   4537  1.1  mrg       if (base && VAR_P (base) && DECL_NONALIASED (base))
   4538  1.1  mrg 	{
   4539  1.1  mrg           if (dump_enabled_p ())
   4540  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4541  1.1  mrg 			     "not vectorized: base object not addressable "
   4542  1.1  mrg 			     "for stmt: %G", stmt_info->stmt);
   4543  1.1  mrg           if (is_a <bb_vec_info> (vinfo))
   4544  1.1  mrg 	    {
   4545  1.1  mrg 	      /* In BB vectorization the ref can still participate
   4546  1.1  mrg 	         in dependence analysis, we just can't vectorize it.  */
   4547  1.1  mrg 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4548  1.1  mrg 	      continue;
   4549  1.1  mrg 	    }
   4550  1.1  mrg 	  return opt_result::failure_at (stmt_info->stmt,
   4551  1.1  mrg 					 "not vectorized: base object not"
   4552  1.1  mrg 					 " addressable for stmt: %G",
   4553  1.1  mrg 					 stmt_info->stmt);
   4554  1.1  mrg 	}
   4555  1.1  mrg 
   4556  1.1  mrg       if (is_a <loop_vec_info> (vinfo)
   4557  1.1  mrg 	  && DR_STEP (dr)
   4558  1.1  mrg 	  && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
   4559  1.1  mrg 	{
   4560  1.1  mrg 	  if (nested_in_vect_loop_p (loop, stmt_info))
   4561  1.1  mrg 	    return opt_result::failure_at (stmt_info->stmt,
   4562  1.1  mrg 					   "not vectorized: "
   4563  1.1  mrg 					   "not suitable for strided load %G",
   4564  1.1  mrg 					   stmt_info->stmt);
   4565  1.1  mrg 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
   4566  1.1  mrg 	}
   4567  1.1  mrg 
   4568  1.1  mrg       /* Update DR field in stmt_vec_info struct.  */
   4569  1.1  mrg 
   4570  1.1  mrg       /* If the dataref is in an inner-loop of the loop that is considered for
   4571  1.1  mrg 	 for vectorization, we also want to analyze the access relative to
   4572  1.1  mrg 	 the outer-loop (DR contains information only relative to the
   4573  1.1  mrg 	 inner-most enclosing loop).  We do that by building a reference to the
   4574  1.1  mrg 	 first location accessed by the inner-loop, and analyze it relative to
   4575  1.1  mrg 	 the outer-loop.  */
   4576  1.1  mrg       if (loop && nested_in_vect_loop_p (loop, stmt_info))
   4577  1.1  mrg 	{
   4578  1.1  mrg 	  /* Build a reference to the first location accessed by the
   4579  1.1  mrg 	     inner loop: *(BASE + INIT + OFFSET).  By construction,
   4580  1.1  mrg 	     this address must be invariant in the inner loop, so we
   4581  1.1  mrg 	     can consider it as being used in the outer loop.  */
   4582  1.1  mrg 	  tree base = unshare_expr (DR_BASE_ADDRESS (dr));
   4583  1.1  mrg 	  tree offset = unshare_expr (DR_OFFSET (dr));
   4584  1.1  mrg 	  tree init = unshare_expr (DR_INIT (dr));
   4585  1.1  mrg 	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
   4586  1.1  mrg 					  init, offset);
   4587  1.1  mrg 	  tree init_addr = fold_build_pointer_plus (base, init_offset);
   4588  1.1  mrg 	  tree init_ref = build_fold_indirect_ref (init_addr);
   4589  1.1  mrg 
   4590  1.1  mrg 	  if (dump_enabled_p ())
   4591  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   4592  1.1  mrg 			     "analyze in outer loop: %T\n", init_ref);
   4593  1.1  mrg 
   4594  1.1  mrg 	  opt_result res
   4595  1.1  mrg 	    = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
   4596  1.1  mrg 				    init_ref, loop, stmt_info->stmt);
   4597  1.1  mrg 	  if (!res)
   4598  1.1  mrg 	    /* dr_analyze_innermost already explained the failure.  */
   4599  1.1  mrg 	    return res;
   4600  1.1  mrg 
   4601  1.1  mrg           if (dump_enabled_p ())
   4602  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   4603  1.1  mrg 			     "\touter base_address: %T\n"
   4604  1.1  mrg 			     "\touter offset from base address: %T\n"
   4605  1.1  mrg 			     "\touter constant offset from base address: %T\n"
   4606  1.1  mrg 			     "\touter step: %T\n"
   4607  1.1  mrg 			     "\touter base alignment: %d\n\n"
   4608  1.1  mrg 			     "\touter base misalignment: %d\n"
   4609  1.1  mrg 			     "\touter offset alignment: %d\n"
   4610  1.1  mrg 			     "\touter step alignment: %d\n",
   4611  1.1  mrg 			     STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
   4612  1.1  mrg 			     STMT_VINFO_DR_OFFSET (stmt_info),
   4613  1.1  mrg 			     STMT_VINFO_DR_INIT (stmt_info),
   4614  1.1  mrg 			     STMT_VINFO_DR_STEP (stmt_info),
   4615  1.1  mrg 			     STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
   4616  1.1  mrg 			     STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
   4617  1.1  mrg 			     STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
   4618  1.1  mrg 			     STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
   4619  1.1  mrg 	}
   4620  1.1  mrg 
   4621  1.1  mrg       /* Set vectype for STMT.  */
   4622  1.1  mrg       scalar_type = TREE_TYPE (DR_REF (dr));
   4623  1.1  mrg       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
   4624  1.1  mrg       if (!vectype)
   4625  1.1  mrg         {
   4626  1.1  mrg           if (dump_enabled_p ())
   4627  1.1  mrg             {
   4628  1.1  mrg               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4629  1.1  mrg                                "not vectorized: no vectype for stmt: %G",
   4630  1.1  mrg 			       stmt_info->stmt);
   4631  1.1  mrg               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
   4632  1.1  mrg               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
   4633  1.1  mrg                                  scalar_type);
   4634  1.1  mrg               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
   4635  1.1  mrg             }
   4636  1.1  mrg 
   4637  1.1  mrg           if (is_a <bb_vec_info> (vinfo))
   4638  1.1  mrg 	    {
   4639  1.1  mrg 	      /* No vector type is fine, the ref can still participate
   4640  1.1  mrg 	         in dependence analysis, we just can't vectorize it.  */
   4641  1.1  mrg 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4642  1.1  mrg 	      continue;
   4643  1.1  mrg 	    }
   4644  1.1  mrg 	  if (fatal)
   4645  1.1  mrg 	    *fatal = false;
   4646  1.1  mrg 	  return opt_result::failure_at (stmt_info->stmt,
   4647  1.1  mrg 					 "not vectorized:"
   4648  1.1  mrg 					 " no vectype for stmt: %G"
   4649  1.1  mrg 					 " scalar_type: %T\n",
   4650  1.1  mrg 					 stmt_info->stmt, scalar_type);
   4651  1.1  mrg         }
   4652  1.1  mrg       else
   4653  1.1  mrg 	{
   4654  1.1  mrg 	  if (dump_enabled_p ())
   4655  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   4656  1.1  mrg 			     "got vectype for stmt: %G%T\n",
   4657  1.1  mrg 			     stmt_info->stmt, vectype);
   4658  1.1  mrg 	}
   4659  1.1  mrg 
   4660  1.1  mrg       /* Adjust the minimal vectorization factor according to the
   4661  1.1  mrg 	 vector type.  */
   4662  1.1  mrg       vf = TYPE_VECTOR_SUBPARTS (vectype);
   4663  1.1  mrg       *min_vf = upper_bound (*min_vf, vf);
   4664  1.1  mrg 
   4665  1.1  mrg       /* Leave the BB vectorizer to pick the vector type later, based on
   4666  1.1  mrg 	 the final dataref group size and SLP node size.  */
   4667  1.1  mrg       if (is_a <loop_vec_info> (vinfo))
   4668  1.1  mrg 	STMT_VINFO_VECTYPE (stmt_info) = vectype;
   4669  1.1  mrg 
   4670  1.1  mrg       if (gatherscatter != SG_NONE)
   4671  1.1  mrg 	{
   4672  1.1  mrg 	  gather_scatter_info gs_info;
   4673  1.1  mrg 	  if (!vect_check_gather_scatter (stmt_info,
   4674  1.1  mrg 					  as_a <loop_vec_info> (vinfo),
   4675  1.1  mrg 					  &gs_info)
   4676  1.1  mrg 	      || !get_vectype_for_scalar_type (vinfo,
   4677  1.1  mrg 					       TREE_TYPE (gs_info.offset)))
   4678  1.1  mrg 	    {
   4679  1.1  mrg 	      if (fatal)
   4680  1.1  mrg 		*fatal = false;
   4681  1.1  mrg 	      return opt_result::failure_at
   4682  1.1  mrg 			(stmt_info->stmt,
   4683  1.1  mrg 			 (gatherscatter == GATHER)
   4684  1.1  mrg 			 ? "not vectorized: not suitable for gather load %G"
   4685  1.1  mrg 			 : "not vectorized: not suitable for scatter store %G",
   4686  1.1  mrg 			 stmt_info->stmt);
   4687  1.1  mrg 	    }
   4688  1.1  mrg 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
   4689  1.1  mrg 	}
   4690  1.1  mrg     }
   4691  1.1  mrg 
   4692  1.1  mrg   /* We used to stop processing and prune the list here.  Verify we no
   4693  1.1  mrg      longer need to.  */
   4694  1.1  mrg   gcc_assert (i == datarefs.length ());
   4695  1.1  mrg 
   4696  1.1  mrg   return opt_result::success ();
   4697  1.1  mrg }
   4698  1.1  mrg 
   4699  1.1  mrg 
   4700  1.1  mrg /* Function vect_get_new_vect_var.
   4701  1.1  mrg 
   4702  1.1  mrg    Returns a name for a new variable.  The current naming scheme appends the
   4703  1.1  mrg    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
   4704  1.1  mrg    the name of vectorizer generated variables, and appends that to NAME if
   4705  1.1  mrg    provided.  */
   4706  1.1  mrg 
   4707  1.1  mrg tree
   4708  1.1  mrg vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   4709  1.1  mrg {
   4710  1.1  mrg   const char *prefix;
   4711  1.1  mrg   tree new_vect_var;
   4712  1.1  mrg 
   4713  1.1  mrg   switch (var_kind)
   4714  1.1  mrg   {
   4715  1.1  mrg   case vect_simple_var:
   4716  1.1  mrg     prefix = "vect";
   4717  1.1  mrg     break;
   4718  1.1  mrg   case vect_scalar_var:
   4719  1.1  mrg     prefix = "stmp";
   4720  1.1  mrg     break;
   4721  1.1  mrg   case vect_mask_var:
   4722  1.1  mrg     prefix = "mask";
   4723  1.1  mrg     break;
   4724  1.1  mrg   case vect_pointer_var:
   4725  1.1  mrg     prefix = "vectp";
   4726  1.1  mrg     break;
   4727  1.1  mrg   default:
   4728  1.1  mrg     gcc_unreachable ();
   4729  1.1  mrg   }
   4730  1.1  mrg 
   4731  1.1  mrg   if (name)
   4732  1.1  mrg     {
   4733  1.1  mrg       char* tmp = concat (prefix, "_", name, NULL);
   4734  1.1  mrg       new_vect_var = create_tmp_reg (type, tmp);
   4735  1.1  mrg       free (tmp);
   4736  1.1  mrg     }
   4737  1.1  mrg   else
   4738  1.1  mrg     new_vect_var = create_tmp_reg (type, prefix);
   4739  1.1  mrg 
   4740  1.1  mrg   return new_vect_var;
   4741  1.1  mrg }
   4742  1.1  mrg 
   4743  1.1  mrg /* Like vect_get_new_vect_var but return an SSA name.  */
   4744  1.1  mrg 
   4745  1.1  mrg tree
   4746  1.1  mrg vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
   4747  1.1  mrg {
   4748  1.1  mrg   const char *prefix;
   4749  1.1  mrg   tree new_vect_var;
   4750  1.1  mrg 
   4751  1.1  mrg   switch (var_kind)
   4752  1.1  mrg   {
   4753  1.1  mrg   case vect_simple_var:
   4754  1.1  mrg     prefix = "vect";
   4755  1.1  mrg     break;
   4756  1.1  mrg   case vect_scalar_var:
   4757  1.1  mrg     prefix = "stmp";
   4758  1.1  mrg     break;
   4759  1.1  mrg   case vect_pointer_var:
   4760  1.1  mrg     prefix = "vectp";
   4761  1.1  mrg     break;
   4762  1.1  mrg   default:
   4763  1.1  mrg     gcc_unreachable ();
   4764  1.1  mrg   }
   4765  1.1  mrg 
   4766  1.1  mrg   if (name)
   4767  1.1  mrg     {
   4768  1.1  mrg       char* tmp = concat (prefix, "_", name, NULL);
   4769  1.1  mrg       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
   4770  1.1  mrg       free (tmp);
   4771  1.1  mrg     }
   4772  1.1  mrg   else
   4773  1.1  mrg     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
   4774  1.1  mrg 
   4775  1.1  mrg   return new_vect_var;
   4776  1.1  mrg }
   4777  1.1  mrg 
   4778  1.1  mrg /* Duplicate points-to info on NAME from DR_INFO.  */
   4779  1.1  mrg 
   4780  1.1  mrg static void
   4781  1.1  mrg vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
   4782  1.1  mrg {
   4783  1.1  mrg   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
   4784  1.1  mrg   /* DR_PTR_INFO is for a base SSA name, not including constant or
   4785  1.1  mrg      variable offsets in the ref so its alignment info does not apply.  */
   4786  1.1  mrg   mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
   4787  1.1  mrg }
   4788  1.1  mrg 
   4789  1.1  mrg /* Function vect_create_addr_base_for_vector_ref.
   4790  1.1  mrg 
   4791  1.1  mrg    Create an expression that computes the address of the first memory location
   4792  1.1  mrg    that will be accessed for a data reference.
   4793  1.1  mrg 
   4794  1.1  mrg    Input:
   4795  1.1  mrg    STMT_INFO: The statement containing the data reference.
   4796  1.1  mrg    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
   4797  1.1  mrg    OFFSET: Optional. If supplied, it is be added to the initial address.
   4798  1.1  mrg    LOOP:    Specify relative to which loop-nest should the address be computed.
   4799  1.1  mrg             For example, when the dataref is in an inner-loop nested in an
   4800  1.1  mrg 	    outer-loop that is now being vectorized, LOOP can be either the
   4801  1.1  mrg 	    outer-loop, or the inner-loop.  The first memory location accessed
   4802  1.1  mrg 	    by the following dataref ('in' points to short):
   4803  1.1  mrg 
   4804  1.1  mrg 		for (i=0; i<N; i++)
   4805  1.1  mrg 		   for (j=0; j<M; j++)
   4806  1.1  mrg 		     s += in[i+j]
   4807  1.1  mrg 
   4808  1.1  mrg 	    is as follows:
   4809  1.1  mrg 	    if LOOP=i_loop:	&in		(relative to i_loop)
   4810  1.1  mrg 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
   4811  1.1  mrg 
   4812  1.1  mrg    Output:
   4813  1.1  mrg    1. Return an SSA_NAME whose value is the address of the memory location of
   4814  1.1  mrg       the first vector of the data reference.
   4815  1.1  mrg    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
   4816  1.1  mrg       these statement(s) which define the returned SSA_NAME.
   4817  1.1  mrg 
   4818  1.1  mrg    FORNOW: We are only handling array accesses with step 1.  */
   4819  1.1  mrg 
   4820  1.1  mrg tree
   4821  1.1  mrg vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
   4822  1.1  mrg 				      gimple_seq *new_stmt_list,
   4823  1.1  mrg 				      tree offset)
   4824  1.1  mrg {
   4825  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   4826  1.1  mrg   struct data_reference *dr = dr_info->dr;
   4827  1.1  mrg   const char *base_name;
   4828  1.1  mrg   tree addr_base;
   4829  1.1  mrg   tree dest;
   4830  1.1  mrg   gimple_seq seq = NULL;
   4831  1.1  mrg   tree vect_ptr_type;
   4832  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4833  1.1  mrg   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
   4834  1.1  mrg 
   4835  1.1  mrg   tree data_ref_base = unshare_expr (drb->base_address);
   4836  1.1  mrg   tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
   4837  1.1  mrg   tree init = unshare_expr (drb->init);
   4838  1.1  mrg 
   4839  1.1  mrg   if (loop_vinfo)
   4840  1.1  mrg     base_name = get_name (data_ref_base);
   4841  1.1  mrg   else
   4842  1.1  mrg     {
   4843  1.1  mrg       base_offset = ssize_int (0);
   4844  1.1  mrg       init = ssize_int (0);
   4845  1.1  mrg       base_name = get_name (DR_REF (dr));
   4846  1.1  mrg     }
   4847  1.1  mrg 
   4848  1.1  mrg   /* Create base_offset */
   4849  1.1  mrg   base_offset = size_binop (PLUS_EXPR,
   4850  1.1  mrg 			    fold_convert (sizetype, base_offset),
   4851  1.1  mrg 			    fold_convert (sizetype, init));
   4852  1.1  mrg 
   4853  1.1  mrg   if (offset)
   4854  1.1  mrg     {
   4855  1.1  mrg       offset = fold_convert (sizetype, offset);
   4856  1.1  mrg       base_offset = fold_build2 (PLUS_EXPR, sizetype,
   4857  1.1  mrg 				 base_offset, offset);
   4858  1.1  mrg     }
   4859  1.1  mrg 
   4860  1.1  mrg   /* base + base_offset */
   4861  1.1  mrg   if (loop_vinfo)
   4862  1.1  mrg     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
   4863  1.1  mrg   else
   4864  1.1  mrg     addr_base = build1 (ADDR_EXPR,
   4865  1.1  mrg 			build_pointer_type (TREE_TYPE (DR_REF (dr))),
   4866  1.1  mrg 			/* Strip zero offset components since we don't need
   4867  1.1  mrg 			   them and they can confuse late diagnostics if
   4868  1.1  mrg 			   we CSE them wrongly.  See PR106904 for example.  */
   4869  1.1  mrg 			unshare_expr (strip_zero_offset_components
   4870  1.1  mrg 								(DR_REF (dr))));
   4871  1.1  mrg 
   4872  1.1  mrg   vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
   4873  1.1  mrg   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
   4874  1.1  mrg   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
   4875  1.1  mrg   gimple_seq_add_seq (new_stmt_list, seq);
   4876  1.1  mrg 
   4877  1.1  mrg   if (DR_PTR_INFO (dr)
   4878  1.1  mrg       && TREE_CODE (addr_base) == SSA_NAME
   4879  1.1  mrg       /* We should only duplicate pointer info to newly created SSA names.  */
   4880  1.1  mrg       && SSA_NAME_VAR (addr_base) == dest)
   4881  1.1  mrg     {
   4882  1.1  mrg       gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
   4883  1.1  mrg       vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
   4884  1.1  mrg     }
   4885  1.1  mrg 
   4886  1.1  mrg   if (dump_enabled_p ())
   4887  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
   4888  1.1  mrg 
   4889  1.1  mrg   return addr_base;
   4890  1.1  mrg }
   4891  1.1  mrg 
   4892  1.1  mrg 
   4893  1.1  mrg /* Function vect_create_data_ref_ptr.
   4894  1.1  mrg 
   4895  1.1  mrg    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
   4896  1.1  mrg    location accessed in the loop by STMT_INFO, along with the def-use update
   4897  1.1  mrg    chain to appropriately advance the pointer through the loop iterations.
   4898  1.1  mrg    Also set aliasing information for the pointer.  This pointer is used by
   4899  1.1  mrg    the callers to this function to create a memory reference expression for
   4900  1.1  mrg    vector load/store access.
   4901  1.1  mrg 
   4902  1.1  mrg    Input:
   4903  1.1  mrg    1. STMT_INFO: a stmt that references memory. Expected to be of the form
   4904  1.1  mrg          GIMPLE_ASSIGN <name, data-ref> or
   4905  1.1  mrg 	 GIMPLE_ASSIGN <data-ref, name>.
   4906  1.1  mrg    2. AGGR_TYPE: the type of the reference, which should be either a vector
   4907  1.1  mrg         or an array.
   4908  1.1  mrg    3. AT_LOOP: the loop where the vector memref is to be created.
   4909  1.1  mrg    4. OFFSET (optional): a byte offset to be added to the initial address
   4910  1.1  mrg 	accessed by the data-ref in STMT_INFO.
   4911  1.1  mrg    5. BSI: location where the new stmts are to be placed if there is no loop
   4912  1.1  mrg    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
   4913  1.1  mrg         pointing to the initial address.
   4914  1.1  mrg    8. IV_STEP (optional, defaults to NULL): the amount that should be added
   4915  1.1  mrg 	to the IV during each iteration of the loop.  NULL says to move
   4916  1.1  mrg 	by one copy of AGGR_TYPE up or down, depending on the step of the
   4917  1.1  mrg 	data reference.
   4918  1.1  mrg 
   4919  1.1  mrg    Output:
   4920  1.1  mrg    1. Declare a new ptr to vector_type, and have it point to the base of the
   4921  1.1  mrg       data reference (initial addressed accessed by the data reference).
   4922  1.1  mrg       For example, for vector of type V8HI, the following code is generated:
   4923  1.1  mrg 
   4924  1.1  mrg       v8hi *ap;
   4925  1.1  mrg       ap = (v8hi *)initial_address;
   4926  1.1  mrg 
   4927  1.1  mrg       if OFFSET is not supplied:
   4928  1.1  mrg          initial_address = &a[init];
   4929  1.1  mrg       if OFFSET is supplied:
   4930  1.1  mrg 	 initial_address = &a[init] + OFFSET;
   4931  1.1  mrg       if BYTE_OFFSET is supplied:
   4932  1.1  mrg 	 initial_address = &a[init] + BYTE_OFFSET;
   4933  1.1  mrg 
   4934  1.1  mrg       Return the initial_address in INITIAL_ADDRESS.
   4935  1.1  mrg 
   4936  1.1  mrg    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
   4937  1.1  mrg       update the pointer in each iteration of the loop.
   4938  1.1  mrg 
   4939  1.1  mrg       Return the increment stmt that updates the pointer in PTR_INCR.
   4940  1.1  mrg 
   4941  1.1  mrg    3. Return the pointer.  */
   4942  1.1  mrg 
   4943  1.1  mrg tree
   4944  1.1  mrg vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
   4945  1.1  mrg 			  tree aggr_type, class loop *at_loop, tree offset,
   4946  1.1  mrg 			  tree *initial_address, gimple_stmt_iterator *gsi,
   4947  1.1  mrg 			  gimple **ptr_incr, bool only_init,
   4948  1.1  mrg 			  tree iv_step)
   4949  1.1  mrg {
   4950  1.1  mrg   const char *base_name;
   4951  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4952  1.1  mrg   class loop *loop = NULL;
   4953  1.1  mrg   bool nested_in_vect_loop = false;
   4954  1.1  mrg   class loop *containing_loop = NULL;
   4955  1.1  mrg   tree aggr_ptr_type;
   4956  1.1  mrg   tree aggr_ptr;
   4957  1.1  mrg   tree new_temp;
   4958  1.1  mrg   gimple_seq new_stmt_list = NULL;
   4959  1.1  mrg   edge pe = NULL;
   4960  1.1  mrg   basic_block new_bb;
   4961  1.1  mrg   tree aggr_ptr_init;
   4962  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   4963  1.1  mrg   struct data_reference *dr = dr_info->dr;
   4964  1.1  mrg   tree aptr;
   4965  1.1  mrg   gimple_stmt_iterator incr_gsi;
   4966  1.1  mrg   bool insert_after;
   4967  1.1  mrg   tree indx_before_incr, indx_after_incr;
   4968  1.1  mrg   gimple *incr;
   4969  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   4970  1.1  mrg 
   4971  1.1  mrg   gcc_assert (iv_step != NULL_TREE
   4972  1.1  mrg 	      || TREE_CODE (aggr_type) == ARRAY_TYPE
   4973  1.1  mrg 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
   4974  1.1  mrg 
   4975  1.1  mrg   if (loop_vinfo)
   4976  1.1  mrg     {
   4977  1.1  mrg       loop = LOOP_VINFO_LOOP (loop_vinfo);
   4978  1.1  mrg       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
   4979  1.1  mrg       containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
   4980  1.1  mrg       pe = loop_preheader_edge (loop);
   4981  1.1  mrg     }
   4982  1.1  mrg   else
   4983  1.1  mrg     {
   4984  1.1  mrg       gcc_assert (bb_vinfo);
   4985  1.1  mrg       only_init = true;
   4986  1.1  mrg       *ptr_incr = NULL;
   4987  1.1  mrg     }
   4988  1.1  mrg 
   4989  1.1  mrg   /* Create an expression for the first address accessed by this load
   4990  1.1  mrg      in LOOP.  */
   4991  1.1  mrg   base_name = get_name (DR_BASE_ADDRESS (dr));
   4992  1.1  mrg 
   4993  1.1  mrg   if (dump_enabled_p ())
   4994  1.1  mrg     {
   4995  1.1  mrg       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
   4996  1.1  mrg       dump_printf_loc (MSG_NOTE, vect_location,
   4997  1.1  mrg                        "create %s-pointer variable to type: %T",
   4998  1.1  mrg 		       get_tree_code_name (TREE_CODE (aggr_type)),
   4999  1.1  mrg 		       aggr_type);
   5000  1.1  mrg       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
   5001  1.1  mrg         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
   5002  1.1  mrg       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
   5003  1.1  mrg         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
   5004  1.1  mrg       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
   5005  1.1  mrg         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
   5006  1.1  mrg       else
   5007  1.1  mrg         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
   5008  1.1  mrg       dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
   5009  1.1  mrg     }
   5010  1.1  mrg 
   5011  1.1  mrg   /* (1) Create the new aggregate-pointer variable.
   5012  1.1  mrg      Vector and array types inherit the alias set of their component
   5013  1.1  mrg      type by default so we need to use a ref-all pointer if the data
   5014  1.1  mrg      reference does not conflict with the created aggregated data
   5015  1.1  mrg      reference because it is not addressable.  */
   5016  1.1  mrg   bool need_ref_all = false;
   5017  1.1  mrg   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
   5018  1.1  mrg 			      get_alias_set (DR_REF (dr))))
   5019  1.1  mrg     need_ref_all = true;
   5020  1.1  mrg   /* Likewise for any of the data references in the stmt group.  */
   5021  1.1  mrg   else if (DR_GROUP_SIZE (stmt_info) > 1)
   5022  1.1  mrg     {
   5023  1.1  mrg       stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
   5024  1.1  mrg       do
   5025  1.1  mrg 	{
   5026  1.1  mrg 	  struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
   5027  1.1  mrg 	  if (!alias_sets_conflict_p (get_alias_set (aggr_type),
   5028  1.1  mrg 				      get_alias_set (DR_REF (sdr))))
   5029  1.1  mrg 	    {
   5030  1.1  mrg 	      need_ref_all = true;
   5031  1.1  mrg 	      break;
   5032  1.1  mrg 	    }
   5033  1.1  mrg 	  sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
   5034  1.1  mrg 	}
   5035  1.1  mrg       while (sinfo);
   5036  1.1  mrg     }
   5037  1.1  mrg   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
   5038  1.1  mrg 					       need_ref_all);
   5039  1.1  mrg   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
   5040  1.1  mrg 
   5041  1.1  mrg 
   5042  1.1  mrg   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
   5043  1.1  mrg      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
   5044  1.1  mrg      def-use update cycles for the pointer: one relative to the outer-loop
   5045  1.1  mrg      (LOOP), which is what steps (3) and (4) below do.  The other is relative
   5046  1.1  mrg      to the inner-loop (which is the inner-most loop containing the dataref),
   5047  1.1  mrg      and this is done be step (5) below.
   5048  1.1  mrg 
   5049  1.1  mrg      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
   5050  1.1  mrg      inner-most loop, and so steps (3),(4) work the same, and step (5) is
   5051  1.1  mrg      redundant.  Steps (3),(4) create the following:
   5052  1.1  mrg 
   5053  1.1  mrg 	vp0 = &base_addr;
   5054  1.1  mrg 	LOOP:	vp1 = phi(vp0,vp2)
   5055  1.1  mrg 		...
   5056  1.1  mrg 		...
   5057  1.1  mrg 		vp2 = vp1 + step
   5058  1.1  mrg 		goto LOOP
   5059  1.1  mrg 
   5060  1.1  mrg      If there is an inner-loop nested in loop, then step (5) will also be
   5061  1.1  mrg      applied, and an additional update in the inner-loop will be created:
   5062  1.1  mrg 
   5063  1.1  mrg 	vp0 = &base_addr;
   5064  1.1  mrg 	LOOP:   vp1 = phi(vp0,vp2)
   5065  1.1  mrg 		...
   5066  1.1  mrg         inner:     vp3 = phi(vp1,vp4)
   5067  1.1  mrg 	           vp4 = vp3 + inner_step
   5068  1.1  mrg 	           if () goto inner
   5069  1.1  mrg 		...
   5070  1.1  mrg 		vp2 = vp1 + step
   5071  1.1  mrg 		if () goto LOOP   */
   5072  1.1  mrg 
   5073  1.1  mrg   /* (2) Calculate the initial address of the aggregate-pointer, and set
   5074  1.1  mrg      the aggregate-pointer to point to it before the loop.  */
   5075  1.1  mrg 
   5076  1.1  mrg   /* Create: (&(base[init_val]+offset) in the loop preheader.  */
   5077  1.1  mrg 
   5078  1.1  mrg   new_temp = vect_create_addr_base_for_vector_ref (vinfo,
   5079  1.1  mrg 						   stmt_info, &new_stmt_list,
   5080  1.1  mrg 						   offset);
   5081  1.1  mrg   if (new_stmt_list)
   5082  1.1  mrg     {
   5083  1.1  mrg       if (pe)
   5084  1.1  mrg         {
   5085  1.1  mrg           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
   5086  1.1  mrg           gcc_assert (!new_bb);
   5087  1.1  mrg         }
   5088  1.1  mrg       else
   5089  1.1  mrg         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
   5090  1.1  mrg     }
   5091  1.1  mrg 
   5092  1.1  mrg   *initial_address = new_temp;
   5093  1.1  mrg   aggr_ptr_init = new_temp;
   5094  1.1  mrg 
   5095  1.1  mrg   /* (3) Handle the updating of the aggregate-pointer inside the loop.
   5096  1.1  mrg      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
   5097  1.1  mrg      inner-loop nested in LOOP (during outer-loop vectorization).  */
   5098  1.1  mrg 
   5099  1.1  mrg   /* No update in loop is required.  */
   5100  1.1  mrg   if (only_init && (!loop_vinfo || at_loop == loop))
   5101  1.1  mrg     aptr = aggr_ptr_init;
   5102  1.1  mrg   else
   5103  1.1  mrg     {
   5104  1.1  mrg       /* Accesses to invariant addresses should be handled specially
   5105  1.1  mrg 	 by the caller.  */
   5106  1.1  mrg       tree step = vect_dr_behavior (vinfo, dr_info)->step;
   5107  1.1  mrg       gcc_assert (!integer_zerop (step));
   5108  1.1  mrg 
   5109  1.1  mrg       if (iv_step == NULL_TREE)
   5110  1.1  mrg 	{
   5111  1.1  mrg 	  /* The step of the aggregate pointer is the type size,
   5112  1.1  mrg 	     negated for downward accesses.  */
   5113  1.1  mrg 	  iv_step = TYPE_SIZE_UNIT (aggr_type);
   5114  1.1  mrg 	  if (tree_int_cst_sgn (step) == -1)
   5115  1.1  mrg 	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
   5116  1.1  mrg 	}
   5117  1.1  mrg 
   5118  1.1  mrg       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
   5119  1.1  mrg 
   5120  1.1  mrg       create_iv (aggr_ptr_init,
   5121  1.1  mrg 		 fold_convert (aggr_ptr_type, iv_step),
   5122  1.1  mrg 		 aggr_ptr, loop, &incr_gsi, insert_after,
   5123  1.1  mrg 		 &indx_before_incr, &indx_after_incr);
   5124  1.1  mrg       incr = gsi_stmt (incr_gsi);
   5125  1.1  mrg 
   5126  1.1  mrg       /* Copy the points-to information if it exists. */
   5127  1.1  mrg       if (DR_PTR_INFO (dr))
   5128  1.1  mrg 	{
   5129  1.1  mrg 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
   5130  1.1  mrg 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
   5131  1.1  mrg 	}
   5132  1.1  mrg       if (ptr_incr)
   5133  1.1  mrg 	*ptr_incr = incr;
   5134  1.1  mrg 
   5135  1.1  mrg       aptr = indx_before_incr;
   5136  1.1  mrg     }
   5137  1.1  mrg 
   5138  1.1  mrg   if (!nested_in_vect_loop || only_init)
   5139  1.1  mrg     return aptr;
   5140  1.1  mrg 
   5141  1.1  mrg 
   5142  1.1  mrg   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
   5143  1.1  mrg      nested in LOOP, if exists.  */
   5144  1.1  mrg 
   5145  1.1  mrg   gcc_assert (nested_in_vect_loop);
   5146  1.1  mrg   if (!only_init)
   5147  1.1  mrg     {
   5148  1.1  mrg       standard_iv_increment_position (containing_loop, &incr_gsi,
   5149  1.1  mrg 				      &insert_after);
   5150  1.1  mrg       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
   5151  1.1  mrg 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
   5152  1.1  mrg 		 &indx_after_incr);
   5153  1.1  mrg       incr = gsi_stmt (incr_gsi);
   5154  1.1  mrg 
   5155  1.1  mrg       /* Copy the points-to information if it exists. */
   5156  1.1  mrg       if (DR_PTR_INFO (dr))
   5157  1.1  mrg 	{
   5158  1.1  mrg 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
   5159  1.1  mrg 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
   5160  1.1  mrg 	}
   5161  1.1  mrg       if (ptr_incr)
   5162  1.1  mrg 	*ptr_incr = incr;
   5163  1.1  mrg 
   5164  1.1  mrg       return indx_before_incr;
   5165  1.1  mrg     }
   5166  1.1  mrg   else
   5167  1.1  mrg     gcc_unreachable ();
   5168  1.1  mrg }
   5169  1.1  mrg 
   5170  1.1  mrg 
   5171  1.1  mrg /* Function bump_vector_ptr
   5172  1.1  mrg 
   5173  1.1  mrg    Increment a pointer (to a vector type) by vector-size. If requested,
   5174  1.1  mrg    i.e. if PTR-INCR is given, then also connect the new increment stmt
   5175  1.1  mrg    to the existing def-use update-chain of the pointer, by modifying
   5176  1.1  mrg    the PTR_INCR as illustrated below:
   5177  1.1  mrg 
   5178  1.1  mrg    The pointer def-use update-chain before this function:
   5179  1.1  mrg                         DATAREF_PTR = phi (p_0, p_2)
   5180  1.1  mrg                         ....
   5181  1.1  mrg         PTR_INCR:       p_2 = DATAREF_PTR + step
   5182  1.1  mrg 
   5183  1.1  mrg    The pointer def-use update-chain after this function:
   5184  1.1  mrg                         DATAREF_PTR = phi (p_0, p_2)
   5185  1.1  mrg                         ....
   5186  1.1  mrg                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
   5187  1.1  mrg                         ....
   5188  1.1  mrg         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
   5189  1.1  mrg 
   5190  1.1  mrg    Input:
   5191  1.1  mrg    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
   5192  1.1  mrg                  in the loop.
   5193  1.1  mrg    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
   5194  1.1  mrg 	      the loop.  The increment amount across iterations is expected
   5195  1.1  mrg 	      to be vector_size.
   5196  1.1  mrg    BSI - location where the new update stmt is to be placed.
   5197  1.1  mrg    STMT_INFO - the original scalar memory-access stmt that is being vectorized.
   5198  1.1  mrg    BUMP - optional. The offset by which to bump the pointer. If not given,
   5199  1.1  mrg 	  the offset is assumed to be vector_size.
   5200  1.1  mrg 
   5201  1.1  mrg    Output: Return NEW_DATAREF_PTR as illustrated above.
   5202  1.1  mrg 
   5203  1.1  mrg */
   5204  1.1  mrg 
   5205  1.1  mrg tree
   5206  1.1  mrg bump_vector_ptr (vec_info *vinfo,
   5207  1.1  mrg 		 tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
   5208  1.1  mrg 		 stmt_vec_info stmt_info, tree bump)
   5209  1.1  mrg {
   5210  1.1  mrg   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   5211  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5212  1.1  mrg   tree update = TYPE_SIZE_UNIT (vectype);
   5213  1.1  mrg   gimple *incr_stmt;
   5214  1.1  mrg   ssa_op_iter iter;
   5215  1.1  mrg   use_operand_p use_p;
   5216  1.1  mrg   tree new_dataref_ptr;
   5217  1.1  mrg 
   5218  1.1  mrg   if (bump)
   5219  1.1  mrg     update = bump;
   5220  1.1  mrg 
   5221  1.1  mrg   if (TREE_CODE (dataref_ptr) == SSA_NAME)
   5222  1.1  mrg     new_dataref_ptr = copy_ssa_name (dataref_ptr);
   5223  1.1  mrg   else
   5224  1.1  mrg     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
   5225  1.1  mrg   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
   5226  1.1  mrg 				   dataref_ptr, update);
   5227  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
   5228  1.1  mrg   /* Fold the increment, avoiding excessive chains use-def chains of
   5229  1.1  mrg      those, leading to compile-time issues for passes until the next
   5230  1.1  mrg      forwprop pass which would do this as well.  */
   5231  1.1  mrg   gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
   5232  1.1  mrg   if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
   5233  1.1  mrg     {
   5234  1.1  mrg       incr_stmt = gsi_stmt (fold_gsi);
   5235  1.1  mrg       update_stmt (incr_stmt);
   5236  1.1  mrg     }
   5237  1.1  mrg 
   5238  1.1  mrg   /* Copy the points-to information if it exists. */
   5239  1.1  mrg   if (DR_PTR_INFO (dr))
   5240  1.1  mrg     {
   5241  1.1  mrg       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
   5242  1.1  mrg       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
   5243  1.1  mrg     }
   5244  1.1  mrg 
   5245  1.1  mrg   if (!ptr_incr)
   5246  1.1  mrg     return new_dataref_ptr;
   5247  1.1  mrg 
   5248  1.1  mrg   /* Update the vector-pointer's cross-iteration increment.  */
   5249  1.1  mrg   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
   5250  1.1  mrg     {
   5251  1.1  mrg       tree use = USE_FROM_PTR (use_p);
   5252  1.1  mrg 
   5253  1.1  mrg       if (use == dataref_ptr)
   5254  1.1  mrg         SET_USE (use_p, new_dataref_ptr);
   5255  1.1  mrg       else
   5256  1.1  mrg         gcc_assert (operand_equal_p (use, update, 0));
   5257  1.1  mrg     }
   5258  1.1  mrg 
   5259  1.1  mrg   return new_dataref_ptr;
   5260  1.1  mrg }
   5261  1.1  mrg 
   5262  1.1  mrg 
   5263  1.1  mrg /* Copy memory reference info such as base/clique from the SRC reference
   5264  1.1  mrg    to the DEST MEM_REF.  */
   5265  1.1  mrg 
   5266  1.1  mrg void
   5267  1.1  mrg vect_copy_ref_info (tree dest, tree src)
   5268  1.1  mrg {
   5269  1.1  mrg   if (TREE_CODE (dest) != MEM_REF)
   5270  1.1  mrg     return;
   5271  1.1  mrg 
   5272  1.1  mrg   tree src_base = src;
   5273  1.1  mrg   while (handled_component_p (src_base))
   5274  1.1  mrg     src_base = TREE_OPERAND (src_base, 0);
   5275  1.1  mrg   if (TREE_CODE (src_base) != MEM_REF
   5276  1.1  mrg       && TREE_CODE (src_base) != TARGET_MEM_REF)
   5277  1.1  mrg     return;
   5278  1.1  mrg 
   5279  1.1  mrg   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
   5280  1.1  mrg   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
   5281  1.1  mrg }
   5282  1.1  mrg 
   5283  1.1  mrg 
   5284  1.1  mrg /* Function vect_create_destination_var.
   5285  1.1  mrg 
   5286  1.1  mrg    Create a new temporary of type VECTYPE.  */
   5287  1.1  mrg 
   5288  1.1  mrg tree
   5289  1.1  mrg vect_create_destination_var (tree scalar_dest, tree vectype)
   5290  1.1  mrg {
   5291  1.1  mrg   tree vec_dest;
   5292  1.1  mrg   const char *name;
   5293  1.1  mrg   char *new_name;
   5294  1.1  mrg   tree type;
   5295  1.1  mrg   enum vect_var_kind kind;
   5296  1.1  mrg 
   5297  1.1  mrg   kind = vectype
   5298  1.1  mrg     ? VECTOR_BOOLEAN_TYPE_P (vectype)
   5299  1.1  mrg     ? vect_mask_var
   5300  1.1  mrg     : vect_simple_var
   5301  1.1  mrg     : vect_scalar_var;
   5302  1.1  mrg   type = vectype ? vectype : TREE_TYPE (scalar_dest);
   5303  1.1  mrg 
   5304  1.1  mrg   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
   5305  1.1  mrg 
   5306  1.1  mrg   name = get_name (scalar_dest);
   5307  1.1  mrg   if (name)
   5308  1.1  mrg     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
   5309  1.1  mrg   else
   5310  1.1  mrg     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
   5311  1.1  mrg   vec_dest = vect_get_new_vect_var (type, kind, new_name);
   5312  1.1  mrg   free (new_name);
   5313  1.1  mrg 
   5314  1.1  mrg   return vec_dest;
   5315  1.1  mrg }
   5316  1.1  mrg 
   5317  1.1  mrg /* Function vect_grouped_store_supported.
   5318  1.1  mrg 
   5319  1.1  mrg    Returns TRUE if interleave high and interleave low permutations
   5320  1.1  mrg    are supported, and FALSE otherwise.  */
   5321  1.1  mrg 
   5322  1.1  mrg bool
   5323  1.1  mrg vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
   5324  1.1  mrg {
   5325  1.1  mrg   machine_mode mode = TYPE_MODE (vectype);
   5326  1.1  mrg 
   5327  1.1  mrg   /* vect_permute_store_chain requires the group size to be equal to 3 or
   5328  1.1  mrg      be a power of two.  */
   5329  1.1  mrg   if (count != 3 && exact_log2 (count) == -1)
   5330  1.1  mrg     {
   5331  1.1  mrg       if (dump_enabled_p ())
   5332  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5333  1.1  mrg 			 "the size of the group of accesses"
   5334  1.1  mrg 			 " is not a power of 2 or not eqaul to 3\n");
   5335  1.1  mrg       return false;
   5336  1.1  mrg     }
   5337  1.1  mrg 
   5338  1.1  mrg   /* Check that the permutation is supported.  */
   5339  1.1  mrg   if (VECTOR_MODE_P (mode))
   5340  1.1  mrg     {
   5341  1.1  mrg       unsigned int i;
   5342  1.1  mrg       if (count == 3)
   5343  1.1  mrg 	{
   5344  1.1  mrg 	  unsigned int j0 = 0, j1 = 0, j2 = 0;
   5345  1.1  mrg 	  unsigned int i, j;
   5346  1.1  mrg 
   5347  1.1  mrg 	  unsigned int nelt;
   5348  1.1  mrg 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
   5349  1.1  mrg 	    {
   5350  1.1  mrg 	      if (dump_enabled_p ())
   5351  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5352  1.1  mrg 				 "cannot handle groups of 3 stores for"
   5353  1.1  mrg 				 " variable-length vectors\n");
   5354  1.1  mrg 	      return false;
   5355  1.1  mrg 	    }
   5356  1.1  mrg 
   5357  1.1  mrg 	  vec_perm_builder sel (nelt, nelt, 1);
   5358  1.1  mrg 	  sel.quick_grow (nelt);
   5359  1.1  mrg 	  vec_perm_indices indices;
   5360  1.1  mrg 	  for (j = 0; j < 3; j++)
   5361  1.1  mrg 	    {
   5362  1.1  mrg 	      int nelt0 = ((3 - j) * nelt) % 3;
   5363  1.1  mrg 	      int nelt1 = ((3 - j) * nelt + 1) % 3;
   5364  1.1  mrg 	      int nelt2 = ((3 - j) * nelt + 2) % 3;
   5365  1.1  mrg 	      for (i = 0; i < nelt; i++)
   5366  1.1  mrg 		{
   5367  1.1  mrg 		  if (3 * i + nelt0 < nelt)
   5368  1.1  mrg 		    sel[3 * i + nelt0] = j0++;
   5369  1.1  mrg 		  if (3 * i + nelt1 < nelt)
   5370  1.1  mrg 		    sel[3 * i + nelt1] = nelt + j1++;
   5371  1.1  mrg 		  if (3 * i + nelt2 < nelt)
   5372  1.1  mrg 		    sel[3 * i + nelt2] = 0;
   5373  1.1  mrg 		}
   5374  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   5375  1.1  mrg 	      if (!can_vec_perm_const_p (mode, indices))
   5376  1.1  mrg 		{
   5377  1.1  mrg 		  if (dump_enabled_p ())
   5378  1.1  mrg 		    dump_printf (MSG_MISSED_OPTIMIZATION,
   5379  1.1  mrg 				 "permutation op not supported by target.\n");
   5380  1.1  mrg 		  return false;
   5381  1.1  mrg 		}
   5382  1.1  mrg 
   5383  1.1  mrg 	      for (i = 0; i < nelt; i++)
   5384  1.1  mrg 		{
   5385  1.1  mrg 		  if (3 * i + nelt0 < nelt)
   5386  1.1  mrg 		    sel[3 * i + nelt0] = 3 * i + nelt0;
   5387  1.1  mrg 		  if (3 * i + nelt1 < nelt)
   5388  1.1  mrg 		    sel[3 * i + nelt1] = 3 * i + nelt1;
   5389  1.1  mrg 		  if (3 * i + nelt2 < nelt)
   5390  1.1  mrg 		    sel[3 * i + nelt2] = nelt + j2++;
   5391  1.1  mrg 		}
   5392  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   5393  1.1  mrg 	      if (!can_vec_perm_const_p (mode, indices))
   5394  1.1  mrg 		{
   5395  1.1  mrg 		  if (dump_enabled_p ())
   5396  1.1  mrg 		    dump_printf (MSG_MISSED_OPTIMIZATION,
   5397  1.1  mrg 				 "permutation op not supported by target.\n");
   5398  1.1  mrg 		  return false;
   5399  1.1  mrg 		}
   5400  1.1  mrg 	    }
   5401  1.1  mrg 	  return true;
   5402  1.1  mrg 	}
   5403  1.1  mrg       else
   5404  1.1  mrg 	{
   5405  1.1  mrg 	  /* If length is not equal to 3 then only power of 2 is supported.  */
   5406  1.1  mrg 	  gcc_assert (pow2p_hwi (count));
   5407  1.1  mrg 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
   5408  1.1  mrg 
   5409  1.1  mrg 	  /* The encoding has 2 interleaved stepped patterns.  */
   5410  1.1  mrg 	  vec_perm_builder sel (nelt, 2, 3);
   5411  1.1  mrg 	  sel.quick_grow (6);
   5412  1.1  mrg 	  for (i = 0; i < 3; i++)
   5413  1.1  mrg 	    {
   5414  1.1  mrg 	      sel[i * 2] = i;
   5415  1.1  mrg 	      sel[i * 2 + 1] = i + nelt;
   5416  1.1  mrg 	    }
   5417  1.1  mrg 	  vec_perm_indices indices (sel, 2, nelt);
   5418  1.1  mrg 	  if (can_vec_perm_const_p (mode, indices))
   5419  1.1  mrg 	    {
   5420  1.1  mrg 	      for (i = 0; i < 6; i++)
   5421  1.1  mrg 		sel[i] += exact_div (nelt, 2);
   5422  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   5423  1.1  mrg 	      if (can_vec_perm_const_p (mode, indices))
   5424  1.1  mrg 		return true;
   5425  1.1  mrg 	    }
   5426  1.1  mrg 	}
   5427  1.1  mrg     }
   5428  1.1  mrg 
   5429  1.1  mrg   if (dump_enabled_p ())
   5430  1.1  mrg     dump_printf (MSG_MISSED_OPTIMIZATION,
   5431  1.1  mrg 		 "permutation op not supported by target.\n");
   5432  1.1  mrg   return false;
   5433  1.1  mrg }
   5434  1.1  mrg 
   5435  1.1  mrg 
   5436  1.1  mrg /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
   5437  1.1  mrg    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
   5438  1.1  mrg 
   5439  1.1  mrg bool
   5440  1.1  mrg vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
   5441  1.1  mrg 			    bool masked_p)
   5442  1.1  mrg {
   5443  1.1  mrg   if (masked_p)
   5444  1.1  mrg     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
   5445  1.1  mrg 					 vec_mask_store_lanes_optab,
   5446  1.1  mrg 					 vectype, count);
   5447  1.1  mrg   else
   5448  1.1  mrg     return vect_lanes_optab_supported_p ("vec_store_lanes",
   5449  1.1  mrg 					 vec_store_lanes_optab,
   5450  1.1  mrg 					 vectype, count);
   5451  1.1  mrg }
   5452  1.1  mrg 
   5453  1.1  mrg 
   5454  1.1  mrg /* Function vect_permute_store_chain.
   5455  1.1  mrg 
   5456  1.1  mrg    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
   5457  1.1  mrg    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
   5458  1.1  mrg    the data correctly for the stores.  Return the final references for stores
   5459  1.1  mrg    in RESULT_CHAIN.
   5460  1.1  mrg 
   5461  1.1  mrg    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
   5462  1.1  mrg    The input is 4 vectors each containing 8 elements.  We assign a number to
   5463  1.1  mrg    each element, the input sequence is:
   5464  1.1  mrg 
   5465  1.1  mrg    1st vec:   0  1  2  3  4  5  6  7
   5466  1.1  mrg    2nd vec:   8  9 10 11 12 13 14 15
   5467  1.1  mrg    3rd vec:  16 17 18 19 20 21 22 23
   5468  1.1  mrg    4th vec:  24 25 26 27 28 29 30 31
   5469  1.1  mrg 
   5470  1.1  mrg    The output sequence should be:
   5471  1.1  mrg 
   5472  1.1  mrg    1st vec:  0  8 16 24  1  9 17 25
   5473  1.1  mrg    2nd vec:  2 10 18 26  3 11 19 27
   5474  1.1  mrg    3rd vec:  4 12 20 28  5 13 21 30
   5475  1.1  mrg    4th vec:  6 14 22 30  7 15 23 31
   5476  1.1  mrg 
   5477  1.1  mrg    i.e., we interleave the contents of the four vectors in their order.
   5478  1.1  mrg 
   5479  1.1  mrg    We use interleave_high/low instructions to create such output.  The input of
   5480  1.1  mrg    each interleave_high/low operation is two vectors:
   5481  1.1  mrg    1st vec    2nd vec
   5482  1.1  mrg    0 1 2 3    4 5 6 7
   5483  1.1  mrg    the even elements of the result vector are obtained left-to-right from the
   5484  1.1  mrg    high/low elements of the first vector.  The odd elements of the result are
   5485  1.1  mrg    obtained left-to-right from the high/low elements of the second vector.
   5486  1.1  mrg    The output of interleave_high will be:   0 4 1 5
   5487  1.1  mrg    and of interleave_low:                   2 6 3 7
   5488  1.1  mrg 
   5489  1.1  mrg 
   5490  1.1  mrg    The permutation is done in log LENGTH stages.  In each stage interleave_high
   5491  1.1  mrg    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
   5492  1.1  mrg    where the first argument is taken from the first half of DR_CHAIN and the
   5493  1.1  mrg    second argument from it's second half.
   5494  1.1  mrg    In our example,
   5495  1.1  mrg 
   5496  1.1  mrg    I1: interleave_high (1st vec, 3rd vec)
   5497  1.1  mrg    I2: interleave_low (1st vec, 3rd vec)
   5498  1.1  mrg    I3: interleave_high (2nd vec, 4th vec)
   5499  1.1  mrg    I4: interleave_low (2nd vec, 4th vec)
   5500  1.1  mrg 
   5501  1.1  mrg    The output for the first stage is:
   5502  1.1  mrg 
   5503  1.1  mrg    I1:  0 16  1 17  2 18  3 19
   5504  1.1  mrg    I2:  4 20  5 21  6 22  7 23
   5505  1.1  mrg    I3:  8 24  9 25 10 26 11 27
   5506  1.1  mrg    I4: 12 28 13 29 14 30 15 31
   5507  1.1  mrg 
   5508  1.1  mrg    The output of the second stage, i.e. the final result is:
   5509  1.1  mrg 
   5510  1.1  mrg    I1:  0  8 16 24  1  9 17 25
   5511  1.1  mrg    I2:  2 10 18 26  3 11 19 27
   5512  1.1  mrg    I3:  4 12 20 28  5 13 21 30
   5513  1.1  mrg    I4:  6 14 22 30  7 15 23 31.  */
   5514  1.1  mrg 
   5515  1.1  mrg void
   5516  1.1  mrg vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
   5517  1.1  mrg 			  unsigned int length,
   5518  1.1  mrg 			  stmt_vec_info stmt_info,
   5519  1.1  mrg 			  gimple_stmt_iterator *gsi,
   5520  1.1  mrg 			  vec<tree> *result_chain)
   5521  1.1  mrg {
   5522  1.1  mrg   tree vect1, vect2, high, low;
   5523  1.1  mrg   gimple *perm_stmt;
   5524  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5525  1.1  mrg   tree perm_mask_low, perm_mask_high;
   5526  1.1  mrg   tree data_ref;
   5527  1.1  mrg   tree perm3_mask_low, perm3_mask_high;
   5528  1.1  mrg   unsigned int i, j, n, log_length = exact_log2 (length);
   5529  1.1  mrg 
   5530  1.1  mrg   result_chain->quick_grow (length);
   5531  1.1  mrg   memcpy (result_chain->address (), dr_chain.address (),
   5532  1.1  mrg 	  length * sizeof (tree));
   5533  1.1  mrg 
   5534  1.1  mrg   if (length == 3)
   5535  1.1  mrg     {
   5536  1.1  mrg       /* vect_grouped_store_supported ensures that this is constant.  */
   5537  1.1  mrg       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   5538  1.1  mrg       unsigned int j0 = 0, j1 = 0, j2 = 0;
   5539  1.1  mrg 
   5540  1.1  mrg       vec_perm_builder sel (nelt, nelt, 1);
   5541  1.1  mrg       sel.quick_grow (nelt);
   5542  1.1  mrg       vec_perm_indices indices;
   5543  1.1  mrg       for (j = 0; j < 3; j++)
   5544  1.1  mrg         {
   5545  1.1  mrg 	  int nelt0 = ((3 - j) * nelt) % 3;
   5546  1.1  mrg 	  int nelt1 = ((3 - j) * nelt + 1) % 3;
   5547  1.1  mrg 	  int nelt2 = ((3 - j) * nelt + 2) % 3;
   5548  1.1  mrg 
   5549  1.1  mrg 	  for (i = 0; i < nelt; i++)
   5550  1.1  mrg 	    {
   5551  1.1  mrg 	      if (3 * i + nelt0 < nelt)
   5552  1.1  mrg 		sel[3 * i + nelt0] = j0++;
   5553  1.1  mrg 	      if (3 * i + nelt1 < nelt)
   5554  1.1  mrg 		sel[3 * i + nelt1] = nelt + j1++;
   5555  1.1  mrg 	      if (3 * i + nelt2 < nelt)
   5556  1.1  mrg 		sel[3 * i + nelt2] = 0;
   5557  1.1  mrg 	    }
   5558  1.1  mrg 	  indices.new_vector (sel, 2, nelt);
   5559  1.1  mrg 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   5560  1.1  mrg 
   5561  1.1  mrg 	  for (i = 0; i < nelt; i++)
   5562  1.1  mrg 	    {
   5563  1.1  mrg 	      if (3 * i + nelt0 < nelt)
   5564  1.1  mrg 		sel[3 * i + nelt0] = 3 * i + nelt0;
   5565  1.1  mrg 	      if (3 * i + nelt1 < nelt)
   5566  1.1  mrg 		sel[3 * i + nelt1] = 3 * i + nelt1;
   5567  1.1  mrg 	      if (3 * i + nelt2 < nelt)
   5568  1.1  mrg 		sel[3 * i + nelt2] = nelt + j2++;
   5569  1.1  mrg 	    }
   5570  1.1  mrg 	  indices.new_vector (sel, 2, nelt);
   5571  1.1  mrg 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   5572  1.1  mrg 
   5573  1.1  mrg 	  vect1 = dr_chain[0];
   5574  1.1  mrg 	  vect2 = dr_chain[1];
   5575  1.1  mrg 
   5576  1.1  mrg 	  /* Create interleaving stmt:
   5577  1.1  mrg 	     low = VEC_PERM_EXPR <vect1, vect2,
   5578  1.1  mrg 				  {j, nelt, *, j + 1, nelt + j + 1, *,
   5579  1.1  mrg 				   j + 2, nelt + j + 2, *, ...}>  */
   5580  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
   5581  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
   5582  1.1  mrg 					   vect2, perm3_mask_low);
   5583  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5584  1.1  mrg 
   5585  1.1  mrg 	  vect1 = data_ref;
   5586  1.1  mrg 	  vect2 = dr_chain[2];
   5587  1.1  mrg 	  /* Create interleaving stmt:
   5588  1.1  mrg 	     low = VEC_PERM_EXPR <vect1, vect2,
   5589  1.1  mrg 				  {0, 1, nelt + j, 3, 4, nelt + j + 1,
   5590  1.1  mrg 				   6, 7, nelt + j + 2, ...}>  */
   5591  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
   5592  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
   5593  1.1  mrg 					   vect2, perm3_mask_high);
   5594  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5595  1.1  mrg 	  (*result_chain)[j] = data_ref;
   5596  1.1  mrg 	}
   5597  1.1  mrg     }
   5598  1.1  mrg   else
   5599  1.1  mrg     {
   5600  1.1  mrg       /* If length is not equal to 3 then only power of 2 is supported.  */
   5601  1.1  mrg       gcc_assert (pow2p_hwi (length));
   5602  1.1  mrg 
   5603  1.1  mrg       /* The encoding has 2 interleaved stepped patterns.  */
   5604  1.1  mrg       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
   5605  1.1  mrg       vec_perm_builder sel (nelt, 2, 3);
   5606  1.1  mrg       sel.quick_grow (6);
   5607  1.1  mrg       for (i = 0; i < 3; i++)
   5608  1.1  mrg 	{
   5609  1.1  mrg 	  sel[i * 2] = i;
   5610  1.1  mrg 	  sel[i * 2 + 1] = i + nelt;
   5611  1.1  mrg 	}
   5612  1.1  mrg 	vec_perm_indices indices (sel, 2, nelt);
   5613  1.1  mrg 	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   5614  1.1  mrg 
   5615  1.1  mrg 	for (i = 0; i < 6; i++)
   5616  1.1  mrg 	  sel[i] += exact_div (nelt, 2);
   5617  1.1  mrg 	indices.new_vector (sel, 2, nelt);
   5618  1.1  mrg 	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   5619  1.1  mrg 
   5620  1.1  mrg 	for (i = 0, n = log_length; i < n; i++)
   5621  1.1  mrg 	  {
   5622  1.1  mrg 	    for (j = 0; j < length/2; j++)
   5623  1.1  mrg 	      {
   5624  1.1  mrg 		vect1 = dr_chain[j];
   5625  1.1  mrg 		vect2 = dr_chain[j+length/2];
   5626  1.1  mrg 
   5627  1.1  mrg 		/* Create interleaving stmt:
   5628  1.1  mrg 		   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
   5629  1.1  mrg 							...}>  */
   5630  1.1  mrg 		high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
   5631  1.1  mrg 		perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
   5632  1.1  mrg 						 vect2, perm_mask_high);
   5633  1.1  mrg 		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5634  1.1  mrg 		(*result_chain)[2*j] = high;
   5635  1.1  mrg 
   5636  1.1  mrg 		/* Create interleaving stmt:
   5637  1.1  mrg 		   low = VEC_PERM_EXPR <vect1, vect2,
   5638  1.1  mrg 					{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
   5639  1.1  mrg 					 ...}>  */
   5640  1.1  mrg 		low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
   5641  1.1  mrg 		perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
   5642  1.1  mrg 						 vect2, perm_mask_low);
   5643  1.1  mrg 		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5644  1.1  mrg 		(*result_chain)[2*j+1] = low;
   5645  1.1  mrg 	      }
   5646  1.1  mrg 	    memcpy (dr_chain.address (), result_chain->address (),
   5647  1.1  mrg 		    length * sizeof (tree));
   5648  1.1  mrg 	  }
   5649  1.1  mrg     }
   5650  1.1  mrg }
   5651  1.1  mrg 
   5652  1.1  mrg /* Function vect_setup_realignment
   5653  1.1  mrg 
   5654  1.1  mrg    This function is called when vectorizing an unaligned load using
   5655  1.1  mrg    the dr_explicit_realign[_optimized] scheme.
   5656  1.1  mrg    This function generates the following code at the loop prolog:
   5657  1.1  mrg 
   5658  1.1  mrg       p = initial_addr;
   5659  1.1  mrg    x  msq_init = *(floor(p));   # prolog load
   5660  1.1  mrg       realignment_token = call target_builtin;
   5661  1.1  mrg     loop:
   5662  1.1  mrg    x  msq = phi (msq_init, ---)
   5663  1.1  mrg 
   5664  1.1  mrg    The stmts marked with x are generated only for the case of
   5665  1.1  mrg    dr_explicit_realign_optimized.
   5666  1.1  mrg 
   5667  1.1  mrg    The code above sets up a new (vector) pointer, pointing to the first
   5668  1.1  mrg    location accessed by STMT_INFO, and a "floor-aligned" load using that
   5669  1.1  mrg    pointer.  It also generates code to compute the "realignment-token"
   5670  1.1  mrg    (if the relevant target hook was defined), and creates a phi-node at the
   5671  1.1  mrg    loop-header bb whose arguments are the result of the prolog-load (created
   5672  1.1  mrg    by this function) and the result of a load that takes place in the loop
   5673  1.1  mrg    (to be created by the caller to this function).
   5674  1.1  mrg 
   5675  1.1  mrg    For the case of dr_explicit_realign_optimized:
   5676  1.1  mrg    The caller to this function uses the phi-result (msq) to create the
   5677  1.1  mrg    realignment code inside the loop, and sets up the missing phi argument,
   5678  1.1  mrg    as follows:
   5679  1.1  mrg     loop:
   5680  1.1  mrg       msq = phi (msq_init, lsq)
   5681  1.1  mrg       lsq = *(floor(p'));        # load in loop
   5682  1.1  mrg       result = realign_load (msq, lsq, realignment_token);
   5683  1.1  mrg 
   5684  1.1  mrg    For the case of dr_explicit_realign:
   5685  1.1  mrg     loop:
   5686  1.1  mrg       msq = *(floor(p)); 	# load in loop
   5687  1.1  mrg       p' = p + (VS-1);
   5688  1.1  mrg       lsq = *(floor(p'));	# load in loop
   5689  1.1  mrg       result = realign_load (msq, lsq, realignment_token);
   5690  1.1  mrg 
   5691  1.1  mrg    Input:
   5692  1.1  mrg    STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
   5693  1.1  mrg 	       a memory location that may be unaligned.
   5694  1.1  mrg    BSI - place where new code is to be inserted.
   5695  1.1  mrg    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
   5696  1.1  mrg 			      is used.
   5697  1.1  mrg 
   5698  1.1  mrg    Output:
   5699  1.1  mrg    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
   5700  1.1  mrg                        target hook, if defined.
   5701  1.1  mrg    Return value - the result of the loop-header phi node.  */
   5702  1.1  mrg 
   5703  1.1  mrg tree
   5704  1.1  mrg vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
   5705  1.1  mrg 			gimple_stmt_iterator *gsi, tree *realignment_token,
   5706  1.1  mrg 			enum dr_alignment_support alignment_support_scheme,
   5707  1.1  mrg 			tree init_addr,
   5708  1.1  mrg 			class loop **at_loop)
   5709  1.1  mrg {
   5710  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5711  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5712  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   5713  1.1  mrg   struct data_reference *dr = dr_info->dr;
   5714  1.1  mrg   class loop *loop = NULL;
   5715  1.1  mrg   edge pe = NULL;
   5716  1.1  mrg   tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
   5717  1.1  mrg   tree vec_dest;
   5718  1.1  mrg   gimple *inc;
   5719  1.1  mrg   tree ptr;
   5720  1.1  mrg   tree data_ref;
   5721  1.1  mrg   basic_block new_bb;
   5722  1.1  mrg   tree msq_init = NULL_TREE;
   5723  1.1  mrg   tree new_temp;
   5724  1.1  mrg   gphi *phi_stmt;
   5725  1.1  mrg   tree msq = NULL_TREE;
   5726  1.1  mrg   gimple_seq stmts = NULL;
   5727  1.1  mrg   bool compute_in_loop = false;
   5728  1.1  mrg   bool nested_in_vect_loop = false;
   5729  1.1  mrg   class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
   5730  1.1  mrg   class loop *loop_for_initial_load = NULL;
   5731  1.1  mrg 
   5732  1.1  mrg   if (loop_vinfo)
   5733  1.1  mrg     {
   5734  1.1  mrg       loop = LOOP_VINFO_LOOP (loop_vinfo);
   5735  1.1  mrg       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
   5736  1.1  mrg     }
   5737  1.1  mrg 
   5738  1.1  mrg   gcc_assert (alignment_support_scheme == dr_explicit_realign
   5739  1.1  mrg 	      || alignment_support_scheme == dr_explicit_realign_optimized);
   5740  1.1  mrg 
   5741  1.1  mrg   /* We need to generate three things:
   5742  1.1  mrg      1. the misalignment computation
   5743  1.1  mrg      2. the extra vector load (for the optimized realignment scheme).
   5744  1.1  mrg      3. the phi node for the two vectors from which the realignment is
   5745  1.1  mrg       done (for the optimized realignment scheme).  */
   5746  1.1  mrg 
   5747  1.1  mrg   /* 1. Determine where to generate the misalignment computation.
   5748  1.1  mrg 
   5749  1.1  mrg      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
   5750  1.1  mrg      calculation will be generated by this function, outside the loop (in the
   5751  1.1  mrg      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
   5752  1.1  mrg      caller, inside the loop.
   5753  1.1  mrg 
   5754  1.1  mrg      Background: If the misalignment remains fixed throughout the iterations of
   5755  1.1  mrg      the loop, then both realignment schemes are applicable, and also the
   5756  1.1  mrg      misalignment computation can be done outside LOOP.  This is because we are
   5757  1.1  mrg      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
   5758  1.1  mrg      are a multiple of VS (the Vector Size), and therefore the misalignment in
   5759  1.1  mrg      different vectorized LOOP iterations is always the same.
   5760  1.1  mrg      The problem arises only if the memory access is in an inner-loop nested
   5761  1.1  mrg      inside LOOP, which is now being vectorized using outer-loop vectorization.
   5762  1.1  mrg      This is the only case when the misalignment of the memory access may not
   5763  1.1  mrg      remain fixed throughout the iterations of the inner-loop (as explained in
   5764  1.1  mrg      detail in vect_supportable_dr_alignment).  In this case, not only is the
   5765  1.1  mrg      optimized realignment scheme not applicable, but also the misalignment
   5766  1.1  mrg      computation (and generation of the realignment token that is passed to
   5767  1.1  mrg      REALIGN_LOAD) have to be done inside the loop.
   5768  1.1  mrg 
   5769  1.1  mrg      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
   5770  1.1  mrg      or not, which in turn determines if the misalignment is computed inside
   5771  1.1  mrg      the inner-loop, or outside LOOP.  */
   5772  1.1  mrg 
   5773  1.1  mrg   if (init_addr != NULL_TREE || !loop_vinfo)
   5774  1.1  mrg     {
   5775  1.1  mrg       compute_in_loop = true;
   5776  1.1  mrg       gcc_assert (alignment_support_scheme == dr_explicit_realign);
   5777  1.1  mrg     }
   5778  1.1  mrg 
   5779  1.1  mrg 
   5780  1.1  mrg   /* 2. Determine where to generate the extra vector load.
   5781  1.1  mrg 
   5782  1.1  mrg      For the optimized realignment scheme, instead of generating two vector
   5783  1.1  mrg      loads in each iteration, we generate a single extra vector load in the
   5784  1.1  mrg      preheader of the loop, and in each iteration reuse the result of the
   5785  1.1  mrg      vector load from the previous iteration.  In case the memory access is in
   5786  1.1  mrg      an inner-loop nested inside LOOP, which is now being vectorized using
   5787  1.1  mrg      outer-loop vectorization, we need to determine whether this initial vector
   5788  1.1  mrg      load should be generated at the preheader of the inner-loop, or can be
   5789  1.1  mrg      generated at the preheader of LOOP.  If the memory access has no evolution
   5790  1.1  mrg      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
   5791  1.1  mrg      to be generated inside LOOP (in the preheader of the inner-loop).  */
   5792  1.1  mrg 
   5793  1.1  mrg   if (nested_in_vect_loop)
   5794  1.1  mrg     {
   5795  1.1  mrg       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
   5796  1.1  mrg       bool invariant_in_outerloop =
   5797  1.1  mrg             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
   5798  1.1  mrg       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
   5799  1.1  mrg     }
   5800  1.1  mrg   else
   5801  1.1  mrg     loop_for_initial_load = loop;
   5802  1.1  mrg   if (at_loop)
   5803  1.1  mrg     *at_loop = loop_for_initial_load;
   5804  1.1  mrg 
   5805  1.1  mrg   if (loop_for_initial_load)
   5806  1.1  mrg     pe = loop_preheader_edge (loop_for_initial_load);
   5807  1.1  mrg 
   5808  1.1  mrg   /* 3. For the case of the optimized realignment, create the first vector
   5809  1.1  mrg       load at the loop preheader.  */
   5810  1.1  mrg 
   5811  1.1  mrg   if (alignment_support_scheme == dr_explicit_realign_optimized)
   5812  1.1  mrg     {
   5813  1.1  mrg       /* Create msq_init = *(floor(p1)) in the loop preheader  */
   5814  1.1  mrg       gassign *new_stmt;
   5815  1.1  mrg 
   5816  1.1  mrg       gcc_assert (!compute_in_loop);
   5817  1.1  mrg       vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5818  1.1  mrg       ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
   5819  1.1  mrg 				      loop_for_initial_load, NULL_TREE,
   5820  1.1  mrg 				      &init_addr, NULL, &inc, true);
   5821  1.1  mrg       if (TREE_CODE (ptr) == SSA_NAME)
   5822  1.1  mrg 	new_temp = copy_ssa_name (ptr);
   5823  1.1  mrg       else
   5824  1.1  mrg 	new_temp = make_ssa_name (TREE_TYPE (ptr));
   5825  1.1  mrg       poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
   5826  1.1  mrg       tree type = TREE_TYPE (ptr);
   5827  1.1  mrg       new_stmt = gimple_build_assign
   5828  1.1  mrg 		   (new_temp, BIT_AND_EXPR, ptr,
   5829  1.1  mrg 		    fold_build2 (MINUS_EXPR, type,
   5830  1.1  mrg 				 build_int_cst (type, 0),
   5831  1.1  mrg 				 build_int_cst (type, align)));
   5832  1.1  mrg       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5833  1.1  mrg       gcc_assert (!new_bb);
   5834  1.1  mrg       data_ref
   5835  1.1  mrg 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
   5836  1.1  mrg 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
   5837  1.1  mrg       vect_copy_ref_info (data_ref, DR_REF (dr));
   5838  1.1  mrg       new_stmt = gimple_build_assign (vec_dest, data_ref);
   5839  1.1  mrg       new_temp = make_ssa_name (vec_dest, new_stmt);
   5840  1.1  mrg       gimple_assign_set_lhs (new_stmt, new_temp);
   5841  1.1  mrg       if (pe)
   5842  1.1  mrg         {
   5843  1.1  mrg           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5844  1.1  mrg           gcc_assert (!new_bb);
   5845  1.1  mrg         }
   5846  1.1  mrg       else
   5847  1.1  mrg          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   5848  1.1  mrg 
   5849  1.1  mrg       msq_init = gimple_assign_lhs (new_stmt);
   5850  1.1  mrg     }
   5851  1.1  mrg 
   5852  1.1  mrg   /* 4. Create realignment token using a target builtin, if available.
   5853  1.1  mrg       It is done either inside the containing loop, or before LOOP (as
   5854  1.1  mrg       determined above).  */
   5855  1.1  mrg 
   5856  1.1  mrg   if (targetm.vectorize.builtin_mask_for_load)
   5857  1.1  mrg     {
   5858  1.1  mrg       gcall *new_stmt;
   5859  1.1  mrg       tree builtin_decl;
   5860  1.1  mrg 
   5861  1.1  mrg       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
   5862  1.1  mrg       if (!init_addr)
   5863  1.1  mrg 	{
   5864  1.1  mrg 	  /* Generate the INIT_ADDR computation outside LOOP.  */
   5865  1.1  mrg 	  init_addr = vect_create_addr_base_for_vector_ref (vinfo,
   5866  1.1  mrg 							    stmt_info, &stmts,
   5867  1.1  mrg 							    NULL_TREE);
   5868  1.1  mrg           if (loop)
   5869  1.1  mrg             {
   5870  1.1  mrg    	      pe = loop_preheader_edge (loop);
   5871  1.1  mrg 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
   5872  1.1  mrg 	      gcc_assert (!new_bb);
   5873  1.1  mrg             }
   5874  1.1  mrg           else
   5875  1.1  mrg              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   5876  1.1  mrg 	}
   5877  1.1  mrg 
   5878  1.1  mrg       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
   5879  1.1  mrg       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
   5880  1.1  mrg       vec_dest =
   5881  1.1  mrg 	vect_create_destination_var (scalar_dest,
   5882  1.1  mrg 				     gimple_call_return_type (new_stmt));
   5883  1.1  mrg       new_temp = make_ssa_name (vec_dest, new_stmt);
   5884  1.1  mrg       gimple_call_set_lhs (new_stmt, new_temp);
   5885  1.1  mrg 
   5886  1.1  mrg       if (compute_in_loop)
   5887  1.1  mrg 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   5888  1.1  mrg       else
   5889  1.1  mrg 	{
   5890  1.1  mrg 	  /* Generate the misalignment computation outside LOOP.  */
   5891  1.1  mrg 	  pe = loop_preheader_edge (loop);
   5892  1.1  mrg 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5893  1.1  mrg 	  gcc_assert (!new_bb);
   5894  1.1  mrg 	}
   5895  1.1  mrg 
   5896  1.1  mrg       *realignment_token = gimple_call_lhs (new_stmt);
   5897  1.1  mrg 
   5898  1.1  mrg       /* The result of the CALL_EXPR to this builtin is determined from
   5899  1.1  mrg          the value of the parameter and no global variables are touched
   5900  1.1  mrg          which makes the builtin a "const" function.  Requiring the
   5901  1.1  mrg          builtin to have the "const" attribute makes it unnecessary
   5902  1.1  mrg          to call mark_call_clobbered.  */
   5903  1.1  mrg       gcc_assert (TREE_READONLY (builtin_decl));
   5904  1.1  mrg     }
   5905  1.1  mrg 
   5906  1.1  mrg   if (alignment_support_scheme == dr_explicit_realign)
   5907  1.1  mrg     return msq;
   5908  1.1  mrg 
   5909  1.1  mrg   gcc_assert (!compute_in_loop);
   5910  1.1  mrg   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
   5911  1.1  mrg 
   5912  1.1  mrg 
   5913  1.1  mrg   /* 5. Create msq = phi <msq_init, lsq> in loop  */
   5914  1.1  mrg 
   5915  1.1  mrg   pe = loop_preheader_edge (containing_loop);
   5916  1.1  mrg   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5917  1.1  mrg   msq = make_ssa_name (vec_dest);
   5918  1.1  mrg   phi_stmt = create_phi_node (msq, containing_loop->header);
   5919  1.1  mrg   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
   5920  1.1  mrg 
   5921  1.1  mrg   return msq;
   5922  1.1  mrg }
   5923  1.1  mrg 
   5924  1.1  mrg 
   5925  1.1  mrg /* Function vect_grouped_load_supported.
   5926  1.1  mrg 
   5927  1.1  mrg    COUNT is the size of the load group (the number of statements plus the
   5928  1.1  mrg    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
   5929  1.1  mrg    only one statement, with a gap of COUNT - 1.
   5930  1.1  mrg 
   5931  1.1  mrg    Returns true if a suitable permute exists.  */
   5932  1.1  mrg 
   5933  1.1  mrg bool
   5934  1.1  mrg vect_grouped_load_supported (tree vectype, bool single_element_p,
   5935  1.1  mrg 			     unsigned HOST_WIDE_INT count)
   5936  1.1  mrg {
   5937  1.1  mrg   machine_mode mode = TYPE_MODE (vectype);
   5938  1.1  mrg 
   5939  1.1  mrg   /* If this is single-element interleaving with an element distance
   5940  1.1  mrg      that leaves unused vector loads around punt - we at least create
   5941  1.1  mrg      very sub-optimal code in that case (and blow up memory,
   5942  1.1  mrg      see PR65518).  */
   5943  1.1  mrg   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
   5944  1.1  mrg     {
   5945  1.1  mrg       if (dump_enabled_p ())
   5946  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5947  1.1  mrg 			 "single-element interleaving not supported "
   5948  1.1  mrg 			 "for not adjacent vector loads\n");
   5949  1.1  mrg       return false;
   5950  1.1  mrg     }
   5951  1.1  mrg 
   5952  1.1  mrg   /* vect_permute_load_chain requires the group size to be equal to 3 or
   5953  1.1  mrg      be a power of two.  */
   5954  1.1  mrg   if (count != 3 && exact_log2 (count) == -1)
   5955  1.1  mrg     {
   5956  1.1  mrg       if (dump_enabled_p ())
   5957  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5958  1.1  mrg 			 "the size of the group of accesses"
   5959  1.1  mrg 			 " is not a power of 2 or not equal to 3\n");
   5960  1.1  mrg       return false;
   5961  1.1  mrg     }
   5962  1.1  mrg 
   5963  1.1  mrg   /* Check that the permutation is supported.  */
   5964  1.1  mrg   if (VECTOR_MODE_P (mode))
   5965  1.1  mrg     {
   5966  1.1  mrg       unsigned int i, j;
   5967  1.1  mrg       if (count == 3)
   5968  1.1  mrg 	{
   5969  1.1  mrg 	  unsigned int nelt;
   5970  1.1  mrg 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
   5971  1.1  mrg 	    {
   5972  1.1  mrg 	      if (dump_enabled_p ())
   5973  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5974  1.1  mrg 				 "cannot handle groups of 3 loads for"
   5975  1.1  mrg 				 " variable-length vectors\n");
   5976  1.1  mrg 	      return false;
   5977  1.1  mrg 	    }
   5978  1.1  mrg 
   5979  1.1  mrg 	  vec_perm_builder sel (nelt, nelt, 1);
   5980  1.1  mrg 	  sel.quick_grow (nelt);
   5981  1.1  mrg 	  vec_perm_indices indices;
   5982  1.1  mrg 	  unsigned int k;
   5983  1.1  mrg 	  for (k = 0; k < 3; k++)
   5984  1.1  mrg 	    {
   5985  1.1  mrg 	      for (i = 0; i < nelt; i++)
   5986  1.1  mrg 		if (3 * i + k < 2 * nelt)
   5987  1.1  mrg 		  sel[i] = 3 * i + k;
   5988  1.1  mrg 		else
   5989  1.1  mrg 		  sel[i] = 0;
   5990  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   5991  1.1  mrg 	      if (!can_vec_perm_const_p (mode, indices))
   5992  1.1  mrg 		{
   5993  1.1  mrg 		  if (dump_enabled_p ())
   5994  1.1  mrg 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5995  1.1  mrg 				     "shuffle of 3 loads is not supported by"
   5996  1.1  mrg 				     " target\n");
   5997  1.1  mrg 		  return false;
   5998  1.1  mrg 		}
   5999  1.1  mrg 	      for (i = 0, j = 0; i < nelt; i++)
   6000  1.1  mrg 		if (3 * i + k < 2 * nelt)
   6001  1.1  mrg 		  sel[i] = i;
   6002  1.1  mrg 		else
   6003  1.1  mrg 		  sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
   6004  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   6005  1.1  mrg 	      if (!can_vec_perm_const_p (mode, indices))
   6006  1.1  mrg 		{
   6007  1.1  mrg 		  if (dump_enabled_p ())
   6008  1.1  mrg 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6009  1.1  mrg 				     "shuffle of 3 loads is not supported by"
   6010  1.1  mrg 				     " target\n");
   6011  1.1  mrg 		  return false;
   6012  1.1  mrg 		}
   6013  1.1  mrg 	    }
   6014  1.1  mrg 	  return true;
   6015  1.1  mrg 	}
   6016  1.1  mrg       else
   6017  1.1  mrg 	{
   6018  1.1  mrg 	  /* If length is not equal to 3 then only power of 2 is supported.  */
   6019  1.1  mrg 	  gcc_assert (pow2p_hwi (count));
   6020  1.1  mrg 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
   6021  1.1  mrg 
   6022  1.1  mrg 	  /* The encoding has a single stepped pattern.  */
   6023  1.1  mrg 	  vec_perm_builder sel (nelt, 1, 3);
   6024  1.1  mrg 	  sel.quick_grow (3);
   6025  1.1  mrg 	  for (i = 0; i < 3; i++)
   6026  1.1  mrg 	    sel[i] = i * 2;
   6027  1.1  mrg 	  vec_perm_indices indices (sel, 2, nelt);
   6028  1.1  mrg 	  if (can_vec_perm_const_p (mode, indices))
   6029  1.1  mrg 	    {
   6030  1.1  mrg 	      for (i = 0; i < 3; i++)
   6031  1.1  mrg 		sel[i] = i * 2 + 1;
   6032  1.1  mrg 	      indices.new_vector (sel, 2, nelt);
   6033  1.1  mrg 	      if (can_vec_perm_const_p (mode, indices))
   6034  1.1  mrg 		return true;
   6035  1.1  mrg 	    }
   6036  1.1  mrg         }
   6037  1.1  mrg     }
   6038  1.1  mrg 
   6039  1.1  mrg   if (dump_enabled_p ())
   6040  1.1  mrg     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6041  1.1  mrg 		     "extract even/odd not supported by target\n");
   6042  1.1  mrg   return false;
   6043  1.1  mrg }
   6044  1.1  mrg 
   6045  1.1  mrg /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
   6046  1.1  mrg    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
   6047  1.1  mrg 
   6048  1.1  mrg bool
   6049  1.1  mrg vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
   6050  1.1  mrg 			   bool masked_p)
   6051  1.1  mrg {
   6052  1.1  mrg   if (masked_p)
   6053  1.1  mrg     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
   6054  1.1  mrg 					 vec_mask_load_lanes_optab,
   6055  1.1  mrg 					 vectype, count);
   6056  1.1  mrg   else
   6057  1.1  mrg     return vect_lanes_optab_supported_p ("vec_load_lanes",
   6058  1.1  mrg 					 vec_load_lanes_optab,
   6059  1.1  mrg 					 vectype, count);
   6060  1.1  mrg }
   6061  1.1  mrg 
   6062  1.1  mrg /* Function vect_permute_load_chain.
   6063  1.1  mrg 
   6064  1.1  mrg    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
   6065  1.1  mrg    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
   6066  1.1  mrg    the input data correctly.  Return the final references for loads in
   6067  1.1  mrg    RESULT_CHAIN.
   6068  1.1  mrg 
   6069  1.1  mrg    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
   6070  1.1  mrg    The input is 4 vectors each containing 8 elements. We assign a number to each
   6071  1.1  mrg    element, the input sequence is:
   6072  1.1  mrg 
   6073  1.1  mrg    1st vec:   0  1  2  3  4  5  6  7
   6074  1.1  mrg    2nd vec:   8  9 10 11 12 13 14 15
   6075  1.1  mrg    3rd vec:  16 17 18 19 20 21 22 23
   6076  1.1  mrg    4th vec:  24 25 26 27 28 29 30 31
   6077  1.1  mrg 
   6078  1.1  mrg    The output sequence should be:
   6079  1.1  mrg 
   6080  1.1  mrg    1st vec:  0 4  8 12 16 20 24 28
   6081  1.1  mrg    2nd vec:  1 5  9 13 17 21 25 29
   6082  1.1  mrg    3rd vec:  2 6 10 14 18 22 26 30
   6083  1.1  mrg    4th vec:  3 7 11 15 19 23 27 31
   6084  1.1  mrg 
   6085  1.1  mrg    i.e., the first output vector should contain the first elements of each
   6086  1.1  mrg    interleaving group, etc.
   6087  1.1  mrg 
   6088  1.1  mrg    We use extract_even/odd instructions to create such output.  The input of
   6089  1.1  mrg    each extract_even/odd operation is two vectors
   6090  1.1  mrg    1st vec    2nd vec
   6091  1.1  mrg    0 1 2 3    4 5 6 7
   6092  1.1  mrg 
   6093  1.1  mrg    and the output is the vector of extracted even/odd elements.  The output of
   6094  1.1  mrg    extract_even will be:   0 2 4 6
   6095  1.1  mrg    and of extract_odd:     1 3 5 7
   6096  1.1  mrg 
   6097  1.1  mrg 
   6098  1.1  mrg    The permutation is done in log LENGTH stages.  In each stage extract_even
   6099  1.1  mrg    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
   6100  1.1  mrg    their order.  In our example,
   6101  1.1  mrg 
   6102  1.1  mrg    E1: extract_even (1st vec, 2nd vec)
   6103  1.1  mrg    E2: extract_odd (1st vec, 2nd vec)
   6104  1.1  mrg    E3: extract_even (3rd vec, 4th vec)
   6105  1.1  mrg    E4: extract_odd (3rd vec, 4th vec)
   6106  1.1  mrg 
   6107  1.1  mrg    The output for the first stage will be:
   6108  1.1  mrg 
   6109  1.1  mrg    E1:  0  2  4  6  8 10 12 14
   6110  1.1  mrg    E2:  1  3  5  7  9 11 13 15
   6111  1.1  mrg    E3: 16 18 20 22 24 26 28 30
   6112  1.1  mrg    E4: 17 19 21 23 25 27 29 31
   6113  1.1  mrg 
   6114  1.1  mrg    In order to proceed and create the correct sequence for the next stage (or
   6115  1.1  mrg    for the correct output, if the second stage is the last one, as in our
   6116  1.1  mrg    example), we first put the output of extract_even operation and then the
   6117  1.1  mrg    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
   6118  1.1  mrg    The input for the second stage is:
   6119  1.1  mrg 
   6120  1.1  mrg    1st vec (E1):  0  2  4  6  8 10 12 14
   6121  1.1  mrg    2nd vec (E3): 16 18 20 22 24 26 28 30
   6122  1.1  mrg    3rd vec (E2):  1  3  5  7  9 11 13 15
   6123  1.1  mrg    4th vec (E4): 17 19 21 23 25 27 29 31
   6124  1.1  mrg 
   6125  1.1  mrg    The output of the second stage:
   6126  1.1  mrg 
   6127  1.1  mrg    E1: 0 4  8 12 16 20 24 28
   6128  1.1  mrg    E2: 2 6 10 14 18 22 26 30
   6129  1.1  mrg    E3: 1 5  9 13 17 21 25 29
   6130  1.1  mrg    E4: 3 7 11 15 19 23 27 31
   6131  1.1  mrg 
   6132  1.1  mrg    And RESULT_CHAIN after reordering:
   6133  1.1  mrg 
   6134  1.1  mrg    1st vec (E1):  0 4  8 12 16 20 24 28
   6135  1.1  mrg    2nd vec (E3):  1 5  9 13 17 21 25 29
   6136  1.1  mrg    3rd vec (E2):  2 6 10 14 18 22 26 30
   6137  1.1  mrg    4th vec (E4):  3 7 11 15 19 23 27 31.  */
   6138  1.1  mrg 
   6139  1.1  mrg static void
   6140  1.1  mrg vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
   6141  1.1  mrg 			 unsigned int length,
   6142  1.1  mrg 			 stmt_vec_info stmt_info,
   6143  1.1  mrg 			 gimple_stmt_iterator *gsi,
   6144  1.1  mrg 			 vec<tree> *result_chain)
   6145  1.1  mrg {
   6146  1.1  mrg   tree data_ref, first_vect, second_vect;
   6147  1.1  mrg   tree perm_mask_even, perm_mask_odd;
   6148  1.1  mrg   tree perm3_mask_low, perm3_mask_high;
   6149  1.1  mrg   gimple *perm_stmt;
   6150  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   6151  1.1  mrg   unsigned int i, j, log_length = exact_log2 (length);
   6152  1.1  mrg 
   6153  1.1  mrg   result_chain->quick_grow (length);
   6154  1.1  mrg   memcpy (result_chain->address (), dr_chain.address (),
   6155  1.1  mrg 	  length * sizeof (tree));
   6156  1.1  mrg 
   6157  1.1  mrg   if (length == 3)
   6158  1.1  mrg     {
   6159  1.1  mrg       /* vect_grouped_load_supported ensures that this is constant.  */
   6160  1.1  mrg       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   6161  1.1  mrg       unsigned int k;
   6162  1.1  mrg 
   6163  1.1  mrg       vec_perm_builder sel (nelt, nelt, 1);
   6164  1.1  mrg       sel.quick_grow (nelt);
   6165  1.1  mrg       vec_perm_indices indices;
   6166  1.1  mrg       for (k = 0; k < 3; k++)
   6167  1.1  mrg 	{
   6168  1.1  mrg 	  for (i = 0; i < nelt; i++)
   6169  1.1  mrg 	    if (3 * i + k < 2 * nelt)
   6170  1.1  mrg 	      sel[i] = 3 * i + k;
   6171  1.1  mrg 	    else
   6172  1.1  mrg 	      sel[i] = 0;
   6173  1.1  mrg 	  indices.new_vector (sel, 2, nelt);
   6174  1.1  mrg 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   6175  1.1  mrg 
   6176  1.1  mrg 	  for (i = 0, j = 0; i < nelt; i++)
   6177  1.1  mrg 	    if (3 * i + k < 2 * nelt)
   6178  1.1  mrg 	      sel[i] = i;
   6179  1.1  mrg 	    else
   6180  1.1  mrg 	      sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
   6181  1.1  mrg 	  indices.new_vector (sel, 2, nelt);
   6182  1.1  mrg 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   6183  1.1  mrg 
   6184  1.1  mrg 	  first_vect = dr_chain[0];
   6185  1.1  mrg 	  second_vect = dr_chain[1];
   6186  1.1  mrg 
   6187  1.1  mrg 	  /* Create interleaving stmt (low part of):
   6188  1.1  mrg 	     low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
   6189  1.1  mrg 							     ...}>  */
   6190  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
   6191  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
   6192  1.1  mrg 					   second_vect, perm3_mask_low);
   6193  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6194  1.1  mrg 
   6195  1.1  mrg 	  /* Create interleaving stmt (high part of):
   6196  1.1  mrg 	     high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
   6197  1.1  mrg 							      ...}>  */
   6198  1.1  mrg 	  first_vect = data_ref;
   6199  1.1  mrg 	  second_vect = dr_chain[2];
   6200  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
   6201  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
   6202  1.1  mrg 					   second_vect, perm3_mask_high);
   6203  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6204  1.1  mrg 	  (*result_chain)[k] = data_ref;
   6205  1.1  mrg 	}
   6206  1.1  mrg     }
   6207  1.1  mrg   else
   6208  1.1  mrg     {
   6209  1.1  mrg       /* If length is not equal to 3 then only power of 2 is supported.  */
   6210  1.1  mrg       gcc_assert (pow2p_hwi (length));
   6211  1.1  mrg 
   6212  1.1  mrg       /* The encoding has a single stepped pattern.  */
   6213  1.1  mrg       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
   6214  1.1  mrg       vec_perm_builder sel (nelt, 1, 3);
   6215  1.1  mrg       sel.quick_grow (3);
   6216  1.1  mrg       for (i = 0; i < 3; ++i)
   6217  1.1  mrg 	sel[i] = i * 2;
   6218  1.1  mrg       vec_perm_indices indices (sel, 2, nelt);
   6219  1.1  mrg       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
   6220  1.1  mrg 
   6221  1.1  mrg       for (i = 0; i < 3; ++i)
   6222  1.1  mrg 	sel[i] = i * 2 + 1;
   6223  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6224  1.1  mrg       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
   6225  1.1  mrg 
   6226  1.1  mrg       for (i = 0; i < log_length; i++)
   6227  1.1  mrg 	{
   6228  1.1  mrg 	  for (j = 0; j < length; j += 2)
   6229  1.1  mrg 	    {
   6230  1.1  mrg 	      first_vect = dr_chain[j];
   6231  1.1  mrg 	      second_vect = dr_chain[j+1];
   6232  1.1  mrg 
   6233  1.1  mrg 	      /* data_ref = permute_even (first_data_ref, second_data_ref);  */
   6234  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
   6235  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6236  1.1  mrg 					       first_vect, second_vect,
   6237  1.1  mrg 					       perm_mask_even);
   6238  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6239  1.1  mrg 	      (*result_chain)[j/2] = data_ref;
   6240  1.1  mrg 
   6241  1.1  mrg 	      /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
   6242  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
   6243  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6244  1.1  mrg 					       first_vect, second_vect,
   6245  1.1  mrg 					       perm_mask_odd);
   6246  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6247  1.1  mrg 	      (*result_chain)[j/2+length/2] = data_ref;
   6248  1.1  mrg 	    }
   6249  1.1  mrg 	  memcpy (dr_chain.address (), result_chain->address (),
   6250  1.1  mrg 		  length * sizeof (tree));
   6251  1.1  mrg 	}
   6252  1.1  mrg     }
   6253  1.1  mrg }
   6254  1.1  mrg 
   6255  1.1  mrg /* Function vect_shift_permute_load_chain.
   6256  1.1  mrg 
   6257  1.1  mrg    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
   6258  1.1  mrg    sequence of stmts to reorder the input data accordingly.
   6259  1.1  mrg    Return the final references for loads in RESULT_CHAIN.
   6260  1.1  mrg    Return true if successed, false otherwise.
   6261  1.1  mrg 
   6262  1.1  mrg    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
   6263  1.1  mrg    The input is 3 vectors each containing 8 elements.  We assign a
   6264  1.1  mrg    number to each element, the input sequence is:
   6265  1.1  mrg 
   6266  1.1  mrg    1st vec:   0  1  2  3  4  5  6  7
   6267  1.1  mrg    2nd vec:   8  9 10 11 12 13 14 15
   6268  1.1  mrg    3rd vec:  16 17 18 19 20 21 22 23
   6269  1.1  mrg 
   6270  1.1  mrg    The output sequence should be:
   6271  1.1  mrg 
   6272  1.1  mrg    1st vec:  0 3 6  9 12 15 18 21
   6273  1.1  mrg    2nd vec:  1 4 7 10 13 16 19 22
   6274  1.1  mrg    3rd vec:  2 5 8 11 14 17 20 23
   6275  1.1  mrg 
   6276  1.1  mrg    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
   6277  1.1  mrg 
   6278  1.1  mrg    First we shuffle all 3 vectors to get correct elements order:
   6279  1.1  mrg 
   6280  1.1  mrg    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
   6281  1.1  mrg    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
   6282  1.1  mrg    3rd vec:  (16 19 22) (17 20 23) (18 21)
   6283  1.1  mrg 
   6284  1.1  mrg    Next we unite and shift vector 3 times:
   6285  1.1  mrg 
   6286  1.1  mrg    1st step:
   6287  1.1  mrg      shift right by 6 the concatenation of:
   6288  1.1  mrg      "1st vec" and  "2nd vec"
   6289  1.1  mrg        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
   6290  1.1  mrg      "2nd vec" and  "3rd vec"
   6291  1.1  mrg        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
   6292  1.1  mrg      "3rd vec" and  "1st vec"
   6293  1.1  mrg        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
   6294  1.1  mrg 			     | New vectors                   |
   6295  1.1  mrg 
   6296  1.1  mrg      So that now new vectors are:
   6297  1.1  mrg 
   6298  1.1  mrg      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
   6299  1.1  mrg      2nd vec:  (10 13) (16 19 22) (17 20 23)
   6300  1.1  mrg      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
   6301  1.1  mrg 
   6302  1.1  mrg    2nd step:
   6303  1.1  mrg      shift right by 5 the concatenation of:
   6304  1.1  mrg      "1st vec" and  "3rd vec"
   6305  1.1  mrg        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
   6306  1.1  mrg      "2nd vec" and  "1st vec"
   6307  1.1  mrg        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
   6308  1.1  mrg      "3rd vec" and  "2nd vec"
   6309  1.1  mrg        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
   6310  1.1  mrg 			  | New vectors                   |
   6311  1.1  mrg 
   6312  1.1  mrg      So that now new vectors are:
   6313  1.1  mrg 
   6314  1.1  mrg      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
   6315  1.1  mrg      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
   6316  1.1  mrg      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
   6317  1.1  mrg 
   6318  1.1  mrg    3rd step:
   6319  1.1  mrg      shift right by 5 the concatenation of:
   6320  1.1  mrg      "1st vec" and  "1st vec"
   6321  1.1  mrg        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
   6322  1.1  mrg      shift right by 3 the concatenation of:
   6323  1.1  mrg      "2nd vec" and  "2nd vec"
   6324  1.1  mrg                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
   6325  1.1  mrg 			  | New vectors                   |
   6326  1.1  mrg 
   6327  1.1  mrg      So that now all vectors are READY:
   6328  1.1  mrg      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
   6329  1.1  mrg      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
   6330  1.1  mrg      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
   6331  1.1  mrg 
   6332  1.1  mrg    This algorithm is faster than one in vect_permute_load_chain if:
   6333  1.1  mrg      1.  "shift of a concatination" is faster than general permutation.
   6334  1.1  mrg 	 This is usually so.
   6335  1.1  mrg      2.  The TARGET machine can't execute vector instructions in parallel.
   6336  1.1  mrg 	 This is because each step of the algorithm depends on previous.
   6337  1.1  mrg 	 The algorithm in vect_permute_load_chain is much more parallel.
   6338  1.1  mrg 
   6339  1.1  mrg    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
   6340  1.1  mrg */
   6341  1.1  mrg 
   6342  1.1  mrg static bool
   6343  1.1  mrg vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
   6344  1.1  mrg 			       unsigned int length,
   6345  1.1  mrg 			       stmt_vec_info stmt_info,
   6346  1.1  mrg 			       gimple_stmt_iterator *gsi,
   6347  1.1  mrg 			       vec<tree> *result_chain)
   6348  1.1  mrg {
   6349  1.1  mrg   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
   6350  1.1  mrg   tree perm2_mask1, perm2_mask2, perm3_mask;
   6351  1.1  mrg   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
   6352  1.1  mrg   gimple *perm_stmt;
   6353  1.1  mrg 
   6354  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   6355  1.1  mrg   unsigned int i;
   6356  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6357  1.1  mrg 
   6358  1.1  mrg   unsigned HOST_WIDE_INT nelt, vf;
   6359  1.1  mrg   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
   6360  1.1  mrg       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
   6361  1.1  mrg     /* Not supported for variable-length vectors.  */
   6362  1.1  mrg     return false;
   6363  1.1  mrg 
   6364  1.1  mrg   vec_perm_builder sel (nelt, nelt, 1);
   6365  1.1  mrg   sel.quick_grow (nelt);
   6366  1.1  mrg 
   6367  1.1  mrg   result_chain->quick_grow (length);
   6368  1.1  mrg   memcpy (result_chain->address (), dr_chain.address (),
   6369  1.1  mrg 	  length * sizeof (tree));
   6370  1.1  mrg 
   6371  1.1  mrg   if (pow2p_hwi (length) && vf > 4)
   6372  1.1  mrg     {
   6373  1.1  mrg       unsigned int j, log_length = exact_log2 (length);
   6374  1.1  mrg       for (i = 0; i < nelt / 2; ++i)
   6375  1.1  mrg 	sel[i] = i * 2;
   6376  1.1  mrg       for (i = 0; i < nelt / 2; ++i)
   6377  1.1  mrg 	sel[nelt / 2 + i] = i * 2 + 1;
   6378  1.1  mrg       vec_perm_indices indices (sel, 2, nelt);
   6379  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6380  1.1  mrg 	{
   6381  1.1  mrg 	  if (dump_enabled_p ())
   6382  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6383  1.1  mrg 			     "shuffle of 2 fields structure is not \
   6384  1.1  mrg 			      supported by target\n");
   6385  1.1  mrg 	  return false;
   6386  1.1  mrg 	}
   6387  1.1  mrg       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
   6388  1.1  mrg 
   6389  1.1  mrg       for (i = 0; i < nelt / 2; ++i)
   6390  1.1  mrg 	sel[i] = i * 2 + 1;
   6391  1.1  mrg       for (i = 0; i < nelt / 2; ++i)
   6392  1.1  mrg 	sel[nelt / 2 + i] = i * 2;
   6393  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6394  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6395  1.1  mrg 	{
   6396  1.1  mrg 	  if (dump_enabled_p ())
   6397  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6398  1.1  mrg 			     "shuffle of 2 fields structure is not \
   6399  1.1  mrg 			      supported by target\n");
   6400  1.1  mrg 	  return false;
   6401  1.1  mrg 	}
   6402  1.1  mrg       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
   6403  1.1  mrg 
   6404  1.1  mrg       /* Generating permutation constant to shift all elements.
   6405  1.1  mrg 	 For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
   6406  1.1  mrg       for (i = 0; i < nelt; i++)
   6407  1.1  mrg 	sel[i] = nelt / 2 + i;
   6408  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6409  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6410  1.1  mrg 	{
   6411  1.1  mrg 	  if (dump_enabled_p ())
   6412  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6413  1.1  mrg 			     "shift permutation is not supported by target\n");
   6414  1.1  mrg 	  return false;
   6415  1.1  mrg 	}
   6416  1.1  mrg       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
   6417  1.1  mrg 
   6418  1.1  mrg       /* Generating permutation constant to select vector from 2.
   6419  1.1  mrg 	 For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
   6420  1.1  mrg       for (i = 0; i < nelt / 2; i++)
   6421  1.1  mrg 	sel[i] = i;
   6422  1.1  mrg       for (i = nelt / 2; i < nelt; i++)
   6423  1.1  mrg 	sel[i] = nelt + i;
   6424  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6425  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6426  1.1  mrg 	{
   6427  1.1  mrg 	  if (dump_enabled_p ())
   6428  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6429  1.1  mrg 			     "select is not supported by target\n");
   6430  1.1  mrg 	  return false;
   6431  1.1  mrg 	}
   6432  1.1  mrg       select_mask = vect_gen_perm_mask_checked (vectype, indices);
   6433  1.1  mrg 
   6434  1.1  mrg       for (i = 0; i < log_length; i++)
   6435  1.1  mrg 	{
   6436  1.1  mrg 	  for (j = 0; j < length; j += 2)
   6437  1.1  mrg 	    {
   6438  1.1  mrg 	      first_vect = dr_chain[j];
   6439  1.1  mrg 	      second_vect = dr_chain[j + 1];
   6440  1.1  mrg 
   6441  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
   6442  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6443  1.1  mrg 					       first_vect, first_vect,
   6444  1.1  mrg 					       perm2_mask1);
   6445  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6446  1.1  mrg 	      vect[0] = data_ref;
   6447  1.1  mrg 
   6448  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
   6449  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6450  1.1  mrg 					       second_vect, second_vect,
   6451  1.1  mrg 					       perm2_mask2);
   6452  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6453  1.1  mrg 	      vect[1] = data_ref;
   6454  1.1  mrg 
   6455  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
   6456  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6457  1.1  mrg 					       vect[0], vect[1], shift1_mask);
   6458  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6459  1.1  mrg 	      (*result_chain)[j/2 + length/2] = data_ref;
   6460  1.1  mrg 
   6461  1.1  mrg 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
   6462  1.1  mrg 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6463  1.1  mrg 					       vect[0], vect[1], select_mask);
   6464  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6465  1.1  mrg 	      (*result_chain)[j/2] = data_ref;
   6466  1.1  mrg 	    }
   6467  1.1  mrg 	  memcpy (dr_chain.address (), result_chain->address (),
   6468  1.1  mrg 		  length * sizeof (tree));
   6469  1.1  mrg 	}
   6470  1.1  mrg       return true;
   6471  1.1  mrg     }
   6472  1.1  mrg   if (length == 3 && vf > 2)
   6473  1.1  mrg     {
   6474  1.1  mrg       unsigned int k = 0, l = 0;
   6475  1.1  mrg 
   6476  1.1  mrg       /* Generating permutation constant to get all elements in rigth order.
   6477  1.1  mrg 	 For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
   6478  1.1  mrg       for (i = 0; i < nelt; i++)
   6479  1.1  mrg 	{
   6480  1.1  mrg 	  if (3 * k + (l % 3) >= nelt)
   6481  1.1  mrg 	    {
   6482  1.1  mrg 	      k = 0;
   6483  1.1  mrg 	      l += (3 - (nelt % 3));
   6484  1.1  mrg 	    }
   6485  1.1  mrg 	  sel[i] = 3 * k + (l % 3);
   6486  1.1  mrg 	  k++;
   6487  1.1  mrg 	}
   6488  1.1  mrg       vec_perm_indices indices (sel, 2, nelt);
   6489  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6490  1.1  mrg 	{
   6491  1.1  mrg 	  if (dump_enabled_p ())
   6492  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6493  1.1  mrg 			     "shuffle of 3 fields structure is not \
   6494  1.1  mrg 			      supported by target\n");
   6495  1.1  mrg 	  return false;
   6496  1.1  mrg 	}
   6497  1.1  mrg       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
   6498  1.1  mrg 
   6499  1.1  mrg       /* Generating permutation constant to shift all elements.
   6500  1.1  mrg 	 For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
   6501  1.1  mrg       for (i = 0; i < nelt; i++)
   6502  1.1  mrg 	sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
   6503  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6504  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6505  1.1  mrg 	{
   6506  1.1  mrg 	  if (dump_enabled_p ())
   6507  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6508  1.1  mrg 			     "shift permutation is not supported by target\n");
   6509  1.1  mrg 	  return false;
   6510  1.1  mrg 	}
   6511  1.1  mrg       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
   6512  1.1  mrg 
   6513  1.1  mrg       /* Generating permutation constant to shift all elements.
   6514  1.1  mrg 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
   6515  1.1  mrg       for (i = 0; i < nelt; i++)
   6516  1.1  mrg 	sel[i] = 2 * (nelt / 3) + 1 + i;
   6517  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6518  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6519  1.1  mrg 	{
   6520  1.1  mrg 	  if (dump_enabled_p ())
   6521  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6522  1.1  mrg 			     "shift permutation is not supported by target\n");
   6523  1.1  mrg 	  return false;
   6524  1.1  mrg 	}
   6525  1.1  mrg       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
   6526  1.1  mrg 
   6527  1.1  mrg       /* Generating permutation constant to shift all elements.
   6528  1.1  mrg 	 For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
   6529  1.1  mrg       for (i = 0; i < nelt; i++)
   6530  1.1  mrg 	sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
   6531  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6532  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6533  1.1  mrg 	{
   6534  1.1  mrg 	  if (dump_enabled_p ())
   6535  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6536  1.1  mrg 			     "shift permutation is not supported by target\n");
   6537  1.1  mrg 	  return false;
   6538  1.1  mrg 	}
   6539  1.1  mrg       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
   6540  1.1  mrg 
   6541  1.1  mrg       /* Generating permutation constant to shift all elements.
   6542  1.1  mrg 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
   6543  1.1  mrg       for (i = 0; i < nelt; i++)
   6544  1.1  mrg 	sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
   6545  1.1  mrg       indices.new_vector (sel, 2, nelt);
   6546  1.1  mrg       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6547  1.1  mrg 	{
   6548  1.1  mrg 	  if (dump_enabled_p ())
   6549  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6550  1.1  mrg 			     "shift permutation is not supported by target\n");
   6551  1.1  mrg 	  return false;
   6552  1.1  mrg 	}
   6553  1.1  mrg       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
   6554  1.1  mrg 
   6555  1.1  mrg       for (k = 0; k < 3; k++)
   6556  1.1  mrg 	{
   6557  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
   6558  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6559  1.1  mrg 					   dr_chain[k], dr_chain[k],
   6560  1.1  mrg 					   perm3_mask);
   6561  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6562  1.1  mrg 	  vect[k] = data_ref;
   6563  1.1  mrg 	}
   6564  1.1  mrg 
   6565  1.1  mrg       for (k = 0; k < 3; k++)
   6566  1.1  mrg 	{
   6567  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
   6568  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6569  1.1  mrg 					   vect[k % 3], vect[(k + 1) % 3],
   6570  1.1  mrg 					   shift1_mask);
   6571  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6572  1.1  mrg 	  vect_shift[k] = data_ref;
   6573  1.1  mrg 	}
   6574  1.1  mrg 
   6575  1.1  mrg       for (k = 0; k < 3; k++)
   6576  1.1  mrg 	{
   6577  1.1  mrg 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
   6578  1.1  mrg 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6579  1.1  mrg 					   vect_shift[(4 - k) % 3],
   6580  1.1  mrg 					   vect_shift[(3 - k) % 3],
   6581  1.1  mrg 					   shift2_mask);
   6582  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6583  1.1  mrg 	  vect[k] = data_ref;
   6584  1.1  mrg 	}
   6585  1.1  mrg 
   6586  1.1  mrg       (*result_chain)[3 - (nelt % 3)] = vect[2];
   6587  1.1  mrg 
   6588  1.1  mrg       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
   6589  1.1  mrg       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
   6590  1.1  mrg 				       vect[0], shift3_mask);
   6591  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6592  1.1  mrg       (*result_chain)[nelt % 3] = data_ref;
   6593  1.1  mrg 
   6594  1.1  mrg       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
   6595  1.1  mrg       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
   6596  1.1  mrg 				       vect[1], shift4_mask);
   6597  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6598  1.1  mrg       (*result_chain)[0] = data_ref;
   6599  1.1  mrg       return true;
   6600  1.1  mrg     }
   6601  1.1  mrg   return false;
   6602  1.1  mrg }
   6603  1.1  mrg 
   6604  1.1  mrg /* Function vect_transform_grouped_load.
   6605  1.1  mrg 
   6606  1.1  mrg    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
   6607  1.1  mrg    to perform their permutation and ascribe the result vectorized statements to
   6608  1.1  mrg    the scalar statements.
   6609  1.1  mrg */
   6610  1.1  mrg 
   6611  1.1  mrg void
   6612  1.1  mrg vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
   6613  1.1  mrg 			     vec<tree> dr_chain,
   6614  1.1  mrg 			     int size, gimple_stmt_iterator *gsi)
   6615  1.1  mrg {
   6616  1.1  mrg   machine_mode mode;
   6617  1.1  mrg   vec<tree> result_chain = vNULL;
   6618  1.1  mrg 
   6619  1.1  mrg   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
   6620  1.1  mrg      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
   6621  1.1  mrg      vectors, that are ready for vector computation.  */
   6622  1.1  mrg   result_chain.create (size);
   6623  1.1  mrg 
   6624  1.1  mrg   /* If reassociation width for vector type is 2 or greater target machine can
   6625  1.1  mrg      execute 2 or more vector instructions in parallel.  Otherwise try to
   6626  1.1  mrg      get chain for loads group using vect_shift_permute_load_chain.  */
   6627  1.1  mrg   mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
   6628  1.1  mrg   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
   6629  1.1  mrg       || pow2p_hwi (size)
   6630  1.1  mrg       || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
   6631  1.1  mrg 					 gsi, &result_chain))
   6632  1.1  mrg     vect_permute_load_chain (vinfo, dr_chain,
   6633  1.1  mrg 			     size, stmt_info, gsi, &result_chain);
   6634  1.1  mrg   vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
   6635  1.1  mrg   result_chain.release ();
   6636  1.1  mrg }
   6637  1.1  mrg 
   6638  1.1  mrg /* RESULT_CHAIN contains the output of a group of grouped loads that were
   6639  1.1  mrg    generated as part of the vectorization of STMT_INFO.  Assign the statement
   6640  1.1  mrg    for each vector to the associated scalar statement.  */
   6641  1.1  mrg 
   6642  1.1  mrg void
   6643  1.1  mrg vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
   6644  1.1  mrg 				  vec<tree> result_chain)
   6645  1.1  mrg {
   6646  1.1  mrg   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   6647  1.1  mrg   unsigned int i, gap_count;
   6648  1.1  mrg   tree tmp_data_ref;
   6649  1.1  mrg 
   6650  1.1  mrg   /* Put a permuted data-ref in the VECTORIZED_STMT field.
   6651  1.1  mrg      Since we scan the chain starting from it's first node, their order
   6652  1.1  mrg      corresponds the order of data-refs in RESULT_CHAIN.  */
   6653  1.1  mrg   stmt_vec_info next_stmt_info = first_stmt_info;
   6654  1.1  mrg   gap_count = 1;
   6655  1.1  mrg   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
   6656  1.1  mrg     {
   6657  1.1  mrg       if (!next_stmt_info)
   6658  1.1  mrg 	break;
   6659  1.1  mrg 
   6660  1.1  mrg       /* Skip the gaps.  Loads created for the gaps will be removed by dead
   6661  1.1  mrg        code elimination pass later.  No need to check for the first stmt in
   6662  1.1  mrg        the group, since it always exists.
   6663  1.1  mrg        DR_GROUP_GAP is the number of steps in elements from the previous
   6664  1.1  mrg        access (if there is no gap DR_GROUP_GAP is 1).  We skip loads that
   6665  1.1  mrg        correspond to the gaps.  */
   6666  1.1  mrg       if (next_stmt_info != first_stmt_info
   6667  1.1  mrg 	  && gap_count < DR_GROUP_GAP (next_stmt_info))
   6668  1.1  mrg 	{
   6669  1.1  mrg 	  gap_count++;
   6670  1.1  mrg 	  continue;
   6671  1.1  mrg 	}
   6672  1.1  mrg 
   6673  1.1  mrg       /* ???  The following needs cleanup after the removal of
   6674  1.1  mrg          DR_GROUP_SAME_DR_STMT.  */
   6675  1.1  mrg       if (next_stmt_info)
   6676  1.1  mrg         {
   6677  1.1  mrg 	  gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
   6678  1.1  mrg 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
   6679  1.1  mrg 	     copies, and we put the new vector statement last.  */
   6680  1.1  mrg 	  STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (new_stmt);
   6681  1.1  mrg 
   6682  1.1  mrg 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   6683  1.1  mrg 	  gap_count = 1;
   6684  1.1  mrg         }
   6685  1.1  mrg     }
   6686  1.1  mrg }
   6687  1.1  mrg 
   6688  1.1  mrg /* Function vect_force_dr_alignment_p.
   6689  1.1  mrg 
   6690  1.1  mrg    Returns whether the alignment of a DECL can be forced to be aligned
   6691  1.1  mrg    on ALIGNMENT bit boundary.  */
   6692  1.1  mrg 
   6693  1.1  mrg bool
   6694  1.1  mrg vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
   6695  1.1  mrg {
   6696  1.1  mrg   if (!VAR_P (decl))
   6697  1.1  mrg     return false;
   6698  1.1  mrg 
   6699  1.1  mrg   if (decl_in_symtab_p (decl)
   6700  1.1  mrg       && !symtab_node::get (decl)->can_increase_alignment_p ())
   6701  1.1  mrg     return false;
   6702  1.1  mrg 
   6703  1.1  mrg   if (TREE_STATIC (decl))
   6704  1.1  mrg     return (known_le (alignment,
   6705  1.1  mrg 		      (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
   6706  1.1  mrg   else
   6707  1.1  mrg     return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
   6708  1.1  mrg }
   6709  1.1  mrg 
   6710  1.1  mrg /* Return whether the data reference DR_INFO is supported with respect to its
   6711  1.1  mrg    alignment.
   6712  1.1  mrg    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
   6713  1.1  mrg    it is aligned, i.e., check if it is possible to vectorize it with different
   6714  1.1  mrg    alignment.  */
   6715  1.1  mrg 
   6716  1.1  mrg enum dr_alignment_support
   6717  1.1  mrg vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
   6718  1.1  mrg 			       tree vectype, int misalignment)
   6719  1.1  mrg {
   6720  1.1  mrg   data_reference *dr = dr_info->dr;
   6721  1.1  mrg   stmt_vec_info stmt_info = dr_info->stmt;
   6722  1.1  mrg   machine_mode mode = TYPE_MODE (vectype);
   6723  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6724  1.1  mrg   class loop *vect_loop = NULL;
   6725  1.1  mrg   bool nested_in_vect_loop = false;
   6726  1.1  mrg 
   6727  1.1  mrg   if (misalignment == 0)
   6728  1.1  mrg     return dr_aligned;
   6729  1.1  mrg 
   6730  1.1  mrg   /* For now assume all conditional loads/stores support unaligned
   6731  1.1  mrg      access without any special code.  */
   6732  1.1  mrg   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
   6733  1.1  mrg     if (gimple_call_internal_p (stmt)
   6734  1.1  mrg 	&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
   6735  1.1  mrg 	    || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
   6736  1.1  mrg       return dr_unaligned_supported;
   6737  1.1  mrg 
   6738  1.1  mrg   if (loop_vinfo)
   6739  1.1  mrg     {
   6740  1.1  mrg       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
   6741  1.1  mrg       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
   6742  1.1  mrg     }
   6743  1.1  mrg 
   6744  1.1  mrg   /* Possibly unaligned access.  */
   6745  1.1  mrg 
   6746  1.1  mrg   /* We can choose between using the implicit realignment scheme (generating
   6747  1.1  mrg      a misaligned_move stmt) and the explicit realignment scheme (generating
   6748  1.1  mrg      aligned loads with a REALIGN_LOAD).  There are two variants to the
   6749  1.1  mrg      explicit realignment scheme: optimized, and unoptimized.
   6750  1.1  mrg      We can optimize the realignment only if the step between consecutive
   6751  1.1  mrg      vector loads is equal to the vector size.  Since the vector memory
   6752  1.1  mrg      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
   6753  1.1  mrg      is guaranteed that the misalignment amount remains the same throughout the
   6754  1.1  mrg      execution of the vectorized loop.  Therefore, we can create the
   6755  1.1  mrg      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
   6756  1.1  mrg      at the loop preheader.
   6757  1.1  mrg 
   6758  1.1  mrg      However, in the case of outer-loop vectorization, when vectorizing a
   6759  1.1  mrg      memory access in the inner-loop nested within the LOOP that is now being
   6760  1.1  mrg      vectorized, while it is guaranteed that the misalignment of the
   6761  1.1  mrg      vectorized memory access will remain the same in different outer-loop
   6762  1.1  mrg      iterations, it is *not* guaranteed that is will remain the same throughout
   6763  1.1  mrg      the execution of the inner-loop.  This is because the inner-loop advances
   6764  1.1  mrg      with the original scalar step (and not in steps of VS).  If the inner-loop
   6765  1.1  mrg      step happens to be a multiple of VS, then the misalignment remains fixed
   6766  1.1  mrg      and we can use the optimized realignment scheme.  For example:
   6767  1.1  mrg 
   6768  1.1  mrg       for (i=0; i<N; i++)
   6769  1.1  mrg         for (j=0; j<M; j++)
   6770  1.1  mrg           s += a[i+j];
   6771  1.1  mrg 
   6772  1.1  mrg      When vectorizing the i-loop in the above example, the step between
   6773  1.1  mrg      consecutive vector loads is 1, and so the misalignment does not remain
   6774  1.1  mrg      fixed across the execution of the inner-loop, and the realignment cannot
   6775  1.1  mrg      be optimized (as illustrated in the following pseudo vectorized loop):
   6776  1.1  mrg 
   6777  1.1  mrg       for (i=0; i<N; i+=4)
   6778  1.1  mrg         for (j=0; j<M; j++){
   6779  1.1  mrg           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
   6780  1.1  mrg                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
   6781  1.1  mrg                          // (assuming that we start from an aligned address).
   6782  1.1  mrg           }
   6783  1.1  mrg 
   6784  1.1  mrg      We therefore have to use the unoptimized realignment scheme:
   6785  1.1  mrg 
   6786  1.1  mrg       for (i=0; i<N; i+=4)
   6787  1.1  mrg           for (j=k; j<M; j+=4)
   6788  1.1  mrg           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
   6789  1.1  mrg                            // that the misalignment of the initial address is
   6790  1.1  mrg                            // 0).
   6791  1.1  mrg 
   6792  1.1  mrg      The loop can then be vectorized as follows:
   6793  1.1  mrg 
   6794  1.1  mrg       for (k=0; k<4; k++){
   6795  1.1  mrg         rt = get_realignment_token (&vp[k]);
   6796  1.1  mrg         for (i=0; i<N; i+=4){
   6797  1.1  mrg           v1 = vp[i+k];
   6798  1.1  mrg           for (j=k; j<M; j+=4){
   6799  1.1  mrg             v2 = vp[i+j+VS-1];
   6800  1.1  mrg             va = REALIGN_LOAD <v1,v2,rt>;
   6801  1.1  mrg             vs += va;
   6802  1.1  mrg             v1 = v2;
   6803  1.1  mrg           }
   6804  1.1  mrg         }
   6805  1.1  mrg     } */
   6806  1.1  mrg 
   6807  1.1  mrg   if (DR_IS_READ (dr))
   6808  1.1  mrg     {
   6809  1.1  mrg       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
   6810  1.1  mrg 	  && (!targetm.vectorize.builtin_mask_for_load
   6811  1.1  mrg 	      || targetm.vectorize.builtin_mask_for_load ()))
   6812  1.1  mrg 	{
   6813  1.1  mrg 	  /* If we are doing SLP then the accesses need not have the
   6814  1.1  mrg 	     same alignment, instead it depends on the SLP group size.  */
   6815  1.1  mrg 	  if (loop_vinfo
   6816  1.1  mrg 	      && STMT_SLP_TYPE (stmt_info)
   6817  1.1  mrg 	      && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
   6818  1.1  mrg 			      * (DR_GROUP_SIZE
   6819  1.1  mrg 				 (DR_GROUP_FIRST_ELEMENT (stmt_info))),
   6820  1.1  mrg 			      TYPE_VECTOR_SUBPARTS (vectype)))
   6821  1.1  mrg 	    ;
   6822  1.1  mrg 	  else if (!loop_vinfo
   6823  1.1  mrg 		   || (nested_in_vect_loop
   6824  1.1  mrg 		       && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
   6825  1.1  mrg 				    GET_MODE_SIZE (TYPE_MODE (vectype)))))
   6826  1.1  mrg 	    return dr_explicit_realign;
   6827  1.1  mrg 	  else
   6828  1.1  mrg 	    return dr_explicit_realign_optimized;
   6829  1.1  mrg 	}
   6830  1.1  mrg     }
   6831  1.1  mrg 
   6832  1.1  mrg   bool is_packed = false;
   6833  1.1  mrg   tree type = TREE_TYPE (DR_REF (dr));
   6834  1.1  mrg   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
   6835  1.1  mrg     is_packed = not_size_aligned (DR_REF (dr));
   6836  1.1  mrg   if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
   6837  1.1  mrg 						     is_packed))
   6838  1.1  mrg     return dr_unaligned_supported;
   6839  1.1  mrg 
   6840  1.1  mrg   /* Unsupported.  */
   6841  1.1  mrg   return dr_unaligned_unsupported;
   6842  1.1  mrg }
   6843