Home | History | Annotate | Line # | Download | only in gcc
tree-vect-data-refs.cc revision 1.1.1.1
      1 /* Data References Analysis and Manipulation Utilities for Vectorization.
      2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3    Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
      4    and Ira Rosen <irar (at) il.ibm.com>
      5 
      6 This file is part of GCC.
      7 
      8 GCC is free software; you can redistribute it and/or modify it under
      9 the terms of the GNU General Public License as published by the Free
     10 Software Foundation; either version 3, or (at your option) any later
     11 version.
     12 
     13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16 for more details.
     17 
     18 You should have received a copy of the GNU General Public License
     19 along with GCC; see the file COPYING3.  If not see
     20 <http://www.gnu.org/licenses/>.  */
     21 
     22 #define INCLUDE_ALGORITHM
     23 #include "config.h"
     24 #include "system.h"
     25 #include "coretypes.h"
     26 #include "backend.h"
     27 #include "target.h"
     28 #include "rtl.h"
     29 #include "tree.h"
     30 #include "gimple.h"
     31 #include "predict.h"
     32 #include "memmodel.h"
     33 #include "tm_p.h"
     34 #include "ssa.h"
     35 #include "optabs-tree.h"
     36 #include "cgraph.h"
     37 #include "dumpfile.h"
     38 #include "alias.h"
     39 #include "fold-const.h"
     40 #include "stor-layout.h"
     41 #include "tree-eh.h"
     42 #include "gimplify.h"
     43 #include "gimple-iterator.h"
     44 #include "gimplify-me.h"
     45 #include "tree-ssa-loop-ivopts.h"
     46 #include "tree-ssa-loop-manip.h"
     47 #include "tree-ssa-loop.h"
     48 #include "cfgloop.h"
     49 #include "tree-scalar-evolution.h"
     50 #include "tree-vectorizer.h"
     51 #include "expr.h"
     52 #include "builtins.h"
     53 #include "tree-cfg.h"
     54 #include "tree-hash-traits.h"
     55 #include "vec-perm-indices.h"
     56 #include "internal-fn.h"
     57 #include "gimple-fold.h"
     58 
     59 /* Return true if load- or store-lanes optab OPTAB is implemented for
     60    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
     61 
     62 static bool
     63 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
     64 			      tree vectype, unsigned HOST_WIDE_INT count)
     65 {
     66   machine_mode mode, array_mode;
     67   bool limit_p;
     68 
     69   mode = TYPE_MODE (vectype);
     70   if (!targetm.array_mode (mode, count).exists (&array_mode))
     71     {
     72       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
     73       limit_p = !targetm.array_mode_supported_p (mode, count);
     74       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
     75 	{
     76 	  if (dump_enabled_p ())
     77 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     78 			     "no array mode for %s[%wu]\n",
     79 			     GET_MODE_NAME (mode), count);
     80 	  return false;
     81 	}
     82     }
     83 
     84   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
     85     {
     86       if (dump_enabled_p ())
     87 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     88                          "cannot use %s<%s><%s>\n", name,
     89                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
     90       return false;
     91     }
     92 
     93   if (dump_enabled_p ())
     94     dump_printf_loc (MSG_NOTE, vect_location,
     95                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
     96                      GET_MODE_NAME (mode));
     97 
     98   return true;
     99 }
    100 
    101 
    102 /* Return the smallest scalar part of STMT_INFO.
    103    This is used to determine the vectype of the stmt.  We generally set the
    104    vectype according to the type of the result (lhs).  For stmts whose
    105    result-type is different than the type of the arguments (e.g., demotion,
    106    promotion), vectype will be reset appropriately (later).  Note that we have
    107    to visit the smallest datatype in this function, because that determines the
    108    VF.  If the smallest datatype in the loop is present only as the rhs of a
    109    promotion operation - we'd miss it.
    110    Such a case, where a variable of this datatype does not appear in the lhs
    111    anywhere in the loop, can only occur if it's an invariant: e.g.:
    112    'int_x = (int) short_inv', which we'd expect to have been optimized away by
    113    invariant motion.  However, we cannot rely on invariant motion to always
    114    take invariants out of the loop, and so in the case of promotion we also
    115    have to check the rhs.
    116    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
    117    types.  */
    118 
    119 tree
    120 vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
    121 {
    122   HOST_WIDE_INT lhs, rhs;
    123 
    124   /* During the analysis phase, this function is called on arbitrary
    125      statements that might not have scalar results.  */
    126   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
    127     return scalar_type;
    128 
    129   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
    130 
    131   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    132   if (assign)
    133     {
    134       scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
    135       if (gimple_assign_cast_p (assign)
    136 	  || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
    137 	  || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
    138 	  || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
    139 	  || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
    140 	  || gimple_assign_rhs_code (assign) == WIDEN_PLUS_EXPR
    141 	  || gimple_assign_rhs_code (assign) == WIDEN_MINUS_EXPR
    142 	  || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
    143 	{
    144 	  tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    145 
    146 	  rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
    147 	  if (rhs < lhs)
    148 	    scalar_type = rhs_type;
    149 	}
    150     }
    151   else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
    152     {
    153       unsigned int i = 0;
    154       if (gimple_call_internal_p (call))
    155 	{
    156 	  internal_fn ifn = gimple_call_internal_fn (call);
    157 	  if (internal_load_fn_p (ifn))
    158 	    /* For loads the LHS type does the trick.  */
    159 	    i = ~0U;
    160 	  else if (internal_store_fn_p (ifn))
    161 	    {
    162 	      /* For stores use the tyep of the stored value.  */
    163 	      i = internal_fn_stored_value_index (ifn);
    164 	      scalar_type = TREE_TYPE (gimple_call_arg (call, i));
    165 	      i = ~0U;
    166 	    }
    167 	  else if (internal_fn_mask_index (ifn) == 0)
    168 	    i = 1;
    169 	}
    170       if (i < gimple_call_num_args (call))
    171 	{
    172 	  tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
    173 	  if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
    174 	    {
    175 	      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
    176 	      if (rhs < lhs)
    177 		scalar_type = rhs_type;
    178 	    }
    179 	}
    180     }
    181 
    182   return scalar_type;
    183 }
    184 
    185 
    186 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
    187    tested at run-time.  Return TRUE if DDR was successfully inserted.
    188    Return false if versioning is not supported.  */
    189 
    190 static opt_result
    191 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
    192 {
    193   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    194 
    195   if ((unsigned) param_vect_max_version_for_alias_checks == 0)
    196     return opt_result::failure_at (vect_location,
    197 				   "will not create alias checks, as"
    198 				   " --param vect-max-version-for-alias-checks"
    199 				   " == 0\n");
    200 
    201   opt_result res
    202     = runtime_alias_check_p (ddr, loop,
    203 			     optimize_loop_nest_for_speed_p (loop));
    204   if (!res)
    205     return res;
    206 
    207   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
    208   return opt_result::success ();
    209 }
    210 
    211 /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
    212 
    213 static void
    214 vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
    215 {
    216   const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
    217   for (unsigned int i = 0; i < checks.length(); ++i)
    218     if (checks[i] == value)
    219       return;
    220 
    221   if (dump_enabled_p ())
    222     dump_printf_loc (MSG_NOTE, vect_location,
    223 		     "need run-time check that %T is nonzero\n",
    224 		     value);
    225   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
    226 }
    227 
    228 /* Return true if we know that the order of vectorized DR_INFO_A and
    229    vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
    230    DR_INFO_B.  At least one of the accesses is a write.  */
    231 
    232 static bool
    233 vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
    234 {
    235   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    236   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    237 
    238   /* Single statements are always kept in their original order.  */
    239   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
    240       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
    241     return true;
    242 
    243   /* STMT_A and STMT_B belong to overlapping groups.  All loads are
    244      emitted at the position of the first scalar load.
    245      Stores in a group are emitted at the position of the last scalar store.
    246      Compute that position and check whether the resulting order matches
    247      the current one.  */
    248   stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
    249   if (il_a)
    250     {
    251       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
    252 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
    253 	     s = DR_GROUP_NEXT_ELEMENT (s))
    254 	  il_a = get_later_stmt (il_a, s);
    255       else /* DR_IS_READ */
    256 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
    257 	     s = DR_GROUP_NEXT_ELEMENT (s))
    258 	  if (get_later_stmt (il_a, s) == il_a)
    259 	    il_a = s;
    260     }
    261   else
    262     il_a = stmtinfo_a;
    263   stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
    264   if (il_b)
    265     {
    266       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
    267 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
    268 	     s = DR_GROUP_NEXT_ELEMENT (s))
    269 	  il_b = get_later_stmt (il_b, s);
    270       else /* DR_IS_READ */
    271 	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
    272 	     s = DR_GROUP_NEXT_ELEMENT (s))
    273 	  if (get_later_stmt (il_b, s) == il_b)
    274 	    il_b = s;
    275     }
    276   else
    277     il_b = stmtinfo_b;
    278   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
    279   return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
    280 }
    281 
    282 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
    283    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
    284    distances.  These distances are conservatively correct but they don't
    285    reflect a guaranteed dependence.
    286 
    287    Return true if this function does all the work necessary to avoid
    288    an alias or false if the caller should use the dependence distances
    289    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
    290    the depth of the loop described by LOOP_VINFO and the other arguments
    291    are as for vect_analyze_data_ref_dependence.  */
    292 
    293 static bool
    294 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
    295 				       loop_vec_info loop_vinfo,
    296 				       int loop_depth, unsigned int *max_vf)
    297 {
    298   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    299   for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
    300     {
    301       int dist = dist_v[loop_depth];
    302       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
    303 	{
    304 	  /* If the user asserted safelen >= DIST consecutive iterations
    305 	     can be executed concurrently, assume independence.
    306 
    307 	     ??? An alternative would be to add the alias check even
    308 	     in this case, and vectorize the fallback loop with the
    309 	     maximum VF set to safelen.  However, if the user has
    310 	     explicitly given a length, it's less likely that that
    311 	     would be a win.  */
    312 	  if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
    313 	    {
    314 	      if ((unsigned int) loop->safelen < *max_vf)
    315 		*max_vf = loop->safelen;
    316 	      LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
    317 	      continue;
    318 	    }
    319 
    320 	  /* For dependence distances of 2 or more, we have the option
    321 	     of limiting VF or checking for an alias at runtime.
    322 	     Prefer to check at runtime if we can, to avoid limiting
    323 	     the VF unnecessarily when the bases are in fact independent.
    324 
    325 	     Note that the alias checks will be removed if the VF ends up
    326 	     being small enough.  */
    327 	  dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
    328 	  dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
    329 	  return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
    330 		  && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
    331 		  && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
    332 	}
    333     }
    334   return true;
    335 }
    336 
    337 
    338 /* Function vect_analyze_data_ref_dependence.
    339 
    340    FIXME: I needed to change the sense of the returned flag.
    341 
    342    Return FALSE if there (might) exist a dependence between a memory-reference
    343    DRA and a memory-reference DRB.  When versioning for alias may check a
    344    dependence at run-time, return TRUE.  Adjust *MAX_VF according to
    345    the data dependence.  */
    346 
    347 static opt_result
    348 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
    349 				  loop_vec_info loop_vinfo,
    350 				  unsigned int *max_vf)
    351 {
    352   unsigned int i;
    353   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    354   struct data_reference *dra = DDR_A (ddr);
    355   struct data_reference *drb = DDR_B (ddr);
    356   dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
    357   dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
    358   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    359   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    360   lambda_vector dist_v;
    361   unsigned int loop_depth;
    362 
    363   /* If user asserted safelen consecutive iterations can be
    364      executed concurrently, assume independence.  */
    365   auto apply_safelen = [&]()
    366     {
    367       if (loop->safelen >= 2)
    368 	{
    369 	  if ((unsigned int) loop->safelen < *max_vf)
    370 	    *max_vf = loop->safelen;
    371 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
    372 	  return true;
    373 	}
    374       return false;
    375     };
    376 
    377   /* In loop analysis all data references should be vectorizable.  */
    378   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
    379       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
    380     gcc_unreachable ();
    381 
    382   /* Independent data accesses.  */
    383   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    384     return opt_result::success ();
    385 
    386   if (dra == drb
    387       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
    388     return opt_result::success ();
    389 
    390   /* We do not have to consider dependences between accesses that belong
    391      to the same group, unless the stride could be smaller than the
    392      group size.  */
    393   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
    394       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
    395 	  == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
    396       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
    397     return opt_result::success ();
    398 
    399   /* Even if we have an anti-dependence then, as the vectorized loop covers at
    400      least two scalar iterations, there is always also a true dependence.
    401      As the vectorizer does not re-order loads and stores we can ignore
    402      the anti-dependence if TBAA can disambiguate both DRs similar to the
    403      case with known negative distance anti-dependences (positive
    404      distance anti-dependences would violate TBAA constraints).  */
    405   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
    406        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
    407       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
    408 				 get_alias_set (DR_REF (drb))))
    409     return opt_result::success ();
    410 
    411   if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
    412       || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
    413     {
    414       if (apply_safelen ())
    415 	return opt_result::success ();
    416 
    417       return opt_result::failure_at
    418 	(stmtinfo_a->stmt,
    419 	 "possible alias involving gather/scatter between %T and %T\n",
    420 	 DR_REF (dra), DR_REF (drb));
    421     }
    422 
    423   /* Unknown data dependence.  */
    424   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    425     {
    426       if (apply_safelen ())
    427 	return opt_result::success ();
    428 
    429       if (dump_enabled_p ())
    430 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
    431 			 "versioning for alias required: "
    432 			 "can't determine dependence between %T and %T\n",
    433 			 DR_REF (dra), DR_REF (drb));
    434 
    435       /* Add to list of ddrs that need to be tested at run-time.  */
    436       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
    437     }
    438 
    439   /* Known data dependence.  */
    440   if (DDR_NUM_DIST_VECTS (ddr) == 0)
    441     {
    442       if (apply_safelen ())
    443 	return opt_result::success ();
    444 
    445       if (dump_enabled_p ())
    446 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
    447 			 "versioning for alias required: "
    448 			 "bad dist vector for %T and %T\n",
    449 			 DR_REF (dra), DR_REF (drb));
    450       /* Add to list of ddrs that need to be tested at run-time.  */
    451       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
    452     }
    453 
    454   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
    455 
    456   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
    457       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
    458 						loop_depth, max_vf))
    459     return opt_result::success ();
    460 
    461   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
    462     {
    463       int dist = dist_v[loop_depth];
    464 
    465       if (dump_enabled_p ())
    466 	dump_printf_loc (MSG_NOTE, vect_location,
    467                          "dependence distance  = %d.\n", dist);
    468 
    469       if (dist == 0)
    470 	{
    471 	  if (dump_enabled_p ())
    472 	    dump_printf_loc (MSG_NOTE, vect_location,
    473 			     "dependence distance == 0 between %T and %T\n",
    474 			     DR_REF (dra), DR_REF (drb));
    475 
    476 	  /* When we perform grouped accesses and perform implicit CSE
    477 	     by detecting equal accesses and doing disambiguation with
    478 	     runtime alias tests like for
    479 	        .. = a[i];
    480 		.. = a[i+1];
    481 		a[i] = ..;
    482 		a[i+1] = ..;
    483 		*p = ..;
    484 		.. = a[i];
    485 		.. = a[i+1];
    486 	     where we will end up loading { a[i], a[i+1] } once, make
    487 	     sure that inserting group loads before the first load and
    488 	     stores after the last store will do the right thing.
    489 	     Similar for groups like
    490 	        a[i] = ...;
    491 		... = a[i];
    492 		a[i+1] = ...;
    493 	     where loads from the group interleave with the store.  */
    494 	  if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
    495 	    return opt_result::failure_at (stmtinfo_a->stmt,
    496 					   "READ_WRITE dependence"
    497 					   " in interleaving.\n");
    498 
    499 	  if (loop->safelen < 2)
    500 	    {
    501 	      tree indicator = dr_zero_step_indicator (dra);
    502 	      if (!indicator || integer_zerop (indicator))
    503 		return opt_result::failure_at (stmtinfo_a->stmt,
    504 					       "access also has a zero step\n");
    505 	      else if (TREE_CODE (indicator) != INTEGER_CST)
    506 		vect_check_nonzero_value (loop_vinfo, indicator);
    507 	    }
    508 	  continue;
    509 	}
    510 
    511       if (dist > 0 && DDR_REVERSED_P (ddr))
    512 	{
    513 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
    514 	     reversed (to make distance vector positive), and the actual
    515 	     distance is negative.  */
    516 	  if (dump_enabled_p ())
    517 	    dump_printf_loc (MSG_NOTE, vect_location,
    518 	                     "dependence distance negative.\n");
    519 	  /* When doing outer loop vectorization, we need to check if there is
    520 	     a backward dependence at the inner loop level if the dependence
    521 	     at the outer loop is reversed.  See PR81740.  */
    522 	  if (nested_in_vect_loop_p (loop, stmtinfo_a)
    523 	      || nested_in_vect_loop_p (loop, stmtinfo_b))
    524 	    {
    525 	      unsigned inner_depth = index_in_loop_nest (loop->inner->num,
    526 							 DDR_LOOP_NEST (ddr));
    527 	      if (dist_v[inner_depth] < 0)
    528 		return opt_result::failure_at (stmtinfo_a->stmt,
    529 					       "not vectorized, dependence "
    530 					       "between data-refs %T and %T\n",
    531 					       DR_REF (dra), DR_REF (drb));
    532 	    }
    533 	  /* Record a negative dependence distance to later limit the
    534 	     amount of stmt copying / unrolling we can perform.
    535 	     Only need to handle read-after-write dependence.  */
    536 	  if (DR_IS_READ (drb)
    537 	      && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
    538 		  || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
    539 	    STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
    540 	  continue;
    541 	}
    542 
    543       unsigned int abs_dist = abs (dist);
    544       if (abs_dist >= 2 && abs_dist < *max_vf)
    545 	{
    546 	  /* The dependence distance requires reduction of the maximal
    547 	     vectorization factor.  */
    548 	  *max_vf = abs_dist;
    549 	  if (dump_enabled_p ())
    550 	    dump_printf_loc (MSG_NOTE, vect_location,
    551 	                     "adjusting maximal vectorization factor to %i\n",
    552 	                     *max_vf);
    553 	}
    554 
    555       if (abs_dist >= *max_vf)
    556 	{
    557 	  /* Dependence distance does not create dependence, as far as
    558 	     vectorization is concerned, in this case.  */
    559 	  if (dump_enabled_p ())
    560 	    dump_printf_loc (MSG_NOTE, vect_location,
    561 	                     "dependence distance >= VF.\n");
    562 	  continue;
    563 	}
    564 
    565       return opt_result::failure_at (stmtinfo_a->stmt,
    566 				     "not vectorized, possible dependence "
    567 				     "between data-refs %T and %T\n",
    568 				     DR_REF (dra), DR_REF (drb));
    569     }
    570 
    571   return opt_result::success ();
    572 }
    573 
    574 /* Function vect_analyze_data_ref_dependences.
    575 
    576    Examine all the data references in the loop, and make sure there do not
    577    exist any data dependences between them.  Set *MAX_VF according to
    578    the maximum vectorization factor the data dependences allow.  */
    579 
    580 opt_result
    581 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
    582 				   unsigned int *max_vf)
    583 {
    584   unsigned int i;
    585   struct data_dependence_relation *ddr;
    586 
    587   DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
    588 
    589   if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
    590     {
    591       LOOP_VINFO_DDRS (loop_vinfo)
    592 	.create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
    593 		 * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
    594       /* We do not need read-read dependences.  */
    595       bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
    596 					  &LOOP_VINFO_DDRS (loop_vinfo),
    597 					  LOOP_VINFO_LOOP_NEST (loop_vinfo),
    598 					  false);
    599       gcc_assert (res);
    600     }
    601 
    602   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
    603 
    604   /* For epilogues we either have no aliases or alias versioning
    605      was applied to original loop.  Therefore we may just get max_vf
    606      using VF of original loop.  */
    607   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
    608     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
    609   else
    610     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
    611       {
    612 	opt_result res
    613 	  = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
    614 	if (!res)
    615 	  return res;
    616       }
    617 
    618   return opt_result::success ();
    619 }
    620 
    621 
    622 /* Function vect_slp_analyze_data_ref_dependence.
    623 
    624    Return TRUE if there (might) exist a dependence between a memory-reference
    625    DRA and a memory-reference DRB for VINFO.  When versioning for alias
    626    may check a dependence at run-time, return FALSE.  Adjust *MAX_VF
    627    according to the data dependence.  */
    628 
    629 static bool
    630 vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
    631 				      struct data_dependence_relation *ddr)
    632 {
    633   struct data_reference *dra = DDR_A (ddr);
    634   struct data_reference *drb = DDR_B (ddr);
    635   dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
    636   dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
    637 
    638   /* We need to check dependences of statements marked as unvectorizable
    639      as well, they still can prohibit vectorization.  */
    640 
    641   /* Independent data accesses.  */
    642   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    643     return false;
    644 
    645   if (dra == drb)
    646     return false;
    647 
    648   /* Read-read is OK.  */
    649   if (DR_IS_READ (dra) && DR_IS_READ (drb))
    650     return false;
    651 
    652   /* If dra and drb are part of the same interleaving chain consider
    653      them independent.  */
    654   if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
    655       && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
    656 	  == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
    657     return false;
    658 
    659   /* Unknown data dependence.  */
    660   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    661     {
    662       if  (dump_enabled_p ())
    663 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    664 			 "can't determine dependence between %T and %T\n",
    665 			 DR_REF (dra), DR_REF (drb));
    666     }
    667   else if (dump_enabled_p ())
    668     dump_printf_loc (MSG_NOTE, vect_location,
    669 		     "determined dependence between %T and %T\n",
    670 		     DR_REF (dra), DR_REF (drb));
    671 
    672   return true;
    673 }
    674 
    675 
    676 /* Analyze dependences involved in the transform of a store SLP NODE.  */
    677 
    678 static bool
    679 vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
    680 {
    681   /* This walks over all stmts involved in the SLP store done
    682      in NODE verifying we can sink them up to the last stmt in the
    683      group.  */
    684   stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
    685   gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
    686 
    687   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    688     {
    689       stmt_vec_info access_info
    690 	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    691       if (access_info == last_access_info)
    692 	continue;
    693       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    694       ao_ref ref;
    695       bool ref_initialized_p = false;
    696       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    697 	   gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
    698 	{
    699 	  gimple *stmt = gsi_stmt (gsi);
    700 	  if (! gimple_vuse (stmt))
    701 	    continue;
    702 
    703 	  /* If we couldn't record a (single) data reference for this
    704 	     stmt we have to resort to the alias oracle.  */
    705 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    706 	  data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    707 	  if (!dr_b)
    708 	    {
    709 	      /* We are moving a store - this means
    710 		 we cannot use TBAA for disambiguation.  */
    711 	      if (!ref_initialized_p)
    712 		ao_ref_init (&ref, DR_REF (dr_a));
    713 	      if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
    714 		  || ref_maybe_used_by_stmt_p (stmt, &ref, false))
    715 		return false;
    716 	      continue;
    717 	    }
    718 
    719 	  gcc_assert (!gimple_visited_p (stmt));
    720 
    721 	  ddr_p ddr = initialize_data_dependence_relation (dr_a,
    722 							   dr_b, vNULL);
    723 	  bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    724 	  free_dependence_relation (ddr);
    725 	  if (dependent)
    726 	    return false;
    727 	}
    728     }
    729   return true;
    730 }
    731 
    732 /* Analyze dependences involved in the transform of a load SLP NODE.  STORES
    733    contain the vector of scalar stores of this instance if we are
    734    disambiguating the loads.  */
    735 
    736 static bool
    737 vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
    738 				   vec<stmt_vec_info> stores,
    739 				   stmt_vec_info last_store_info)
    740 {
    741   /* This walks over all stmts involved in the SLP load done
    742      in NODE verifying we can hoist them up to the first stmt in the
    743      group.  */
    744   stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
    745   gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
    746 
    747   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    748     {
    749       stmt_vec_info access_info
    750 	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    751       if (access_info == first_access_info)
    752 	continue;
    753       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    754       ao_ref ref;
    755       bool ref_initialized_p = false;
    756       hash_set<stmt_vec_info> grp_visited;
    757       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    758 	   gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
    759 	{
    760 	  gimple *stmt = gsi_stmt (gsi);
    761 	  if (! gimple_vdef (stmt))
    762 	    continue;
    763 
    764 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    765 
    766 	  /* If we run into a store of this same instance (we've just
    767 	     marked those) then delay dependence checking until we run
    768 	     into the last store because this is where it will have
    769 	     been sunk to (and we verified that we can do that already).  */
    770 	  if (gimple_visited_p (stmt))
    771 	    {
    772 	      if (stmt_info != last_store_info)
    773 		continue;
    774 
    775 	      for (stmt_vec_info &store_info : stores)
    776 		{
    777 		  data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
    778 		  ddr_p ddr = initialize_data_dependence_relation
    779 				(dr_a, store_dr, vNULL);
    780 		  bool dependent
    781 		    = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    782 		  free_dependence_relation (ddr);
    783 		  if (dependent)
    784 		    return false;
    785 		}
    786 	      continue;
    787 	    }
    788 
    789 	  auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
    790 	    {
    791 	      /* We are hoisting a load - this means we can use TBAA for
    792 		 disambiguation.  */
    793 	      if (!ref_initialized_p)
    794 		ao_ref_init (&ref, DR_REF (dr_a));
    795 	      if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
    796 		{
    797 		  /* If we couldn't record a (single) data reference for this
    798 		     stmt we have to give up now.  */
    799 		  data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    800 		  if (!dr_b)
    801 		    return false;
    802 		  ddr_p ddr = initialize_data_dependence_relation (dr_a,
    803 								   dr_b, vNULL);
    804 		  bool dependent
    805 		    = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    806 		  free_dependence_relation (ddr);
    807 		  if (dependent)
    808 		    return false;
    809 		}
    810 	      /* No dependence.  */
    811 	      return true;
    812 	    };
    813 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    814 	    {
    815 	      /* When we run into a store group we have to honor
    816 		 that earlier stores might be moved here.  We don't
    817 		 know exactly which and where to since we lack a
    818 		 back-mapping from DR to SLP node, so assume all
    819 		 earlier stores are sunk here.  It's enough to
    820 		 consider the last stmt of a group for this.
    821 		 ???  Both this and the fact that we disregard that
    822 		 the conflicting instance might be removed later
    823 		 is overly conservative.  */
    824 	      if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
    825 		for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    826 		     store_info != NULL;
    827 		     store_info = DR_GROUP_NEXT_ELEMENT (store_info))
    828 		  if ((store_info == stmt_info
    829 		       || get_later_stmt (store_info, stmt_info) == stmt_info)
    830 		      && !check_hoist (store_info))
    831 		    return false;
    832 	    }
    833 	  else
    834 	    {
    835 	      if (!check_hoist (stmt_info))
    836 		return false;
    837 	    }
    838 	}
    839     }
    840   return true;
    841 }
    842 
    843 
    844 /* Function vect_analyze_data_ref_dependences.
    845 
    846    Examine all the data references in the basic-block, and make sure there
    847    do not exist any data dependences between them.  Set *MAX_VF according to
    848    the maximum vectorization factor the data dependences allow.  */
    849 
    850 bool
    851 vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
    852 {
    853   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
    854 
    855   /* The stores of this instance are at the root of the SLP tree.  */
    856   slp_tree store = NULL;
    857   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
    858     store = SLP_INSTANCE_TREE (instance);
    859 
    860   /* Verify we can sink stores to the vectorized stmt insert location.  */
    861   stmt_vec_info last_store_info = NULL;
    862   if (store)
    863     {
    864       if (! vect_slp_analyze_store_dependences (vinfo, store))
    865 	return false;
    866 
    867       /* Mark stores in this instance and remember the last one.  */
    868       last_store_info = vect_find_last_scalar_stmt_in_slp (store);
    869       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    870 	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
    871     }
    872 
    873   bool res = true;
    874 
    875   /* Verify we can sink loads to the vectorized stmt insert location,
    876      special-casing stores of this instance.  */
    877   for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
    878     if (! vect_slp_analyze_load_dependences (vinfo, load,
    879 					     store
    880 					     ? SLP_TREE_SCALAR_STMTS (store)
    881 					     : vNULL, last_store_info))
    882       {
    883 	res = false;
    884 	break;
    885       }
    886 
    887   /* Unset the visited flag.  */
    888   if (store)
    889     for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    890       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
    891 
    892   return res;
    893 }
    894 
    895 /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
    896    applied.  */
    897 
    898 int
    899 dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
    900 {
    901   HOST_WIDE_INT diff = 0;
    902   /* Alignment is only analyzed for the first element of a DR group,
    903      use that but adjust misalignment by the offset of the access.  */
    904   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
    905     {
    906       dr_vec_info *first_dr
    907 	= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
    908       /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
    909 	 INTEGER_CSTs and the first element in the group has the lowest
    910 	 address.  */
    911       diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
    912 	      - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
    913       gcc_assert (diff >= 0);
    914       dr_info = first_dr;
    915     }
    916 
    917   int misalign = dr_info->misalignment;
    918   gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
    919   if (misalign == DR_MISALIGNMENT_UNKNOWN)
    920     return misalign;
    921 
    922   /* If the access is only aligned for a vector type with smaller alignment
    923      requirement the access has unknown misalignment.  */
    924   if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
    925 		targetm.vectorize.preferred_vector_alignment (vectype)))
    926     return DR_MISALIGNMENT_UNKNOWN;
    927 
    928   /* Apply the offset from the DR group start and the externally supplied
    929      offset which can for example result from a negative stride access.  */
    930   poly_int64 misalignment = misalign + diff + offset;
    931 
    932   /* vect_compute_data_ref_alignment will have ensured that target_alignment
    933      is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN.  */
    934   unsigned HOST_WIDE_INT target_alignment_c
    935     = dr_info->target_alignment.to_constant ();
    936   if (!known_misalignment (misalignment, target_alignment_c, &misalign))
    937     return DR_MISALIGNMENT_UNKNOWN;
    938   return misalign;
    939 }
    940 
    941 /* Record the base alignment guarantee given by DRB, which occurs
    942    in STMT_INFO.  */
    943 
    944 static void
    945 vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
    946 			    innermost_loop_behavior *drb)
    947 {
    948   bool existed;
    949   std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
    950     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
    951   if (!existed || entry.second->base_alignment < drb->base_alignment)
    952     {
    953       entry = std::make_pair (stmt_info, drb);
    954       if (dump_enabled_p ())
    955 	dump_printf_loc (MSG_NOTE, vect_location,
    956 			 "recording new base alignment for %T\n"
    957 			 "  alignment:    %d\n"
    958 			 "  misalignment: %d\n"
    959 			 "  based on:     %G",
    960 			 drb->base_address,
    961 			 drb->base_alignment,
    962 			 drb->base_misalignment,
    963 			 stmt_info->stmt);
    964     }
    965 }
    966 
    967 /* If the region we're going to vectorize is reached, all unconditional
    968    data references occur at least once.  We can therefore pool the base
    969    alignment guarantees from each unconditional reference.  Do this by
    970    going through all the data references in VINFO and checking whether
    971    the containing statement makes the reference unconditionally.  If so,
    972    record the alignment of the base address in VINFO so that it can be
    973    used for all other references with the same base.  */
    974 
    975 void
    976 vect_record_base_alignments (vec_info *vinfo)
    977 {
    978   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    979   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    980   for (data_reference *dr : vinfo->shared->datarefs)
    981     {
    982       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
    983       stmt_vec_info stmt_info = dr_info->stmt;
    984       if (!DR_IS_CONDITIONAL_IN_STMT (dr)
    985 	  && STMT_VINFO_VECTORIZABLE (stmt_info)
    986 	  && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    987 	{
    988 	  vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
    989 
    990 	  /* If DR is nested in the loop that is being vectorized, we can also
    991 	     record the alignment of the base wrt the outer loop.  */
    992 	  if (loop && nested_in_vect_loop_p (loop, stmt_info))
    993 	    vect_record_base_alignment
    994 	      (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
    995 	}
    996     }
    997 }
    998 
    999 /* Function vect_compute_data_ref_alignment
   1000 
   1001    Compute the misalignment of the data reference DR_INFO when vectorizing
   1002    with VECTYPE.
   1003 
   1004    Output:
   1005    1. initialized misalignment info for DR_INFO
   1006 
   1007    FOR NOW: No analysis is actually performed. Misalignment is calculated
   1008    only for trivial cases. TODO.  */
   1009 
   1010 static void
   1011 vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
   1012 				 tree vectype)
   1013 {
   1014   stmt_vec_info stmt_info = dr_info->stmt;
   1015   vec_base_alignments *base_alignments = &vinfo->base_alignments;
   1016   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   1017   class loop *loop = NULL;
   1018   tree ref = DR_REF (dr_info->dr);
   1019 
   1020   if (dump_enabled_p ())
   1021     dump_printf_loc (MSG_NOTE, vect_location,
   1022                      "vect_compute_data_ref_alignment:\n");
   1023 
   1024   if (loop_vinfo)
   1025     loop = LOOP_VINFO_LOOP (loop_vinfo);
   1026 
   1027   /* Initialize misalignment to unknown.  */
   1028   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
   1029 
   1030   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   1031     return;
   1032 
   1033   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
   1034   bool step_preserves_misalignment_p;
   1035 
   1036   poly_uint64 vector_alignment
   1037     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
   1038 		 BITS_PER_UNIT);
   1039   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
   1040 
   1041   /* If the main loop has peeled for alignment we have no way of knowing
   1042      whether the data accesses in the epilogues are aligned.  We can't at
   1043      compile time answer the question whether we have entered the main loop or
   1044      not.  Fixes PR 92351.  */
   1045   if (loop_vinfo)
   1046     {
   1047       loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
   1048       if (orig_loop_vinfo
   1049 	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
   1050 	return;
   1051     }
   1052 
   1053   unsigned HOST_WIDE_INT vect_align_c;
   1054   if (!vector_alignment.is_constant (&vect_align_c))
   1055     return;
   1056 
   1057   /* No step for BB vectorization.  */
   1058   if (!loop)
   1059     {
   1060       gcc_assert (integer_zerop (drb->step));
   1061       step_preserves_misalignment_p = true;
   1062     }
   1063 
   1064   /* In case the dataref is in an inner-loop of the loop that is being
   1065      vectorized (LOOP), we use the base and misalignment information
   1066      relative to the outer-loop (LOOP).  This is ok only if the misalignment
   1067      stays the same throughout the execution of the inner-loop, which is why
   1068      we have to check that the stride of the dataref in the inner-loop evenly
   1069      divides by the vector alignment.  */
   1070   else if (nested_in_vect_loop_p (loop, stmt_info))
   1071     {
   1072       step_preserves_misalignment_p
   1073 	= (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
   1074 
   1075       if (dump_enabled_p ())
   1076 	{
   1077 	  if (step_preserves_misalignment_p)
   1078 	    dump_printf_loc (MSG_NOTE, vect_location,
   1079 			     "inner step divides the vector alignment.\n");
   1080 	  else
   1081 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1082 			     "inner step doesn't divide the vector"
   1083 			     " alignment.\n");
   1084 	}
   1085     }
   1086 
   1087   /* Similarly we can only use base and misalignment information relative to
   1088      an innermost loop if the misalignment stays the same throughout the
   1089      execution of the loop.  As above, this is the case if the stride of
   1090      the dataref evenly divides by the alignment.  */
   1091   else
   1092     {
   1093       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   1094       step_preserves_misalignment_p
   1095 	= multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
   1096 
   1097       if (!step_preserves_misalignment_p && dump_enabled_p ())
   1098 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1099 			 "step doesn't divide the vector alignment.\n");
   1100     }
   1101 
   1102   unsigned int base_alignment = drb->base_alignment;
   1103   unsigned int base_misalignment = drb->base_misalignment;
   1104 
   1105   /* Calculate the maximum of the pooled base address alignment and the
   1106      alignment that we can compute for DR itself.  */
   1107   std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
   1108     = base_alignments->get (drb->base_address);
   1109   if (entry
   1110       && base_alignment < (*entry).second->base_alignment
   1111       && (loop_vinfo
   1112 	  || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
   1113 			      gimple_bb (entry->first->stmt))
   1114 	      && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
   1115 		  || (entry->first->dr_aux.group <= dr_info->group)))))
   1116     {
   1117       base_alignment = entry->second->base_alignment;
   1118       base_misalignment = entry->second->base_misalignment;
   1119     }
   1120 
   1121   if (drb->offset_alignment < vect_align_c
   1122       || !step_preserves_misalignment_p
   1123       /* We need to know whether the step wrt the vectorized loop is
   1124 	 negative when computing the starting misalignment below.  */
   1125       || TREE_CODE (drb->step) != INTEGER_CST)
   1126     {
   1127       if (dump_enabled_p ())
   1128 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1129 			 "Unknown alignment for access: %T\n", ref);
   1130       return;
   1131     }
   1132 
   1133   if (base_alignment < vect_align_c)
   1134     {
   1135       unsigned int max_alignment;
   1136       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
   1137       if (max_alignment < vect_align_c
   1138 	  || !vect_can_force_dr_alignment_p (base,
   1139 					     vect_align_c * BITS_PER_UNIT))
   1140 	{
   1141 	  if (dump_enabled_p ())
   1142 	    dump_printf_loc (MSG_NOTE, vect_location,
   1143 			     "can't force alignment of ref: %T\n", ref);
   1144 	  return;
   1145 	}
   1146 
   1147       /* Force the alignment of the decl.
   1148 	 NOTE: This is the only change to the code we make during
   1149 	 the analysis phase, before deciding to vectorize the loop.  */
   1150       if (dump_enabled_p ())
   1151 	dump_printf_loc (MSG_NOTE, vect_location,
   1152 			 "force alignment of %T\n", ref);
   1153 
   1154       dr_info->base_decl = base;
   1155       dr_info->base_misaligned = true;
   1156       base_misalignment = 0;
   1157     }
   1158   poly_int64 misalignment
   1159     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
   1160 
   1161   unsigned int const_misalignment;
   1162   if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
   1163     {
   1164       if (dump_enabled_p ())
   1165 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1166 			 "Non-constant misalignment for access: %T\n", ref);
   1167       return;
   1168     }
   1169 
   1170   SET_DR_MISALIGNMENT (dr_info, const_misalignment);
   1171 
   1172   if (dump_enabled_p ())
   1173     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1174 		     "misalign = %d bytes of ref %T\n",
   1175 		     const_misalignment, ref);
   1176 
   1177   return;
   1178 }
   1179 
   1180 /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
   1181    that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
   1182    is made aligned via peeling.  */
   1183 
   1184 static bool
   1185 vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
   1186 					 dr_vec_info *dr_peel_info)
   1187 {
   1188   if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
   1189 		  DR_TARGET_ALIGNMENT (dr_info)))
   1190     {
   1191       poly_offset_int diff
   1192 	= (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
   1193 	   - wi::to_poly_offset (DR_INIT (dr_info->dr)));
   1194       if (known_eq (diff, 0)
   1195 	  || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
   1196 	return true;
   1197     }
   1198   return false;
   1199 }
   1200 
   1201 /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
   1202    aligned via peeling.  */
   1203 
   1204 static bool
   1205 vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
   1206 				 dr_vec_info *dr_peel_info)
   1207 {
   1208   if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
   1209 			DR_BASE_ADDRESS (dr_peel_info->dr), 0)
   1210       || !operand_equal_p (DR_OFFSET (dr_info->dr),
   1211 			   DR_OFFSET (dr_peel_info->dr), 0)
   1212       || !operand_equal_p (DR_STEP (dr_info->dr),
   1213 			   DR_STEP (dr_peel_info->dr), 0))
   1214     return false;
   1215 
   1216   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
   1217 }
   1218 
   1219 /* Compute the value for dr_info->misalign so that the access appears
   1220    aligned.  This is used by peeling to compensate for dr_misalignment
   1221    applying the offset for negative step.  */
   1222 
   1223 int
   1224 vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
   1225 {
   1226   if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
   1227     return 0;
   1228 
   1229   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1230   poly_int64 misalignment
   1231     = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   1232        * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   1233 
   1234   unsigned HOST_WIDE_INT target_alignment_c;
   1235   int misalign;
   1236   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
   1237       || !known_misalignment (misalignment, target_alignment_c, &misalign))
   1238     return DR_MISALIGNMENT_UNKNOWN;
   1239   return misalign;
   1240 }
   1241 
   1242 /* Function vect_update_misalignment_for_peel.
   1243    Sets DR_INFO's misalignment
   1244    - to 0 if it has the same alignment as DR_PEEL_INFO,
   1245    - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
   1246    - to -1 (unknown) otherwise.
   1247 
   1248    DR_INFO - the data reference whose misalignment is to be adjusted.
   1249    DR_PEEL_INFO - the data reference whose misalignment is being made
   1250 		  zero in the vector loop by the peel.
   1251    NPEEL - the number of iterations in the peel loop if the misalignment
   1252            of DR_PEEL_INFO is known at compile time.  */
   1253 
   1254 static void
   1255 vect_update_misalignment_for_peel (dr_vec_info *dr_info,
   1256 				   dr_vec_info *dr_peel_info, int npeel)
   1257 {
   1258   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
   1259   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
   1260     {
   1261       SET_DR_MISALIGNMENT (dr_info,
   1262 			   vect_dr_misalign_for_aligned_access (dr_peel_info));
   1263       return;
   1264     }
   1265 
   1266   unsigned HOST_WIDE_INT alignment;
   1267   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
   1268       && known_alignment_for_access_p (dr_info,
   1269 				       STMT_VINFO_VECTYPE (dr_info->stmt))
   1270       && known_alignment_for_access_p (dr_peel_info,
   1271 				       STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
   1272     {
   1273       int misal = dr_info->misalignment;
   1274       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1275       misal &= alignment - 1;
   1276       set_dr_misalignment (dr_info, misal);
   1277       return;
   1278     }
   1279 
   1280   if (dump_enabled_p ())
   1281     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
   1282 		     "to unknown (-1).\n");
   1283   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
   1284 }
   1285 
   1286 /* Return true if alignment is relevant for DR_INFO.  */
   1287 
   1288 static bool
   1289 vect_relevant_for_alignment_p (dr_vec_info *dr_info)
   1290 {
   1291   stmt_vec_info stmt_info = dr_info->stmt;
   1292 
   1293   if (!STMT_VINFO_RELEVANT_P (stmt_info))
   1294     return false;
   1295 
   1296   /* For interleaving, only the alignment of the first access matters.  */
   1297   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
   1298       && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
   1299     return false;
   1300 
   1301   /* Scatter-gather and invariant accesses continue to address individual
   1302      scalars, so vector-level alignment is irrelevant.  */
   1303   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
   1304       || integer_zerop (DR_STEP (dr_info->dr)))
   1305     return false;
   1306 
   1307   /* Strided accesses perform only component accesses, alignment is
   1308      irrelevant for them.  */
   1309   if (STMT_VINFO_STRIDED_P (stmt_info)
   1310       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
   1311     return false;
   1312 
   1313   return true;
   1314 }
   1315 
   1316 /* Given an memory reference EXP return whether its alignment is less
   1317    than its size.  */
   1318 
   1319 static bool
   1320 not_size_aligned (tree exp)
   1321 {
   1322   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
   1323     return true;
   1324 
   1325   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
   1326 	  > get_object_alignment (exp));
   1327 }
   1328 
   1329 /* Function vector_alignment_reachable_p
   1330 
   1331    Return true if vector alignment for DR_INFO is reachable by peeling
   1332    a few loop iterations.  Return false otherwise.  */
   1333 
   1334 static bool
   1335 vector_alignment_reachable_p (dr_vec_info *dr_info)
   1336 {
   1337   stmt_vec_info stmt_info = dr_info->stmt;
   1338   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1339 
   1340   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   1341     {
   1342       /* For interleaved access we peel only if number of iterations in
   1343 	 the prolog loop ({VF - misalignment}), is a multiple of the
   1344 	 number of the interleaved accesses.  */
   1345       int elem_size, mis_in_elements;
   1346 
   1347       /* FORNOW: handle only known alignment.  */
   1348       if (!known_alignment_for_access_p (dr_info, vectype))
   1349 	return false;
   1350 
   1351       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
   1352       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
   1353       elem_size = vector_element_size (vector_size, nelements);
   1354       mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
   1355 
   1356       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
   1357 	return false;
   1358     }
   1359 
   1360   /* If misalignment is known at the compile time then allow peeling
   1361      only if natural alignment is reachable through peeling.  */
   1362   if (known_alignment_for_access_p (dr_info, vectype)
   1363       && !aligned_access_p (dr_info, vectype))
   1364     {
   1365       HOST_WIDE_INT elmsize =
   1366 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
   1367       if (dump_enabled_p ())
   1368 	{
   1369 	  dump_printf_loc (MSG_NOTE, vect_location,
   1370 	                   "data size = %wd. misalignment = %d.\n", elmsize,
   1371 			   dr_misalignment (dr_info, vectype));
   1372 	}
   1373       if (dr_misalignment (dr_info, vectype) % elmsize)
   1374 	{
   1375 	  if (dump_enabled_p ())
   1376 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1377 	                     "data size does not divide the misalignment.\n");
   1378 	  return false;
   1379 	}
   1380     }
   1381 
   1382   if (!known_alignment_for_access_p (dr_info, vectype))
   1383     {
   1384       tree type = TREE_TYPE (DR_REF (dr_info->dr));
   1385       bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
   1386       if (dump_enabled_p ())
   1387 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1388 	                 "Unknown misalignment, %snaturally aligned\n",
   1389 			 is_packed ? "not " : "");
   1390       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
   1391     }
   1392 
   1393   return true;
   1394 }
   1395 
   1396 
   1397 /* Calculate the cost of the memory access represented by DR_INFO.  */
   1398 
   1399 static void
   1400 vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
   1401 			   dr_alignment_support alignment_support_scheme,
   1402 			   int misalignment,
   1403 			   unsigned int *inside_cost,
   1404                            unsigned int *outside_cost,
   1405 			   stmt_vector_for_cost *body_cost_vec,
   1406 			   stmt_vector_for_cost *prologue_cost_vec)
   1407 {
   1408   stmt_vec_info stmt_info = dr_info->stmt;
   1409   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   1410   int ncopies;
   1411 
   1412   if (PURE_SLP_STMT (stmt_info))
   1413     ncopies = 1;
   1414   else
   1415     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
   1416 
   1417   if (DR_IS_READ (dr_info->dr))
   1418     vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
   1419 			misalignment, true, inside_cost,
   1420 			outside_cost, prologue_cost_vec, body_cost_vec, false);
   1421   else
   1422     vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
   1423 			 misalignment, inside_cost, body_cost_vec);
   1424 
   1425   if (dump_enabled_p ())
   1426     dump_printf_loc (MSG_NOTE, vect_location,
   1427                      "vect_get_data_access_cost: inside_cost = %d, "
   1428                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
   1429 }
   1430 
   1431 
   1432 typedef struct _vect_peel_info
   1433 {
   1434   dr_vec_info *dr_info;
   1435   int npeel;
   1436   unsigned int count;
   1437 } *vect_peel_info;
   1438 
   1439 typedef struct _vect_peel_extended_info
   1440 {
   1441   vec_info *vinfo;
   1442   struct _vect_peel_info peel_info;
   1443   unsigned int inside_cost;
   1444   unsigned int outside_cost;
   1445 } *vect_peel_extended_info;
   1446 
   1447 
   1448 /* Peeling hashtable helpers.  */
   1449 
   1450 struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
   1451 {
   1452   static inline hashval_t hash (const _vect_peel_info *);
   1453   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
   1454 };
   1455 
   1456 inline hashval_t
   1457 peel_info_hasher::hash (const _vect_peel_info *peel_info)
   1458 {
   1459   return (hashval_t) peel_info->npeel;
   1460 }
   1461 
   1462 inline bool
   1463 peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
   1464 {
   1465   return (a->npeel == b->npeel);
   1466 }
   1467 
   1468 
   1469 /* Insert DR_INFO into peeling hash table with NPEEL as key.  */
   1470 
   1471 static void
   1472 vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
   1473 			  loop_vec_info loop_vinfo, dr_vec_info *dr_info,
   1474 			  int npeel, bool supportable_if_not_aligned)
   1475 {
   1476   struct _vect_peel_info elem, *slot;
   1477   _vect_peel_info **new_slot;
   1478 
   1479   elem.npeel = npeel;
   1480   slot = peeling_htab->find (&elem);
   1481   if (slot)
   1482     slot->count++;
   1483   else
   1484     {
   1485       slot = XNEW (struct _vect_peel_info);
   1486       slot->npeel = npeel;
   1487       slot->dr_info = dr_info;
   1488       slot->count = 1;
   1489       new_slot = peeling_htab->find_slot (slot, INSERT);
   1490       *new_slot = slot;
   1491     }
   1492 
   1493   /* If this DR is not supported with unknown misalignment then bias
   1494      this slot when the cost model is disabled.  */
   1495   if (!supportable_if_not_aligned
   1496       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1497     slot->count += VECT_MAX_COST;
   1498 }
   1499 
   1500 
   1501 /* Traverse peeling hash table to find peeling option that aligns maximum
   1502    number of data accesses.  */
   1503 
   1504 int
   1505 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
   1506 				     _vect_peel_extended_info *max)
   1507 {
   1508   vect_peel_info elem = *slot;
   1509 
   1510   if (elem->count > max->peel_info.count
   1511       || (elem->count == max->peel_info.count
   1512           && max->peel_info.npeel > elem->npeel))
   1513     {
   1514       max->peel_info.npeel = elem->npeel;
   1515       max->peel_info.count = elem->count;
   1516       max->peel_info.dr_info = elem->dr_info;
   1517     }
   1518 
   1519   return 1;
   1520 }
   1521 
   1522 /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
   1523    data access costs for all data refs.  If UNKNOWN_MISALIGNMENT is true,
   1524    npeel is computed at runtime but DR0_INFO's misalignment will be zero
   1525    after peeling.  */
   1526 
   1527 static void
   1528 vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
   1529 				dr_vec_info *dr0_info,
   1530 				unsigned int *inside_cost,
   1531 				unsigned int *outside_cost,
   1532 				stmt_vector_for_cost *body_cost_vec,
   1533 				stmt_vector_for_cost *prologue_cost_vec,
   1534 				unsigned int npeel)
   1535 {
   1536   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   1537 
   1538   bool dr0_alignment_known_p
   1539     = (dr0_info
   1540        && known_alignment_for_access_p (dr0_info,
   1541 					STMT_VINFO_VECTYPE (dr0_info->stmt)));
   1542 
   1543   for (data_reference *dr : datarefs)
   1544     {
   1545       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1546       if (!vect_relevant_for_alignment_p (dr_info))
   1547 	continue;
   1548 
   1549       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1550       dr_alignment_support alignment_support_scheme;
   1551       int misalignment;
   1552       unsigned HOST_WIDE_INT alignment;
   1553 
   1554       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
   1555 					    size_zero_node) < 0;
   1556       poly_int64 off = 0;
   1557       if (negative)
   1558 	off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   1559 	       * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   1560 
   1561       if (npeel == 0)
   1562 	misalignment = dr_misalignment (dr_info, vectype, off);
   1563       else if (dr_info == dr0_info
   1564 	       || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
   1565 	misalignment = 0;
   1566       else if (!dr0_alignment_known_p
   1567 	       || !known_alignment_for_access_p (dr_info, vectype)
   1568 	       || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
   1569 	misalignment = DR_MISALIGNMENT_UNKNOWN;
   1570       else
   1571 	{
   1572 	  misalignment = dr_misalignment (dr_info, vectype, off);
   1573 	  misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1574 	  misalignment &= alignment - 1;
   1575 	}
   1576       alignment_support_scheme
   1577 	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   1578 					 misalignment);
   1579 
   1580       vect_get_data_access_cost (loop_vinfo, dr_info,
   1581 				 alignment_support_scheme, misalignment,
   1582 				 inside_cost, outside_cost,
   1583 				 body_cost_vec, prologue_cost_vec);
   1584     }
   1585 }
   1586 
   1587 /* Traverse peeling hash table and calculate cost for each peeling option.
   1588    Find the one with the lowest cost.  */
   1589 
   1590 int
   1591 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
   1592 				   _vect_peel_extended_info *min)
   1593 {
   1594   vect_peel_info elem = *slot;
   1595   int dummy;
   1596   unsigned int inside_cost = 0, outside_cost = 0;
   1597   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
   1598   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
   1599 		       epilogue_cost_vec;
   1600 
   1601   prologue_cost_vec.create (2);
   1602   body_cost_vec.create (2);
   1603   epilogue_cost_vec.create (2);
   1604 
   1605   vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
   1606 				  &outside_cost, &body_cost_vec,
   1607 				  &prologue_cost_vec, elem->npeel);
   1608 
   1609   body_cost_vec.release ();
   1610 
   1611   outside_cost += vect_get_known_peeling_cost
   1612     (loop_vinfo, elem->npeel, &dummy,
   1613      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   1614      &prologue_cost_vec, &epilogue_cost_vec);
   1615 
   1616   /* Prologue and epilogue costs are added to the target model later.
   1617      These costs depend only on the scalar iteration cost, the
   1618      number of peeling iterations finally chosen, and the number of
   1619      misaligned statements.  So discard the information found here.  */
   1620   prologue_cost_vec.release ();
   1621   epilogue_cost_vec.release ();
   1622 
   1623   if (inside_cost < min->inside_cost
   1624       || (inside_cost == min->inside_cost
   1625 	  && outside_cost < min->outside_cost))
   1626     {
   1627       min->inside_cost = inside_cost;
   1628       min->outside_cost = outside_cost;
   1629       min->peel_info.dr_info = elem->dr_info;
   1630       min->peel_info.npeel = elem->npeel;
   1631       min->peel_info.count = elem->count;
   1632     }
   1633 
   1634   return 1;
   1635 }
   1636 
   1637 
   1638 /* Choose best peeling option by traversing peeling hash table and either
   1639    choosing an option with the lowest cost (if cost model is enabled) or the
   1640    option that aligns as many accesses as possible.  */
   1641 
   1642 static struct _vect_peel_extended_info
   1643 vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
   1644 				       loop_vec_info loop_vinfo)
   1645 {
   1646    struct _vect_peel_extended_info res;
   1647 
   1648    res.peel_info.dr_info = NULL;
   1649    res.vinfo = loop_vinfo;
   1650 
   1651    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1652      {
   1653        res.inside_cost = INT_MAX;
   1654        res.outside_cost = INT_MAX;
   1655        peeling_htab->traverse <_vect_peel_extended_info *,
   1656 	   		       vect_peeling_hash_get_lowest_cost> (&res);
   1657      }
   1658    else
   1659      {
   1660        res.peel_info.count = 0;
   1661        peeling_htab->traverse <_vect_peel_extended_info *,
   1662 	   		       vect_peeling_hash_get_most_frequent> (&res);
   1663        res.inside_cost = 0;
   1664        res.outside_cost = 0;
   1665      }
   1666 
   1667    return res;
   1668 }
   1669 
   1670 /* Return true if the new peeling NPEEL is supported.  */
   1671 
   1672 static bool
   1673 vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
   1674 			  unsigned npeel)
   1675 {
   1676   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   1677   enum dr_alignment_support supportable_dr_alignment;
   1678 
   1679   bool dr0_alignment_known_p
   1680     = known_alignment_for_access_p (dr0_info,
   1681 				    STMT_VINFO_VECTYPE (dr0_info->stmt));
   1682 
   1683   /* Ensure that all data refs can be vectorized after the peel.  */
   1684   for (data_reference *dr : datarefs)
   1685     {
   1686       if (dr == dr0_info->dr)
   1687 	continue;
   1688 
   1689       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1690       if (!vect_relevant_for_alignment_p (dr_info)
   1691 	  || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
   1692 	continue;
   1693 
   1694       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
   1695       int misalignment;
   1696       unsigned HOST_WIDE_INT alignment;
   1697       if (!dr0_alignment_known_p
   1698 	  || !known_alignment_for_access_p (dr_info, vectype)
   1699 	  || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
   1700 	misalignment = DR_MISALIGNMENT_UNKNOWN;
   1701       else
   1702 	{
   1703 	  misalignment = dr_misalignment (dr_info, vectype);
   1704 	  misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
   1705 	  misalignment &= alignment - 1;
   1706 	}
   1707       supportable_dr_alignment
   1708 	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   1709 					 misalignment);
   1710       if (supportable_dr_alignment == dr_unaligned_unsupported)
   1711 	return false;
   1712     }
   1713 
   1714   return true;
   1715 }
   1716 
   1717 /* Compare two data-references DRA and DRB to group them into chunks
   1718    with related alignment.  */
   1719 
   1720 static int
   1721 dr_align_group_sort_cmp (const void *dra_, const void *drb_)
   1722 {
   1723   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
   1724   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
   1725   int cmp;
   1726 
   1727   /* Stabilize sort.  */
   1728   if (dra == drb)
   1729     return 0;
   1730 
   1731   /* Ordering of DRs according to base.  */
   1732   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   1733 			       DR_BASE_ADDRESS (drb));
   1734   if (cmp != 0)
   1735     return cmp;
   1736 
   1737   /* And according to DR_OFFSET.  */
   1738   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
   1739   if (cmp != 0)
   1740     return cmp;
   1741 
   1742   /* And after step.  */
   1743   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
   1744   if (cmp != 0)
   1745     return cmp;
   1746 
   1747   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
   1748   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
   1749   if (cmp == 0)
   1750     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
   1751   return cmp;
   1752 }
   1753 
   1754 /* Function vect_enhance_data_refs_alignment
   1755 
   1756    This pass will use loop versioning and loop peeling in order to enhance
   1757    the alignment of data references in the loop.
   1758 
   1759    FOR NOW: we assume that whatever versioning/peeling takes place, only the
   1760    original loop is to be vectorized.  Any other loops that are created by
   1761    the transformations performed in this pass - are not supposed to be
   1762    vectorized.  This restriction will be relaxed.
   1763 
   1764    This pass will require a cost model to guide it whether to apply peeling
   1765    or versioning or a combination of the two.  For example, the scheme that
   1766    intel uses when given a loop with several memory accesses, is as follows:
   1767    choose one memory access ('p') which alignment you want to force by doing
   1768    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
   1769    other accesses are not necessarily aligned, or (2) use loop versioning to
   1770    generate one loop in which all accesses are aligned, and another loop in
   1771    which only 'p' is necessarily aligned.
   1772 
   1773    ("Automatic Intra-Register Vectorization for the Intel Architecture",
   1774    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
   1775    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
   1776 
   1777    Devising a cost model is the most critical aspect of this work.  It will
   1778    guide us on which access to peel for, whether to use loop versioning, how
   1779    many versions to create, etc.  The cost model will probably consist of
   1780    generic considerations as well as target specific considerations (on
   1781    powerpc for example, misaligned stores are more painful than misaligned
   1782    loads).
   1783 
   1784    Here are the general steps involved in alignment enhancements:
   1785 
   1786      -- original loop, before alignment analysis:
   1787 	for (i=0; i<N; i++){
   1788 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
   1789 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1790 	}
   1791 
   1792      -- After vect_compute_data_refs_alignment:
   1793 	for (i=0; i<N; i++){
   1794 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1795 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1796 	}
   1797 
   1798      -- Possibility 1: we do loop versioning:
   1799      if (p is aligned) {
   1800 	for (i=0; i<N; i++){	# loop 1A
   1801 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1802 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
   1803 	}
   1804      }
   1805      else {
   1806 	for (i=0; i<N; i++){	# loop 1B
   1807 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
   1808 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
   1809 	}
   1810      }
   1811 
   1812      -- Possibility 2: we do loop peeling:
   1813      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
   1814 	x = q[i];
   1815 	p[i] = y;
   1816      }
   1817      for (i = 3; i < N; i++){	# loop 2A
   1818 	x = q[i];			# DR_MISALIGNMENT(q) = 0
   1819 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
   1820      }
   1821 
   1822      -- Possibility 3: combination of loop peeling and versioning:
   1823      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
   1824 	x = q[i];
   1825 	p[i] = y;
   1826      }
   1827      if (p is aligned) {
   1828 	for (i = 3; i<N; i++){	# loop 3A
   1829 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
   1830 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
   1831 	}
   1832      }
   1833      else {
   1834 	for (i = 3; i<N; i++){	# loop 3B
   1835 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
   1836 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
   1837 	}
   1838      }
   1839 
   1840      These loops are later passed to loop_transform to be vectorized.  The
   1841      vectorizer will use the alignment information to guide the transformation
   1842      (whether to generate regular loads/stores, or with special handling for
   1843      misalignment).  */
   1844 
   1845 opt_result
   1846 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   1847 {
   1848   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   1849   dr_vec_info *first_store = NULL;
   1850   dr_vec_info *dr0_info = NULL;
   1851   struct data_reference *dr;
   1852   unsigned int i;
   1853   bool do_peeling = false;
   1854   bool do_versioning = false;
   1855   unsigned int npeel = 0;
   1856   bool one_misalignment_known = false;
   1857   bool one_misalignment_unknown = false;
   1858   bool one_dr_unsupportable = false;
   1859   dr_vec_info *unsupportable_dr_info = NULL;
   1860   unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
   1861   hash_table<peel_info_hasher> peeling_htab (1);
   1862 
   1863   DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
   1864 
   1865   /* Reset data so we can safely be called multiple times.  */
   1866   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
   1867   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
   1868 
   1869   if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
   1870     return opt_result::success ();
   1871 
   1872   /* Sort the vector of datarefs so DRs that have the same or dependent
   1873      alignment are next to each other.  */
   1874   auto_vec<data_reference_p> datarefs
   1875     = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
   1876   datarefs.qsort (dr_align_group_sort_cmp);
   1877 
   1878   /* Compute the number of DRs that become aligned when we peel
   1879      a dataref so it becomes aligned.  */
   1880   auto_vec<unsigned> n_same_align_refs (datarefs.length ());
   1881   n_same_align_refs.quick_grow_cleared (datarefs.length ());
   1882   unsigned i0;
   1883   for (i0 = 0; i0 < datarefs.length (); ++i0)
   1884     if (DR_BASE_ADDRESS (datarefs[i0]))
   1885       break;
   1886   for (i = i0 + 1; i <= datarefs.length (); ++i)
   1887     {
   1888       if (i == datarefs.length ()
   1889 	  || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
   1890 			       DR_BASE_ADDRESS (datarefs[i]), 0)
   1891 	  || !operand_equal_p (DR_OFFSET (datarefs[i0]),
   1892 			       DR_OFFSET (datarefs[i]), 0)
   1893 	  || !operand_equal_p (DR_STEP (datarefs[i0]),
   1894 			       DR_STEP (datarefs[i]), 0))
   1895 	{
   1896 	  /* The subgroup [i0, i-1] now only differs in DR_INIT and
   1897 	     possibly DR_TARGET_ALIGNMENT.  Still the whole subgroup
   1898 	     will get known misalignment if we align one of the refs
   1899 	     with the largest DR_TARGET_ALIGNMENT.  */
   1900 	  for (unsigned j = i0; j < i; ++j)
   1901 	    {
   1902 	      dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
   1903 	      for (unsigned k = i0; k < i; ++k)
   1904 		{
   1905 		  if (k == j)
   1906 		    continue;
   1907 		  dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
   1908 		  if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
   1909 							       dr_infoj))
   1910 		    n_same_align_refs[j]++;
   1911 		}
   1912 	    }
   1913 	  i0 = i;
   1914 	}
   1915     }
   1916 
   1917   /* While cost model enhancements are expected in the future, the high level
   1918      view of the code at this time is as follows:
   1919 
   1920      A) If there is a misaligned access then see if peeling to align
   1921         this access can make all data references satisfy
   1922         vect_supportable_dr_alignment.  If so, update data structures
   1923         as needed and return true.
   1924 
   1925      B) If peeling wasn't possible and there is a data reference with an
   1926         unknown misalignment that does not satisfy vect_supportable_dr_alignment
   1927         then see if loop versioning checks can be used to make all data
   1928         references satisfy vect_supportable_dr_alignment.  If so, update
   1929         data structures as needed and return true.
   1930 
   1931      C) If neither peeling nor versioning were successful then return false if
   1932         any data reference does not satisfy vect_supportable_dr_alignment.
   1933 
   1934      D) Return true (all data references satisfy vect_supportable_dr_alignment).
   1935 
   1936      Note, Possibility 3 above (which is peeling and versioning together) is not
   1937      being done at this time.  */
   1938 
   1939   /* (1) Peeling to force alignment.  */
   1940 
   1941   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
   1942      Considerations:
   1943      + How many accesses will become aligned due to the peeling
   1944      - How many accesses will become unaligned due to the peeling,
   1945        and the cost of misaligned accesses.
   1946      - The cost of peeling (the extra runtime checks, the increase
   1947        in code size).  */
   1948 
   1949   FOR_EACH_VEC_ELT (datarefs, i, dr)
   1950     {
   1951       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   1952       if (!vect_relevant_for_alignment_p (dr_info))
   1953 	continue;
   1954 
   1955       stmt_vec_info stmt_info = dr_info->stmt;
   1956       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1957       do_peeling = vector_alignment_reachable_p (dr_info);
   1958       if (do_peeling)
   1959         {
   1960 	  if (known_alignment_for_access_p (dr_info, vectype))
   1961             {
   1962 	      unsigned int npeel_tmp = 0;
   1963 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
   1964 						    size_zero_node) < 0;
   1965 
   1966 	      /* If known_alignment_for_access_p then we have set
   1967 	         DR_MISALIGNMENT which is only done if we know it at compiler
   1968 	         time, so it is safe to assume target alignment is constant.
   1969 	       */
   1970 	      unsigned int target_align =
   1971 		DR_TARGET_ALIGNMENT (dr_info).to_constant ();
   1972 	      unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
   1973 	      poly_int64 off = 0;
   1974 	      if (negative)
   1975 		off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
   1976 	      unsigned int mis = dr_misalignment (dr_info, vectype, off);
   1977 	      mis = negative ? mis : -mis;
   1978 	      if (mis != 0)
   1979 		npeel_tmp = (mis & (target_align - 1)) / dr_size;
   1980 
   1981               /* For multiple types, it is possible that the bigger type access
   1982                  will have more than one peeling option.  E.g., a loop with two
   1983                  types: one of size (vector size / 4), and the other one of
   1984                  size (vector size / 8).  Vectorization factor will 8.  If both
   1985                  accesses are misaligned by 3, the first one needs one scalar
   1986                  iteration to be aligned, and the second one needs 5.  But the
   1987 		 first one will be aligned also by peeling 5 scalar
   1988                  iterations, and in that case both accesses will be aligned.
   1989                  Hence, except for the immediate peeling amount, we also want
   1990                  to try to add full vector size, while we don't exceed
   1991                  vectorization factor.
   1992                  We do this automatically for cost model, since we calculate
   1993 		 cost for every peeling option.  */
   1994 	      poly_uint64 nscalars = npeel_tmp;
   1995               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
   1996 		{
   1997 		  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   1998 		  nscalars = (STMT_SLP_TYPE (stmt_info)
   1999 			      ? vf * DR_GROUP_SIZE (stmt_info) : vf);
   2000 		}
   2001 
   2002 	      /* Save info about DR in the hash table.  Also include peeling
   2003 		 amounts according to the explanation above.  Indicate
   2004 		 the alignment status when the ref is not aligned.
   2005 		 ???  Rather than using unknown alignment here we should
   2006 		 prune all entries from the peeling hashtable which cause
   2007 		 DRs to be not supported.  */
   2008 	      bool supportable_if_not_aligned
   2009 		= vect_supportable_dr_alignment
   2010 		    (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
   2011 	      while (known_le (npeel_tmp, nscalars))
   2012                 {
   2013                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
   2014 					    dr_info, npeel_tmp,
   2015 					    supportable_if_not_aligned);
   2016 		  npeel_tmp += MAX (1, target_align / dr_size);
   2017                 }
   2018 
   2019 	      one_misalignment_known = true;
   2020             }
   2021           else
   2022             {
   2023               /* If we don't know any misalignment values, we prefer
   2024                  peeling for data-ref that has the maximum number of data-refs
   2025                  with the same alignment, unless the target prefers to align
   2026                  stores over load.  */
   2027 	      unsigned same_align_drs = n_same_align_refs[i];
   2028 	      if (!dr0_info
   2029 		  || dr0_same_align_drs < same_align_drs)
   2030 		{
   2031 		  dr0_same_align_drs = same_align_drs;
   2032 		  dr0_info = dr_info;
   2033 		}
   2034 	      /* For data-refs with the same number of related
   2035 		 accesses prefer the one where the misalign
   2036 		 computation will be invariant in the outermost loop.  */
   2037 	      else if (dr0_same_align_drs == same_align_drs)
   2038 		{
   2039 		  class loop *ivloop0, *ivloop;
   2040 		  ivloop0 = outermost_invariant_loop_for_expr
   2041 		    (loop, DR_BASE_ADDRESS (dr0_info->dr));
   2042 		  ivloop = outermost_invariant_loop_for_expr
   2043 		    (loop, DR_BASE_ADDRESS (dr));
   2044 		  if ((ivloop && !ivloop0)
   2045 		      || (ivloop && ivloop0
   2046 			  && flow_loop_nested_p (ivloop, ivloop0)))
   2047 		    dr0_info = dr_info;
   2048 		}
   2049 
   2050 	      one_misalignment_unknown = true;
   2051 
   2052 	      /* Check for data refs with unsupportable alignment that
   2053 	         can be peeled.  */
   2054 	      enum dr_alignment_support supportable_dr_alignment
   2055 		= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   2056 						 DR_MISALIGNMENT_UNKNOWN);
   2057 	      if (supportable_dr_alignment == dr_unaligned_unsupported)
   2058 		{
   2059 		  one_dr_unsupportable = true;
   2060 		  unsupportable_dr_info = dr_info;
   2061 		}
   2062 
   2063 	      if (!first_store && DR_IS_WRITE (dr))
   2064 		{
   2065 		  first_store = dr_info;
   2066 		  first_store_same_align_drs = same_align_drs;
   2067 		}
   2068             }
   2069         }
   2070       else
   2071         {
   2072 	  if (!aligned_access_p (dr_info, vectype))
   2073             {
   2074               if (dump_enabled_p ())
   2075                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2076                                  "vector alignment may not be reachable\n");
   2077               break;
   2078             }
   2079         }
   2080     }
   2081 
   2082   /* Check if we can possibly peel the loop.  */
   2083   if (!vect_can_advance_ivs_p (loop_vinfo)
   2084       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
   2085       || loop->inner)
   2086     do_peeling = false;
   2087 
   2088   struct _vect_peel_extended_info peel_for_known_alignment;
   2089   struct _vect_peel_extended_info peel_for_unknown_alignment;
   2090   struct _vect_peel_extended_info best_peel;
   2091 
   2092   peel_for_unknown_alignment.inside_cost = INT_MAX;
   2093   peel_for_unknown_alignment.outside_cost = INT_MAX;
   2094   peel_for_unknown_alignment.peel_info.count = 0;
   2095 
   2096   if (do_peeling
   2097       && one_misalignment_unknown)
   2098     {
   2099       /* Check if the target requires to prefer stores over loads, i.e., if
   2100          misaligned stores are more expensive than misaligned loads (taking
   2101          drs with same alignment into account).  */
   2102       unsigned int load_inside_cost = 0;
   2103       unsigned int load_outside_cost = 0;
   2104       unsigned int store_inside_cost = 0;
   2105       unsigned int store_outside_cost = 0;
   2106       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
   2107 
   2108       stmt_vector_for_cost dummy;
   2109       dummy.create (2);
   2110       vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
   2111 				      &load_inside_cost,
   2112 				      &load_outside_cost,
   2113 				      &dummy, &dummy, estimated_npeels);
   2114       dummy.release ();
   2115 
   2116       if (first_store)
   2117 	{
   2118 	  dummy.create (2);
   2119 	  vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
   2120 					  &store_inside_cost,
   2121 					  &store_outside_cost,
   2122 					  &dummy, &dummy,
   2123 					  estimated_npeels);
   2124 	  dummy.release ();
   2125 	}
   2126       else
   2127 	{
   2128 	  store_inside_cost = INT_MAX;
   2129 	  store_outside_cost = INT_MAX;
   2130 	}
   2131 
   2132       if (load_inside_cost > store_inside_cost
   2133 	  || (load_inside_cost == store_inside_cost
   2134 	      && load_outside_cost > store_outside_cost))
   2135 	{
   2136 	  dr0_info = first_store;
   2137 	  dr0_same_align_drs = first_store_same_align_drs;
   2138 	  peel_for_unknown_alignment.inside_cost = store_inside_cost;
   2139 	  peel_for_unknown_alignment.outside_cost = store_outside_cost;
   2140 	}
   2141       else
   2142 	{
   2143 	  peel_for_unknown_alignment.inside_cost = load_inside_cost;
   2144 	  peel_for_unknown_alignment.outside_cost = load_outside_cost;
   2145 	}
   2146 
   2147       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
   2148       prologue_cost_vec.create (2);
   2149       epilogue_cost_vec.create (2);
   2150 
   2151       int dummy2;
   2152       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
   2153 	(loop_vinfo, estimated_npeels, &dummy2,
   2154 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   2155 	 &prologue_cost_vec, &epilogue_cost_vec);
   2156 
   2157       prologue_cost_vec.release ();
   2158       epilogue_cost_vec.release ();
   2159 
   2160       peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
   2161     }
   2162 
   2163   peel_for_unknown_alignment.peel_info.npeel = 0;
   2164   peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
   2165 
   2166   best_peel = peel_for_unknown_alignment;
   2167 
   2168   peel_for_known_alignment.inside_cost = INT_MAX;
   2169   peel_for_known_alignment.outside_cost = INT_MAX;
   2170   peel_for_known_alignment.peel_info.count = 0;
   2171   peel_for_known_alignment.peel_info.dr_info = NULL;
   2172 
   2173   if (do_peeling && one_misalignment_known)
   2174     {
   2175       /* Peeling is possible, but there is no data access that is not supported
   2176          unless aligned.  So we try to choose the best possible peeling from
   2177 	 the hash table.  */
   2178       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
   2179 	(&peeling_htab, loop_vinfo);
   2180     }
   2181 
   2182   /* Compare costs of peeling for known and unknown alignment. */
   2183   if (peel_for_known_alignment.peel_info.dr_info != NULL
   2184       && peel_for_unknown_alignment.inside_cost
   2185       >= peel_for_known_alignment.inside_cost)
   2186     {
   2187       best_peel = peel_for_known_alignment;
   2188 
   2189       /* If the best peeling for known alignment has NPEEL == 0, perform no
   2190          peeling at all except if there is an unsupportable dr that we can
   2191          align.  */
   2192       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
   2193 	do_peeling = false;
   2194     }
   2195 
   2196   /* If there is an unsupportable data ref, prefer this over all choices so far
   2197      since we'd have to discard a chosen peeling except when it accidentally
   2198      aligned the unsupportable data ref.  */
   2199   if (one_dr_unsupportable)
   2200     dr0_info = unsupportable_dr_info;
   2201   else if (do_peeling)
   2202     {
   2203       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
   2204 	 TODO: Use nopeel_outside_cost or get rid of it?  */
   2205       unsigned nopeel_inside_cost = 0;
   2206       unsigned nopeel_outside_cost = 0;
   2207 
   2208       stmt_vector_for_cost dummy;
   2209       dummy.create (2);
   2210       vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
   2211 				      &nopeel_outside_cost, &dummy, &dummy, 0);
   2212       dummy.release ();
   2213 
   2214       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
   2215 	 costs will be recorded.  */
   2216       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
   2217       prologue_cost_vec.create (2);
   2218       epilogue_cost_vec.create (2);
   2219 
   2220       int dummy2;
   2221       nopeel_outside_cost += vect_get_known_peeling_cost
   2222 	(loop_vinfo, 0, &dummy2,
   2223 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
   2224 	 &prologue_cost_vec, &epilogue_cost_vec);
   2225 
   2226       prologue_cost_vec.release ();
   2227       epilogue_cost_vec.release ();
   2228 
   2229       npeel = best_peel.peel_info.npeel;
   2230       dr0_info = best_peel.peel_info.dr_info;
   2231 
   2232       /* If no peeling is not more expensive than the best peeling we
   2233 	 have so far, don't perform any peeling.  */
   2234       if (nopeel_inside_cost <= best_peel.inside_cost)
   2235 	do_peeling = false;
   2236     }
   2237 
   2238   if (do_peeling)
   2239     {
   2240       stmt_vec_info stmt_info = dr0_info->stmt;
   2241       if (known_alignment_for_access_p (dr0_info,
   2242 					STMT_VINFO_VECTYPE (stmt_info)))
   2243         {
   2244 	  bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
   2245 						size_zero_node) < 0;
   2246           if (!npeel)
   2247             {
   2248               /* Since it's known at compile time, compute the number of
   2249                  iterations in the peeled loop (the peeling factor) for use in
   2250                  updating DR_MISALIGNMENT values.  The peeling factor is the
   2251                  vectorization factor minus the misalignment as an element
   2252                  count.  */
   2253 	      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2254 	      poly_int64 off = 0;
   2255 	      if (negative)
   2256 		off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   2257 		       * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   2258 	      unsigned int mis
   2259 		= dr_misalignment (dr0_info, vectype, off);
   2260 	      mis = negative ? mis : -mis;
   2261 	      /* If known_alignment_for_access_p then we have set
   2262 	         DR_MISALIGNMENT which is only done if we know it at compiler
   2263 	         time, so it is safe to assume target alignment is constant.
   2264 	       */
   2265 	      unsigned int target_align =
   2266 		DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
   2267 	      npeel = ((mis & (target_align - 1))
   2268 		       / vect_get_scalar_dr_size (dr0_info));
   2269             }
   2270 
   2271 	  /* For interleaved data access every iteration accesses all the
   2272 	     members of the group, therefore we divide the number of iterations
   2273 	     by the group size.  */
   2274 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   2275 	    npeel /= DR_GROUP_SIZE (stmt_info);
   2276 
   2277           if (dump_enabled_p ())
   2278             dump_printf_loc (MSG_NOTE, vect_location,
   2279                              "Try peeling by %d\n", npeel);
   2280         }
   2281 
   2282       /* Ensure that all datarefs can be vectorized after the peel.  */
   2283       if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
   2284 	do_peeling = false;
   2285 
   2286       /* Check if all datarefs are supportable and log.  */
   2287       if (do_peeling
   2288 	  && npeel == 0
   2289 	  && known_alignment_for_access_p (dr0_info,
   2290 					   STMT_VINFO_VECTYPE (stmt_info)))
   2291 	return opt_result::success ();
   2292 
   2293       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
   2294       if (do_peeling)
   2295         {
   2296           unsigned max_allowed_peel
   2297 	    = param_vect_max_peeling_for_alignment;
   2298 	  if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
   2299 	    max_allowed_peel = 0;
   2300           if (max_allowed_peel != (unsigned)-1)
   2301             {
   2302               unsigned max_peel = npeel;
   2303               if (max_peel == 0)
   2304                 {
   2305 		  poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
   2306 		  unsigned HOST_WIDE_INT target_align_c;
   2307 		  if (target_align.is_constant (&target_align_c))
   2308 		    max_peel =
   2309 		      target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
   2310 		  else
   2311 		    {
   2312 		      do_peeling = false;
   2313 		      if (dump_enabled_p ())
   2314 			dump_printf_loc (MSG_NOTE, vect_location,
   2315 			  "Disable peeling, max peels set and vector"
   2316 			  " alignment unknown\n");
   2317 		    }
   2318                 }
   2319               if (max_peel > max_allowed_peel)
   2320                 {
   2321                   do_peeling = false;
   2322                   if (dump_enabled_p ())
   2323                     dump_printf_loc (MSG_NOTE, vect_location,
   2324                         "Disable peeling, max peels reached: %d\n", max_peel);
   2325                 }
   2326             }
   2327         }
   2328 
   2329       /* Cost model #2 - if peeling may result in a remaining loop not
   2330 	 iterating enough to be vectorized then do not peel.  Since this
   2331 	 is a cost heuristic rather than a correctness decision, use the
   2332 	 most likely runtime value for variable vectorization factors.  */
   2333       if (do_peeling
   2334 	  && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
   2335 	{
   2336 	  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
   2337 	  unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
   2338 	  if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
   2339 	      < assumed_vf + max_peel)
   2340 	    do_peeling = false;
   2341 	}
   2342 
   2343       if (do_peeling)
   2344         {
   2345           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
   2346              If the misalignment of DR_i is identical to that of dr0 then set
   2347              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
   2348              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
   2349              by the peeling factor times the element size of DR_i (MOD the
   2350              vectorization factor times the size).  Otherwise, the
   2351              misalignment of DR_i must be set to unknown.  */
   2352 	  FOR_EACH_VEC_ELT (datarefs, i, dr)
   2353 	    if (dr != dr0_info->dr)
   2354 	      {
   2355 		dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2356 		if (!vect_relevant_for_alignment_p (dr_info))
   2357 		  continue;
   2358 
   2359 		vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
   2360 	      }
   2361 
   2362           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
   2363           if (npeel)
   2364             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
   2365           else
   2366 	    LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
   2367 	  SET_DR_MISALIGNMENT (dr0_info,
   2368 			       vect_dr_misalign_for_aligned_access (dr0_info));
   2369 	  if (dump_enabled_p ())
   2370             {
   2371               dump_printf_loc (MSG_NOTE, vect_location,
   2372                                "Alignment of access forced using peeling.\n");
   2373               dump_printf_loc (MSG_NOTE, vect_location,
   2374                                "Peeling for alignment will be applied.\n");
   2375             }
   2376 
   2377 	  /* The inside-loop cost will be accounted for in vectorizable_load
   2378 	     and vectorizable_store correctly with adjusted alignments.
   2379 	     Drop the body_cst_vec on the floor here.  */
   2380 	  return opt_result::success ();
   2381         }
   2382     }
   2383 
   2384   /* (2) Versioning to force alignment.  */
   2385 
   2386   /* Try versioning if:
   2387      1) optimize loop for speed and the cost-model is not cheap
   2388      2) there is at least one unsupported misaligned data ref with an unknown
   2389         misalignment, and
   2390      3) all misaligned data refs with a known misalignment are supported, and
   2391      4) the number of runtime alignment checks is within reason.  */
   2392 
   2393   do_versioning
   2394     = (optimize_loop_nest_for_speed_p (loop)
   2395        && !loop->inner /* FORNOW */
   2396        && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
   2397 
   2398   if (do_versioning)
   2399     {
   2400       FOR_EACH_VEC_ELT (datarefs, i, dr)
   2401         {
   2402 	  dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2403 	  if (!vect_relevant_for_alignment_p (dr_info))
   2404 	    continue;
   2405 
   2406 	  stmt_vec_info stmt_info = dr_info->stmt;
   2407 	  if (STMT_VINFO_STRIDED_P (stmt_info))
   2408 	    {
   2409 	      do_versioning = false;
   2410 	      break;
   2411 	    }
   2412 
   2413 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2414 	  bool negative = tree_int_cst_compare (DR_STEP (dr),
   2415 						size_zero_node) < 0;
   2416 	  poly_int64 off = 0;
   2417 	  if (negative)
   2418 	    off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
   2419 		   * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   2420 	  int misalignment;
   2421 	  if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
   2422 	    continue;
   2423 
   2424 	  enum dr_alignment_support supportable_dr_alignment
   2425 	    = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
   2426 					     misalignment);
   2427 	  if (supportable_dr_alignment == dr_unaligned_unsupported)
   2428             {
   2429 	      if (misalignment != DR_MISALIGNMENT_UNKNOWN
   2430 		  || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
   2431 		      >= (unsigned) param_vect_max_version_for_alignment_checks))
   2432                 {
   2433                   do_versioning = false;
   2434                   break;
   2435                 }
   2436 
   2437 	      /* At present we don't support versioning for alignment
   2438 		 with variable VF, since there's no guarantee that the
   2439 		 VF is a power of two.  We could relax this if we added
   2440 		 a way of enforcing a power-of-two size.  */
   2441 	      unsigned HOST_WIDE_INT size;
   2442 	      if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
   2443 		{
   2444 		  do_versioning = false;
   2445 		  break;
   2446 		}
   2447 
   2448 	      /* Forcing alignment in the first iteration is no good if
   2449 		 we don't keep it across iterations.  For now, just disable
   2450 		 versioning in this case.
   2451 		 ?? We could actually unroll the loop to achieve the required
   2452 		 overall step alignment, and forcing the alignment could be
   2453 		 done by doing some iterations of the non-vectorized loop.  */
   2454 	      if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
   2455 			       * DR_STEP_ALIGNMENT (dr),
   2456 			       DR_TARGET_ALIGNMENT (dr_info)))
   2457 		{
   2458 		  do_versioning = false;
   2459 		  break;
   2460 		}
   2461 
   2462               /* The rightmost bits of an aligned address must be zeros.
   2463                  Construct the mask needed for this test.  For example,
   2464                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
   2465                  mask must be 15 = 0xf. */
   2466 	      int mask = size - 1;
   2467 
   2468 	      /* FORNOW: use the same mask to test all potentially unaligned
   2469 		 references in the loop.  */
   2470 	      if (LOOP_VINFO_PTR_MASK (loop_vinfo)
   2471 		  && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
   2472 		{
   2473 		  do_versioning = false;
   2474 		  break;
   2475 		}
   2476 
   2477               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
   2478 	      LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
   2479             }
   2480         }
   2481 
   2482       /* Versioning requires at least one misaligned data reference.  */
   2483       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
   2484         do_versioning = false;
   2485       else if (!do_versioning)
   2486         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
   2487     }
   2488 
   2489   if (do_versioning)
   2490     {
   2491       const vec<stmt_vec_info> &may_misalign_stmts
   2492 	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
   2493       stmt_vec_info stmt_info;
   2494 
   2495       /* It can now be assumed that the data references in the statements
   2496          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
   2497          of the loop being vectorized.  */
   2498       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
   2499         {
   2500 	  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   2501 	  SET_DR_MISALIGNMENT (dr_info,
   2502 			       vect_dr_misalign_for_aligned_access (dr_info));
   2503 	  if (dump_enabled_p ())
   2504             dump_printf_loc (MSG_NOTE, vect_location,
   2505                              "Alignment of access forced using versioning.\n");
   2506         }
   2507 
   2508       if (dump_enabled_p ())
   2509         dump_printf_loc (MSG_NOTE, vect_location,
   2510                          "Versioning for alignment will be applied.\n");
   2511 
   2512       /* Peeling and versioning can't be done together at this time.  */
   2513       gcc_assert (! (do_peeling && do_versioning));
   2514 
   2515       return opt_result::success ();
   2516     }
   2517 
   2518   /* This point is reached if neither peeling nor versioning is being done.  */
   2519   gcc_assert (! (do_peeling || do_versioning));
   2520 
   2521   return opt_result::success ();
   2522 }
   2523 
   2524 
   2525 /* Function vect_analyze_data_refs_alignment
   2526 
   2527    Analyze the alignment of the data-references in the loop.
   2528    Return FALSE if a data reference is found that cannot be vectorized.  */
   2529 
   2530 opt_result
   2531 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
   2532 {
   2533   DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
   2534 
   2535   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
   2536   struct data_reference *dr;
   2537   unsigned int i;
   2538 
   2539   vect_record_base_alignments (loop_vinfo);
   2540   FOR_EACH_VEC_ELT (datarefs, i, dr)
   2541     {
   2542       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
   2543       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
   2544 	{
   2545 	  if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
   2546 	      && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
   2547 	    continue;
   2548 	  vect_compute_data_ref_alignment (loop_vinfo, dr_info,
   2549 					   STMT_VINFO_VECTYPE (dr_info->stmt));
   2550 	}
   2551     }
   2552 
   2553   return opt_result::success ();
   2554 }
   2555 
   2556 
   2557 /* Analyze alignment of DRs of stmts in NODE.  */
   2558 
   2559 static bool
   2560 vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
   2561 {
   2562   /* Alignment is maintained in the first element of the group.  */
   2563   stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
   2564   first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
   2565   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   2566   tree vectype = SLP_TREE_VECTYPE (node);
   2567   poly_uint64 vector_alignment
   2568     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
   2569 		 BITS_PER_UNIT);
   2570   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
   2571     vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
   2572   /* Re-analyze alignment when we're facing a vectorization with a bigger
   2573      alignment requirement.  */
   2574   else if (known_lt (dr_info->target_alignment, vector_alignment))
   2575     {
   2576       poly_uint64 old_target_alignment = dr_info->target_alignment;
   2577       int old_misalignment = dr_info->misalignment;
   2578       vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
   2579       /* But keep knowledge about a smaller alignment.  */
   2580       if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
   2581 	  && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
   2582 	{
   2583 	  dr_info->target_alignment = old_target_alignment;
   2584 	  dr_info->misalignment = old_misalignment;
   2585 	}
   2586     }
   2587   /* When we ever face unordered target alignments the first one wins in terms
   2588      of analyzing and the other will become unknown in dr_misalignment.  */
   2589   return true;
   2590 }
   2591 
   2592 /* Function vect_slp_analyze_instance_alignment
   2593 
   2594    Analyze the alignment of the data-references in the SLP instance.
   2595    Return FALSE if a data reference is found that cannot be vectorized.  */
   2596 
   2597 bool
   2598 vect_slp_analyze_instance_alignment (vec_info *vinfo,
   2599 						slp_instance instance)
   2600 {
   2601   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
   2602 
   2603   slp_tree node;
   2604   unsigned i;
   2605   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
   2606     if (! vect_slp_analyze_node_alignment (vinfo, node))
   2607       return false;
   2608 
   2609   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
   2610       && ! vect_slp_analyze_node_alignment
   2611 	     (vinfo, SLP_INSTANCE_TREE (instance)))
   2612     return false;
   2613 
   2614   return true;
   2615 }
   2616 
   2617 
   2618 /* Analyze groups of accesses: check that DR_INFO belongs to a group of
   2619    accesses of legal size, step, etc.  Detect gaps, single element
   2620    interleaving, and other special cases. Set grouped access info.
   2621    Collect groups of strided stores for further use in SLP analysis.
   2622    Worker for vect_analyze_group_access.  */
   2623 
   2624 static bool
   2625 vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
   2626 {
   2627   data_reference *dr = dr_info->dr;
   2628   tree step = DR_STEP (dr);
   2629   tree scalar_type = TREE_TYPE (DR_REF (dr));
   2630   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
   2631   stmt_vec_info stmt_info = dr_info->stmt;
   2632   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2633   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   2634   HOST_WIDE_INT dr_step = -1;
   2635   HOST_WIDE_INT groupsize, last_accessed_element = 1;
   2636   bool slp_impossible = false;
   2637 
   2638   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
   2639      size of the interleaving group (including gaps).  */
   2640   if (tree_fits_shwi_p (step))
   2641     {
   2642       dr_step = tree_to_shwi (step);
   2643       /* Check that STEP is a multiple of type size.  Otherwise there is
   2644          a non-element-sized gap at the end of the group which we
   2645 	 cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
   2646 	 ???  As we can handle non-constant step fine here we should
   2647 	 simply remove uses of DR_GROUP_GAP between the last and first
   2648 	 element and instead rely on DR_STEP.  DR_GROUP_SIZE then would
   2649 	 simply not include that gap.  */
   2650       if ((dr_step % type_size) != 0)
   2651 	{
   2652 	  if (dump_enabled_p ())
   2653 	    dump_printf_loc (MSG_NOTE, vect_location,
   2654 			     "Step %T is not a multiple of the element size"
   2655 			     " for %T\n",
   2656 			     step, DR_REF (dr));
   2657 	  return false;
   2658 	}
   2659       groupsize = absu_hwi (dr_step) / type_size;
   2660     }
   2661   else
   2662     groupsize = 0;
   2663 
   2664   /* Not consecutive access is possible only if it is a part of interleaving.  */
   2665   if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
   2666     {
   2667       /* Check if it this DR is a part of interleaving, and is a single
   2668 	 element of the group that is accessed in the loop.  */
   2669 
   2670       /* Gaps are supported only for loads. STEP must be a multiple of the type
   2671 	 size.  */
   2672       if (DR_IS_READ (dr)
   2673 	  && (dr_step % type_size) == 0
   2674 	  && groupsize > 0
   2675 	  /* This could be UINT_MAX but as we are generating code in a very
   2676 	     inefficient way we have to cap earlier.
   2677 	     See PR91403 for example.  */
   2678 	  && groupsize <= 4096)
   2679 	{
   2680 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
   2681 	  DR_GROUP_SIZE (stmt_info) = groupsize;
   2682 	  DR_GROUP_GAP (stmt_info) = groupsize - 1;
   2683 	  if (dump_enabled_p ())
   2684 	    dump_printf_loc (MSG_NOTE, vect_location,
   2685 			     "Detected single element interleaving %T"
   2686 			     " step %T\n",
   2687 			     DR_REF (dr), step);
   2688 
   2689 	  return true;
   2690 	}
   2691 
   2692       if (dump_enabled_p ())
   2693 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2694 			 "not consecutive access %G", stmt_info->stmt);
   2695 
   2696       if (bb_vinfo)
   2697 	{
   2698 	  /* Mark the statement as unvectorizable.  */
   2699 	  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   2700 	  return true;
   2701 	}
   2702 
   2703       if (dump_enabled_p ())
   2704 	dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
   2705       STMT_VINFO_STRIDED_P (stmt_info) = true;
   2706       return true;
   2707     }
   2708 
   2709   if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
   2710     {
   2711       /* First stmt in the interleaving chain. Check the chain.  */
   2712       stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2713       struct data_reference *data_ref = dr;
   2714       unsigned int count = 1;
   2715       tree prev_init = DR_INIT (data_ref);
   2716       HOST_WIDE_INT diff, gaps = 0;
   2717 
   2718       /* By construction, all group members have INTEGER_CST DR_INITs.  */
   2719       while (next)
   2720         {
   2721           /* We never have the same DR multiple times.  */
   2722           gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
   2723 				DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
   2724 
   2725 	  data_ref = STMT_VINFO_DATA_REF (next);
   2726 
   2727 	  /* All group members have the same STEP by construction.  */
   2728 	  gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
   2729 
   2730           /* Check that the distance between two accesses is equal to the type
   2731              size. Otherwise, we have gaps.  */
   2732           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
   2733 		  - TREE_INT_CST_LOW (prev_init)) / type_size;
   2734 	  if (diff < 1 || diff > UINT_MAX)
   2735 	    {
   2736 	      /* For artificial testcases with array accesses with large
   2737 		 constant indices we can run into overflow issues which
   2738 		 can end up fooling the groupsize constraint below so
   2739 		 check the individual gaps (which are represented as
   2740 		 unsigned int) as well.  */
   2741 	      if (dump_enabled_p ())
   2742 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2743 				 "interleaved access with gap larger "
   2744 				 "than representable\n");
   2745 	      return false;
   2746 	    }
   2747 	  if (diff != 1)
   2748 	    {
   2749 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
   2750 	      slp_impossible = true;
   2751 	      if (DR_IS_WRITE (data_ref))
   2752 		{
   2753                   if (dump_enabled_p ())
   2754                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2755                                      "interleaved store with gaps\n");
   2756 		  return false;
   2757 		}
   2758 
   2759               gaps += diff - 1;
   2760 	    }
   2761 
   2762 	  last_accessed_element += diff;
   2763 
   2764           /* Store the gap from the previous member of the group. If there is no
   2765              gap in the access, DR_GROUP_GAP is always 1.  */
   2766 	  DR_GROUP_GAP (next) = diff;
   2767 
   2768 	  prev_init = DR_INIT (data_ref);
   2769 	  next = DR_GROUP_NEXT_ELEMENT (next);
   2770 	  /* Count the number of data-refs in the chain.  */
   2771 	  count++;
   2772         }
   2773 
   2774       if (groupsize == 0)
   2775         groupsize = count + gaps;
   2776 
   2777       /* This could be UINT_MAX but as we are generating code in a very
   2778          inefficient way we have to cap earlier.  See PR78699 for example.  */
   2779       if (groupsize > 4096)
   2780 	{
   2781 	  if (dump_enabled_p ())
   2782 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2783 			     "group is too large\n");
   2784 	  return false;
   2785 	}
   2786 
   2787       /* Check that the size of the interleaving is equal to count for stores,
   2788          i.e., that there are no gaps.  */
   2789       if (groupsize != count
   2790 	  && !DR_IS_READ (dr))
   2791         {
   2792 	  groupsize = count;
   2793 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
   2794 	}
   2795 
   2796       /* If there is a gap after the last load in the group it is the
   2797 	 difference between the groupsize and the last accessed
   2798 	 element.
   2799 	 When there is no gap, this difference should be 0.  */
   2800       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
   2801 
   2802       DR_GROUP_SIZE (stmt_info) = groupsize;
   2803       if (dump_enabled_p ())
   2804 	{
   2805 	  dump_printf_loc (MSG_NOTE, vect_location,
   2806 			   "Detected interleaving ");
   2807 	  if (DR_IS_READ (dr))
   2808 	    dump_printf (MSG_NOTE, "load ");
   2809 	  else if (STMT_VINFO_STRIDED_P (stmt_info))
   2810 	    dump_printf (MSG_NOTE, "strided store ");
   2811 	  else
   2812 	    dump_printf (MSG_NOTE, "store ");
   2813 	  dump_printf (MSG_NOTE, "of size %u\n",
   2814 		       (unsigned)groupsize);
   2815 	  dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
   2816 	  next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2817 	  while (next)
   2818 	    {
   2819 	      if (DR_GROUP_GAP (next) != 1)
   2820 		dump_printf_loc (MSG_NOTE, vect_location,
   2821 				 "\t<gap of %d elements>\n",
   2822 				 DR_GROUP_GAP (next) - 1);
   2823 	      dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
   2824 	      next = DR_GROUP_NEXT_ELEMENT (next);
   2825 	    }
   2826 	  if (DR_GROUP_GAP (stmt_info) != 0)
   2827 	    dump_printf_loc (MSG_NOTE, vect_location,
   2828 			     "\t<gap of %d elements>\n",
   2829 			     DR_GROUP_GAP (stmt_info));
   2830 	}
   2831 
   2832       /* SLP: create an SLP data structure for every interleaving group of
   2833 	 stores for further analysis in vect_analyse_slp.  */
   2834       if (DR_IS_WRITE (dr) && !slp_impossible)
   2835 	{
   2836 	  if (loop_vinfo)
   2837 	    LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
   2838 	  if (bb_vinfo)
   2839 	    BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
   2840 	}
   2841     }
   2842 
   2843   return true;
   2844 }
   2845 
   2846 /* Analyze groups of accesses: check that DR_INFO belongs to a group of
   2847    accesses of legal size, step, etc.  Detect gaps, single element
   2848    interleaving, and other special cases. Set grouped access info.
   2849    Collect groups of strided stores for further use in SLP analysis.  */
   2850 
   2851 static bool
   2852 vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
   2853 {
   2854   if (!vect_analyze_group_access_1 (vinfo, dr_info))
   2855     {
   2856       /* Dissolve the group if present.  */
   2857       stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
   2858       while (stmt_info)
   2859 	{
   2860 	  stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2861 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2862 	  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
   2863 	  stmt_info = next;
   2864 	}
   2865       return false;
   2866     }
   2867   return true;
   2868 }
   2869 
   2870 /* Analyze the access pattern of the data-reference DR_INFO.
   2871    In case of non-consecutive accesses call vect_analyze_group_access() to
   2872    analyze groups of accesses.  */
   2873 
   2874 static bool
   2875 vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
   2876 {
   2877   data_reference *dr = dr_info->dr;
   2878   tree step = DR_STEP (dr);
   2879   tree scalar_type = TREE_TYPE (DR_REF (dr));
   2880   stmt_vec_info stmt_info = dr_info->stmt;
   2881   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2882   class loop *loop = NULL;
   2883 
   2884   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   2885     return true;
   2886 
   2887   if (loop_vinfo)
   2888     loop = LOOP_VINFO_LOOP (loop_vinfo);
   2889 
   2890   if (loop_vinfo && !step)
   2891     {
   2892       if (dump_enabled_p ())
   2893 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2894 	                 "bad data-ref access in loop\n");
   2895       return false;
   2896     }
   2897 
   2898   /* Allow loads with zero step in inner-loop vectorization.  */
   2899   if (loop_vinfo && integer_zerop (step))
   2900     {
   2901       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2902       if (!nested_in_vect_loop_p (loop, stmt_info))
   2903 	return DR_IS_READ (dr);
   2904       /* Allow references with zero step for outer loops marked
   2905 	 with pragma omp simd only - it guarantees absence of
   2906 	 loop-carried dependencies between inner loop iterations.  */
   2907       if (loop->safelen < 2)
   2908 	{
   2909 	  if (dump_enabled_p ())
   2910 	    dump_printf_loc (MSG_NOTE, vect_location,
   2911 			     "zero step in inner loop of nest\n");
   2912 	  return false;
   2913 	}
   2914     }
   2915 
   2916   if (loop && nested_in_vect_loop_p (loop, stmt_info))
   2917     {
   2918       /* Interleaved accesses are not yet supported within outer-loop
   2919         vectorization for references in the inner-loop.  */
   2920       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2921 
   2922       /* For the rest of the analysis we use the outer-loop step.  */
   2923       step = STMT_VINFO_DR_STEP (stmt_info);
   2924       if (integer_zerop (step))
   2925 	{
   2926 	  if (dump_enabled_p ())
   2927 	    dump_printf_loc (MSG_NOTE, vect_location,
   2928 	                     "zero step in outer loop.\n");
   2929 	  return DR_IS_READ (dr);
   2930 	}
   2931     }
   2932 
   2933   /* Consecutive?  */
   2934   if (TREE_CODE (step) == INTEGER_CST)
   2935     {
   2936       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
   2937       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
   2938 	  || (dr_step < 0
   2939 	      && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
   2940 	{
   2941 	  /* Mark that it is not interleaving.  */
   2942 	  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
   2943 	  return true;
   2944 	}
   2945     }
   2946 
   2947   if (loop && nested_in_vect_loop_p (loop, stmt_info))
   2948     {
   2949       if (dump_enabled_p ())
   2950 	dump_printf_loc (MSG_NOTE, vect_location,
   2951 	                 "grouped access in outer loop.\n");
   2952       return false;
   2953     }
   2954 
   2955 
   2956   /* Assume this is a DR handled by non-constant strided load case.  */
   2957   if (TREE_CODE (step) != INTEGER_CST)
   2958     return (STMT_VINFO_STRIDED_P (stmt_info)
   2959 	    && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
   2960 		|| vect_analyze_group_access (vinfo, dr_info)));
   2961 
   2962   /* Not consecutive access - check if it's a part of interleaving group.  */
   2963   return vect_analyze_group_access (vinfo, dr_info);
   2964 }
   2965 
   2966 /* Compare two data-references DRA and DRB to group them into chunks
   2967    suitable for grouping.  */
   2968 
   2969 static int
   2970 dr_group_sort_cmp (const void *dra_, const void *drb_)
   2971 {
   2972   dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
   2973   dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
   2974   data_reference_p dra = dra_info->dr;
   2975   data_reference_p drb = drb_info->dr;
   2976   int cmp;
   2977 
   2978   /* Stabilize sort.  */
   2979   if (dra == drb)
   2980     return 0;
   2981 
   2982   /* Different group IDs lead never belong to the same group.  */
   2983   if (dra_info->group != drb_info->group)
   2984     return dra_info->group < drb_info->group ? -1 : 1;
   2985 
   2986   /* Ordering of DRs according to base.  */
   2987   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   2988 			       DR_BASE_ADDRESS (drb));
   2989   if (cmp != 0)
   2990     return cmp;
   2991 
   2992   /* And according to DR_OFFSET.  */
   2993   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
   2994   if (cmp != 0)
   2995     return cmp;
   2996 
   2997   /* Put reads before writes.  */
   2998   if (DR_IS_READ (dra) != DR_IS_READ (drb))
   2999     return DR_IS_READ (dra) ? -1 : 1;
   3000 
   3001   /* Then sort after access size.  */
   3002   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
   3003 			       TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
   3004   if (cmp != 0)
   3005     return cmp;
   3006 
   3007   /* And after step.  */
   3008   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
   3009   if (cmp != 0)
   3010     return cmp;
   3011 
   3012   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
   3013   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
   3014   if (cmp == 0)
   3015     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
   3016   return cmp;
   3017 }
   3018 
   3019 /* If OP is the result of a conversion, return the unconverted value,
   3020    otherwise return null.  */
   3021 
   3022 static tree
   3023 strip_conversion (tree op)
   3024 {
   3025   if (TREE_CODE (op) != SSA_NAME)
   3026     return NULL_TREE;
   3027   gimple *stmt = SSA_NAME_DEF_STMT (op);
   3028   if (!is_gimple_assign (stmt)
   3029       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
   3030     return NULL_TREE;
   3031   return gimple_assign_rhs1 (stmt);
   3032 }
   3033 
   3034 /* Return true if vectorizable_* routines can handle statements STMT1_INFO
   3035    and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
   3036    be grouped in SLP mode.  */
   3037 
   3038 static bool
   3039 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
   3040 		   bool allow_slp_p)
   3041 {
   3042   if (gimple_assign_single_p (stmt1_info->stmt))
   3043     return gimple_assign_single_p (stmt2_info->stmt);
   3044 
   3045   gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
   3046   if (call1 && gimple_call_internal_p (call1))
   3047     {
   3048       /* Check for two masked loads or two masked stores.  */
   3049       gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
   3050       if (!call2 || !gimple_call_internal_p (call2))
   3051 	return false;
   3052       internal_fn ifn = gimple_call_internal_fn (call1);
   3053       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
   3054 	return false;
   3055       if (ifn != gimple_call_internal_fn (call2))
   3056 	return false;
   3057 
   3058       /* Check that the masks are the same.  Cope with casts of masks,
   3059 	 like those created by build_mask_conversion.  */
   3060       tree mask1 = gimple_call_arg (call1, 2);
   3061       tree mask2 = gimple_call_arg (call2, 2);
   3062       if (!operand_equal_p (mask1, mask2, 0)
   3063           && (ifn == IFN_MASK_STORE || !allow_slp_p))
   3064 	{
   3065 	  mask1 = strip_conversion (mask1);
   3066 	  if (!mask1)
   3067 	    return false;
   3068 	  mask2 = strip_conversion (mask2);
   3069 	  if (!mask2)
   3070 	    return false;
   3071 	  if (!operand_equal_p (mask1, mask2, 0))
   3072 	    return false;
   3073 	}
   3074       return true;
   3075     }
   3076 
   3077   return false;
   3078 }
   3079 
   3080 /* Function vect_analyze_data_ref_accesses.
   3081 
   3082    Analyze the access pattern of all the data references in the loop.
   3083 
   3084    FORNOW: the only access pattern that is considered vectorizable is a
   3085 	   simple step 1 (consecutive) access.
   3086 
   3087    FORNOW: handle only arrays and pointer accesses.  */
   3088 
   3089 opt_result
   3090 vect_analyze_data_ref_accesses (vec_info *vinfo,
   3091 				vec<int> *dataref_groups)
   3092 {
   3093   unsigned int i;
   3094   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
   3095 
   3096   DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
   3097 
   3098   if (datarefs.is_empty ())
   3099     return opt_result::success ();
   3100 
   3101   /* Sort the array of datarefs to make building the interleaving chains
   3102      linear.  Don't modify the original vector's order, it is needed for
   3103      determining what dependencies are reversed.  */
   3104   vec<dr_vec_info *> datarefs_copy;
   3105   datarefs_copy.create (datarefs.length ());
   3106   for (unsigned i = 0; i < datarefs.length (); i++)
   3107     {
   3108       dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
   3109       /* If the caller computed DR grouping use that, otherwise group by
   3110 	 basic blocks.  */
   3111       if (dataref_groups)
   3112 	dr_info->group = (*dataref_groups)[i];
   3113       else
   3114 	dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
   3115       datarefs_copy.quick_push (dr_info);
   3116     }
   3117   datarefs_copy.qsort (dr_group_sort_cmp);
   3118   hash_set<stmt_vec_info> to_fixup;
   3119 
   3120   /* Build the interleaving chains.  */
   3121   for (i = 0; i < datarefs_copy.length () - 1;)
   3122     {
   3123       dr_vec_info *dr_info_a = datarefs_copy[i];
   3124       data_reference_p dra = dr_info_a->dr;
   3125       int dra_group_id = dr_info_a->group;
   3126       stmt_vec_info stmtinfo_a = dr_info_a->stmt;
   3127       stmt_vec_info lastinfo = NULL;
   3128       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
   3129 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
   3130 	{
   3131 	  ++i;
   3132 	  continue;
   3133 	}
   3134       for (i = i + 1; i < datarefs_copy.length (); ++i)
   3135 	{
   3136 	  dr_vec_info *dr_info_b = datarefs_copy[i];
   3137 	  data_reference_p drb = dr_info_b->dr;
   3138 	  int drb_group_id = dr_info_b->group;
   3139 	  stmt_vec_info stmtinfo_b = dr_info_b->stmt;
   3140 	  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
   3141 	      || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
   3142 	    break;
   3143 
   3144 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
   3145 	     accesses, same accesses) can lead to a group to be artificially
   3146 	     split here as we don't just skip over those.  If it really
   3147 	     matters we can push those to a worklist and re-iterate
   3148 	     over them.  The we can just skip ahead to the next DR here.  */
   3149 
   3150 	  /* DRs in a different DR group should not be put into the same
   3151 	     interleaving group.  */
   3152 	  if (dra_group_id != drb_group_id)
   3153 	    break;
   3154 
   3155 	  /* Check that the data-refs have same first location (except init)
   3156 	     and they are both either store or load (not load and store,
   3157 	     not masked loads or stores).  */
   3158 	  if (DR_IS_READ (dra) != DR_IS_READ (drb)
   3159 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
   3160 					DR_BASE_ADDRESS (drb)) != 0
   3161 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
   3162 	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
   3163 	    break;
   3164 
   3165 	  /* Check that the data-refs have the same constant size.  */
   3166 	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
   3167 	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
   3168 	  if (!tree_fits_uhwi_p (sza)
   3169 	      || !tree_fits_uhwi_p (szb)
   3170 	      || !tree_int_cst_equal (sza, szb))
   3171 	    break;
   3172 
   3173 	  /* Check that the data-refs have the same step.  */
   3174 	  if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
   3175 	    break;
   3176 
   3177 	  /* Check the types are compatible.
   3178 	     ???  We don't distinguish this during sorting.  */
   3179 	  if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
   3180 				   TREE_TYPE (DR_REF (drb))))
   3181 	    break;
   3182 
   3183 	  /* Check that the DR_INITs are compile-time constants.  */
   3184 	  if (!tree_fits_shwi_p (DR_INIT (dra))
   3185 	      || !tree_fits_shwi_p (DR_INIT (drb)))
   3186 	    break;
   3187 
   3188 	  /* Different .GOMP_SIMD_LANE calls still give the same lane,
   3189 	     just hold extra information.  */
   3190 	  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
   3191 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
   3192 	      && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
   3193 	    break;
   3194 
   3195 	  /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
   3196 	  HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
   3197 	  HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
   3198 	  HOST_WIDE_INT init_prev
   3199 	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
   3200 	  gcc_assert (init_a <= init_b
   3201 		      && init_a <= init_prev
   3202 		      && init_prev <= init_b);
   3203 
   3204 	  /* Do not place the same access in the interleaving chain twice.  */
   3205 	  if (init_b == init_prev)
   3206 	    {
   3207 	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
   3208 			  < gimple_uid (DR_STMT (drb)));
   3209 	      /* Simply link in duplicates and fix up the chain below.  */
   3210 	    }
   3211 	  else
   3212 	    {
   3213 	      /* If init_b == init_a + the size of the type * k, we have an
   3214 		 interleaving, and DRA is accessed before DRB.  */
   3215 	      unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
   3216 	      if (type_size_a == 0
   3217 		  || (((unsigned HOST_WIDE_INT)init_b - init_a)
   3218 		      % type_size_a != 0))
   3219 		break;
   3220 
   3221 	      /* If we have a store, the accesses are adjacent.  This splits
   3222 		 groups into chunks we support (we don't support vectorization
   3223 		 of stores with gaps).  */
   3224 	      if (!DR_IS_READ (dra)
   3225 		  && (((unsigned HOST_WIDE_INT)init_b - init_prev)
   3226 		      != type_size_a))
   3227 		break;
   3228 
   3229 	      /* If the step (if not zero or non-constant) is smaller than the
   3230 		 difference between data-refs' inits this splits groups into
   3231 		 suitable sizes.  */
   3232 	      if (tree_fits_shwi_p (DR_STEP (dra)))
   3233 		{
   3234 		  unsigned HOST_WIDE_INT step
   3235 		    = absu_hwi (tree_to_shwi (DR_STEP (dra)));
   3236 		  if (step != 0
   3237 		      && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
   3238 		    break;
   3239 		}
   3240 	    }
   3241 
   3242 	  if (dump_enabled_p ())
   3243 	    dump_printf_loc (MSG_NOTE, vect_location,
   3244 			     DR_IS_READ (dra)
   3245 			     ? "Detected interleaving load %T and %T\n"
   3246 			     : "Detected interleaving store %T and %T\n",
   3247 			     DR_REF (dra), DR_REF (drb));
   3248 
   3249 	  /* Link the found element into the group list.  */
   3250 	  if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
   3251 	    {
   3252 	      DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
   3253 	      lastinfo = stmtinfo_a;
   3254 	    }
   3255 	  DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
   3256 	  DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
   3257 	  lastinfo = stmtinfo_b;
   3258 
   3259 	  if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
   3260 	    {
   3261 	      STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
   3262 		= !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
   3263 
   3264 	      if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
   3265 		dump_printf_loc (MSG_NOTE, vect_location,
   3266 				 "Load suitable for SLP vectorization only.\n");
   3267 	    }
   3268 
   3269 	  if (init_b == init_prev
   3270 	      && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
   3271 	      && dump_enabled_p ())
   3272 	    dump_printf_loc (MSG_NOTE, vect_location,
   3273 			     "Queuing group with duplicate access for fixup\n");
   3274 	}
   3275     }
   3276 
   3277   /* Fixup groups with duplicate entries by splitting it.  */
   3278   while (1)
   3279     {
   3280       hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
   3281       if (!(it != to_fixup.end ()))
   3282 	break;
   3283       stmt_vec_info grp = *it;
   3284       to_fixup.remove (grp);
   3285 
   3286       /* Find the earliest duplicate group member.  */
   3287       unsigned first_duplicate = -1u;
   3288       stmt_vec_info next, g = grp;
   3289       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
   3290 	{
   3291 	  if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
   3292 				  DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
   3293 	      && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
   3294 	    first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
   3295 	  g = next;
   3296 	}
   3297       if (first_duplicate == -1U)
   3298 	continue;
   3299 
   3300       /* Then move all stmts after the first duplicate to a new group.
   3301          Note this is a heuristic but one with the property that *it
   3302 	 is fixed up completely.  */
   3303       g = grp;
   3304       stmt_vec_info newgroup = NULL, ng = grp;
   3305       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
   3306 	{
   3307 	  if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
   3308 	    {
   3309 	      DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
   3310 	      if (!newgroup)
   3311 		{
   3312 		  newgroup = next;
   3313 		  STMT_VINFO_SLP_VECT_ONLY (newgroup)
   3314 		    = STMT_VINFO_SLP_VECT_ONLY (grp);
   3315 		}
   3316 	      else
   3317 		DR_GROUP_NEXT_ELEMENT (ng) = next;
   3318 	      ng = next;
   3319 	      DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
   3320 	    }
   3321 	  else
   3322 	    g = DR_GROUP_NEXT_ELEMENT (g);
   3323 	}
   3324       DR_GROUP_NEXT_ELEMENT (ng) = NULL;
   3325 
   3326       /* Fixup the new group which still may contain duplicates.  */
   3327       to_fixup.add (newgroup);
   3328     }
   3329 
   3330   dr_vec_info *dr_info;
   3331   FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
   3332     {
   3333       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
   3334 	  && !vect_analyze_data_ref_access (vinfo, dr_info))
   3335 	{
   3336 	  if (dump_enabled_p ())
   3337 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3338 			     "not vectorized: complicated access pattern.\n");
   3339 
   3340 	  if (is_a <bb_vec_info> (vinfo))
   3341 	    {
   3342 	      /* Mark the statement as not vectorizable.  */
   3343 	      STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
   3344 	      continue;
   3345 	    }
   3346 	  else
   3347 	    {
   3348 	      datarefs_copy.release ();
   3349 	      return opt_result::failure_at (dr_info->stmt->stmt,
   3350 					     "not vectorized:"
   3351 					     " complicated access pattern.\n");
   3352 	    }
   3353 	}
   3354     }
   3355 
   3356   datarefs_copy.release ();
   3357   return opt_result::success ();
   3358 }
   3359 
   3360 /* Function vect_vfa_segment_size.
   3361 
   3362    Input:
   3363      DR_INFO: The data reference.
   3364      LENGTH_FACTOR: segment length to consider.
   3365 
   3366    Return a value suitable for the dr_with_seg_len::seg_len field.
   3367    This is the "distance travelled" by the pointer from the first
   3368    iteration in the segment to the last.  Note that it does not include
   3369    the size of the access; in effect it only describes the first byte.  */
   3370 
   3371 static tree
   3372 vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
   3373 {
   3374   length_factor = size_binop (MINUS_EXPR,
   3375 			      fold_convert (sizetype, length_factor),
   3376 			      size_one_node);
   3377   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
   3378 		     length_factor);
   3379 }
   3380 
   3381 /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
   3382    gives the worst-case number of bytes covered by the segment.  */
   3383 
   3384 static unsigned HOST_WIDE_INT
   3385 vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
   3386 {
   3387   stmt_vec_info stmt_vinfo = dr_info->stmt;
   3388   tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
   3389   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
   3390   unsigned HOST_WIDE_INT access_size = ref_size;
   3391   if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
   3392     {
   3393       gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
   3394       access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
   3395     }
   3396   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
   3397   int misalignment;
   3398   if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
   3399       && ((misalignment = dr_misalignment (dr_info, vectype)), true)
   3400       && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
   3401 	  == dr_explicit_realign_optimized))
   3402     {
   3403       /* We might access a full vector's worth.  */
   3404       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
   3405     }
   3406   return access_size;
   3407 }
   3408 
   3409 /* Get the minimum alignment for all the scalar accesses that DR_INFO
   3410    describes.  */
   3411 
   3412 static unsigned int
   3413 vect_vfa_align (dr_vec_info *dr_info)
   3414 {
   3415   return dr_alignment (dr_info->dr);
   3416 }
   3417 
   3418 /* Function vect_no_alias_p.
   3419 
   3420    Given data references A and B with equal base and offset, see whether
   3421    the alias relation can be decided at compilation time.  Return 1 if
   3422    it can and the references alias, 0 if it can and the references do
   3423    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
   3424    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
   3425    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
   3426 
   3427 static int
   3428 vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
   3429 			 tree segment_length_a, tree segment_length_b,
   3430 			 unsigned HOST_WIDE_INT access_size_a,
   3431 			 unsigned HOST_WIDE_INT access_size_b)
   3432 {
   3433   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
   3434   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
   3435   poly_uint64 const_length_a;
   3436   poly_uint64 const_length_b;
   3437 
   3438   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
   3439      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
   3440      [a, a+12) */
   3441   if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
   3442     {
   3443       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
   3444       offset_a -= const_length_a;
   3445     }
   3446   else
   3447     const_length_a = tree_to_poly_uint64 (segment_length_a);
   3448   if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
   3449     {
   3450       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
   3451       offset_b -= const_length_b;
   3452     }
   3453   else
   3454     const_length_b = tree_to_poly_uint64 (segment_length_b);
   3455 
   3456   const_length_a += access_size_a;
   3457   const_length_b += access_size_b;
   3458 
   3459   if (ranges_known_overlap_p (offset_a, const_length_a,
   3460 			      offset_b, const_length_b))
   3461     return 1;
   3462 
   3463   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
   3464 			       offset_b, const_length_b))
   3465     return 0;
   3466 
   3467   return -1;
   3468 }
   3469 
   3470 /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
   3471    in DDR is >= VF.  */
   3472 
   3473 static bool
   3474 dependence_distance_ge_vf (data_dependence_relation *ddr,
   3475 			   unsigned int loop_depth, poly_uint64 vf)
   3476 {
   3477   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
   3478       || DDR_NUM_DIST_VECTS (ddr) == 0)
   3479     return false;
   3480 
   3481   /* If the dependence is exact, we should have limited the VF instead.  */
   3482   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
   3483 
   3484   unsigned int i;
   3485   lambda_vector dist_v;
   3486   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
   3487     {
   3488       HOST_WIDE_INT dist = dist_v[loop_depth];
   3489       if (dist != 0
   3490 	  && !(dist > 0 && DDR_REVERSED_P (ddr))
   3491 	  && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
   3492 	return false;
   3493     }
   3494 
   3495   if (dump_enabled_p ())
   3496     dump_printf_loc (MSG_NOTE, vect_location,
   3497 		     "dependence distance between %T and %T is >= VF\n",
   3498 		     DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
   3499 
   3500   return true;
   3501 }
   3502 
   3503 /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
   3504 
   3505 static void
   3506 dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
   3507 {
   3508   dump_printf (dump_kind, "%s (%T) >= ",
   3509 	       lower_bound.unsigned_p ? "unsigned" : "abs",
   3510 	       lower_bound.expr);
   3511   dump_dec (dump_kind, lower_bound.min_value);
   3512 }
   3513 
   3514 /* Record that the vectorized loop requires the vec_lower_bound described
   3515    by EXPR, UNSIGNED_P and MIN_VALUE.  */
   3516 
   3517 static void
   3518 vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
   3519 			poly_uint64 min_value)
   3520 {
   3521   vec<vec_lower_bound> &lower_bounds
   3522     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
   3523   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
   3524     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
   3525       {
   3526 	unsigned_p &= lower_bounds[i].unsigned_p;
   3527 	min_value = upper_bound (lower_bounds[i].min_value, min_value);
   3528 	if (lower_bounds[i].unsigned_p != unsigned_p
   3529 	    || maybe_lt (lower_bounds[i].min_value, min_value))
   3530 	  {
   3531 	    lower_bounds[i].unsigned_p = unsigned_p;
   3532 	    lower_bounds[i].min_value = min_value;
   3533 	    if (dump_enabled_p ())
   3534 	      {
   3535 		dump_printf_loc (MSG_NOTE, vect_location,
   3536 				 "updating run-time check to ");
   3537 		dump_lower_bound (MSG_NOTE, lower_bounds[i]);
   3538 		dump_printf (MSG_NOTE, "\n");
   3539 	      }
   3540 	  }
   3541 	return;
   3542       }
   3543 
   3544   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
   3545   if (dump_enabled_p ())
   3546     {
   3547       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
   3548       dump_lower_bound (MSG_NOTE, lower_bound);
   3549       dump_printf (MSG_NOTE, "\n");
   3550     }
   3551   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
   3552 }
   3553 
   3554 /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
   3555    will span fewer than GAP bytes.  */
   3556 
   3557 static bool
   3558 vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
   3559 		  poly_int64 gap)
   3560 {
   3561   stmt_vec_info stmt_info = dr_info->stmt;
   3562   HOST_WIDE_INT count
   3563     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   3564   if (DR_GROUP_FIRST_ELEMENT (stmt_info))
   3565     count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
   3566   return (estimated_poly_value (gap)
   3567 	  <= count * vect_get_scalar_dr_size (dr_info));
   3568 }
   3569 
   3570 /* Return true if we know that there is no alias between DR_INFO_A and
   3571    DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
   3572    When returning true, set *LOWER_BOUND_OUT to this N.  */
   3573 
   3574 static bool
   3575 vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
   3576 				poly_uint64 *lower_bound_out)
   3577 {
   3578   /* Check that there is a constant gap of known sign between DR_A
   3579      and DR_B.  */
   3580   data_reference *dr_a = dr_info_a->dr;
   3581   data_reference *dr_b = dr_info_b->dr;
   3582   poly_int64 init_a, init_b;
   3583   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
   3584       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
   3585       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
   3586       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
   3587       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
   3588       || !ordered_p (init_a, init_b))
   3589     return false;
   3590 
   3591   /* Sort DR_A and DR_B by the address they access.  */
   3592   if (maybe_lt (init_b, init_a))
   3593     {
   3594       std::swap (init_a, init_b);
   3595       std::swap (dr_info_a, dr_info_b);
   3596       std::swap (dr_a, dr_b);
   3597     }
   3598 
   3599   /* If the two accesses could be dependent within a scalar iteration,
   3600      make sure that we'd retain their order.  */
   3601   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
   3602       && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
   3603     return false;
   3604 
   3605   /* There is no alias if abs (DR_STEP) is greater than or equal to
   3606      the bytes spanned by the combination of the two accesses.  */
   3607   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
   3608   return true;
   3609 }
   3610 
   3611 /* Function vect_prune_runtime_alias_test_list.
   3612 
   3613    Prune a list of ddrs to be tested at run-time by versioning for alias.
   3614    Merge several alias checks into one if possible.
   3615    Return FALSE if resulting list of ddrs is longer then allowed by
   3616    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
   3617 
   3618 opt_result
   3619 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   3620 {
   3621   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
   3622   hash_set <tree_pair_hash> compared_objects;
   3623 
   3624   const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
   3625   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
   3626     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
   3627   const vec<vec_object_pair> &check_unequal_addrs
   3628     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
   3629   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   3630   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
   3631 
   3632   ddr_p ddr;
   3633   unsigned int i;
   3634   tree length_factor;
   3635 
   3636   DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
   3637 
   3638   /* Step values are irrelevant for aliasing if the number of vector
   3639      iterations is equal to the number of scalar iterations (which can
   3640      happen for fully-SLP loops).  */
   3641   bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
   3642 
   3643   if (!vf_one_p)
   3644     {
   3645       /* Convert the checks for nonzero steps into bound tests.  */
   3646       tree value;
   3647       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
   3648 	vect_check_lower_bound (loop_vinfo, value, true, 1);
   3649     }
   3650 
   3651   if (may_alias_ddrs.is_empty ())
   3652     return opt_result::success ();
   3653 
   3654   comp_alias_ddrs.create (may_alias_ddrs.length ());
   3655 
   3656   unsigned int loop_depth
   3657     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   3658 			  LOOP_VINFO_LOOP_NEST (loop_vinfo));
   3659 
   3660   /* First, we collect all data ref pairs for aliasing checks.  */
   3661   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
   3662     {
   3663       poly_uint64 lower_bound;
   3664       tree segment_length_a, segment_length_b;
   3665       unsigned HOST_WIDE_INT access_size_a, access_size_b;
   3666       unsigned HOST_WIDE_INT align_a, align_b;
   3667 
   3668       /* Ignore the alias if the VF we chose ended up being no greater
   3669 	 than the dependence distance.  */
   3670       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
   3671 	continue;
   3672 
   3673       if (DDR_OBJECT_A (ddr))
   3674 	{
   3675 	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
   3676 	  if (!compared_objects.add (new_pair))
   3677 	    {
   3678 	      if (dump_enabled_p ())
   3679 		dump_printf_loc (MSG_NOTE, vect_location,
   3680 				 "checking that %T and %T"
   3681 				 " have different addresses\n",
   3682 				 new_pair.first, new_pair.second);
   3683 	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
   3684 	    }
   3685 	  continue;
   3686 	}
   3687 
   3688       dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
   3689       stmt_vec_info stmt_info_a = dr_info_a->stmt;
   3690 
   3691       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
   3692       stmt_vec_info stmt_info_b = dr_info_b->stmt;
   3693 
   3694       bool preserves_scalar_order_p
   3695 	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
   3696       bool ignore_step_p
   3697 	  = (vf_one_p
   3698 	     && (preserves_scalar_order_p
   3699 		 || operand_equal_p (DR_STEP (dr_info_a->dr),
   3700 				     DR_STEP (dr_info_b->dr))));
   3701 
   3702       /* Skip the pair if inter-iteration dependencies are irrelevant
   3703 	 and intra-iteration dependencies are guaranteed to be honored.  */
   3704       if (ignore_step_p
   3705 	  && (preserves_scalar_order_p
   3706 	      || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
   3707 						 &lower_bound)))
   3708 	{
   3709 	  if (dump_enabled_p ())
   3710 	    dump_printf_loc (MSG_NOTE, vect_location,
   3711 			     "no need for alias check between "
   3712 			     "%T and %T when VF is 1\n",
   3713 			     DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
   3714 	  continue;
   3715 	}
   3716 
   3717       /* See whether we can handle the alias using a bounds check on
   3718 	 the step, and whether that's likely to be the best approach.
   3719 	 (It might not be, for example, if the minimum step is much larger
   3720 	 than the number of bytes handled by one vector iteration.)  */
   3721       if (!ignore_step_p
   3722 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
   3723 	  && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
   3724 					     &lower_bound)
   3725 	  && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
   3726 	      || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
   3727 	{
   3728 	  bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
   3729 	  if (dump_enabled_p ())
   3730 	    {
   3731 	      dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
   3732 			       "%T and %T when the step %T is outside ",
   3733 			       DR_REF (dr_info_a->dr),
   3734 			       DR_REF (dr_info_b->dr),
   3735 			       DR_STEP (dr_info_a->dr));
   3736 	      if (unsigned_p)
   3737 		dump_printf (MSG_NOTE, "[0");
   3738 	      else
   3739 		{
   3740 		  dump_printf (MSG_NOTE, "(");
   3741 		  dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
   3742 		}
   3743 	      dump_printf (MSG_NOTE, ", ");
   3744 	      dump_dec (MSG_NOTE, lower_bound);
   3745 	      dump_printf (MSG_NOTE, ")\n");
   3746 	    }
   3747 	  vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
   3748 				  unsigned_p, lower_bound);
   3749 	  continue;
   3750 	}
   3751 
   3752       stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
   3753       if (dr_group_first_a)
   3754 	{
   3755 	  stmt_info_a = dr_group_first_a;
   3756 	  dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
   3757 	}
   3758 
   3759       stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
   3760       if (dr_group_first_b)
   3761 	{
   3762 	  stmt_info_b = dr_group_first_b;
   3763 	  dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
   3764 	}
   3765 
   3766       if (ignore_step_p)
   3767 	{
   3768 	  segment_length_a = size_zero_node;
   3769 	  segment_length_b = size_zero_node;
   3770 	}
   3771       else
   3772 	{
   3773 	  if (!operand_equal_p (DR_STEP (dr_info_a->dr),
   3774 				DR_STEP (dr_info_b->dr), 0))
   3775 	    length_factor = scalar_loop_iters;
   3776 	  else
   3777 	    length_factor = size_int (vect_factor);
   3778 	  segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
   3779 	  segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
   3780 	}
   3781       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
   3782       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
   3783       align_a = vect_vfa_align (dr_info_a);
   3784       align_b = vect_vfa_align (dr_info_b);
   3785 
   3786       /* See whether the alias is known at compilation time.  */
   3787       if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
   3788 			   DR_BASE_ADDRESS (dr_info_b->dr), 0)
   3789 	  && operand_equal_p (DR_OFFSET (dr_info_a->dr),
   3790 			      DR_OFFSET (dr_info_b->dr), 0)
   3791 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
   3792 	  && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
   3793 	  && poly_int_tree_p (segment_length_a)
   3794 	  && poly_int_tree_p (segment_length_b))
   3795 	{
   3796 	  int res = vect_compile_time_alias (dr_info_a, dr_info_b,
   3797 					     segment_length_a,
   3798 					     segment_length_b,
   3799 					     access_size_a,
   3800 					     access_size_b);
   3801 	  if (res >= 0 && dump_enabled_p ())
   3802 	    {
   3803 	      dump_printf_loc (MSG_NOTE, vect_location,
   3804 			       "can tell at compile time that %T and %T",
   3805 			       DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
   3806 	      if (res == 0)
   3807 		dump_printf (MSG_NOTE, " do not alias\n");
   3808 	      else
   3809 		dump_printf (MSG_NOTE, " alias\n");
   3810 	    }
   3811 
   3812 	  if (res == 0)
   3813 	    continue;
   3814 
   3815 	  if (res == 1)
   3816 	    return opt_result::failure_at (stmt_info_b->stmt,
   3817 					   "not vectorized:"
   3818 					   " compilation time alias: %G%G",
   3819 					   stmt_info_a->stmt,
   3820 					   stmt_info_b->stmt);
   3821 	}
   3822 
   3823       /* dr_with_seg_len requires the alignment to apply to the segment length
   3824 	 and access size, not just the start address.  The access size can be
   3825 	 smaller than the pointer alignment for grouped accesses and bitfield
   3826 	 references; see PR115192 and PR116125 respectively.  */
   3827       align_a = std::min (align_a, least_bit_hwi (access_size_a));
   3828       align_b = std::min (align_b, least_bit_hwi (access_size_b));
   3829 
   3830       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
   3831 			    access_size_a, align_a);
   3832       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
   3833 			    access_size_b, align_b);
   3834       /* Canonicalize the order to be the one that's needed for accurate
   3835 	 RAW, WAR and WAW flags, in cases where the data references are
   3836 	 well-ordered.  The order doesn't really matter otherwise,
   3837 	 but we might as well be consistent.  */
   3838       if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
   3839 	std::swap (dr_a, dr_b);
   3840 
   3841       dr_with_seg_len_pair_t dr_with_seg_len_pair
   3842 	(dr_a, dr_b, (preserves_scalar_order_p
   3843 		      ? dr_with_seg_len_pair_t::WELL_ORDERED
   3844 		      : dr_with_seg_len_pair_t::REORDERED));
   3845 
   3846       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
   3847     }
   3848 
   3849   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
   3850 
   3851   unsigned int count = (comp_alias_ddrs.length ()
   3852 			+ check_unequal_addrs.length ());
   3853 
   3854   if (count
   3855       && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
   3856 	  == VECT_COST_MODEL_VERY_CHEAP))
   3857     return opt_result::failure_at
   3858       (vect_location, "would need a runtime alias check\n");
   3859 
   3860   if (dump_enabled_p ())
   3861     dump_printf_loc (MSG_NOTE, vect_location,
   3862 		     "improved number of alias checks from %d to %d\n",
   3863 		     may_alias_ddrs.length (), count);
   3864   unsigned limit = param_vect_max_version_for_alias_checks;
   3865   if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
   3866     limit = param_vect_max_version_for_alias_checks * 6 / 10;
   3867   if (count > limit)
   3868     return opt_result::failure_at
   3869       (vect_location,
   3870        "number of versioning for alias run-time tests exceeds %d "
   3871        "(--param vect-max-version-for-alias-checks)\n", limit);
   3872 
   3873   return opt_result::success ();
   3874 }
   3875 
   3876 /* Check whether we can use an internal function for a gather load
   3877    or scatter store.  READ_P is true for loads and false for stores.
   3878    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
   3879    the type of the memory elements being loaded or stored.  OFFSET_TYPE
   3880    is the type of the offset that is being applied to the invariant
   3881    base address.  SCALE is the amount by which the offset should
   3882    be multiplied *after* it has been converted to address width.
   3883 
   3884    Return true if the function is supported, storing the function id in
   3885    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.  */
   3886 
   3887 bool
   3888 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
   3889 			  tree vectype, tree memory_type, tree offset_type,
   3890 			  int scale, internal_fn *ifn_out,
   3891 			  tree *offset_vectype_out)
   3892 {
   3893   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
   3894   unsigned int element_bits = vector_element_bits (vectype);
   3895   if (element_bits != memory_bits)
   3896     /* For now the vector elements must be the same width as the
   3897        memory elements.  */
   3898     return false;
   3899 
   3900   /* Work out which function we need.  */
   3901   internal_fn ifn, alt_ifn;
   3902   if (read_p)
   3903     {
   3904       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
   3905       alt_ifn = IFN_MASK_GATHER_LOAD;
   3906     }
   3907   else
   3908     {
   3909       ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
   3910       alt_ifn = IFN_MASK_SCATTER_STORE;
   3911     }
   3912 
   3913   for (;;)
   3914     {
   3915       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
   3916       if (!offset_vectype)
   3917 	return false;
   3918 
   3919       /* Test whether the target supports this combination.  */
   3920       if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
   3921 						  offset_vectype, scale))
   3922 	{
   3923 	  *ifn_out = ifn;
   3924 	  *offset_vectype_out = offset_vectype;
   3925 	  return true;
   3926 	}
   3927       else if (!masked_p
   3928 	       && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
   3929 							  memory_type,
   3930 							  offset_vectype,
   3931 							  scale))
   3932 	{
   3933 	  *ifn_out = alt_ifn;
   3934 	  *offset_vectype_out = offset_vectype;
   3935 	  return true;
   3936 	}
   3937 
   3938       if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
   3939 	  && TYPE_PRECISION (offset_type) >= element_bits)
   3940 	return false;
   3941 
   3942       offset_type = build_nonstandard_integer_type
   3943 	(TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
   3944     }
   3945 }
   3946 
   3947 /* STMT_INFO is a call to an internal gather load or scatter store function.
   3948    Describe the operation in INFO.  */
   3949 
   3950 static void
   3951 vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
   3952 				   gather_scatter_info *info)
   3953 {
   3954   gcall *call = as_a <gcall *> (stmt_info->stmt);
   3955   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   3956   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   3957 
   3958   info->ifn = gimple_call_internal_fn (call);
   3959   info->decl = NULL_TREE;
   3960   info->base = gimple_call_arg (call, 0);
   3961   info->offset = gimple_call_arg (call, 1);
   3962   info->offset_dt = vect_unknown_def_type;
   3963   info->offset_vectype = NULL_TREE;
   3964   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
   3965   info->element_type = TREE_TYPE (vectype);
   3966   info->memory_type = TREE_TYPE (DR_REF (dr));
   3967 }
   3968 
   3969 /* Return true if a non-affine read or write in STMT_INFO is suitable for a
   3970    gather load or scatter store.  Describe the operation in *INFO if so.  */
   3971 
   3972 bool
   3973 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
   3974 			   gather_scatter_info *info)
   3975 {
   3976   HOST_WIDE_INT scale = 1;
   3977   poly_int64 pbitpos, pbitsize;
   3978   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   3979   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   3980   tree offtype = NULL_TREE;
   3981   tree decl = NULL_TREE, base, off;
   3982   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   3983   tree memory_type = TREE_TYPE (DR_REF (dr));
   3984   machine_mode pmode;
   3985   int punsignedp, reversep, pvolatilep = 0;
   3986   internal_fn ifn;
   3987   tree offset_vectype;
   3988   bool masked_p = false;
   3989 
   3990   /* See whether this is already a call to a gather/scatter internal function.
   3991      If not, see whether it's a masked load or store.  */
   3992   gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   3993   if (call && gimple_call_internal_p (call))
   3994     {
   3995       ifn = gimple_call_internal_fn (call);
   3996       if (internal_gather_scatter_fn_p (ifn))
   3997 	{
   3998 	  vect_describe_gather_scatter_call (stmt_info, info);
   3999 	  return true;
   4000 	}
   4001       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
   4002     }
   4003 
   4004   /* True if we should aim to use internal functions rather than
   4005      built-in functions.  */
   4006   bool use_ifn_p = (DR_IS_READ (dr)
   4007 		    ? supports_vec_gather_load_p (TYPE_MODE (vectype))
   4008 		    : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
   4009 
   4010   base = DR_REF (dr);
   4011   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
   4012      see if we can use the def stmt of the address.  */
   4013   if (masked_p
   4014       && TREE_CODE (base) == MEM_REF
   4015       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
   4016       && integer_zerop (TREE_OPERAND (base, 1))
   4017       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
   4018     {
   4019       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
   4020       if (is_gimple_assign (def_stmt)
   4021 	  && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
   4022 	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
   4023     }
   4024 
   4025   /* The gather and scatter builtins need address of the form
   4026      loop_invariant + vector * {1, 2, 4, 8}
   4027      or
   4028      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
   4029      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
   4030      of loop invariants/SSA_NAMEs defined in the loop, with casts,
   4031      multiplications and additions in it.  To get a vector, we need
   4032      a single SSA_NAME that will be defined in the loop and will
   4033      contain everything that is not loop invariant and that can be
   4034      vectorized.  The following code attempts to find such a preexistng
   4035      SSA_NAME OFF and put the loop invariants into a tree BASE
   4036      that can be gimplified before the loop.  */
   4037   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
   4038 			      &punsignedp, &reversep, &pvolatilep);
   4039   if (reversep)
   4040     return false;
   4041 
   4042   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
   4043 
   4044   if (TREE_CODE (base) == MEM_REF)
   4045     {
   4046       if (!integer_zerop (TREE_OPERAND (base, 1)))
   4047 	{
   4048 	  if (off == NULL_TREE)
   4049 	    off = wide_int_to_tree (sizetype, mem_ref_offset (base));
   4050 	  else
   4051 	    off = size_binop (PLUS_EXPR, off,
   4052 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
   4053 	}
   4054       base = TREE_OPERAND (base, 0);
   4055     }
   4056   else
   4057     base = build_fold_addr_expr (base);
   4058 
   4059   if (off == NULL_TREE)
   4060     off = size_zero_node;
   4061 
   4062   /* If base is not loop invariant, either off is 0, then we start with just
   4063      the constant offset in the loop invariant BASE and continue with base
   4064      as OFF, otherwise give up.
   4065      We could handle that case by gimplifying the addition of base + off
   4066      into some SSA_NAME and use that as off, but for now punt.  */
   4067   if (!expr_invariant_in_loop_p (loop, base))
   4068     {
   4069       if (!integer_zerop (off))
   4070 	return false;
   4071       off = base;
   4072       base = size_int (pbytepos);
   4073     }
   4074   /* Otherwise put base + constant offset into the loop invariant BASE
   4075      and continue with OFF.  */
   4076   else
   4077     {
   4078       base = fold_convert (sizetype, base);
   4079       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
   4080     }
   4081 
   4082   /* OFF at this point may be either a SSA_NAME or some tree expression
   4083      from get_inner_reference.  Try to peel off loop invariants from it
   4084      into BASE as long as possible.  */
   4085   STRIP_NOPS (off);
   4086   while (offtype == NULL_TREE)
   4087     {
   4088       enum tree_code code;
   4089       tree op0, op1, add = NULL_TREE;
   4090 
   4091       if (TREE_CODE (off) == SSA_NAME)
   4092 	{
   4093 	  gimple *def_stmt = SSA_NAME_DEF_STMT (off);
   4094 
   4095 	  if (expr_invariant_in_loop_p (loop, off))
   4096 	    return false;
   4097 
   4098 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
   4099 	    break;
   4100 
   4101 	  op0 = gimple_assign_rhs1 (def_stmt);
   4102 	  code = gimple_assign_rhs_code (def_stmt);
   4103 	  op1 = gimple_assign_rhs2 (def_stmt);
   4104 	}
   4105       else
   4106 	{
   4107 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
   4108 	    return false;
   4109 	  code = TREE_CODE (off);
   4110 	  extract_ops_from_tree (off, &code, &op0, &op1);
   4111 	}
   4112       switch (code)
   4113 	{
   4114 	case POINTER_PLUS_EXPR:
   4115 	case PLUS_EXPR:
   4116 	  if (expr_invariant_in_loop_p (loop, op0))
   4117 	    {
   4118 	      add = op0;
   4119 	      off = op1;
   4120 	    do_add:
   4121 	      add = fold_convert (sizetype, add);
   4122 	      if (scale != 1)
   4123 		add = size_binop (MULT_EXPR, add, size_int (scale));
   4124 	      base = size_binop (PLUS_EXPR, base, add);
   4125 	      continue;
   4126 	    }
   4127 	  if (expr_invariant_in_loop_p (loop, op1))
   4128 	    {
   4129 	      add = op1;
   4130 	      off = op0;
   4131 	      goto do_add;
   4132 	    }
   4133 	  break;
   4134 	case MINUS_EXPR:
   4135 	  if (expr_invariant_in_loop_p (loop, op1))
   4136 	    {
   4137 	      add = fold_convert (sizetype, op1);
   4138 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
   4139 	      off = op0;
   4140 	      goto do_add;
   4141 	    }
   4142 	  break;
   4143 	case MULT_EXPR:
   4144 	  if (scale == 1 && tree_fits_shwi_p (op1))
   4145 	    {
   4146 	      int new_scale = tree_to_shwi (op1);
   4147 	      /* Only treat this as a scaling operation if the target
   4148 		 supports it for at least some offset type.  */
   4149 	      if (use_ifn_p
   4150 		  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4151 						masked_p, vectype, memory_type,
   4152 						signed_char_type_node,
   4153 						new_scale, &ifn,
   4154 						&offset_vectype)
   4155 		  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4156 						masked_p, vectype, memory_type,
   4157 						unsigned_char_type_node,
   4158 						new_scale, &ifn,
   4159 						&offset_vectype))
   4160 		break;
   4161 	      scale = new_scale;
   4162 	      off = op0;
   4163 	      continue;
   4164 	    }
   4165 	  break;
   4166 	case SSA_NAME:
   4167 	  off = op0;
   4168 	  continue;
   4169 	CASE_CONVERT:
   4170 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
   4171 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
   4172 	    break;
   4173 
   4174 	  /* Don't include the conversion if the target is happy with
   4175 	     the current offset type.  */
   4176 	  if (use_ifn_p
   4177 	      && !POINTER_TYPE_P (TREE_TYPE (off))
   4178 	      && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
   4179 					   masked_p, vectype, memory_type,
   4180 					   TREE_TYPE (off), scale, &ifn,
   4181 					   &offset_vectype))
   4182 	    break;
   4183 
   4184 	  if (TYPE_PRECISION (TREE_TYPE (op0))
   4185 	      == TYPE_PRECISION (TREE_TYPE (off)))
   4186 	    {
   4187 	      off = op0;
   4188 	      continue;
   4189 	    }
   4190 
   4191 	  /* Include the conversion if it is widening and we're using
   4192 	     the IFN path or the target can handle the converted from
   4193 	     offset or the current size is not already the same as the
   4194 	     data vector element size.  */
   4195 	  if ((TYPE_PRECISION (TREE_TYPE (op0))
   4196 	       < TYPE_PRECISION (TREE_TYPE (off)))
   4197 	      && (use_ifn_p
   4198 		  || (DR_IS_READ (dr)
   4199 		      ? (targetm.vectorize.builtin_gather
   4200 			 && targetm.vectorize.builtin_gather (vectype,
   4201 							      TREE_TYPE (op0),
   4202 							      scale))
   4203 		      : (targetm.vectorize.builtin_scatter
   4204 			 && targetm.vectorize.builtin_scatter (vectype,
   4205 							       TREE_TYPE (op0),
   4206 							       scale)))
   4207 		  || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
   4208 				       TYPE_SIZE (TREE_TYPE (vectype)), 0)))
   4209 	    {
   4210 	      off = op0;
   4211 	      offtype = TREE_TYPE (off);
   4212 	      STRIP_NOPS (off);
   4213 	      continue;
   4214 	    }
   4215 	  break;
   4216 	default:
   4217 	  break;
   4218 	}
   4219       break;
   4220     }
   4221 
   4222   /* If at the end OFF still isn't a SSA_NAME or isn't
   4223      defined in the loop, punt.  */
   4224   if (TREE_CODE (off) != SSA_NAME
   4225       || expr_invariant_in_loop_p (loop, off))
   4226     return false;
   4227 
   4228   if (offtype == NULL_TREE)
   4229     offtype = TREE_TYPE (off);
   4230 
   4231   if (use_ifn_p)
   4232     {
   4233       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
   4234 				     vectype, memory_type, offtype, scale,
   4235 				     &ifn, &offset_vectype))
   4236 	ifn = IFN_LAST;
   4237       decl = NULL_TREE;
   4238     }
   4239   else
   4240     {
   4241       if (DR_IS_READ (dr))
   4242 	{
   4243 	  if (targetm.vectorize.builtin_gather)
   4244 	    decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
   4245 	}
   4246       else
   4247 	{
   4248 	  if (targetm.vectorize.builtin_scatter)
   4249 	    decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
   4250 	}
   4251       ifn = IFN_LAST;
   4252       /* The offset vector type will be read from DECL when needed.  */
   4253       offset_vectype = NULL_TREE;
   4254     }
   4255 
   4256   info->ifn = ifn;
   4257   info->decl = decl;
   4258   info->base = base;
   4259   info->offset = off;
   4260   info->offset_dt = vect_unknown_def_type;
   4261   info->offset_vectype = offset_vectype;
   4262   info->scale = scale;
   4263   info->element_type = TREE_TYPE (vectype);
   4264   info->memory_type = memory_type;
   4265   return true;
   4266 }
   4267 
   4268 /* Find the data references in STMT, analyze them with respect to LOOP and
   4269    append them to DATAREFS.  Return false if datarefs in this stmt cannot
   4270    be handled.  */
   4271 
   4272 opt_result
   4273 vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
   4274 			       vec<data_reference_p> *datarefs,
   4275 			       vec<int> *dataref_groups, int group_id)
   4276 {
   4277   /* We can ignore clobbers for dataref analysis - they are removed during
   4278      loop vectorization and BB vectorization checks dependences with a
   4279      stmt walk.  */
   4280   if (gimple_clobber_p (stmt))
   4281     return opt_result::success ();
   4282 
   4283   if (gimple_has_volatile_ops (stmt))
   4284     return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
   4285 				   stmt);
   4286 
   4287   if (stmt_can_throw_internal (cfun, stmt))
   4288     return opt_result::failure_at (stmt,
   4289 				   "not vectorized:"
   4290 				   " statement can throw an exception: %G",
   4291 				   stmt);
   4292 
   4293   auto_vec<data_reference_p, 2> refs;
   4294   opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
   4295   if (!res)
   4296     return res;
   4297 
   4298   if (refs.is_empty ())
   4299     return opt_result::success ();
   4300 
   4301   if (refs.length () > 1)
   4302     {
   4303       while (!refs.is_empty ())
   4304 	free_data_ref (refs.pop ());
   4305       return opt_result::failure_at (stmt,
   4306 				     "not vectorized: more than one "
   4307 				     "data ref in stmt: %G", stmt);
   4308     }
   4309 
   4310   data_reference_p dr = refs.pop ();
   4311   if (gcall *call = dyn_cast <gcall *> (stmt))
   4312     if (!gimple_call_internal_p (call)
   4313 	|| (gimple_call_internal_fn (call) != IFN_MASK_LOAD
   4314 	    && gimple_call_internal_fn (call) != IFN_MASK_STORE))
   4315       {
   4316 	free_data_ref (dr);
   4317 	return opt_result::failure_at (stmt,
   4318 				       "not vectorized: dr in a call %G", stmt);
   4319       }
   4320 
   4321   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
   4322       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
   4323     {
   4324       free_data_ref (dr);
   4325       return opt_result::failure_at (stmt,
   4326 				     "not vectorized:"
   4327 				     " statement is bitfield access %G", stmt);
   4328     }
   4329 
   4330   if (DR_BASE_ADDRESS (dr)
   4331       && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
   4332     {
   4333       free_data_ref (dr);
   4334       return opt_result::failure_at (stmt,
   4335 				     "not vectorized:"
   4336 				     " base addr of dr is a constant\n");
   4337     }
   4338 
   4339   /* Check whether this may be a SIMD lane access and adjust the
   4340      DR to make it easier for us to handle it.  */
   4341   if (loop
   4342       && loop->simduid
   4343       && (!DR_BASE_ADDRESS (dr)
   4344 	  || !DR_OFFSET (dr)
   4345 	  || !DR_INIT (dr)
   4346 	  || !DR_STEP (dr)))
   4347     {
   4348       struct data_reference *newdr
   4349 	= create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
   4350 			   DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
   4351       if (DR_BASE_ADDRESS (newdr)
   4352 	  && DR_OFFSET (newdr)
   4353 	  && DR_INIT (newdr)
   4354 	  && DR_STEP (newdr)
   4355 	  && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
   4356 	  && integer_zerop (DR_STEP (newdr)))
   4357 	{
   4358 	  tree base_address = DR_BASE_ADDRESS (newdr);
   4359 	  tree off = DR_OFFSET (newdr);
   4360 	  tree step = ssize_int (1);
   4361 	  if (integer_zerop (off)
   4362 	      && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
   4363 	    {
   4364 	      off = TREE_OPERAND (base_address, 1);
   4365 	      base_address = TREE_OPERAND (base_address, 0);
   4366 	    }
   4367 	  STRIP_NOPS (off);
   4368 	  if (TREE_CODE (off) == MULT_EXPR
   4369 	      && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
   4370 	    {
   4371 	      step = TREE_OPERAND (off, 1);
   4372 	      off = TREE_OPERAND (off, 0);
   4373 	      STRIP_NOPS (off);
   4374 	    }
   4375 	  if (CONVERT_EXPR_P (off)
   4376 	      && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
   4377 		  < TYPE_PRECISION (TREE_TYPE (off))))
   4378 	    off = TREE_OPERAND (off, 0);
   4379 	  if (TREE_CODE (off) == SSA_NAME)
   4380 	    {
   4381 	      gimple *def = SSA_NAME_DEF_STMT (off);
   4382 	      /* Look through widening conversion.  */
   4383 	      if (is_gimple_assign (def)
   4384 		  && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
   4385 		{
   4386 		  tree rhs1 = gimple_assign_rhs1 (def);
   4387 		  if (TREE_CODE (rhs1) == SSA_NAME
   4388 		      && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
   4389 		      && (TYPE_PRECISION (TREE_TYPE (off))
   4390 			  > TYPE_PRECISION (TREE_TYPE (rhs1))))
   4391 		    def = SSA_NAME_DEF_STMT (rhs1);
   4392 		}
   4393 	      if (is_gimple_call (def)
   4394 		  && gimple_call_internal_p (def)
   4395 		  && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
   4396 		{
   4397 		  tree arg = gimple_call_arg (def, 0);
   4398 		  tree reft = TREE_TYPE (DR_REF (newdr));
   4399 		  gcc_assert (TREE_CODE (arg) == SSA_NAME);
   4400 		  arg = SSA_NAME_VAR (arg);
   4401 		  if (arg == loop->simduid
   4402 		      /* For now.  */
   4403 		      && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
   4404 		    {
   4405 		      DR_BASE_ADDRESS (newdr) = base_address;
   4406 		      DR_OFFSET (newdr) = ssize_int (0);
   4407 		      DR_STEP (newdr) = step;
   4408 		      DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
   4409 		      DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
   4410 		      /* Mark as simd-lane access.  */
   4411 		      tree arg2 = gimple_call_arg (def, 1);
   4412 		      newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
   4413 		      free_data_ref (dr);
   4414 		      datarefs->safe_push (newdr);
   4415 		      if (dataref_groups)
   4416 			dataref_groups->safe_push (group_id);
   4417 		      return opt_result::success ();
   4418 		    }
   4419 		}
   4420 	    }
   4421 	}
   4422       free_data_ref (newdr);
   4423     }
   4424 
   4425   datarefs->safe_push (dr);
   4426   if (dataref_groups)
   4427     dataref_groups->safe_push (group_id);
   4428   return opt_result::success ();
   4429 }
   4430 
   4431 /* Function vect_analyze_data_refs.
   4432 
   4433   Find all the data references in the loop or basic block.
   4434 
   4435    The general structure of the analysis of data refs in the vectorizer is as
   4436    follows:
   4437    1- vect_analyze_data_refs(loop/bb): call
   4438       compute_data_dependences_for_loop/bb to find and analyze all data-refs
   4439       in the loop/bb and their dependences.
   4440    2- vect_analyze_dependences(): apply dependence testing using ddrs.
   4441    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
   4442    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
   4443 
   4444 */
   4445 
   4446 opt_result
   4447 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
   4448 {
   4449   class loop *loop = NULL;
   4450   unsigned int i;
   4451   struct data_reference *dr;
   4452   tree scalar_type;
   4453 
   4454   DUMP_VECT_SCOPE ("vect_analyze_data_refs");
   4455 
   4456   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
   4457     loop = LOOP_VINFO_LOOP (loop_vinfo);
   4458 
   4459   /* Go through the data-refs, check that the analysis succeeded.  Update
   4460      pointer from stmt_vec_info struct to DR and vectype.  */
   4461 
   4462   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
   4463   FOR_EACH_VEC_ELT (datarefs, i, dr)
   4464     {
   4465       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
   4466       poly_uint64 vf;
   4467 
   4468       gcc_assert (DR_REF (dr));
   4469       stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
   4470       gcc_assert (!stmt_info->dr_aux.dr);
   4471       stmt_info->dr_aux.dr = dr;
   4472       stmt_info->dr_aux.stmt = stmt_info;
   4473 
   4474       /* Check that analysis of the data-ref succeeded.  */
   4475       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
   4476 	  || !DR_STEP (dr))
   4477         {
   4478 	  bool maybe_gather
   4479 	    = DR_IS_READ (dr)
   4480 	      && !TREE_THIS_VOLATILE (DR_REF (dr));
   4481 	  bool maybe_scatter
   4482 	    = DR_IS_WRITE (dr)
   4483 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
   4484 	      && (targetm.vectorize.builtin_scatter != NULL
   4485 		  || supports_vec_scatter_store_p ());
   4486 
   4487 	  /* If target supports vector gather loads or scatter stores,
   4488 	     see if they can't be used.  */
   4489 	  if (is_a <loop_vec_info> (vinfo)
   4490 	      && !nested_in_vect_loop_p (loop, stmt_info))
   4491 	    {
   4492 	      if (maybe_gather || maybe_scatter)
   4493 		{
   4494 		  if (maybe_gather)
   4495 		    gatherscatter = GATHER;
   4496 		  else
   4497 		    gatherscatter = SCATTER;
   4498 		}
   4499 	    }
   4500 
   4501 	  if (gatherscatter == SG_NONE)
   4502 	    {
   4503 	      if (dump_enabled_p ())
   4504 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4505 				 "not vectorized: data ref analysis "
   4506 				 "failed %G", stmt_info->stmt);
   4507 	      if (is_a <bb_vec_info> (vinfo))
   4508 		{
   4509 		  /* In BB vectorization the ref can still participate
   4510 		     in dependence analysis, we just can't vectorize it.  */
   4511 		  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4512 		  continue;
   4513 		}
   4514 	      return opt_result::failure_at (stmt_info->stmt,
   4515 					     "not vectorized:"
   4516 					     " data ref analysis failed: %G",
   4517 					     stmt_info->stmt);
   4518 	    }
   4519         }
   4520 
   4521       /* See if this was detected as SIMD lane access.  */
   4522       if (dr->aux == (void *)-1
   4523 	  || dr->aux == (void *)-2
   4524 	  || dr->aux == (void *)-3
   4525 	  || dr->aux == (void *)-4)
   4526 	{
   4527 	  if (nested_in_vect_loop_p (loop, stmt_info))
   4528 	    return opt_result::failure_at (stmt_info->stmt,
   4529 					   "not vectorized:"
   4530 					   " data ref analysis failed: %G",
   4531 					   stmt_info->stmt);
   4532 	  STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
   4533 	    = -(uintptr_t) dr->aux;
   4534 	}
   4535 
   4536       tree base = get_base_address (DR_REF (dr));
   4537       if (base && VAR_P (base) && DECL_NONALIASED (base))
   4538 	{
   4539           if (dump_enabled_p ())
   4540 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4541 			     "not vectorized: base object not addressable "
   4542 			     "for stmt: %G", stmt_info->stmt);
   4543           if (is_a <bb_vec_info> (vinfo))
   4544 	    {
   4545 	      /* In BB vectorization the ref can still participate
   4546 	         in dependence analysis, we just can't vectorize it.  */
   4547 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4548 	      continue;
   4549 	    }
   4550 	  return opt_result::failure_at (stmt_info->stmt,
   4551 					 "not vectorized: base object not"
   4552 					 " addressable for stmt: %G",
   4553 					 stmt_info->stmt);
   4554 	}
   4555 
   4556       if (is_a <loop_vec_info> (vinfo)
   4557 	  && DR_STEP (dr)
   4558 	  && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
   4559 	{
   4560 	  if (nested_in_vect_loop_p (loop, stmt_info))
   4561 	    return opt_result::failure_at (stmt_info->stmt,
   4562 					   "not vectorized: "
   4563 					   "not suitable for strided load %G",
   4564 					   stmt_info->stmt);
   4565 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
   4566 	}
   4567 
   4568       /* Update DR field in stmt_vec_info struct.  */
   4569 
   4570       /* If the dataref is in an inner-loop of the loop that is considered for
   4571 	 for vectorization, we also want to analyze the access relative to
   4572 	 the outer-loop (DR contains information only relative to the
   4573 	 inner-most enclosing loop).  We do that by building a reference to the
   4574 	 first location accessed by the inner-loop, and analyze it relative to
   4575 	 the outer-loop.  */
   4576       if (loop && nested_in_vect_loop_p (loop, stmt_info))
   4577 	{
   4578 	  /* Build a reference to the first location accessed by the
   4579 	     inner loop: *(BASE + INIT + OFFSET).  By construction,
   4580 	     this address must be invariant in the inner loop, so we
   4581 	     can consider it as being used in the outer loop.  */
   4582 	  tree base = unshare_expr (DR_BASE_ADDRESS (dr));
   4583 	  tree offset = unshare_expr (DR_OFFSET (dr));
   4584 	  tree init = unshare_expr (DR_INIT (dr));
   4585 	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
   4586 					  init, offset);
   4587 	  tree init_addr = fold_build_pointer_plus (base, init_offset);
   4588 	  tree init_ref = build_fold_indirect_ref (init_addr);
   4589 
   4590 	  if (dump_enabled_p ())
   4591 	    dump_printf_loc (MSG_NOTE, vect_location,
   4592 			     "analyze in outer loop: %T\n", init_ref);
   4593 
   4594 	  opt_result res
   4595 	    = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
   4596 				    init_ref, loop, stmt_info->stmt);
   4597 	  if (!res)
   4598 	    /* dr_analyze_innermost already explained the failure.  */
   4599 	    return res;
   4600 
   4601           if (dump_enabled_p ())
   4602 	    dump_printf_loc (MSG_NOTE, vect_location,
   4603 			     "\touter base_address: %T\n"
   4604 			     "\touter offset from base address: %T\n"
   4605 			     "\touter constant offset from base address: %T\n"
   4606 			     "\touter step: %T\n"
   4607 			     "\touter base alignment: %d\n\n"
   4608 			     "\touter base misalignment: %d\n"
   4609 			     "\touter offset alignment: %d\n"
   4610 			     "\touter step alignment: %d\n",
   4611 			     STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
   4612 			     STMT_VINFO_DR_OFFSET (stmt_info),
   4613 			     STMT_VINFO_DR_INIT (stmt_info),
   4614 			     STMT_VINFO_DR_STEP (stmt_info),
   4615 			     STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
   4616 			     STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
   4617 			     STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
   4618 			     STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
   4619 	}
   4620 
   4621       /* Set vectype for STMT.  */
   4622       scalar_type = TREE_TYPE (DR_REF (dr));
   4623       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
   4624       if (!vectype)
   4625         {
   4626           if (dump_enabled_p ())
   4627             {
   4628               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4629                                "not vectorized: no vectype for stmt: %G",
   4630 			       stmt_info->stmt);
   4631               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
   4632               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
   4633                                  scalar_type);
   4634               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
   4635             }
   4636 
   4637           if (is_a <bb_vec_info> (vinfo))
   4638 	    {
   4639 	      /* No vector type is fine, the ref can still participate
   4640 	         in dependence analysis, we just can't vectorize it.  */
   4641 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
   4642 	      continue;
   4643 	    }
   4644 	  if (fatal)
   4645 	    *fatal = false;
   4646 	  return opt_result::failure_at (stmt_info->stmt,
   4647 					 "not vectorized:"
   4648 					 " no vectype for stmt: %G"
   4649 					 " scalar_type: %T\n",
   4650 					 stmt_info->stmt, scalar_type);
   4651         }
   4652       else
   4653 	{
   4654 	  if (dump_enabled_p ())
   4655 	    dump_printf_loc (MSG_NOTE, vect_location,
   4656 			     "got vectype for stmt: %G%T\n",
   4657 			     stmt_info->stmt, vectype);
   4658 	}
   4659 
   4660       /* Adjust the minimal vectorization factor according to the
   4661 	 vector type.  */
   4662       vf = TYPE_VECTOR_SUBPARTS (vectype);
   4663       *min_vf = upper_bound (*min_vf, vf);
   4664 
   4665       /* Leave the BB vectorizer to pick the vector type later, based on
   4666 	 the final dataref group size and SLP node size.  */
   4667       if (is_a <loop_vec_info> (vinfo))
   4668 	STMT_VINFO_VECTYPE (stmt_info) = vectype;
   4669 
   4670       if (gatherscatter != SG_NONE)
   4671 	{
   4672 	  gather_scatter_info gs_info;
   4673 	  if (!vect_check_gather_scatter (stmt_info,
   4674 					  as_a <loop_vec_info> (vinfo),
   4675 					  &gs_info)
   4676 	      || !get_vectype_for_scalar_type (vinfo,
   4677 					       TREE_TYPE (gs_info.offset)))
   4678 	    {
   4679 	      if (fatal)
   4680 		*fatal = false;
   4681 	      return opt_result::failure_at
   4682 			(stmt_info->stmt,
   4683 			 (gatherscatter == GATHER)
   4684 			 ? "not vectorized: not suitable for gather load %G"
   4685 			 : "not vectorized: not suitable for scatter store %G",
   4686 			 stmt_info->stmt);
   4687 	    }
   4688 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
   4689 	}
   4690     }
   4691 
   4692   /* We used to stop processing and prune the list here.  Verify we no
   4693      longer need to.  */
   4694   gcc_assert (i == datarefs.length ());
   4695 
   4696   return opt_result::success ();
   4697 }
   4698 
   4699 
   4700 /* Function vect_get_new_vect_var.
   4701 
   4702    Returns a name for a new variable.  The current naming scheme appends the
   4703    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
   4704    the name of vectorizer generated variables, and appends that to NAME if
   4705    provided.  */
   4706 
   4707 tree
   4708 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   4709 {
   4710   const char *prefix;
   4711   tree new_vect_var;
   4712 
   4713   switch (var_kind)
   4714   {
   4715   case vect_simple_var:
   4716     prefix = "vect";
   4717     break;
   4718   case vect_scalar_var:
   4719     prefix = "stmp";
   4720     break;
   4721   case vect_mask_var:
   4722     prefix = "mask";
   4723     break;
   4724   case vect_pointer_var:
   4725     prefix = "vectp";
   4726     break;
   4727   default:
   4728     gcc_unreachable ();
   4729   }
   4730 
   4731   if (name)
   4732     {
   4733       char* tmp = concat (prefix, "_", name, NULL);
   4734       new_vect_var = create_tmp_reg (type, tmp);
   4735       free (tmp);
   4736     }
   4737   else
   4738     new_vect_var = create_tmp_reg (type, prefix);
   4739 
   4740   return new_vect_var;
   4741 }
   4742 
   4743 /* Like vect_get_new_vect_var but return an SSA name.  */
   4744 
   4745 tree
   4746 vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
   4747 {
   4748   const char *prefix;
   4749   tree new_vect_var;
   4750 
   4751   switch (var_kind)
   4752   {
   4753   case vect_simple_var:
   4754     prefix = "vect";
   4755     break;
   4756   case vect_scalar_var:
   4757     prefix = "stmp";
   4758     break;
   4759   case vect_pointer_var:
   4760     prefix = "vectp";
   4761     break;
   4762   default:
   4763     gcc_unreachable ();
   4764   }
   4765 
   4766   if (name)
   4767     {
   4768       char* tmp = concat (prefix, "_", name, NULL);
   4769       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
   4770       free (tmp);
   4771     }
   4772   else
   4773     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
   4774 
   4775   return new_vect_var;
   4776 }
   4777 
   4778 /* Duplicate points-to info on NAME from DR_INFO.  */
   4779 
   4780 static void
   4781 vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
   4782 {
   4783   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
   4784   /* DR_PTR_INFO is for a base SSA name, not including constant or
   4785      variable offsets in the ref so its alignment info does not apply.  */
   4786   mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
   4787 }
   4788 
   4789 /* Function vect_create_addr_base_for_vector_ref.
   4790 
   4791    Create an expression that computes the address of the first memory location
   4792    that will be accessed for a data reference.
   4793 
   4794    Input:
   4795    STMT_INFO: The statement containing the data reference.
   4796    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
   4797    OFFSET: Optional. If supplied, it is be added to the initial address.
   4798    LOOP:    Specify relative to which loop-nest should the address be computed.
   4799             For example, when the dataref is in an inner-loop nested in an
   4800 	    outer-loop that is now being vectorized, LOOP can be either the
   4801 	    outer-loop, or the inner-loop.  The first memory location accessed
   4802 	    by the following dataref ('in' points to short):
   4803 
   4804 		for (i=0; i<N; i++)
   4805 		   for (j=0; j<M; j++)
   4806 		     s += in[i+j]
   4807 
   4808 	    is as follows:
   4809 	    if LOOP=i_loop:	&in		(relative to i_loop)
   4810 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
   4811 
   4812    Output:
   4813    1. Return an SSA_NAME whose value is the address of the memory location of
   4814       the first vector of the data reference.
   4815    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
   4816       these statement(s) which define the returned SSA_NAME.
   4817 
   4818    FORNOW: We are only handling array accesses with step 1.  */
   4819 
   4820 tree
   4821 vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
   4822 				      gimple_seq *new_stmt_list,
   4823 				      tree offset)
   4824 {
   4825   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   4826   struct data_reference *dr = dr_info->dr;
   4827   const char *base_name;
   4828   tree addr_base;
   4829   tree dest;
   4830   gimple_seq seq = NULL;
   4831   tree vect_ptr_type;
   4832   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4833   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
   4834 
   4835   tree data_ref_base = unshare_expr (drb->base_address);
   4836   tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
   4837   tree init = unshare_expr (drb->init);
   4838 
   4839   if (loop_vinfo)
   4840     base_name = get_name (data_ref_base);
   4841   else
   4842     {
   4843       base_offset = ssize_int (0);
   4844       init = ssize_int (0);
   4845       base_name = get_name (DR_REF (dr));
   4846     }
   4847 
   4848   /* Create base_offset */
   4849   base_offset = size_binop (PLUS_EXPR,
   4850 			    fold_convert (sizetype, base_offset),
   4851 			    fold_convert (sizetype, init));
   4852 
   4853   if (offset)
   4854     {
   4855       offset = fold_convert (sizetype, offset);
   4856       base_offset = fold_build2 (PLUS_EXPR, sizetype,
   4857 				 base_offset, offset);
   4858     }
   4859 
   4860   /* base + base_offset */
   4861   if (loop_vinfo)
   4862     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
   4863   else
   4864     addr_base = build1 (ADDR_EXPR,
   4865 			build_pointer_type (TREE_TYPE (DR_REF (dr))),
   4866 			/* Strip zero offset components since we don't need
   4867 			   them and they can confuse late diagnostics if
   4868 			   we CSE them wrongly.  See PR106904 for example.  */
   4869 			unshare_expr (strip_zero_offset_components
   4870 								(DR_REF (dr))));
   4871 
   4872   vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
   4873   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
   4874   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
   4875   gimple_seq_add_seq (new_stmt_list, seq);
   4876 
   4877   if (DR_PTR_INFO (dr)
   4878       && TREE_CODE (addr_base) == SSA_NAME
   4879       /* We should only duplicate pointer info to newly created SSA names.  */
   4880       && SSA_NAME_VAR (addr_base) == dest)
   4881     {
   4882       gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
   4883       vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
   4884     }
   4885 
   4886   if (dump_enabled_p ())
   4887     dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
   4888 
   4889   return addr_base;
   4890 }
   4891 
   4892 
   4893 /* Function vect_create_data_ref_ptr.
   4894 
   4895    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
   4896    location accessed in the loop by STMT_INFO, along with the def-use update
   4897    chain to appropriately advance the pointer through the loop iterations.
   4898    Also set aliasing information for the pointer.  This pointer is used by
   4899    the callers to this function to create a memory reference expression for
   4900    vector load/store access.
   4901 
   4902    Input:
   4903    1. STMT_INFO: a stmt that references memory. Expected to be of the form
   4904          GIMPLE_ASSIGN <name, data-ref> or
   4905 	 GIMPLE_ASSIGN <data-ref, name>.
   4906    2. AGGR_TYPE: the type of the reference, which should be either a vector
   4907         or an array.
   4908    3. AT_LOOP: the loop where the vector memref is to be created.
   4909    4. OFFSET (optional): a byte offset to be added to the initial address
   4910 	accessed by the data-ref in STMT_INFO.
   4911    5. BSI: location where the new stmts are to be placed if there is no loop
   4912    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
   4913         pointing to the initial address.
   4914    8. IV_STEP (optional, defaults to NULL): the amount that should be added
   4915 	to the IV during each iteration of the loop.  NULL says to move
   4916 	by one copy of AGGR_TYPE up or down, depending on the step of the
   4917 	data reference.
   4918 
   4919    Output:
   4920    1. Declare a new ptr to vector_type, and have it point to the base of the
   4921       data reference (initial addressed accessed by the data reference).
   4922       For example, for vector of type V8HI, the following code is generated:
   4923 
   4924       v8hi *ap;
   4925       ap = (v8hi *)initial_address;
   4926 
   4927       if OFFSET is not supplied:
   4928          initial_address = &a[init];
   4929       if OFFSET is supplied:
   4930 	 initial_address = &a[init] + OFFSET;
   4931       if BYTE_OFFSET is supplied:
   4932 	 initial_address = &a[init] + BYTE_OFFSET;
   4933 
   4934       Return the initial_address in INITIAL_ADDRESS.
   4935 
   4936    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
   4937       update the pointer in each iteration of the loop.
   4938 
   4939       Return the increment stmt that updates the pointer in PTR_INCR.
   4940 
   4941    3. Return the pointer.  */
   4942 
   4943 tree
   4944 vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
   4945 			  tree aggr_type, class loop *at_loop, tree offset,
   4946 			  tree *initial_address, gimple_stmt_iterator *gsi,
   4947 			  gimple **ptr_incr, bool only_init,
   4948 			  tree iv_step)
   4949 {
   4950   const char *base_name;
   4951   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4952   class loop *loop = NULL;
   4953   bool nested_in_vect_loop = false;
   4954   class loop *containing_loop = NULL;
   4955   tree aggr_ptr_type;
   4956   tree aggr_ptr;
   4957   tree new_temp;
   4958   gimple_seq new_stmt_list = NULL;
   4959   edge pe = NULL;
   4960   basic_block new_bb;
   4961   tree aggr_ptr_init;
   4962   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   4963   struct data_reference *dr = dr_info->dr;
   4964   tree aptr;
   4965   gimple_stmt_iterator incr_gsi;
   4966   bool insert_after;
   4967   tree indx_before_incr, indx_after_incr;
   4968   gimple *incr;
   4969   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   4970 
   4971   gcc_assert (iv_step != NULL_TREE
   4972 	      || TREE_CODE (aggr_type) == ARRAY_TYPE
   4973 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
   4974 
   4975   if (loop_vinfo)
   4976     {
   4977       loop = LOOP_VINFO_LOOP (loop_vinfo);
   4978       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
   4979       containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
   4980       pe = loop_preheader_edge (loop);
   4981     }
   4982   else
   4983     {
   4984       gcc_assert (bb_vinfo);
   4985       only_init = true;
   4986       *ptr_incr = NULL;
   4987     }
   4988 
   4989   /* Create an expression for the first address accessed by this load
   4990      in LOOP.  */
   4991   base_name = get_name (DR_BASE_ADDRESS (dr));
   4992 
   4993   if (dump_enabled_p ())
   4994     {
   4995       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
   4996       dump_printf_loc (MSG_NOTE, vect_location,
   4997                        "create %s-pointer variable to type: %T",
   4998 		       get_tree_code_name (TREE_CODE (aggr_type)),
   4999 		       aggr_type);
   5000       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
   5001         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
   5002       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
   5003         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
   5004       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
   5005         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
   5006       else
   5007         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
   5008       dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
   5009     }
   5010 
   5011   /* (1) Create the new aggregate-pointer variable.
   5012      Vector and array types inherit the alias set of their component
   5013      type by default so we need to use a ref-all pointer if the data
   5014      reference does not conflict with the created aggregated data
   5015      reference because it is not addressable.  */
   5016   bool need_ref_all = false;
   5017   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
   5018 			      get_alias_set (DR_REF (dr))))
   5019     need_ref_all = true;
   5020   /* Likewise for any of the data references in the stmt group.  */
   5021   else if (DR_GROUP_SIZE (stmt_info) > 1)
   5022     {
   5023       stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
   5024       do
   5025 	{
   5026 	  struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
   5027 	  if (!alias_sets_conflict_p (get_alias_set (aggr_type),
   5028 				      get_alias_set (DR_REF (sdr))))
   5029 	    {
   5030 	      need_ref_all = true;
   5031 	      break;
   5032 	    }
   5033 	  sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
   5034 	}
   5035       while (sinfo);
   5036     }
   5037   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
   5038 					       need_ref_all);
   5039   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
   5040 
   5041 
   5042   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
   5043      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
   5044      def-use update cycles for the pointer: one relative to the outer-loop
   5045      (LOOP), which is what steps (3) and (4) below do.  The other is relative
   5046      to the inner-loop (which is the inner-most loop containing the dataref),
   5047      and this is done be step (5) below.
   5048 
   5049      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
   5050      inner-most loop, and so steps (3),(4) work the same, and step (5) is
   5051      redundant.  Steps (3),(4) create the following:
   5052 
   5053 	vp0 = &base_addr;
   5054 	LOOP:	vp1 = phi(vp0,vp2)
   5055 		...
   5056 		...
   5057 		vp2 = vp1 + step
   5058 		goto LOOP
   5059 
   5060      If there is an inner-loop nested in loop, then step (5) will also be
   5061      applied, and an additional update in the inner-loop will be created:
   5062 
   5063 	vp0 = &base_addr;
   5064 	LOOP:   vp1 = phi(vp0,vp2)
   5065 		...
   5066         inner:     vp3 = phi(vp1,vp4)
   5067 	           vp4 = vp3 + inner_step
   5068 	           if () goto inner
   5069 		...
   5070 		vp2 = vp1 + step
   5071 		if () goto LOOP   */
   5072 
   5073   /* (2) Calculate the initial address of the aggregate-pointer, and set
   5074      the aggregate-pointer to point to it before the loop.  */
   5075 
   5076   /* Create: (&(base[init_val]+offset) in the loop preheader.  */
   5077 
   5078   new_temp = vect_create_addr_base_for_vector_ref (vinfo,
   5079 						   stmt_info, &new_stmt_list,
   5080 						   offset);
   5081   if (new_stmt_list)
   5082     {
   5083       if (pe)
   5084         {
   5085           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
   5086           gcc_assert (!new_bb);
   5087         }
   5088       else
   5089         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
   5090     }
   5091 
   5092   *initial_address = new_temp;
   5093   aggr_ptr_init = new_temp;
   5094 
   5095   /* (3) Handle the updating of the aggregate-pointer inside the loop.
   5096      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
   5097      inner-loop nested in LOOP (during outer-loop vectorization).  */
   5098 
   5099   /* No update in loop is required.  */
   5100   if (only_init && (!loop_vinfo || at_loop == loop))
   5101     aptr = aggr_ptr_init;
   5102   else
   5103     {
   5104       /* Accesses to invariant addresses should be handled specially
   5105 	 by the caller.  */
   5106       tree step = vect_dr_behavior (vinfo, dr_info)->step;
   5107       gcc_assert (!integer_zerop (step));
   5108 
   5109       if (iv_step == NULL_TREE)
   5110 	{
   5111 	  /* The step of the aggregate pointer is the type size,
   5112 	     negated for downward accesses.  */
   5113 	  iv_step = TYPE_SIZE_UNIT (aggr_type);
   5114 	  if (tree_int_cst_sgn (step) == -1)
   5115 	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
   5116 	}
   5117 
   5118       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
   5119 
   5120       create_iv (aggr_ptr_init,
   5121 		 fold_convert (aggr_ptr_type, iv_step),
   5122 		 aggr_ptr, loop, &incr_gsi, insert_after,
   5123 		 &indx_before_incr, &indx_after_incr);
   5124       incr = gsi_stmt (incr_gsi);
   5125 
   5126       /* Copy the points-to information if it exists. */
   5127       if (DR_PTR_INFO (dr))
   5128 	{
   5129 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
   5130 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
   5131 	}
   5132       if (ptr_incr)
   5133 	*ptr_incr = incr;
   5134 
   5135       aptr = indx_before_incr;
   5136     }
   5137 
   5138   if (!nested_in_vect_loop || only_init)
   5139     return aptr;
   5140 
   5141 
   5142   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
   5143      nested in LOOP, if exists.  */
   5144 
   5145   gcc_assert (nested_in_vect_loop);
   5146   if (!only_init)
   5147     {
   5148       standard_iv_increment_position (containing_loop, &incr_gsi,
   5149 				      &insert_after);
   5150       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
   5151 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
   5152 		 &indx_after_incr);
   5153       incr = gsi_stmt (incr_gsi);
   5154 
   5155       /* Copy the points-to information if it exists. */
   5156       if (DR_PTR_INFO (dr))
   5157 	{
   5158 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
   5159 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
   5160 	}
   5161       if (ptr_incr)
   5162 	*ptr_incr = incr;
   5163 
   5164       return indx_before_incr;
   5165     }
   5166   else
   5167     gcc_unreachable ();
   5168 }
   5169 
   5170 
   5171 /* Function bump_vector_ptr
   5172 
   5173    Increment a pointer (to a vector type) by vector-size. If requested,
   5174    i.e. if PTR-INCR is given, then also connect the new increment stmt
   5175    to the existing def-use update-chain of the pointer, by modifying
   5176    the PTR_INCR as illustrated below:
   5177 
   5178    The pointer def-use update-chain before this function:
   5179                         DATAREF_PTR = phi (p_0, p_2)
   5180                         ....
   5181         PTR_INCR:       p_2 = DATAREF_PTR + step
   5182 
   5183    The pointer def-use update-chain after this function:
   5184                         DATAREF_PTR = phi (p_0, p_2)
   5185                         ....
   5186                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
   5187                         ....
   5188         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
   5189 
   5190    Input:
   5191    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
   5192                  in the loop.
   5193    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
   5194 	      the loop.  The increment amount across iterations is expected
   5195 	      to be vector_size.
   5196    BSI - location where the new update stmt is to be placed.
   5197    STMT_INFO - the original scalar memory-access stmt that is being vectorized.
   5198    BUMP - optional. The offset by which to bump the pointer. If not given,
   5199 	  the offset is assumed to be vector_size.
   5200 
   5201    Output: Return NEW_DATAREF_PTR as illustrated above.
   5202 
   5203 */
   5204 
   5205 tree
   5206 bump_vector_ptr (vec_info *vinfo,
   5207 		 tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
   5208 		 stmt_vec_info stmt_info, tree bump)
   5209 {
   5210   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   5211   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5212   tree update = TYPE_SIZE_UNIT (vectype);
   5213   gimple *incr_stmt;
   5214   ssa_op_iter iter;
   5215   use_operand_p use_p;
   5216   tree new_dataref_ptr;
   5217 
   5218   if (bump)
   5219     update = bump;
   5220 
   5221   if (TREE_CODE (dataref_ptr) == SSA_NAME)
   5222     new_dataref_ptr = copy_ssa_name (dataref_ptr);
   5223   else
   5224     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
   5225   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
   5226 				   dataref_ptr, update);
   5227   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
   5228   /* Fold the increment, avoiding excessive chains use-def chains of
   5229      those, leading to compile-time issues for passes until the next
   5230      forwprop pass which would do this as well.  */
   5231   gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
   5232   if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
   5233     {
   5234       incr_stmt = gsi_stmt (fold_gsi);
   5235       update_stmt (incr_stmt);
   5236     }
   5237 
   5238   /* Copy the points-to information if it exists. */
   5239   if (DR_PTR_INFO (dr))
   5240     {
   5241       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
   5242       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
   5243     }
   5244 
   5245   if (!ptr_incr)
   5246     return new_dataref_ptr;
   5247 
   5248   /* Update the vector-pointer's cross-iteration increment.  */
   5249   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
   5250     {
   5251       tree use = USE_FROM_PTR (use_p);
   5252 
   5253       if (use == dataref_ptr)
   5254         SET_USE (use_p, new_dataref_ptr);
   5255       else
   5256         gcc_assert (operand_equal_p (use, update, 0));
   5257     }
   5258 
   5259   return new_dataref_ptr;
   5260 }
   5261 
   5262 
   5263 /* Copy memory reference info such as base/clique from the SRC reference
   5264    to the DEST MEM_REF.  */
   5265 
   5266 void
   5267 vect_copy_ref_info (tree dest, tree src)
   5268 {
   5269   if (TREE_CODE (dest) != MEM_REF)
   5270     return;
   5271 
   5272   tree src_base = src;
   5273   while (handled_component_p (src_base))
   5274     src_base = TREE_OPERAND (src_base, 0);
   5275   if (TREE_CODE (src_base) != MEM_REF
   5276       && TREE_CODE (src_base) != TARGET_MEM_REF)
   5277     return;
   5278 
   5279   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
   5280   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
   5281 }
   5282 
   5283 
   5284 /* Function vect_create_destination_var.
   5285 
   5286    Create a new temporary of type VECTYPE.  */
   5287 
   5288 tree
   5289 vect_create_destination_var (tree scalar_dest, tree vectype)
   5290 {
   5291   tree vec_dest;
   5292   const char *name;
   5293   char *new_name;
   5294   tree type;
   5295   enum vect_var_kind kind;
   5296 
   5297   kind = vectype
   5298     ? VECTOR_BOOLEAN_TYPE_P (vectype)
   5299     ? vect_mask_var
   5300     : vect_simple_var
   5301     : vect_scalar_var;
   5302   type = vectype ? vectype : TREE_TYPE (scalar_dest);
   5303 
   5304   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
   5305 
   5306   name = get_name (scalar_dest);
   5307   if (name)
   5308     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
   5309   else
   5310     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
   5311   vec_dest = vect_get_new_vect_var (type, kind, new_name);
   5312   free (new_name);
   5313 
   5314   return vec_dest;
   5315 }
   5316 
   5317 /* Function vect_grouped_store_supported.
   5318 
   5319    Returns TRUE if interleave high and interleave low permutations
   5320    are supported, and FALSE otherwise.  */
   5321 
   5322 bool
   5323 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
   5324 {
   5325   machine_mode mode = TYPE_MODE (vectype);
   5326 
   5327   /* vect_permute_store_chain requires the group size to be equal to 3 or
   5328      be a power of two.  */
   5329   if (count != 3 && exact_log2 (count) == -1)
   5330     {
   5331       if (dump_enabled_p ())
   5332 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5333 			 "the size of the group of accesses"
   5334 			 " is not a power of 2 or not eqaul to 3\n");
   5335       return false;
   5336     }
   5337 
   5338   /* Check that the permutation is supported.  */
   5339   if (VECTOR_MODE_P (mode))
   5340     {
   5341       unsigned int i;
   5342       if (count == 3)
   5343 	{
   5344 	  unsigned int j0 = 0, j1 = 0, j2 = 0;
   5345 	  unsigned int i, j;
   5346 
   5347 	  unsigned int nelt;
   5348 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
   5349 	    {
   5350 	      if (dump_enabled_p ())
   5351 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5352 				 "cannot handle groups of 3 stores for"
   5353 				 " variable-length vectors\n");
   5354 	      return false;
   5355 	    }
   5356 
   5357 	  vec_perm_builder sel (nelt, nelt, 1);
   5358 	  sel.quick_grow (nelt);
   5359 	  vec_perm_indices indices;
   5360 	  for (j = 0; j < 3; j++)
   5361 	    {
   5362 	      int nelt0 = ((3 - j) * nelt) % 3;
   5363 	      int nelt1 = ((3 - j) * nelt + 1) % 3;
   5364 	      int nelt2 = ((3 - j) * nelt + 2) % 3;
   5365 	      for (i = 0; i < nelt; i++)
   5366 		{
   5367 		  if (3 * i + nelt0 < nelt)
   5368 		    sel[3 * i + nelt0] = j0++;
   5369 		  if (3 * i + nelt1 < nelt)
   5370 		    sel[3 * i + nelt1] = nelt + j1++;
   5371 		  if (3 * i + nelt2 < nelt)
   5372 		    sel[3 * i + nelt2] = 0;
   5373 		}
   5374 	      indices.new_vector (sel, 2, nelt);
   5375 	      if (!can_vec_perm_const_p (mode, indices))
   5376 		{
   5377 		  if (dump_enabled_p ())
   5378 		    dump_printf (MSG_MISSED_OPTIMIZATION,
   5379 				 "permutation op not supported by target.\n");
   5380 		  return false;
   5381 		}
   5382 
   5383 	      for (i = 0; i < nelt; i++)
   5384 		{
   5385 		  if (3 * i + nelt0 < nelt)
   5386 		    sel[3 * i + nelt0] = 3 * i + nelt0;
   5387 		  if (3 * i + nelt1 < nelt)
   5388 		    sel[3 * i + nelt1] = 3 * i + nelt1;
   5389 		  if (3 * i + nelt2 < nelt)
   5390 		    sel[3 * i + nelt2] = nelt + j2++;
   5391 		}
   5392 	      indices.new_vector (sel, 2, nelt);
   5393 	      if (!can_vec_perm_const_p (mode, indices))
   5394 		{
   5395 		  if (dump_enabled_p ())
   5396 		    dump_printf (MSG_MISSED_OPTIMIZATION,
   5397 				 "permutation op not supported by target.\n");
   5398 		  return false;
   5399 		}
   5400 	    }
   5401 	  return true;
   5402 	}
   5403       else
   5404 	{
   5405 	  /* If length is not equal to 3 then only power of 2 is supported.  */
   5406 	  gcc_assert (pow2p_hwi (count));
   5407 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
   5408 
   5409 	  /* The encoding has 2 interleaved stepped patterns.  */
   5410 	  vec_perm_builder sel (nelt, 2, 3);
   5411 	  sel.quick_grow (6);
   5412 	  for (i = 0; i < 3; i++)
   5413 	    {
   5414 	      sel[i * 2] = i;
   5415 	      sel[i * 2 + 1] = i + nelt;
   5416 	    }
   5417 	  vec_perm_indices indices (sel, 2, nelt);
   5418 	  if (can_vec_perm_const_p (mode, indices))
   5419 	    {
   5420 	      for (i = 0; i < 6; i++)
   5421 		sel[i] += exact_div (nelt, 2);
   5422 	      indices.new_vector (sel, 2, nelt);
   5423 	      if (can_vec_perm_const_p (mode, indices))
   5424 		return true;
   5425 	    }
   5426 	}
   5427     }
   5428 
   5429   if (dump_enabled_p ())
   5430     dump_printf (MSG_MISSED_OPTIMIZATION,
   5431 		 "permutation op not supported by target.\n");
   5432   return false;
   5433 }
   5434 
   5435 
   5436 /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
   5437    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
   5438 
   5439 bool
   5440 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
   5441 			    bool masked_p)
   5442 {
   5443   if (masked_p)
   5444     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
   5445 					 vec_mask_store_lanes_optab,
   5446 					 vectype, count);
   5447   else
   5448     return vect_lanes_optab_supported_p ("vec_store_lanes",
   5449 					 vec_store_lanes_optab,
   5450 					 vectype, count);
   5451 }
   5452 
   5453 
   5454 /* Function vect_permute_store_chain.
   5455 
   5456    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
   5457    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
   5458    the data correctly for the stores.  Return the final references for stores
   5459    in RESULT_CHAIN.
   5460 
   5461    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
   5462    The input is 4 vectors each containing 8 elements.  We assign a number to
   5463    each element, the input sequence is:
   5464 
   5465    1st vec:   0  1  2  3  4  5  6  7
   5466    2nd vec:   8  9 10 11 12 13 14 15
   5467    3rd vec:  16 17 18 19 20 21 22 23
   5468    4th vec:  24 25 26 27 28 29 30 31
   5469 
   5470    The output sequence should be:
   5471 
   5472    1st vec:  0  8 16 24  1  9 17 25
   5473    2nd vec:  2 10 18 26  3 11 19 27
   5474    3rd vec:  4 12 20 28  5 13 21 30
   5475    4th vec:  6 14 22 30  7 15 23 31
   5476 
   5477    i.e., we interleave the contents of the four vectors in their order.
   5478 
   5479    We use interleave_high/low instructions to create such output.  The input of
   5480    each interleave_high/low operation is two vectors:
   5481    1st vec    2nd vec
   5482    0 1 2 3    4 5 6 7
   5483    the even elements of the result vector are obtained left-to-right from the
   5484    high/low elements of the first vector.  The odd elements of the result are
   5485    obtained left-to-right from the high/low elements of the second vector.
   5486    The output of interleave_high will be:   0 4 1 5
   5487    and of interleave_low:                   2 6 3 7
   5488 
   5489 
   5490    The permutation is done in log LENGTH stages.  In each stage interleave_high
   5491    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
   5492    where the first argument is taken from the first half of DR_CHAIN and the
   5493    second argument from it's second half.
   5494    In our example,
   5495 
   5496    I1: interleave_high (1st vec, 3rd vec)
   5497    I2: interleave_low (1st vec, 3rd vec)
   5498    I3: interleave_high (2nd vec, 4th vec)
   5499    I4: interleave_low (2nd vec, 4th vec)
   5500 
   5501    The output for the first stage is:
   5502 
   5503    I1:  0 16  1 17  2 18  3 19
   5504    I2:  4 20  5 21  6 22  7 23
   5505    I3:  8 24  9 25 10 26 11 27
   5506    I4: 12 28 13 29 14 30 15 31
   5507 
   5508    The output of the second stage, i.e. the final result is:
   5509 
   5510    I1:  0  8 16 24  1  9 17 25
   5511    I2:  2 10 18 26  3 11 19 27
   5512    I3:  4 12 20 28  5 13 21 30
   5513    I4:  6 14 22 30  7 15 23 31.  */
   5514 
   5515 void
   5516 vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
   5517 			  unsigned int length,
   5518 			  stmt_vec_info stmt_info,
   5519 			  gimple_stmt_iterator *gsi,
   5520 			  vec<tree> *result_chain)
   5521 {
   5522   tree vect1, vect2, high, low;
   5523   gimple *perm_stmt;
   5524   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5525   tree perm_mask_low, perm_mask_high;
   5526   tree data_ref;
   5527   tree perm3_mask_low, perm3_mask_high;
   5528   unsigned int i, j, n, log_length = exact_log2 (length);
   5529 
   5530   result_chain->quick_grow (length);
   5531   memcpy (result_chain->address (), dr_chain.address (),
   5532 	  length * sizeof (tree));
   5533 
   5534   if (length == 3)
   5535     {
   5536       /* vect_grouped_store_supported ensures that this is constant.  */
   5537       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   5538       unsigned int j0 = 0, j1 = 0, j2 = 0;
   5539 
   5540       vec_perm_builder sel (nelt, nelt, 1);
   5541       sel.quick_grow (nelt);
   5542       vec_perm_indices indices;
   5543       for (j = 0; j < 3; j++)
   5544         {
   5545 	  int nelt0 = ((3 - j) * nelt) % 3;
   5546 	  int nelt1 = ((3 - j) * nelt + 1) % 3;
   5547 	  int nelt2 = ((3 - j) * nelt + 2) % 3;
   5548 
   5549 	  for (i = 0; i < nelt; i++)
   5550 	    {
   5551 	      if (3 * i + nelt0 < nelt)
   5552 		sel[3 * i + nelt0] = j0++;
   5553 	      if (3 * i + nelt1 < nelt)
   5554 		sel[3 * i + nelt1] = nelt + j1++;
   5555 	      if (3 * i + nelt2 < nelt)
   5556 		sel[3 * i + nelt2] = 0;
   5557 	    }
   5558 	  indices.new_vector (sel, 2, nelt);
   5559 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   5560 
   5561 	  for (i = 0; i < nelt; i++)
   5562 	    {
   5563 	      if (3 * i + nelt0 < nelt)
   5564 		sel[3 * i + nelt0] = 3 * i + nelt0;
   5565 	      if (3 * i + nelt1 < nelt)
   5566 		sel[3 * i + nelt1] = 3 * i + nelt1;
   5567 	      if (3 * i + nelt2 < nelt)
   5568 		sel[3 * i + nelt2] = nelt + j2++;
   5569 	    }
   5570 	  indices.new_vector (sel, 2, nelt);
   5571 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   5572 
   5573 	  vect1 = dr_chain[0];
   5574 	  vect2 = dr_chain[1];
   5575 
   5576 	  /* Create interleaving stmt:
   5577 	     low = VEC_PERM_EXPR <vect1, vect2,
   5578 				  {j, nelt, *, j + 1, nelt + j + 1, *,
   5579 				   j + 2, nelt + j + 2, *, ...}>  */
   5580 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
   5581 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
   5582 					   vect2, perm3_mask_low);
   5583 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5584 
   5585 	  vect1 = data_ref;
   5586 	  vect2 = dr_chain[2];
   5587 	  /* Create interleaving stmt:
   5588 	     low = VEC_PERM_EXPR <vect1, vect2,
   5589 				  {0, 1, nelt + j, 3, 4, nelt + j + 1,
   5590 				   6, 7, nelt + j + 2, ...}>  */
   5591 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
   5592 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
   5593 					   vect2, perm3_mask_high);
   5594 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5595 	  (*result_chain)[j] = data_ref;
   5596 	}
   5597     }
   5598   else
   5599     {
   5600       /* If length is not equal to 3 then only power of 2 is supported.  */
   5601       gcc_assert (pow2p_hwi (length));
   5602 
   5603       /* The encoding has 2 interleaved stepped patterns.  */
   5604       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
   5605       vec_perm_builder sel (nelt, 2, 3);
   5606       sel.quick_grow (6);
   5607       for (i = 0; i < 3; i++)
   5608 	{
   5609 	  sel[i * 2] = i;
   5610 	  sel[i * 2 + 1] = i + nelt;
   5611 	}
   5612 	vec_perm_indices indices (sel, 2, nelt);
   5613 	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   5614 
   5615 	for (i = 0; i < 6; i++)
   5616 	  sel[i] += exact_div (nelt, 2);
   5617 	indices.new_vector (sel, 2, nelt);
   5618 	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   5619 
   5620 	for (i = 0, n = log_length; i < n; i++)
   5621 	  {
   5622 	    for (j = 0; j < length/2; j++)
   5623 	      {
   5624 		vect1 = dr_chain[j];
   5625 		vect2 = dr_chain[j+length/2];
   5626 
   5627 		/* Create interleaving stmt:
   5628 		   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
   5629 							...}>  */
   5630 		high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
   5631 		perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
   5632 						 vect2, perm_mask_high);
   5633 		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5634 		(*result_chain)[2*j] = high;
   5635 
   5636 		/* Create interleaving stmt:
   5637 		   low = VEC_PERM_EXPR <vect1, vect2,
   5638 					{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
   5639 					 ...}>  */
   5640 		low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
   5641 		perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
   5642 						 vect2, perm_mask_low);
   5643 		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   5644 		(*result_chain)[2*j+1] = low;
   5645 	      }
   5646 	    memcpy (dr_chain.address (), result_chain->address (),
   5647 		    length * sizeof (tree));
   5648 	  }
   5649     }
   5650 }
   5651 
   5652 /* Function vect_setup_realignment
   5653 
   5654    This function is called when vectorizing an unaligned load using
   5655    the dr_explicit_realign[_optimized] scheme.
   5656    This function generates the following code at the loop prolog:
   5657 
   5658       p = initial_addr;
   5659    x  msq_init = *(floor(p));   # prolog load
   5660       realignment_token = call target_builtin;
   5661     loop:
   5662    x  msq = phi (msq_init, ---)
   5663 
   5664    The stmts marked with x are generated only for the case of
   5665    dr_explicit_realign_optimized.
   5666 
   5667    The code above sets up a new (vector) pointer, pointing to the first
   5668    location accessed by STMT_INFO, and a "floor-aligned" load using that
   5669    pointer.  It also generates code to compute the "realignment-token"
   5670    (if the relevant target hook was defined), and creates a phi-node at the
   5671    loop-header bb whose arguments are the result of the prolog-load (created
   5672    by this function) and the result of a load that takes place in the loop
   5673    (to be created by the caller to this function).
   5674 
   5675    For the case of dr_explicit_realign_optimized:
   5676    The caller to this function uses the phi-result (msq) to create the
   5677    realignment code inside the loop, and sets up the missing phi argument,
   5678    as follows:
   5679     loop:
   5680       msq = phi (msq_init, lsq)
   5681       lsq = *(floor(p'));        # load in loop
   5682       result = realign_load (msq, lsq, realignment_token);
   5683 
   5684    For the case of dr_explicit_realign:
   5685     loop:
   5686       msq = *(floor(p)); 	# load in loop
   5687       p' = p + (VS-1);
   5688       lsq = *(floor(p'));	# load in loop
   5689       result = realign_load (msq, lsq, realignment_token);
   5690 
   5691    Input:
   5692    STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
   5693 	       a memory location that may be unaligned.
   5694    BSI - place where new code is to be inserted.
   5695    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
   5696 			      is used.
   5697 
   5698    Output:
   5699    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
   5700                        target hook, if defined.
   5701    Return value - the result of the loop-header phi node.  */
   5702 
   5703 tree
   5704 vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
   5705 			gimple_stmt_iterator *gsi, tree *realignment_token,
   5706 			enum dr_alignment_support alignment_support_scheme,
   5707 			tree init_addr,
   5708 			class loop **at_loop)
   5709 {
   5710   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5711   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5712   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   5713   struct data_reference *dr = dr_info->dr;
   5714   class loop *loop = NULL;
   5715   edge pe = NULL;
   5716   tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
   5717   tree vec_dest;
   5718   gimple *inc;
   5719   tree ptr;
   5720   tree data_ref;
   5721   basic_block new_bb;
   5722   tree msq_init = NULL_TREE;
   5723   tree new_temp;
   5724   gphi *phi_stmt;
   5725   tree msq = NULL_TREE;
   5726   gimple_seq stmts = NULL;
   5727   bool compute_in_loop = false;
   5728   bool nested_in_vect_loop = false;
   5729   class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
   5730   class loop *loop_for_initial_load = NULL;
   5731 
   5732   if (loop_vinfo)
   5733     {
   5734       loop = LOOP_VINFO_LOOP (loop_vinfo);
   5735       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
   5736     }
   5737 
   5738   gcc_assert (alignment_support_scheme == dr_explicit_realign
   5739 	      || alignment_support_scheme == dr_explicit_realign_optimized);
   5740 
   5741   /* We need to generate three things:
   5742      1. the misalignment computation
   5743      2. the extra vector load (for the optimized realignment scheme).
   5744      3. the phi node for the two vectors from which the realignment is
   5745       done (for the optimized realignment scheme).  */
   5746 
   5747   /* 1. Determine where to generate the misalignment computation.
   5748 
   5749      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
   5750      calculation will be generated by this function, outside the loop (in the
   5751      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
   5752      caller, inside the loop.
   5753 
   5754      Background: If the misalignment remains fixed throughout the iterations of
   5755      the loop, then both realignment schemes are applicable, and also the
   5756      misalignment computation can be done outside LOOP.  This is because we are
   5757      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
   5758      are a multiple of VS (the Vector Size), and therefore the misalignment in
   5759      different vectorized LOOP iterations is always the same.
   5760      The problem arises only if the memory access is in an inner-loop nested
   5761      inside LOOP, which is now being vectorized using outer-loop vectorization.
   5762      This is the only case when the misalignment of the memory access may not
   5763      remain fixed throughout the iterations of the inner-loop (as explained in
   5764      detail in vect_supportable_dr_alignment).  In this case, not only is the
   5765      optimized realignment scheme not applicable, but also the misalignment
   5766      computation (and generation of the realignment token that is passed to
   5767      REALIGN_LOAD) have to be done inside the loop.
   5768 
   5769      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
   5770      or not, which in turn determines if the misalignment is computed inside
   5771      the inner-loop, or outside LOOP.  */
   5772 
   5773   if (init_addr != NULL_TREE || !loop_vinfo)
   5774     {
   5775       compute_in_loop = true;
   5776       gcc_assert (alignment_support_scheme == dr_explicit_realign);
   5777     }
   5778 
   5779 
   5780   /* 2. Determine where to generate the extra vector load.
   5781 
   5782      For the optimized realignment scheme, instead of generating two vector
   5783      loads in each iteration, we generate a single extra vector load in the
   5784      preheader of the loop, and in each iteration reuse the result of the
   5785      vector load from the previous iteration.  In case the memory access is in
   5786      an inner-loop nested inside LOOP, which is now being vectorized using
   5787      outer-loop vectorization, we need to determine whether this initial vector
   5788      load should be generated at the preheader of the inner-loop, or can be
   5789      generated at the preheader of LOOP.  If the memory access has no evolution
   5790      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
   5791      to be generated inside LOOP (in the preheader of the inner-loop).  */
   5792 
   5793   if (nested_in_vect_loop)
   5794     {
   5795       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
   5796       bool invariant_in_outerloop =
   5797             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
   5798       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
   5799     }
   5800   else
   5801     loop_for_initial_load = loop;
   5802   if (at_loop)
   5803     *at_loop = loop_for_initial_load;
   5804 
   5805   if (loop_for_initial_load)
   5806     pe = loop_preheader_edge (loop_for_initial_load);
   5807 
   5808   /* 3. For the case of the optimized realignment, create the first vector
   5809       load at the loop preheader.  */
   5810 
   5811   if (alignment_support_scheme == dr_explicit_realign_optimized)
   5812     {
   5813       /* Create msq_init = *(floor(p1)) in the loop preheader  */
   5814       gassign *new_stmt;
   5815 
   5816       gcc_assert (!compute_in_loop);
   5817       vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5818       ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
   5819 				      loop_for_initial_load, NULL_TREE,
   5820 				      &init_addr, NULL, &inc, true);
   5821       if (TREE_CODE (ptr) == SSA_NAME)
   5822 	new_temp = copy_ssa_name (ptr);
   5823       else
   5824 	new_temp = make_ssa_name (TREE_TYPE (ptr));
   5825       poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
   5826       tree type = TREE_TYPE (ptr);
   5827       new_stmt = gimple_build_assign
   5828 		   (new_temp, BIT_AND_EXPR, ptr,
   5829 		    fold_build2 (MINUS_EXPR, type,
   5830 				 build_int_cst (type, 0),
   5831 				 build_int_cst (type, align)));
   5832       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5833       gcc_assert (!new_bb);
   5834       data_ref
   5835 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
   5836 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
   5837       vect_copy_ref_info (data_ref, DR_REF (dr));
   5838       new_stmt = gimple_build_assign (vec_dest, data_ref);
   5839       new_temp = make_ssa_name (vec_dest, new_stmt);
   5840       gimple_assign_set_lhs (new_stmt, new_temp);
   5841       if (pe)
   5842         {
   5843           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5844           gcc_assert (!new_bb);
   5845         }
   5846       else
   5847          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   5848 
   5849       msq_init = gimple_assign_lhs (new_stmt);
   5850     }
   5851 
   5852   /* 4. Create realignment token using a target builtin, if available.
   5853       It is done either inside the containing loop, or before LOOP (as
   5854       determined above).  */
   5855 
   5856   if (targetm.vectorize.builtin_mask_for_load)
   5857     {
   5858       gcall *new_stmt;
   5859       tree builtin_decl;
   5860 
   5861       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
   5862       if (!init_addr)
   5863 	{
   5864 	  /* Generate the INIT_ADDR computation outside LOOP.  */
   5865 	  init_addr = vect_create_addr_base_for_vector_ref (vinfo,
   5866 							    stmt_info, &stmts,
   5867 							    NULL_TREE);
   5868           if (loop)
   5869             {
   5870    	      pe = loop_preheader_edge (loop);
   5871 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
   5872 	      gcc_assert (!new_bb);
   5873             }
   5874           else
   5875              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   5876 	}
   5877 
   5878       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
   5879       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
   5880       vec_dest =
   5881 	vect_create_destination_var (scalar_dest,
   5882 				     gimple_call_return_type (new_stmt));
   5883       new_temp = make_ssa_name (vec_dest, new_stmt);
   5884       gimple_call_set_lhs (new_stmt, new_temp);
   5885 
   5886       if (compute_in_loop)
   5887 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   5888       else
   5889 	{
   5890 	  /* Generate the misalignment computation outside LOOP.  */
   5891 	  pe = loop_preheader_edge (loop);
   5892 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
   5893 	  gcc_assert (!new_bb);
   5894 	}
   5895 
   5896       *realignment_token = gimple_call_lhs (new_stmt);
   5897 
   5898       /* The result of the CALL_EXPR to this builtin is determined from
   5899          the value of the parameter and no global variables are touched
   5900          which makes the builtin a "const" function.  Requiring the
   5901          builtin to have the "const" attribute makes it unnecessary
   5902          to call mark_call_clobbered.  */
   5903       gcc_assert (TREE_READONLY (builtin_decl));
   5904     }
   5905 
   5906   if (alignment_support_scheme == dr_explicit_realign)
   5907     return msq;
   5908 
   5909   gcc_assert (!compute_in_loop);
   5910   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
   5911 
   5912 
   5913   /* 5. Create msq = phi <msq_init, lsq> in loop  */
   5914 
   5915   pe = loop_preheader_edge (containing_loop);
   5916   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5917   msq = make_ssa_name (vec_dest);
   5918   phi_stmt = create_phi_node (msq, containing_loop->header);
   5919   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
   5920 
   5921   return msq;
   5922 }
   5923 
   5924 
   5925 /* Function vect_grouped_load_supported.
   5926 
   5927    COUNT is the size of the load group (the number of statements plus the
   5928    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
   5929    only one statement, with a gap of COUNT - 1.
   5930 
   5931    Returns true if a suitable permute exists.  */
   5932 
   5933 bool
   5934 vect_grouped_load_supported (tree vectype, bool single_element_p,
   5935 			     unsigned HOST_WIDE_INT count)
   5936 {
   5937   machine_mode mode = TYPE_MODE (vectype);
   5938 
   5939   /* If this is single-element interleaving with an element distance
   5940      that leaves unused vector loads around punt - we at least create
   5941      very sub-optimal code in that case (and blow up memory,
   5942      see PR65518).  */
   5943   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
   5944     {
   5945       if (dump_enabled_p ())
   5946 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5947 			 "single-element interleaving not supported "
   5948 			 "for not adjacent vector loads\n");
   5949       return false;
   5950     }
   5951 
   5952   /* vect_permute_load_chain requires the group size to be equal to 3 or
   5953      be a power of two.  */
   5954   if (count != 3 && exact_log2 (count) == -1)
   5955     {
   5956       if (dump_enabled_p ())
   5957 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5958 			 "the size of the group of accesses"
   5959 			 " is not a power of 2 or not equal to 3\n");
   5960       return false;
   5961     }
   5962 
   5963   /* Check that the permutation is supported.  */
   5964   if (VECTOR_MODE_P (mode))
   5965     {
   5966       unsigned int i, j;
   5967       if (count == 3)
   5968 	{
   5969 	  unsigned int nelt;
   5970 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
   5971 	    {
   5972 	      if (dump_enabled_p ())
   5973 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5974 				 "cannot handle groups of 3 loads for"
   5975 				 " variable-length vectors\n");
   5976 	      return false;
   5977 	    }
   5978 
   5979 	  vec_perm_builder sel (nelt, nelt, 1);
   5980 	  sel.quick_grow (nelt);
   5981 	  vec_perm_indices indices;
   5982 	  unsigned int k;
   5983 	  for (k = 0; k < 3; k++)
   5984 	    {
   5985 	      for (i = 0; i < nelt; i++)
   5986 		if (3 * i + k < 2 * nelt)
   5987 		  sel[i] = 3 * i + k;
   5988 		else
   5989 		  sel[i] = 0;
   5990 	      indices.new_vector (sel, 2, nelt);
   5991 	      if (!can_vec_perm_const_p (mode, indices))
   5992 		{
   5993 		  if (dump_enabled_p ())
   5994 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5995 				     "shuffle of 3 loads is not supported by"
   5996 				     " target\n");
   5997 		  return false;
   5998 		}
   5999 	      for (i = 0, j = 0; i < nelt; i++)
   6000 		if (3 * i + k < 2 * nelt)
   6001 		  sel[i] = i;
   6002 		else
   6003 		  sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
   6004 	      indices.new_vector (sel, 2, nelt);
   6005 	      if (!can_vec_perm_const_p (mode, indices))
   6006 		{
   6007 		  if (dump_enabled_p ())
   6008 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6009 				     "shuffle of 3 loads is not supported by"
   6010 				     " target\n");
   6011 		  return false;
   6012 		}
   6013 	    }
   6014 	  return true;
   6015 	}
   6016       else
   6017 	{
   6018 	  /* If length is not equal to 3 then only power of 2 is supported.  */
   6019 	  gcc_assert (pow2p_hwi (count));
   6020 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
   6021 
   6022 	  /* The encoding has a single stepped pattern.  */
   6023 	  vec_perm_builder sel (nelt, 1, 3);
   6024 	  sel.quick_grow (3);
   6025 	  for (i = 0; i < 3; i++)
   6026 	    sel[i] = i * 2;
   6027 	  vec_perm_indices indices (sel, 2, nelt);
   6028 	  if (can_vec_perm_const_p (mode, indices))
   6029 	    {
   6030 	      for (i = 0; i < 3; i++)
   6031 		sel[i] = i * 2 + 1;
   6032 	      indices.new_vector (sel, 2, nelt);
   6033 	      if (can_vec_perm_const_p (mode, indices))
   6034 		return true;
   6035 	    }
   6036         }
   6037     }
   6038 
   6039   if (dump_enabled_p ())
   6040     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6041 		     "extract even/odd not supported by target\n");
   6042   return false;
   6043 }
   6044 
   6045 /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
   6046    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
   6047 
   6048 bool
   6049 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
   6050 			   bool masked_p)
   6051 {
   6052   if (masked_p)
   6053     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
   6054 					 vec_mask_load_lanes_optab,
   6055 					 vectype, count);
   6056   else
   6057     return vect_lanes_optab_supported_p ("vec_load_lanes",
   6058 					 vec_load_lanes_optab,
   6059 					 vectype, count);
   6060 }
   6061 
   6062 /* Function vect_permute_load_chain.
   6063 
   6064    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
   6065    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
   6066    the input data correctly.  Return the final references for loads in
   6067    RESULT_CHAIN.
   6068 
   6069    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
   6070    The input is 4 vectors each containing 8 elements. We assign a number to each
   6071    element, the input sequence is:
   6072 
   6073    1st vec:   0  1  2  3  4  5  6  7
   6074    2nd vec:   8  9 10 11 12 13 14 15
   6075    3rd vec:  16 17 18 19 20 21 22 23
   6076    4th vec:  24 25 26 27 28 29 30 31
   6077 
   6078    The output sequence should be:
   6079 
   6080    1st vec:  0 4  8 12 16 20 24 28
   6081    2nd vec:  1 5  9 13 17 21 25 29
   6082    3rd vec:  2 6 10 14 18 22 26 30
   6083    4th vec:  3 7 11 15 19 23 27 31
   6084 
   6085    i.e., the first output vector should contain the first elements of each
   6086    interleaving group, etc.
   6087 
   6088    We use extract_even/odd instructions to create such output.  The input of
   6089    each extract_even/odd operation is two vectors
   6090    1st vec    2nd vec
   6091    0 1 2 3    4 5 6 7
   6092 
   6093    and the output is the vector of extracted even/odd elements.  The output of
   6094    extract_even will be:   0 2 4 6
   6095    and of extract_odd:     1 3 5 7
   6096 
   6097 
   6098    The permutation is done in log LENGTH stages.  In each stage extract_even
   6099    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
   6100    their order.  In our example,
   6101 
   6102    E1: extract_even (1st vec, 2nd vec)
   6103    E2: extract_odd (1st vec, 2nd vec)
   6104    E3: extract_even (3rd vec, 4th vec)
   6105    E4: extract_odd (3rd vec, 4th vec)
   6106 
   6107    The output for the first stage will be:
   6108 
   6109    E1:  0  2  4  6  8 10 12 14
   6110    E2:  1  3  5  7  9 11 13 15
   6111    E3: 16 18 20 22 24 26 28 30
   6112    E4: 17 19 21 23 25 27 29 31
   6113 
   6114    In order to proceed and create the correct sequence for the next stage (or
   6115    for the correct output, if the second stage is the last one, as in our
   6116    example), we first put the output of extract_even operation and then the
   6117    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
   6118    The input for the second stage is:
   6119 
   6120    1st vec (E1):  0  2  4  6  8 10 12 14
   6121    2nd vec (E3): 16 18 20 22 24 26 28 30
   6122    3rd vec (E2):  1  3  5  7  9 11 13 15
   6123    4th vec (E4): 17 19 21 23 25 27 29 31
   6124 
   6125    The output of the second stage:
   6126 
   6127    E1: 0 4  8 12 16 20 24 28
   6128    E2: 2 6 10 14 18 22 26 30
   6129    E3: 1 5  9 13 17 21 25 29
   6130    E4: 3 7 11 15 19 23 27 31
   6131 
   6132    And RESULT_CHAIN after reordering:
   6133 
   6134    1st vec (E1):  0 4  8 12 16 20 24 28
   6135    2nd vec (E3):  1 5  9 13 17 21 25 29
   6136    3rd vec (E2):  2 6 10 14 18 22 26 30
   6137    4th vec (E4):  3 7 11 15 19 23 27 31.  */
   6138 
   6139 static void
   6140 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
   6141 			 unsigned int length,
   6142 			 stmt_vec_info stmt_info,
   6143 			 gimple_stmt_iterator *gsi,
   6144 			 vec<tree> *result_chain)
   6145 {
   6146   tree data_ref, first_vect, second_vect;
   6147   tree perm_mask_even, perm_mask_odd;
   6148   tree perm3_mask_low, perm3_mask_high;
   6149   gimple *perm_stmt;
   6150   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   6151   unsigned int i, j, log_length = exact_log2 (length);
   6152 
   6153   result_chain->quick_grow (length);
   6154   memcpy (result_chain->address (), dr_chain.address (),
   6155 	  length * sizeof (tree));
   6156 
   6157   if (length == 3)
   6158     {
   6159       /* vect_grouped_load_supported ensures that this is constant.  */
   6160       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   6161       unsigned int k;
   6162 
   6163       vec_perm_builder sel (nelt, nelt, 1);
   6164       sel.quick_grow (nelt);
   6165       vec_perm_indices indices;
   6166       for (k = 0; k < 3; k++)
   6167 	{
   6168 	  for (i = 0; i < nelt; i++)
   6169 	    if (3 * i + k < 2 * nelt)
   6170 	      sel[i] = 3 * i + k;
   6171 	    else
   6172 	      sel[i] = 0;
   6173 	  indices.new_vector (sel, 2, nelt);
   6174 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
   6175 
   6176 	  for (i = 0, j = 0; i < nelt; i++)
   6177 	    if (3 * i + k < 2 * nelt)
   6178 	      sel[i] = i;
   6179 	    else
   6180 	      sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
   6181 	  indices.new_vector (sel, 2, nelt);
   6182 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
   6183 
   6184 	  first_vect = dr_chain[0];
   6185 	  second_vect = dr_chain[1];
   6186 
   6187 	  /* Create interleaving stmt (low part of):
   6188 	     low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
   6189 							     ...}>  */
   6190 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
   6191 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
   6192 					   second_vect, perm3_mask_low);
   6193 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6194 
   6195 	  /* Create interleaving stmt (high part of):
   6196 	     high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
   6197 							      ...}>  */
   6198 	  first_vect = data_ref;
   6199 	  second_vect = dr_chain[2];
   6200 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
   6201 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
   6202 					   second_vect, perm3_mask_high);
   6203 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6204 	  (*result_chain)[k] = data_ref;
   6205 	}
   6206     }
   6207   else
   6208     {
   6209       /* If length is not equal to 3 then only power of 2 is supported.  */
   6210       gcc_assert (pow2p_hwi (length));
   6211 
   6212       /* The encoding has a single stepped pattern.  */
   6213       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
   6214       vec_perm_builder sel (nelt, 1, 3);
   6215       sel.quick_grow (3);
   6216       for (i = 0; i < 3; ++i)
   6217 	sel[i] = i * 2;
   6218       vec_perm_indices indices (sel, 2, nelt);
   6219       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
   6220 
   6221       for (i = 0; i < 3; ++i)
   6222 	sel[i] = i * 2 + 1;
   6223       indices.new_vector (sel, 2, nelt);
   6224       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
   6225 
   6226       for (i = 0; i < log_length; i++)
   6227 	{
   6228 	  for (j = 0; j < length; j += 2)
   6229 	    {
   6230 	      first_vect = dr_chain[j];
   6231 	      second_vect = dr_chain[j+1];
   6232 
   6233 	      /* data_ref = permute_even (first_data_ref, second_data_ref);  */
   6234 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
   6235 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6236 					       first_vect, second_vect,
   6237 					       perm_mask_even);
   6238 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6239 	      (*result_chain)[j/2] = data_ref;
   6240 
   6241 	      /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
   6242 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
   6243 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6244 					       first_vect, second_vect,
   6245 					       perm_mask_odd);
   6246 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6247 	      (*result_chain)[j/2+length/2] = data_ref;
   6248 	    }
   6249 	  memcpy (dr_chain.address (), result_chain->address (),
   6250 		  length * sizeof (tree));
   6251 	}
   6252     }
   6253 }
   6254 
   6255 /* Function vect_shift_permute_load_chain.
   6256 
   6257    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
   6258    sequence of stmts to reorder the input data accordingly.
   6259    Return the final references for loads in RESULT_CHAIN.
   6260    Return true if successed, false otherwise.
   6261 
   6262    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
   6263    The input is 3 vectors each containing 8 elements.  We assign a
   6264    number to each element, the input sequence is:
   6265 
   6266    1st vec:   0  1  2  3  4  5  6  7
   6267    2nd vec:   8  9 10 11 12 13 14 15
   6268    3rd vec:  16 17 18 19 20 21 22 23
   6269 
   6270    The output sequence should be:
   6271 
   6272    1st vec:  0 3 6  9 12 15 18 21
   6273    2nd vec:  1 4 7 10 13 16 19 22
   6274    3rd vec:  2 5 8 11 14 17 20 23
   6275 
   6276    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
   6277 
   6278    First we shuffle all 3 vectors to get correct elements order:
   6279 
   6280    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
   6281    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
   6282    3rd vec:  (16 19 22) (17 20 23) (18 21)
   6283 
   6284    Next we unite and shift vector 3 times:
   6285 
   6286    1st step:
   6287      shift right by 6 the concatenation of:
   6288      "1st vec" and  "2nd vec"
   6289        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
   6290      "2nd vec" and  "3rd vec"
   6291        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
   6292      "3rd vec" and  "1st vec"
   6293        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
   6294 			     | New vectors                   |
   6295 
   6296      So that now new vectors are:
   6297 
   6298      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
   6299      2nd vec:  (10 13) (16 19 22) (17 20 23)
   6300      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
   6301 
   6302    2nd step:
   6303      shift right by 5 the concatenation of:
   6304      "1st vec" and  "3rd vec"
   6305        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
   6306      "2nd vec" and  "1st vec"
   6307        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
   6308      "3rd vec" and  "2nd vec"
   6309        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
   6310 			  | New vectors                   |
   6311 
   6312      So that now new vectors are:
   6313 
   6314      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
   6315      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
   6316      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
   6317 
   6318    3rd step:
   6319      shift right by 5 the concatenation of:
   6320      "1st vec" and  "1st vec"
   6321        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
   6322      shift right by 3 the concatenation of:
   6323      "2nd vec" and  "2nd vec"
   6324                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
   6325 			  | New vectors                   |
   6326 
   6327      So that now all vectors are READY:
   6328      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
   6329      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
   6330      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
   6331 
   6332    This algorithm is faster than one in vect_permute_load_chain if:
   6333      1.  "shift of a concatination" is faster than general permutation.
   6334 	 This is usually so.
   6335      2.  The TARGET machine can't execute vector instructions in parallel.
   6336 	 This is because each step of the algorithm depends on previous.
   6337 	 The algorithm in vect_permute_load_chain is much more parallel.
   6338 
   6339    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
   6340 */
   6341 
   6342 static bool
   6343 vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
   6344 			       unsigned int length,
   6345 			       stmt_vec_info stmt_info,
   6346 			       gimple_stmt_iterator *gsi,
   6347 			       vec<tree> *result_chain)
   6348 {
   6349   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
   6350   tree perm2_mask1, perm2_mask2, perm3_mask;
   6351   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
   6352   gimple *perm_stmt;
   6353 
   6354   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   6355   unsigned int i;
   6356   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6357 
   6358   unsigned HOST_WIDE_INT nelt, vf;
   6359   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
   6360       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
   6361     /* Not supported for variable-length vectors.  */
   6362     return false;
   6363 
   6364   vec_perm_builder sel (nelt, nelt, 1);
   6365   sel.quick_grow (nelt);
   6366 
   6367   result_chain->quick_grow (length);
   6368   memcpy (result_chain->address (), dr_chain.address (),
   6369 	  length * sizeof (tree));
   6370 
   6371   if (pow2p_hwi (length) && vf > 4)
   6372     {
   6373       unsigned int j, log_length = exact_log2 (length);
   6374       for (i = 0; i < nelt / 2; ++i)
   6375 	sel[i] = i * 2;
   6376       for (i = 0; i < nelt / 2; ++i)
   6377 	sel[nelt / 2 + i] = i * 2 + 1;
   6378       vec_perm_indices indices (sel, 2, nelt);
   6379       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6380 	{
   6381 	  if (dump_enabled_p ())
   6382 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6383 			     "shuffle of 2 fields structure is not \
   6384 			      supported by target\n");
   6385 	  return false;
   6386 	}
   6387       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
   6388 
   6389       for (i = 0; i < nelt / 2; ++i)
   6390 	sel[i] = i * 2 + 1;
   6391       for (i = 0; i < nelt / 2; ++i)
   6392 	sel[nelt / 2 + i] = i * 2;
   6393       indices.new_vector (sel, 2, nelt);
   6394       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6395 	{
   6396 	  if (dump_enabled_p ())
   6397 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6398 			     "shuffle of 2 fields structure is not \
   6399 			      supported by target\n");
   6400 	  return false;
   6401 	}
   6402       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
   6403 
   6404       /* Generating permutation constant to shift all elements.
   6405 	 For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
   6406       for (i = 0; i < nelt; i++)
   6407 	sel[i] = nelt / 2 + i;
   6408       indices.new_vector (sel, 2, nelt);
   6409       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6410 	{
   6411 	  if (dump_enabled_p ())
   6412 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6413 			     "shift permutation is not supported by target\n");
   6414 	  return false;
   6415 	}
   6416       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
   6417 
   6418       /* Generating permutation constant to select vector from 2.
   6419 	 For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
   6420       for (i = 0; i < nelt / 2; i++)
   6421 	sel[i] = i;
   6422       for (i = nelt / 2; i < nelt; i++)
   6423 	sel[i] = nelt + i;
   6424       indices.new_vector (sel, 2, nelt);
   6425       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6426 	{
   6427 	  if (dump_enabled_p ())
   6428 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6429 			     "select is not supported by target\n");
   6430 	  return false;
   6431 	}
   6432       select_mask = vect_gen_perm_mask_checked (vectype, indices);
   6433 
   6434       for (i = 0; i < log_length; i++)
   6435 	{
   6436 	  for (j = 0; j < length; j += 2)
   6437 	    {
   6438 	      first_vect = dr_chain[j];
   6439 	      second_vect = dr_chain[j + 1];
   6440 
   6441 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
   6442 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6443 					       first_vect, first_vect,
   6444 					       perm2_mask1);
   6445 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6446 	      vect[0] = data_ref;
   6447 
   6448 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
   6449 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6450 					       second_vect, second_vect,
   6451 					       perm2_mask2);
   6452 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6453 	      vect[1] = data_ref;
   6454 
   6455 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
   6456 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6457 					       vect[0], vect[1], shift1_mask);
   6458 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6459 	      (*result_chain)[j/2 + length/2] = data_ref;
   6460 
   6461 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
   6462 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6463 					       vect[0], vect[1], select_mask);
   6464 	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6465 	      (*result_chain)[j/2] = data_ref;
   6466 	    }
   6467 	  memcpy (dr_chain.address (), result_chain->address (),
   6468 		  length * sizeof (tree));
   6469 	}
   6470       return true;
   6471     }
   6472   if (length == 3 && vf > 2)
   6473     {
   6474       unsigned int k = 0, l = 0;
   6475 
   6476       /* Generating permutation constant to get all elements in rigth order.
   6477 	 For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
   6478       for (i = 0; i < nelt; i++)
   6479 	{
   6480 	  if (3 * k + (l % 3) >= nelt)
   6481 	    {
   6482 	      k = 0;
   6483 	      l += (3 - (nelt % 3));
   6484 	    }
   6485 	  sel[i] = 3 * k + (l % 3);
   6486 	  k++;
   6487 	}
   6488       vec_perm_indices indices (sel, 2, nelt);
   6489       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6490 	{
   6491 	  if (dump_enabled_p ())
   6492 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6493 			     "shuffle of 3 fields structure is not \
   6494 			      supported by target\n");
   6495 	  return false;
   6496 	}
   6497       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
   6498 
   6499       /* Generating permutation constant to shift all elements.
   6500 	 For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
   6501       for (i = 0; i < nelt; i++)
   6502 	sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
   6503       indices.new_vector (sel, 2, nelt);
   6504       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6505 	{
   6506 	  if (dump_enabled_p ())
   6507 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6508 			     "shift permutation is not supported by target\n");
   6509 	  return false;
   6510 	}
   6511       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
   6512 
   6513       /* Generating permutation constant to shift all elements.
   6514 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
   6515       for (i = 0; i < nelt; i++)
   6516 	sel[i] = 2 * (nelt / 3) + 1 + i;
   6517       indices.new_vector (sel, 2, nelt);
   6518       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6519 	{
   6520 	  if (dump_enabled_p ())
   6521 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6522 			     "shift permutation is not supported by target\n");
   6523 	  return false;
   6524 	}
   6525       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
   6526 
   6527       /* Generating permutation constant to shift all elements.
   6528 	 For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
   6529       for (i = 0; i < nelt; i++)
   6530 	sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
   6531       indices.new_vector (sel, 2, nelt);
   6532       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6533 	{
   6534 	  if (dump_enabled_p ())
   6535 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6536 			     "shift permutation is not supported by target\n");
   6537 	  return false;
   6538 	}
   6539       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
   6540 
   6541       /* Generating permutation constant to shift all elements.
   6542 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
   6543       for (i = 0; i < nelt; i++)
   6544 	sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
   6545       indices.new_vector (sel, 2, nelt);
   6546       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   6547 	{
   6548 	  if (dump_enabled_p ())
   6549 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6550 			     "shift permutation is not supported by target\n");
   6551 	  return false;
   6552 	}
   6553       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
   6554 
   6555       for (k = 0; k < 3; k++)
   6556 	{
   6557 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
   6558 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6559 					   dr_chain[k], dr_chain[k],
   6560 					   perm3_mask);
   6561 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6562 	  vect[k] = data_ref;
   6563 	}
   6564 
   6565       for (k = 0; k < 3; k++)
   6566 	{
   6567 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
   6568 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6569 					   vect[k % 3], vect[(k + 1) % 3],
   6570 					   shift1_mask);
   6571 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6572 	  vect_shift[k] = data_ref;
   6573 	}
   6574 
   6575       for (k = 0; k < 3; k++)
   6576 	{
   6577 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
   6578 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
   6579 					   vect_shift[(4 - k) % 3],
   6580 					   vect_shift[(3 - k) % 3],
   6581 					   shift2_mask);
   6582 	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6583 	  vect[k] = data_ref;
   6584 	}
   6585 
   6586       (*result_chain)[3 - (nelt % 3)] = vect[2];
   6587 
   6588       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
   6589       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
   6590 				       vect[0], shift3_mask);
   6591       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6592       (*result_chain)[nelt % 3] = data_ref;
   6593 
   6594       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
   6595       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
   6596 				       vect[1], shift4_mask);
   6597       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   6598       (*result_chain)[0] = data_ref;
   6599       return true;
   6600     }
   6601   return false;
   6602 }
   6603 
   6604 /* Function vect_transform_grouped_load.
   6605 
   6606    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
   6607    to perform their permutation and ascribe the result vectorized statements to
   6608    the scalar statements.
   6609 */
   6610 
   6611 void
   6612 vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
   6613 			     vec<tree> dr_chain,
   6614 			     int size, gimple_stmt_iterator *gsi)
   6615 {
   6616   machine_mode mode;
   6617   vec<tree> result_chain = vNULL;
   6618 
   6619   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
   6620      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
   6621      vectors, that are ready for vector computation.  */
   6622   result_chain.create (size);
   6623 
   6624   /* If reassociation width for vector type is 2 or greater target machine can
   6625      execute 2 or more vector instructions in parallel.  Otherwise try to
   6626      get chain for loads group using vect_shift_permute_load_chain.  */
   6627   mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
   6628   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
   6629       || pow2p_hwi (size)
   6630       || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
   6631 					 gsi, &result_chain))
   6632     vect_permute_load_chain (vinfo, dr_chain,
   6633 			     size, stmt_info, gsi, &result_chain);
   6634   vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
   6635   result_chain.release ();
   6636 }
   6637 
   6638 /* RESULT_CHAIN contains the output of a group of grouped loads that were
   6639    generated as part of the vectorization of STMT_INFO.  Assign the statement
   6640    for each vector to the associated scalar statement.  */
   6641 
   6642 void
   6643 vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
   6644 				  vec<tree> result_chain)
   6645 {
   6646   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   6647   unsigned int i, gap_count;
   6648   tree tmp_data_ref;
   6649 
   6650   /* Put a permuted data-ref in the VECTORIZED_STMT field.
   6651      Since we scan the chain starting from it's first node, their order
   6652      corresponds the order of data-refs in RESULT_CHAIN.  */
   6653   stmt_vec_info next_stmt_info = first_stmt_info;
   6654   gap_count = 1;
   6655   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
   6656     {
   6657       if (!next_stmt_info)
   6658 	break;
   6659 
   6660       /* Skip the gaps.  Loads created for the gaps will be removed by dead
   6661        code elimination pass later.  No need to check for the first stmt in
   6662        the group, since it always exists.
   6663        DR_GROUP_GAP is the number of steps in elements from the previous
   6664        access (if there is no gap DR_GROUP_GAP is 1).  We skip loads that
   6665        correspond to the gaps.  */
   6666       if (next_stmt_info != first_stmt_info
   6667 	  && gap_count < DR_GROUP_GAP (next_stmt_info))
   6668 	{
   6669 	  gap_count++;
   6670 	  continue;
   6671 	}
   6672 
   6673       /* ???  The following needs cleanup after the removal of
   6674          DR_GROUP_SAME_DR_STMT.  */
   6675       if (next_stmt_info)
   6676         {
   6677 	  gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
   6678 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
   6679 	     copies, and we put the new vector statement last.  */
   6680 	  STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (new_stmt);
   6681 
   6682 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   6683 	  gap_count = 1;
   6684         }
   6685     }
   6686 }
   6687 
   6688 /* Function vect_force_dr_alignment_p.
   6689 
   6690    Returns whether the alignment of a DECL can be forced to be aligned
   6691    on ALIGNMENT bit boundary.  */
   6692 
   6693 bool
   6694 vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
   6695 {
   6696   if (!VAR_P (decl))
   6697     return false;
   6698 
   6699   if (decl_in_symtab_p (decl)
   6700       && !symtab_node::get (decl)->can_increase_alignment_p ())
   6701     return false;
   6702 
   6703   if (TREE_STATIC (decl))
   6704     return (known_le (alignment,
   6705 		      (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
   6706   else
   6707     return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
   6708 }
   6709 
   6710 /* Return whether the data reference DR_INFO is supported with respect to its
   6711    alignment.
   6712    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
   6713    it is aligned, i.e., check if it is possible to vectorize it with different
   6714    alignment.  */
   6715 
   6716 enum dr_alignment_support
   6717 vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
   6718 			       tree vectype, int misalignment)
   6719 {
   6720   data_reference *dr = dr_info->dr;
   6721   stmt_vec_info stmt_info = dr_info->stmt;
   6722   machine_mode mode = TYPE_MODE (vectype);
   6723   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6724   class loop *vect_loop = NULL;
   6725   bool nested_in_vect_loop = false;
   6726 
   6727   if (misalignment == 0)
   6728     return dr_aligned;
   6729 
   6730   /* For now assume all conditional loads/stores support unaligned
   6731      access without any special code.  */
   6732   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
   6733     if (gimple_call_internal_p (stmt)
   6734 	&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
   6735 	    || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
   6736       return dr_unaligned_supported;
   6737 
   6738   if (loop_vinfo)
   6739     {
   6740       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
   6741       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
   6742     }
   6743 
   6744   /* Possibly unaligned access.  */
   6745 
   6746   /* We can choose between using the implicit realignment scheme (generating
   6747      a misaligned_move stmt) and the explicit realignment scheme (generating
   6748      aligned loads with a REALIGN_LOAD).  There are two variants to the
   6749      explicit realignment scheme: optimized, and unoptimized.
   6750      We can optimize the realignment only if the step between consecutive
   6751      vector loads is equal to the vector size.  Since the vector memory
   6752      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
   6753      is guaranteed that the misalignment amount remains the same throughout the
   6754      execution of the vectorized loop.  Therefore, we can create the
   6755      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
   6756      at the loop preheader.
   6757 
   6758      However, in the case of outer-loop vectorization, when vectorizing a
   6759      memory access in the inner-loop nested within the LOOP that is now being
   6760      vectorized, while it is guaranteed that the misalignment of the
   6761      vectorized memory access will remain the same in different outer-loop
   6762      iterations, it is *not* guaranteed that is will remain the same throughout
   6763      the execution of the inner-loop.  This is because the inner-loop advances
   6764      with the original scalar step (and not in steps of VS).  If the inner-loop
   6765      step happens to be a multiple of VS, then the misalignment remains fixed
   6766      and we can use the optimized realignment scheme.  For example:
   6767 
   6768       for (i=0; i<N; i++)
   6769         for (j=0; j<M; j++)
   6770           s += a[i+j];
   6771 
   6772      When vectorizing the i-loop in the above example, the step between
   6773      consecutive vector loads is 1, and so the misalignment does not remain
   6774      fixed across the execution of the inner-loop, and the realignment cannot
   6775      be optimized (as illustrated in the following pseudo vectorized loop):
   6776 
   6777       for (i=0; i<N; i+=4)
   6778         for (j=0; j<M; j++){
   6779           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
   6780                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
   6781                          // (assuming that we start from an aligned address).
   6782           }
   6783 
   6784      We therefore have to use the unoptimized realignment scheme:
   6785 
   6786       for (i=0; i<N; i+=4)
   6787           for (j=k; j<M; j+=4)
   6788           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
   6789                            // that the misalignment of the initial address is
   6790                            // 0).
   6791 
   6792      The loop can then be vectorized as follows:
   6793 
   6794       for (k=0; k<4; k++){
   6795         rt = get_realignment_token (&vp[k]);
   6796         for (i=0; i<N; i+=4){
   6797           v1 = vp[i+k];
   6798           for (j=k; j<M; j+=4){
   6799             v2 = vp[i+j+VS-1];
   6800             va = REALIGN_LOAD <v1,v2,rt>;
   6801             vs += va;
   6802             v1 = v2;
   6803           }
   6804         }
   6805     } */
   6806 
   6807   if (DR_IS_READ (dr))
   6808     {
   6809       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
   6810 	  && (!targetm.vectorize.builtin_mask_for_load
   6811 	      || targetm.vectorize.builtin_mask_for_load ()))
   6812 	{
   6813 	  /* If we are doing SLP then the accesses need not have the
   6814 	     same alignment, instead it depends on the SLP group size.  */
   6815 	  if (loop_vinfo
   6816 	      && STMT_SLP_TYPE (stmt_info)
   6817 	      && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
   6818 			      * (DR_GROUP_SIZE
   6819 				 (DR_GROUP_FIRST_ELEMENT (stmt_info))),
   6820 			      TYPE_VECTOR_SUBPARTS (vectype)))
   6821 	    ;
   6822 	  else if (!loop_vinfo
   6823 		   || (nested_in_vect_loop
   6824 		       && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
   6825 				    GET_MODE_SIZE (TYPE_MODE (vectype)))))
   6826 	    return dr_explicit_realign;
   6827 	  else
   6828 	    return dr_explicit_realign_optimized;
   6829 	}
   6830     }
   6831 
   6832   bool is_packed = false;
   6833   tree type = TREE_TYPE (DR_REF (dr));
   6834   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
   6835     is_packed = not_size_aligned (DR_REF (dr));
   6836   if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
   6837 						     is_packed))
   6838     return dr_unaligned_supported;
   6839 
   6840   /* Unsupported.  */
   6841   return dr_unaligned_unsupported;
   6842 }
   6843