Home | History | Annotate | Line # | Download | only in gcc
tree-vectorizer.cc revision 1.1.1.1
      1 /* Vectorizer
      2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3    Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
      4 
      5 This file is part of GCC.
      6 
      7 GCC is free software; you can redistribute it and/or modify it under
      8 the terms of the GNU General Public License as published by the Free
      9 Software Foundation; either version 3, or (at your option) any later
     10 version.
     11 
     12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15 for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with GCC; see the file COPYING3.  If not see
     19 <http://www.gnu.org/licenses/>.  */
     20 
     21 /* Loop and basic block vectorizer.
     22 
     23   This file contains drivers for the three vectorizers:
     24   (1) loop vectorizer (inter-iteration parallelism),
     25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
     26       vectorizer)
     27   (3) BB vectorizer (out-of-loops), aka SLP
     28 
     29   The rest of the vectorizer's code is organized as follows:
     30   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
     31     used by drivers (1) and (2).
     32   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
     33     drivers (1) and (2).
     34   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
     35     used by drivers (2) and (3).
     36   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
     37   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
     38     manipulations (used by all).
     39   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
     40 
     41   Here's a poor attempt at illustrating that:
     42 
     43      tree-vectorizer.cc:
     44      loop_vect()  loop_aware_slp()  slp_vect()
     45           |        /           \          /
     46           |       /             \        /
     47           tree-vect-loop.cc  tree-vect-slp.cc
     48                 | \      \  /      /   |
     49                 |  \      \/      /    |
     50                 |   \     /\     /     |
     51                 |    \   /  \   /      |
     52          tree-vect-stmts.cc  tree-vect-data-refs.cc
     53                        \      /
     54                     tree-vect-patterns.cc
     55 */
     56 
     57 #include "config.h"
     58 #include "system.h"
     59 #include "coretypes.h"
     60 #include "backend.h"
     61 #include "tree.h"
     62 #include "gimple.h"
     63 #include "predict.h"
     64 #include "tree-pass.h"
     65 #include "ssa.h"
     66 #include "cgraph.h"
     67 #include "fold-const.h"
     68 #include "stor-layout.h"
     69 #include "gimple-iterator.h"
     70 #include "gimple-walk.h"
     71 #include "tree-ssa-loop-manip.h"
     72 #include "tree-ssa-loop-niter.h"
     73 #include "tree-cfg.h"
     74 #include "cfgloop.h"
     75 #include "tree-vectorizer.h"
     76 #include "tree-ssa-propagate.h"
     77 #include "dbgcnt.h"
     78 #include "tree-scalar-evolution.h"
     79 #include "stringpool.h"
     80 #include "attribs.h"
     81 #include "gimple-pretty-print.h"
     82 #include "opt-problem.h"
     83 #include "internal-fn.h"
     84 #include "tree-ssa-sccvn.h"
     85 
     86 /* Loop or bb location, with hotness information.  */
     87 dump_user_location_t vect_location;
     88 
     89 /* auto_purge_vect_location's dtor: reset the vect_location
     90    global, to avoid stale location_t values that could reference
     91    GC-ed blocks.  */
     92 
     93 auto_purge_vect_location::~auto_purge_vect_location ()
     94 {
     95   vect_location = dump_user_location_t ();
     96 }
     97 
     98 /* Dump a cost entry according to args to F.  */
     99 
    100 void
    101 dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
    102 		stmt_vec_info stmt_info, slp_tree node, tree,
    103 		int misalign, unsigned cost,
    104 		enum vect_cost_model_location where)
    105 {
    106   if (stmt_info)
    107     {
    108       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
    109       fprintf (f, " ");
    110     }
    111   else if (node)
    112     fprintf (f, "node %p ", (void *)node);
    113   else
    114     fprintf (f, "<unknown> ");
    115   fprintf (f, "%d times ", count);
    116   const char *ks = "unknown";
    117   switch (kind)
    118     {
    119     case scalar_stmt:
    120       ks = "scalar_stmt";
    121       break;
    122     case scalar_load:
    123       ks = "scalar_load";
    124       break;
    125     case scalar_store:
    126       ks = "scalar_store";
    127       break;
    128     case vector_stmt:
    129       ks = "vector_stmt";
    130       break;
    131     case vector_load:
    132       ks = "vector_load";
    133       break;
    134     case vector_gather_load:
    135       ks = "vector_gather_load";
    136       break;
    137     case unaligned_load:
    138       ks = "unaligned_load";
    139       break;
    140     case unaligned_store:
    141       ks = "unaligned_store";
    142       break;
    143     case vector_store:
    144       ks = "vector_store";
    145       break;
    146     case vector_scatter_store:
    147       ks = "vector_scatter_store";
    148       break;
    149     case vec_to_scalar:
    150       ks = "vec_to_scalar";
    151       break;
    152     case scalar_to_vec:
    153       ks = "scalar_to_vec";
    154       break;
    155     case cond_branch_not_taken:
    156       ks = "cond_branch_not_taken";
    157       break;
    158     case cond_branch_taken:
    159       ks = "cond_branch_taken";
    160       break;
    161     case vec_perm:
    162       ks = "vec_perm";
    163       break;
    164     case vec_promote_demote:
    165       ks = "vec_promote_demote";
    166       break;
    167     case vec_construct:
    168       ks = "vec_construct";
    169       break;
    170     }
    171   fprintf (f, "%s ", ks);
    172   if (kind == unaligned_load || kind == unaligned_store)
    173     fprintf (f, "(misalign %d) ", misalign);
    174   fprintf (f, "costs %u ", cost);
    175   const char *ws = "unknown";
    176   switch (where)
    177     {
    178     case vect_prologue:
    179       ws = "prologue";
    180       break;
    181     case vect_body:
    182       ws = "body";
    183       break;
    184     case vect_epilogue:
    185       ws = "epilogue";
    186       break;
    187     }
    188   fprintf (f, "in %s\n", ws);
    189 }
    190 
    191 /* For mapping simduid to vectorization factor.  */
    193 
    194 class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
    195 {
    196 public:
    197   unsigned int simduid;
    198   poly_uint64 vf;
    199 
    200   /* hash_table support.  */
    201   static inline hashval_t hash (const simduid_to_vf *);
    202   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
    203 };
    204 
    205 inline hashval_t
    206 simduid_to_vf::hash (const simduid_to_vf *p)
    207 {
    208   return p->simduid;
    209 }
    210 
    211 inline int
    212 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
    213 {
    214   return p1->simduid == p2->simduid;
    215 }
    216 
    217 /* This hash maps the OMP simd array to the corresponding simduid used
    218    to index into it.  Like thus,
    219 
    220         _7 = GOMP_SIMD_LANE (simduid.0)
    221         ...
    222         ...
    223         D.1737[_7] = stuff;
    224 
    225 
    226    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
    227    simduid.0.  */
    228 
    229 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
    230 {
    231   tree decl;
    232   unsigned int simduid;
    233 
    234   /* hash_table support.  */
    235   static inline hashval_t hash (const simd_array_to_simduid *);
    236   static inline int equal (const simd_array_to_simduid *,
    237 			   const simd_array_to_simduid *);
    238 };
    239 
    240 inline hashval_t
    241 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
    242 {
    243   return DECL_UID (p->decl);
    244 }
    245 
    246 inline int
    247 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
    248 			      const simd_array_to_simduid *p2)
    249 {
    250   return p1->decl == p2->decl;
    251 }
    252 
    253 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
    254    into their corresponding constants and remove
    255    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
    256 
    257 static void
    258 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
    259 {
    260   basic_block bb;
    261 
    262   FOR_EACH_BB_FN (bb, fun)
    263     {
    264       gimple_stmt_iterator i;
    265 
    266       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
    267 	{
    268 	  poly_uint64 vf = 1;
    269 	  enum internal_fn ifn;
    270 	  gimple *stmt = gsi_stmt (i);
    271 	  tree t;
    272 	  if (!is_gimple_call (stmt)
    273 	      || !gimple_call_internal_p (stmt))
    274 	    {
    275 	      gsi_next (&i);
    276 	      continue;
    277 	    }
    278 	  ifn = gimple_call_internal_fn (stmt);
    279 	  switch (ifn)
    280 	    {
    281 	    case IFN_GOMP_SIMD_LANE:
    282 	    case IFN_GOMP_SIMD_VF:
    283 	    case IFN_GOMP_SIMD_LAST_LANE:
    284 	      break;
    285 	    case IFN_GOMP_SIMD_ORDERED_START:
    286 	    case IFN_GOMP_SIMD_ORDERED_END:
    287 	      if (integer_onep (gimple_call_arg (stmt, 0)))
    288 		{
    289 		  enum built_in_function bcode
    290 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
    291 		       ? BUILT_IN_GOMP_ORDERED_START
    292 		       : BUILT_IN_GOMP_ORDERED_END);
    293 		  gimple *g
    294 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
    295 		  gimple_move_vops (g, stmt);
    296 		  gsi_replace (&i, g, true);
    297 		  continue;
    298 		}
    299 	      gsi_remove (&i, true);
    300 	      unlink_stmt_vdef (stmt);
    301 	      continue;
    302 	    default:
    303 	      gsi_next (&i);
    304 	      continue;
    305 	    }
    306 	  tree arg = gimple_call_arg (stmt, 0);
    307 	  gcc_assert (arg != NULL_TREE);
    308 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
    309 	  simduid_to_vf *p = NULL, data;
    310 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
    311 	  /* Need to nullify loop safelen field since it's value is not
    312 	     valid after transformation.  */
    313 	  if (bb->loop_father && bb->loop_father->safelen > 0)
    314 	    bb->loop_father->safelen = 0;
    315 	  if (htab)
    316 	    {
    317 	      p = htab->find (&data);
    318 	      if (p)
    319 		vf = p->vf;
    320 	    }
    321 	  switch (ifn)
    322 	    {
    323 	    case IFN_GOMP_SIMD_VF:
    324 	      t = build_int_cst (unsigned_type_node, vf);
    325 	      break;
    326 	    case IFN_GOMP_SIMD_LANE:
    327 	      t = build_int_cst (unsigned_type_node, 0);
    328 	      break;
    329 	    case IFN_GOMP_SIMD_LAST_LANE:
    330 	      t = gimple_call_arg (stmt, 1);
    331 	      break;
    332 	    default:
    333 	      gcc_unreachable ();
    334 	    }
    335 	  tree lhs = gimple_call_lhs (stmt);
    336 	  if (lhs)
    337 	    replace_uses_by (lhs, t);
    338 	  release_defs (stmt);
    339 	  gsi_remove (&i, true);
    340 	}
    341     }
    342 }
    343 
    344 /* Helper structure for note_simd_array_uses.  */
    345 
    346 struct note_simd_array_uses_struct
    347 {
    348   hash_table<simd_array_to_simduid> **htab;
    349   unsigned int simduid;
    350 };
    351 
    352 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
    353 
    354 static tree
    355 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
    356 {
    357   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
    358   struct note_simd_array_uses_struct *ns
    359     = (struct note_simd_array_uses_struct *) wi->info;
    360 
    361   if (TYPE_P (*tp))
    362     *walk_subtrees = 0;
    363   else if (VAR_P (*tp)
    364 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
    365 	   && DECL_CONTEXT (*tp) == current_function_decl)
    366     {
    367       simd_array_to_simduid data;
    368       if (!*ns->htab)
    369 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
    370       data.decl = *tp;
    371       data.simduid = ns->simduid;
    372       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
    373       if (*slot == NULL)
    374 	{
    375 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
    376 	  *p = data;
    377 	  *slot = p;
    378 	}
    379       else if ((*slot)->simduid != ns->simduid)
    380 	(*slot)->simduid = -1U;
    381       *walk_subtrees = 0;
    382     }
    383   return NULL_TREE;
    384 }
    385 
    386 /* Find "omp simd array" temporaries and map them to corresponding
    387    simduid.  */
    388 
    389 static void
    390 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
    391 {
    392   basic_block bb;
    393   gimple_stmt_iterator gsi;
    394   struct walk_stmt_info wi;
    395   struct note_simd_array_uses_struct ns;
    396 
    397   memset (&wi, 0, sizeof (wi));
    398   wi.info = &ns;
    399   ns.htab = htab;
    400 
    401   FOR_EACH_BB_FN (bb, fun)
    402     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    403       {
    404 	gimple *stmt = gsi_stmt (gsi);
    405 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
    406 	  continue;
    407 	switch (gimple_call_internal_fn (stmt))
    408 	  {
    409 	  case IFN_GOMP_SIMD_LANE:
    410 	  case IFN_GOMP_SIMD_VF:
    411 	  case IFN_GOMP_SIMD_LAST_LANE:
    412 	    break;
    413 	  default:
    414 	    continue;
    415 	  }
    416 	tree lhs = gimple_call_lhs (stmt);
    417 	if (lhs == NULL_TREE)
    418 	  continue;
    419 	imm_use_iterator use_iter;
    420 	gimple *use_stmt;
    421 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
    422 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
    423 	  if (!is_gimple_debug (use_stmt))
    424 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
    425       }
    426 }
    427 
    428 /* Shrink arrays with "omp simd array" attribute to the corresponding
    429    vectorization factor.  */
    430 
    431 static void
    432 shrink_simd_arrays
    433   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
    434    hash_table<simduid_to_vf> *simduid_to_vf_htab)
    435 {
    436   for (hash_table<simd_array_to_simduid>::iterator iter
    437 	 = simd_array_to_simduid_htab->begin ();
    438        iter != simd_array_to_simduid_htab->end (); ++iter)
    439     if ((*iter)->simduid != -1U)
    440       {
    441 	tree decl = (*iter)->decl;
    442 	poly_uint64 vf = 1;
    443 	if (simduid_to_vf_htab)
    444 	  {
    445 	    simduid_to_vf *p = NULL, data;
    446 	    data.simduid = (*iter)->simduid;
    447 	    p = simduid_to_vf_htab->find (&data);
    448 	    if (p)
    449 	      vf = p->vf;
    450 	  }
    451 	tree atype
    452 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
    453 	TREE_TYPE (decl) = atype;
    454 	relayout_decl (decl);
    455       }
    456 
    457   delete simd_array_to_simduid_htab;
    458 }
    459 
    460 /* Initialize the vec_info with kind KIND_IN and target cost data
    462    TARGET_COST_DATA_IN.  */
    463 
    464 vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
    465   : kind (kind_in),
    466     shared (shared_),
    467     stmt_vec_info_ro (false)
    468 {
    469   stmt_vec_infos.create (50);
    470 }
    471 
    472 vec_info::~vec_info ()
    473 {
    474   for (slp_instance &instance : slp_instances)
    475     vect_free_slp_instance (instance);
    476 
    477   free_stmt_vec_infos ();
    478 }
    479 
    480 vec_info_shared::vec_info_shared ()
    481   : n_stmts (0),
    482     datarefs (vNULL),
    483     datarefs_copy (vNULL),
    484     ddrs (vNULL)
    485 {
    486 }
    487 
    488 vec_info_shared::~vec_info_shared ()
    489 {
    490   free_data_refs (datarefs);
    491   free_dependence_relations (ddrs);
    492   datarefs_copy.release ();
    493 }
    494 
    495 void
    496 vec_info_shared::save_datarefs ()
    497 {
    498   if (!flag_checking)
    499     return;
    500   datarefs_copy.reserve_exact (datarefs.length ());
    501   for (unsigned i = 0; i < datarefs.length (); ++i)
    502     datarefs_copy.quick_push (*datarefs[i]);
    503 }
    504 
    505 void
    506 vec_info_shared::check_datarefs ()
    507 {
    508   if (!flag_checking)
    509     return;
    510   gcc_assert (datarefs.length () == datarefs_copy.length ());
    511   for (unsigned i = 0; i < datarefs.length (); ++i)
    512     if (memcmp (&datarefs_copy[i], datarefs[i],
    513 		offsetof (data_reference, alt_indices)) != 0)
    514       gcc_unreachable ();
    515 }
    516 
    517 /* Record that STMT belongs to the vectorizable region.  Create and return
    518    an associated stmt_vec_info.  */
    519 
    520 stmt_vec_info
    521 vec_info::add_stmt (gimple *stmt)
    522 {
    523   stmt_vec_info res = new_stmt_vec_info (stmt);
    524   set_vinfo_for_stmt (stmt, res);
    525   return res;
    526 }
    527 
    528 /* Record that STMT belongs to the vectorizable region.  Create a new
    529    stmt_vec_info and mark VECINFO as being related and return the new
    530    stmt_vec_info.  */
    531 
    532 stmt_vec_info
    533 vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
    534 {
    535   stmt_vec_info res = new_stmt_vec_info (stmt);
    536   set_vinfo_for_stmt (stmt, res, false);
    537   STMT_VINFO_RELATED_STMT (res) = stmt_info;
    538   return res;
    539 }
    540 
    541 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
    542    return null.  It is safe to call this function on any statement, even if
    543    it might not be part of the vectorizable region.  */
    544 
    545 stmt_vec_info
    546 vec_info::lookup_stmt (gimple *stmt)
    547 {
    548   unsigned int uid = gimple_uid (stmt);
    549   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
    550     {
    551       stmt_vec_info res = stmt_vec_infos[uid - 1];
    552       if (res && res->stmt == stmt)
    553 	return res;
    554     }
    555   return NULL;
    556 }
    557 
    558 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
    559    return that stmt_vec_info, otherwise return null.  It is safe to call
    560    this on arbitrary operands.  */
    561 
    562 stmt_vec_info
    563 vec_info::lookup_def (tree name)
    564 {
    565   if (TREE_CODE (name) == SSA_NAME
    566       && !SSA_NAME_IS_DEFAULT_DEF (name))
    567     return lookup_stmt (SSA_NAME_DEF_STMT (name));
    568   return NULL;
    569 }
    570 
    571 /* See whether there is a single non-debug statement that uses LHS and
    572    whether that statement has an associated stmt_vec_info.  Return the
    573    stmt_vec_info if so, otherwise return null.  */
    574 
    575 stmt_vec_info
    576 vec_info::lookup_single_use (tree lhs)
    577 {
    578   use_operand_p dummy;
    579   gimple *use_stmt;
    580   if (single_imm_use (lhs, &dummy, &use_stmt))
    581     return lookup_stmt (use_stmt);
    582   return NULL;
    583 }
    584 
    585 /* Return vectorization information about DR.  */
    586 
    587 dr_vec_info *
    588 vec_info::lookup_dr (data_reference *dr)
    589 {
    590   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
    591   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
    592   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
    593   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
    594 }
    595 
    596 /* Record that NEW_STMT_INFO now implements the same data reference
    597    as OLD_STMT_INFO.  */
    598 
    599 void
    600 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
    601 {
    602   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
    603   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
    604   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
    605   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
    606     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
    607   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
    608     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
    609 }
    610 
    611 /* Permanently remove the statement described by STMT_INFO from the
    612    function.  */
    613 
    614 void
    615 vec_info::remove_stmt (stmt_vec_info stmt_info)
    616 {
    617   gcc_assert (!stmt_info->pattern_stmt_p);
    618   set_vinfo_for_stmt (stmt_info->stmt, NULL);
    619   unlink_stmt_vdef (stmt_info->stmt);
    620   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
    621   gsi_remove (&si, true);
    622   release_defs (stmt_info->stmt);
    623   free_stmt_vec_info (stmt_info);
    624 }
    625 
    626 /* Replace the statement at GSI by NEW_STMT, both the vectorization
    627    information and the function itself.  STMT_INFO describes the statement
    628    at GSI.  */
    629 
    630 void
    631 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
    632 			gimple *new_stmt)
    633 {
    634   gimple *old_stmt = stmt_info->stmt;
    635   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
    636   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
    637   stmt_info->stmt = new_stmt;
    638   gsi_replace (gsi, new_stmt, true);
    639 }
    640 
    641 /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
    642    not NULL it specifies whether to use the sub-region entry
    643    determined by it, currently used for loop vectorization to insert
    644    on the inner loop entry vs. the outer loop entry.  */
    645 
    646 void
    647 vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
    648 {
    649   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
    650     {
    651       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    652       basic_block new_bb;
    653       edge pe;
    654 
    655       if (context && nested_in_vect_loop_p (loop, context))
    656 	loop = loop->inner;
    657 
    658       pe = loop_preheader_edge (loop);
    659       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    660       gcc_assert (!new_bb);
    661     }
    662   else
    663     {
    664       bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
    665       gimple_stmt_iterator gsi_region_begin
    666 	= gsi_after_labels (bb_vinfo->bbs[0]);
    667       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
    668     }
    669 }
    670 
    671 /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
    672 
    673 void
    674 vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
    675 {
    676   gimple_seq seq = NULL;
    677   gimple_stmt_iterator gsi = gsi_start (seq);
    678   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
    679   insert_seq_on_entry (context, seq);
    680 }
    681 
    682 /* Create and initialize a new stmt_vec_info struct for STMT.  */
    683 
    684 stmt_vec_info
    685 vec_info::new_stmt_vec_info (gimple *stmt)
    686 {
    687   stmt_vec_info res = XCNEW (class _stmt_vec_info);
    688   res->stmt = stmt;
    689 
    690   STMT_VINFO_TYPE (res) = undef_vec_info_type;
    691   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
    692   STMT_VINFO_VECTORIZABLE (res) = true;
    693   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
    694   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
    695   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
    696   STMT_VINFO_REDUC_IDX (res) = -1;
    697   STMT_VINFO_SLP_VECT_ONLY (res) = false;
    698   STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
    699   STMT_VINFO_VEC_STMTS (res) = vNULL;
    700   res->reduc_initial_values = vNULL;
    701   res->reduc_scalar_results = vNULL;
    702 
    703   if (is_a <loop_vec_info> (this)
    704       && gimple_code (stmt) == GIMPLE_PHI
    705       && is_loop_header_bb_p (gimple_bb (stmt)))
    706     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
    707   else
    708     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
    709 
    710   STMT_SLP_TYPE (res) = loop_vect;
    711 
    712   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
    713   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
    714 
    715   return res;
    716 }
    717 
    718 /* Associate STMT with INFO.  */
    719 
    720 void
    721 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
    722 {
    723   unsigned int uid = gimple_uid (stmt);
    724   if (uid == 0)
    725     {
    726       gcc_assert (!check_ro || !stmt_vec_info_ro);
    727       gcc_checking_assert (info);
    728       uid = stmt_vec_infos.length () + 1;
    729       gimple_set_uid (stmt, uid);
    730       stmt_vec_infos.safe_push (info);
    731     }
    732   else
    733     {
    734       gcc_checking_assert (info == NULL);
    735       stmt_vec_infos[uid - 1] = info;
    736     }
    737 }
    738 
    739 /* Free the contents of stmt_vec_infos.  */
    740 
    741 void
    742 vec_info::free_stmt_vec_infos (void)
    743 {
    744   for (stmt_vec_info &info : stmt_vec_infos)
    745     if (info != NULL)
    746       free_stmt_vec_info (info);
    747   stmt_vec_infos.release ();
    748 }
    749 
    750 /* Free STMT_INFO.  */
    751 
    752 void
    753 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
    754 {
    755   if (stmt_info->pattern_stmt_p)
    756     {
    757       gimple_set_bb (stmt_info->stmt, NULL);
    758       tree lhs = gimple_get_lhs (stmt_info->stmt);
    759       if (lhs && TREE_CODE (lhs) == SSA_NAME)
    760 	release_ssa_name (lhs);
    761     }
    762 
    763   stmt_info->reduc_initial_values.release ();
    764   stmt_info->reduc_scalar_results.release ();
    765   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
    766   STMT_VINFO_VEC_STMTS (stmt_info).release ();
    767   free (stmt_info);
    768 }
    769 
    770 /* Returns true if S1 dominates S2.  */
    771 
    772 bool
    773 vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
    774 {
    775   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
    776 
    777   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
    778      SSA_NAME.  Assume it lives at the beginning of function and
    779      thus dominates everything.  */
    780   if (!bb1 || s1 == s2)
    781     return true;
    782 
    783   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
    784   if (!bb2)
    785     return false;
    786 
    787   if (bb1 != bb2)
    788     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
    789 
    790   /* PHIs in the same basic block are assumed to be
    791      executed all in parallel, if only one stmt is a PHI,
    792      it dominates the other stmt in the same basic block.  */
    793   if (gimple_code (s1) == GIMPLE_PHI)
    794     return true;
    795 
    796   if (gimple_code (s2) == GIMPLE_PHI)
    797     return false;
    798 
    799   /* Inserted vectorized stmts all have UID 0 while the original stmts
    800      in the IL have UID increasing within a BB.  Walk from both sides
    801      until we find the other stmt or a stmt with UID != 0.  */
    802   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
    803   while (gimple_uid (gsi_stmt (gsi1)) == 0)
    804     {
    805       gsi_next (&gsi1);
    806       if (gsi_end_p (gsi1))
    807 	return false;
    808       if (gsi_stmt (gsi1) == s2)
    809 	return true;
    810     }
    811   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
    812     return false;
    813 
    814   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
    815   while (gimple_uid (gsi_stmt (gsi2)) == 0)
    816     {
    817       gsi_prev (&gsi2);
    818       if (gsi_end_p (gsi2))
    819 	return false;
    820       if (gsi_stmt (gsi2) == s1)
    821 	return true;
    822     }
    823   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
    824     return false;
    825 
    826   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
    827     return true;
    828   return false;
    829 }
    830 
    831 /* A helper function to free scev and LOOP niter information, as well as
    832    clear loop constraint LOOP_C_FINITE.  */
    833 
    834 void
    835 vect_free_loop_info_assumptions (class loop *loop)
    836 {
    837   scev_reset_htab ();
    838   /* We need to explicitly reset upper bound information since they are
    839      used even after free_numbers_of_iterations_estimates.  */
    840   loop->any_upper_bound = false;
    841   loop->any_likely_upper_bound = false;
    842   free_numbers_of_iterations_estimates (loop);
    843   loop_constraint_clear (loop, LOOP_C_FINITE);
    844 }
    845 
    846 /* If LOOP has been versioned during ifcvt, return the internal call
    847    guarding it.  */
    848 
    849 gimple *
    850 vect_loop_vectorized_call (class loop *loop, gcond **cond)
    851 {
    852   basic_block bb = loop_preheader_edge (loop)->src;
    853   gimple *g;
    854   do
    855     {
    856       g = last_stmt (bb);
    857       if ((g && gimple_code (g) == GIMPLE_COND)
    858 	  || !single_succ_p (bb))
    859 	break;
    860       if (!single_pred_p (bb))
    861 	break;
    862       bb = single_pred (bb);
    863     }
    864   while (1);
    865   if (g && gimple_code (g) == GIMPLE_COND)
    866     {
    867       if (cond)
    868 	*cond = as_a <gcond *> (g);
    869       gimple_stmt_iterator gsi = gsi_for_stmt (g);
    870       gsi_prev (&gsi);
    871       if (!gsi_end_p (gsi))
    872 	{
    873 	  g = gsi_stmt (gsi);
    874 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
    875 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
    876 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
    877 	    return g;
    878 	}
    879     }
    880   return NULL;
    881 }
    882 
    883 /* If LOOP has been versioned during loop distribution, return the gurading
    884    internal call.  */
    885 
    886 static gimple *
    887 vect_loop_dist_alias_call (class loop *loop, function *fun)
    888 {
    889   basic_block bb;
    890   basic_block entry;
    891   class loop *outer, *orig;
    892   gimple_stmt_iterator gsi;
    893   gimple *g;
    894 
    895   if (loop->orig_loop_num == 0)
    896     return NULL;
    897 
    898   orig = get_loop (fun, loop->orig_loop_num);
    899   if (orig == NULL)
    900     {
    901       /* The original loop is somehow destroyed.  Clear the information.  */
    902       loop->orig_loop_num = 0;
    903       return NULL;
    904     }
    905 
    906   if (loop != orig)
    907     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
    908   else
    909     bb = loop_preheader_edge (loop)->src;
    910 
    911   outer = bb->loop_father;
    912   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
    913 
    914   /* Look upward in dominance tree.  */
    915   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
    916        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
    917     {
    918       g = last_stmt (bb);
    919       if (g == NULL || gimple_code (g) != GIMPLE_COND)
    920 	continue;
    921 
    922       gsi = gsi_for_stmt (g);
    923       gsi_prev (&gsi);
    924       if (gsi_end_p (gsi))
    925 	continue;
    926 
    927       g = gsi_stmt (gsi);
    928       /* The guarding internal function call must have the same distribution
    929 	 alias id.  */
    930       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
    931 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
    932 	return g;
    933     }
    934   return NULL;
    935 }
    936 
    937 /* Set the uids of all the statements in basic blocks inside loop
    938    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
    939    call guarding the loop which has been if converted.  */
    940 static void
    941 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
    942 		  function *fun)
    943 {
    944   tree arg = gimple_call_arg (loop_vectorized_call, 1);
    945   basic_block *bbs;
    946   unsigned int i;
    947   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
    948 
    949   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
    950   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
    951 		       == loop_vectorized_call);
    952   /* If we are going to vectorize outer loop, prevent vectorization
    953      of the inner loop in the scalar loop - either the scalar loop is
    954      thrown away, so it is a wasted work, or is used only for
    955      a few iterations.  */
    956   if (scalar_loop->inner)
    957     {
    958       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
    959       if (g)
    960 	{
    961 	  arg = gimple_call_arg (g, 0);
    962 	  get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
    963 	  fold_loop_internal_call (g, boolean_false_node);
    964 	}
    965     }
    966   bbs = get_loop_body (scalar_loop);
    967   for (i = 0; i < scalar_loop->num_nodes; i++)
    968     {
    969       basic_block bb = bbs[i];
    970       gimple_stmt_iterator gsi;
    971       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    972 	{
    973 	  gimple *phi = gsi_stmt (gsi);
    974 	  gimple_set_uid (phi, 0);
    975 	}
    976       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    977 	{
    978 	  gimple *stmt = gsi_stmt (gsi);
    979 	  gimple_set_uid (stmt, 0);
    980 	}
    981     }
    982   free (bbs);
    983 }
    984 
    985 /* Generate vectorized code for LOOP and its epilogues.  */
    986 
    987 static void
    988 vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    989 		      loop_p loop, gimple *loop_vectorized_call,
    990 		      function *fun)
    991 {
    992   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
    993 
    994   if (loop_vectorized_call)
    995     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
    996 
    997   unsigned HOST_WIDE_INT bytes;
    998   if (dump_enabled_p ())
    999     {
   1000       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
   1001 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
   1002 			 "loop vectorized using %wu byte vectors\n", bytes);
   1003       else
   1004 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
   1005 			 "loop vectorized using variable length vectors\n");
   1006     }
   1007 
   1008   loop_p new_loop = vect_transform_loop (loop_vinfo,
   1009 					 loop_vectorized_call);
   1010   /* Now that the loop has been vectorized, allow it to be unrolled
   1011      etc.  */
   1012   loop->force_vectorize = false;
   1013 
   1014   if (loop->simduid)
   1015     {
   1016       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
   1017       if (!simduid_to_vf_htab)
   1018 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
   1019       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
   1020       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
   1021       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
   1022 	  = simduid_to_vf_data;
   1023     }
   1024 
   1025   /* Epilogue of vectorized loop must be vectorized too.  */
   1026   if (new_loop)
   1027     vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
   1028 }
   1029 
   1030 /* Try to vectorize LOOP.  */
   1031 
   1032 static unsigned
   1033 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
   1034 		      unsigned *num_vectorized_loops, loop_p loop,
   1035 		      gimple *loop_vectorized_call,
   1036 		      gimple *loop_dist_alias_call,
   1037 		      function *fun)
   1038 {
   1039   unsigned ret = 0;
   1040   vec_info_shared shared;
   1041   auto_purge_vect_location sentinel;
   1042   vect_location = find_loop_location (loop);
   1043 
   1044   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
   1045       && dump_enabled_p ())
   1046     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
   1047 		 "\nAnalyzing loop at %s:%d\n",
   1048 		 LOCATION_FILE (vect_location.get_location_t ()),
   1049 		 LOCATION_LINE (vect_location.get_location_t ()));
   1050 
   1051   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
   1052   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared);
   1053   loop->aux = loop_vinfo;
   1054 
   1055   if (!loop_vinfo)
   1056     if (dump_enabled_p ())
   1057       if (opt_problem *problem = loop_vinfo.get_problem ())
   1058 	{
   1059 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1060 			   "couldn't vectorize loop\n");
   1061 	  problem->emit_and_clear ();
   1062 	}
   1063 
   1064   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
   1065     {
   1066       /* Free existing information if loop is analyzed with some
   1067 	 assumptions.  */
   1068       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
   1069 	vect_free_loop_info_assumptions (loop);
   1070 
   1071       /* If we applied if-conversion then try to vectorize the
   1072 	 BB of innermost loops.
   1073 	 ???  Ideally BB vectorization would learn to vectorize
   1074 	 control flow by applying if-conversion on-the-fly, the
   1075 	 following retains the if-converted loop body even when
   1076 	 only non-if-converted parts took part in BB vectorization.  */
   1077       if (flag_tree_slp_vectorize != 0
   1078 	  && loop_vectorized_call
   1079 	  && ! loop->inner)
   1080 	{
   1081 	  basic_block bb = loop->header;
   1082 	  bool require_loop_vectorize = false;
   1083 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
   1084 	       !gsi_end_p (gsi); gsi_next (&gsi))
   1085 	    {
   1086 	      gimple *stmt = gsi_stmt (gsi);
   1087 	      gcall *call = dyn_cast <gcall *> (stmt);
   1088 	      if (call && gimple_call_internal_p (call))
   1089 		{
   1090 		  internal_fn ifn = gimple_call_internal_fn (call);
   1091 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
   1092 		      /* Don't keep the if-converted parts when the ifn with
   1093 			 specifc type is not supported by the backend.  */
   1094 		      || (direct_internal_fn_p (ifn)
   1095 			  && !direct_internal_fn_supported_p
   1096 			  (call, OPTIMIZE_FOR_SPEED)))
   1097 		    {
   1098 		      require_loop_vectorize = true;
   1099 		      break;
   1100 		    }
   1101 		}
   1102 	      gimple_set_uid (stmt, -1);
   1103 	      gimple_set_visited (stmt, false);
   1104 	    }
   1105 	  if (!require_loop_vectorize)
   1106 	    {
   1107 	      tree arg = gimple_call_arg (loop_vectorized_call, 1);
   1108 	      class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
   1109 	      if (vect_slp_if_converted_bb (bb, scalar_loop))
   1110 		{
   1111 		  fold_loop_internal_call (loop_vectorized_call,
   1112 					   boolean_true_node);
   1113 		  loop_vectorized_call = NULL;
   1114 		  ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
   1115 		}
   1116 	    }
   1117 	}
   1118       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
   1119 	 loop, don't vectorize its inner loop; we'll attempt to
   1120 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
   1121 	 loop version.  */
   1122       if (loop_vectorized_call && loop->inner)
   1123 	loop->inner->dont_vectorize = true;
   1124       return ret;
   1125     }
   1126 
   1127   if (!dbg_cnt (vect_loop))
   1128     {
   1129       /* Free existing information if loop is analyzed with some
   1130 	 assumptions.  */
   1131       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
   1132 	vect_free_loop_info_assumptions (loop);
   1133       return ret;
   1134     }
   1135 
   1136   (*num_vectorized_loops)++;
   1137   /* Transform LOOP and its epilogues.  */
   1138   vect_transform_loops (simduid_to_vf_htab, loop, loop_vectorized_call, fun);
   1139 
   1140   if (loop_vectorized_call)
   1141     {
   1142       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
   1143       ret |= TODO_cleanup_cfg;
   1144     }
   1145   if (loop_dist_alias_call)
   1146     {
   1147       tree value = gimple_call_arg (loop_dist_alias_call, 1);
   1148       fold_loop_internal_call (loop_dist_alias_call, value);
   1149       ret |= TODO_cleanup_cfg;
   1150     }
   1151 
   1152   return ret;
   1153 }
   1154 
   1155 /* Try to vectorize LOOP.  */
   1156 
   1157 static unsigned
   1158 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
   1159 		    unsigned *num_vectorized_loops, loop_p loop,
   1160 		    function *fun)
   1161 {
   1162   if (!((flag_tree_loop_vectorize
   1163 	 && optimize_loop_nest_for_speed_p (loop))
   1164 	|| loop->force_vectorize))
   1165     return 0;
   1166 
   1167   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
   1168 			       vect_loop_vectorized_call (loop),
   1169 			       vect_loop_dist_alias_call (loop, fun), fun);
   1170 }
   1171 
   1172 
   1173 /* Loop autovectorization.  */
   1174 
   1175 namespace {
   1176 
   1177 const pass_data pass_data_vectorize =
   1178 {
   1179   GIMPLE_PASS, /* type */
   1180   "vect", /* name */
   1181   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1182   TV_TREE_VECTORIZATION, /* tv_id */
   1183   ( PROP_cfg | PROP_ssa ), /* properties_required */
   1184   0, /* properties_provided */
   1185   0, /* properties_destroyed */
   1186   0, /* todo_flags_start */
   1187   0, /* todo_flags_finish */
   1188 };
   1189 
   1190 class pass_vectorize : public gimple_opt_pass
   1191 {
   1192 public:
   1193   pass_vectorize (gcc::context *ctxt)
   1194     : gimple_opt_pass (pass_data_vectorize, ctxt)
   1195   {}
   1196 
   1197   /* opt_pass methods: */
   1198   virtual bool gate (function *fun)
   1199     {
   1200       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
   1201     }
   1202 
   1203   virtual unsigned int execute (function *);
   1204 
   1205 }; // class pass_vectorize
   1206 
   1207 /* Function vectorize_loops.
   1208 
   1209    Entry point to loop vectorization phase.  */
   1210 
   1211 unsigned
   1212 pass_vectorize::execute (function *fun)
   1213 {
   1214   unsigned int i;
   1215   unsigned int num_vectorized_loops = 0;
   1216   unsigned int vect_loops_num;
   1217   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
   1218   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
   1219   bool any_ifcvt_loops = false;
   1220   unsigned ret = 0;
   1221 
   1222   vect_loops_num = number_of_loops (fun);
   1223 
   1224   /* Bail out if there are no loops.  */
   1225   if (vect_loops_num <= 1)
   1226     return 0;
   1227 
   1228   vect_slp_init ();
   1229 
   1230   if (fun->has_simduid_loops)
   1231     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
   1232 
   1233   /*  ----------- Analyze loops. -----------  */
   1234 
   1235   /* If some loop was duplicated, it gets bigger number
   1236      than all previously defined loops.  This fact allows us to run
   1237      only over initial loops skipping newly generated ones.  */
   1238   for (auto loop : loops_list (fun, 0))
   1239     if (loop->dont_vectorize)
   1240       {
   1241 	any_ifcvt_loops = true;
   1242 	/* If-conversion sometimes versions both the outer loop
   1243 	   (for the case when outer loop vectorization might be
   1244 	   desirable) as well as the inner loop in the scalar version
   1245 	   of the loop.  So we have:
   1246 	    if (LOOP_VECTORIZED (1, 3))
   1247 	      {
   1248 		loop1
   1249 		  loop2
   1250 	      }
   1251 	    else
   1252 	      loop3 (copy of loop1)
   1253 		if (LOOP_VECTORIZED (4, 5))
   1254 		  loop4 (copy of loop2)
   1255 		else
   1256 		  loop5 (copy of loop4)
   1257 	   If loops' iteration gives us loop3 first (which has
   1258 	   dont_vectorize set), make sure to process loop1 before loop4;
   1259 	   so that we can prevent vectorization of loop4 if loop1
   1260 	   is successfully vectorized.  */
   1261 	if (loop->inner)
   1262 	  {
   1263 	    gimple *loop_vectorized_call
   1264 	      = vect_loop_vectorized_call (loop);
   1265 	    if (loop_vectorized_call
   1266 		&& vect_loop_vectorized_call (loop->inner))
   1267 	      {
   1268 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
   1269 		class loop *vector_loop
   1270 		  = get_loop (fun, tree_to_shwi (arg));
   1271 		if (vector_loop && vector_loop != loop)
   1272 		  {
   1273 		    /* Make sure we don't vectorize it twice.  */
   1274 		    vector_loop->dont_vectorize = true;
   1275 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
   1276 					       &num_vectorized_loops,
   1277 					       vector_loop, fun);
   1278 		  }
   1279 	      }
   1280 	  }
   1281       }
   1282     else
   1283       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
   1284 				 loop, fun);
   1285 
   1286   vect_location = dump_user_location_t ();
   1287 
   1288   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
   1289   if (dump_enabled_p ()
   1290       || (num_vectorized_loops > 0 && dump_enabled_p ()))
   1291     dump_printf_loc (MSG_NOTE, vect_location,
   1292                      "vectorized %u loops in function.\n",
   1293                      num_vectorized_loops);
   1294 
   1295   /*  ----------- Finalize. -----------  */
   1296 
   1297   if (any_ifcvt_loops)
   1298     for (i = 1; i < number_of_loops (fun); i++)
   1299       {
   1300 	class loop *loop = get_loop (fun, i);
   1301 	if (loop && loop->dont_vectorize)
   1302 	  {
   1303 	    gimple *g = vect_loop_vectorized_call (loop);
   1304 	    if (g)
   1305 	      {
   1306 		fold_loop_internal_call (g, boolean_false_node);
   1307 		ret |= TODO_cleanup_cfg;
   1308 		g = NULL;
   1309 	      }
   1310 	    else
   1311 	      g = vect_loop_dist_alias_call (loop, fun);
   1312 
   1313 	    if (g)
   1314 	      {
   1315 		fold_loop_internal_call (g, boolean_false_node);
   1316 		ret |= TODO_cleanup_cfg;
   1317 	      }
   1318 	  }
   1319       }
   1320 
   1321   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
   1322   if (fun->has_simduid_loops)
   1323     {
   1324       adjust_simduid_builtins (simduid_to_vf_htab, fun);
   1325       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
   1326       scev_reset ();
   1327     }
   1328   /* Shrink any "omp array simd" temporary arrays to the
   1329      actual vectorization factors.  */
   1330   if (simd_array_to_simduid_htab)
   1331     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
   1332   delete simduid_to_vf_htab;
   1333   fun->has_simduid_loops = false;
   1334 
   1335   if (num_vectorized_loops > 0)
   1336     {
   1337       /* If we vectorized any loop only virtual SSA form needs to be updated.
   1338 	 ???  Also while we try hard to update loop-closed SSA form we fail
   1339 	 to properly do this in some corner-cases (see PR56286).  */
   1340       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
   1341       ret |= TODO_cleanup_cfg;
   1342     }
   1343 
   1344   for (i = 1; i < number_of_loops (fun); i++)
   1345     {
   1346       loop_vec_info loop_vinfo;
   1347       bool has_mask_store;
   1348 
   1349       class loop *loop = get_loop (fun, i);
   1350       if (!loop || !loop->aux)
   1351 	continue;
   1352       loop_vinfo = (loop_vec_info) loop->aux;
   1353       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
   1354       delete loop_vinfo;
   1355       if (has_mask_store
   1356 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
   1357 	optimize_mask_stores (loop);
   1358 
   1359       auto_bitmap exit_bbs;
   1360       /* Perform local CSE, this esp. helps because we emit code for
   1361 	 predicates that need to be shared for optimal predicate usage.
   1362 	 However reassoc will re-order them and prevent CSE from working
   1363 	 as it should.  CSE only the loop body, not the entry.  */
   1364       bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
   1365 
   1366       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
   1367       do_rpo_vn (fun, entry, exit_bbs);
   1368 
   1369       loop->aux = NULL;
   1370     }
   1371 
   1372   vect_slp_fini ();
   1373 
   1374   return ret;
   1375 }
   1376 
   1377 } // anon namespace
   1378 
   1379 gimple_opt_pass *
   1380 make_pass_vectorize (gcc::context *ctxt)
   1381 {
   1382   return new pass_vectorize (ctxt);
   1383 }
   1384 
   1385 /* Entry point to the simduid cleanup pass.  */
   1386 
   1387 namespace {
   1388 
   1389 const pass_data pass_data_simduid_cleanup =
   1390 {
   1391   GIMPLE_PASS, /* type */
   1392   "simduid", /* name */
   1393   OPTGROUP_NONE, /* optinfo_flags */
   1394   TV_NONE, /* tv_id */
   1395   ( PROP_ssa | PROP_cfg ), /* properties_required */
   1396   0, /* properties_provided */
   1397   0, /* properties_destroyed */
   1398   0, /* todo_flags_start */
   1399   0, /* todo_flags_finish */
   1400 };
   1401 
   1402 class pass_simduid_cleanup : public gimple_opt_pass
   1403 {
   1404 public:
   1405   pass_simduid_cleanup (gcc::context *ctxt)
   1406     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
   1407   {}
   1408 
   1409   /* opt_pass methods: */
   1410   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
   1411   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
   1412   virtual unsigned int execute (function *);
   1413 
   1414 }; // class pass_simduid_cleanup
   1415 
   1416 unsigned int
   1417 pass_simduid_cleanup::execute (function *fun)
   1418 {
   1419   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
   1420 
   1421   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
   1422 
   1423   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
   1424   adjust_simduid_builtins (NULL, fun);
   1425 
   1426   /* Shrink any "omp array simd" temporary arrays to the
   1427      actual vectorization factors.  */
   1428   if (simd_array_to_simduid_htab)
   1429     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
   1430   fun->has_simduid_loops = false;
   1431   return 0;
   1432 }
   1433 
   1434 }  // anon namespace
   1435 
   1436 gimple_opt_pass *
   1437 make_pass_simduid_cleanup (gcc::context *ctxt)
   1438 {
   1439   return new pass_simduid_cleanup (ctxt);
   1440 }
   1441 
   1442 
   1443 /*  Entry point to basic block SLP phase.  */
   1444 
   1445 namespace {
   1446 
   1447 const pass_data pass_data_slp_vectorize =
   1448 {
   1449   GIMPLE_PASS, /* type */
   1450   "slp", /* name */
   1451   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1452   TV_TREE_SLP_VECTORIZATION, /* tv_id */
   1453   ( PROP_ssa | PROP_cfg ), /* properties_required */
   1454   0, /* properties_provided */
   1455   0, /* properties_destroyed */
   1456   0, /* todo_flags_start */
   1457   TODO_update_ssa, /* todo_flags_finish */
   1458 };
   1459 
   1460 class pass_slp_vectorize : public gimple_opt_pass
   1461 {
   1462 public:
   1463   pass_slp_vectorize (gcc::context *ctxt)
   1464     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
   1465   {}
   1466 
   1467   /* opt_pass methods: */
   1468   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
   1469   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
   1470   virtual unsigned int execute (function *);
   1471 
   1472 }; // class pass_slp_vectorize
   1473 
   1474 unsigned int
   1475 pass_slp_vectorize::execute (function *fun)
   1476 {
   1477   auto_purge_vect_location sentinel;
   1478   basic_block bb;
   1479 
   1480   bool in_loop_pipeline = scev_initialized_p ();
   1481   if (!in_loop_pipeline)
   1482     {
   1483       loop_optimizer_init (LOOPS_NORMAL);
   1484       scev_initialize ();
   1485     }
   1486 
   1487   /* Mark all stmts as not belonging to the current region and unvisited.  */
   1488   FOR_EACH_BB_FN (bb, fun)
   1489     {
   1490       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
   1491 	   gsi_next (&gsi))
   1492 	{
   1493 	  gphi *stmt = gsi.phi ();
   1494 	  gimple_set_uid (stmt, -1);
   1495 	  gimple_set_visited (stmt, false);
   1496 	}
   1497       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
   1498 	   gsi_next (&gsi))
   1499 	{
   1500 	  gimple *stmt = gsi_stmt (gsi);
   1501 	  gimple_set_uid (stmt, -1);
   1502 	  gimple_set_visited (stmt, false);
   1503 	}
   1504     }
   1505 
   1506   vect_slp_init ();
   1507 
   1508   vect_slp_function (fun);
   1509 
   1510   vect_slp_fini ();
   1511 
   1512   if (!in_loop_pipeline)
   1513     {
   1514       scev_finalize ();
   1515       loop_optimizer_finalize ();
   1516     }
   1517 
   1518   return 0;
   1519 }
   1520 
   1521 } // anon namespace
   1522 
   1523 gimple_opt_pass *
   1524 make_pass_slp_vectorize (gcc::context *ctxt)
   1525 {
   1526   return new pass_slp_vectorize (ctxt);
   1527 }
   1528 
   1529 
   1530 /* Increase alignment of global arrays to improve vectorization potential.
   1531    TODO:
   1532    - Consider also structs that have an array field.
   1533    - Use ipa analysis to prune arrays that can't be vectorized?
   1534      This should involve global alignment analysis and in the future also
   1535      array padding.  */
   1536 
   1537 static unsigned get_vec_alignment_for_type (tree);
   1538 static hash_map<tree, unsigned> *type_align_map;
   1539 
   1540 /* Return alignment of array's vector type corresponding to scalar type.
   1541    0 if no vector type exists.  */
   1542 static unsigned
   1543 get_vec_alignment_for_array_type (tree type)
   1544 {
   1545   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
   1546   poly_uint64 array_size, vector_size;
   1547 
   1548   tree scalar_type = strip_array_types (type);
   1549   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
   1550   if (!vectype
   1551       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
   1552       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
   1553       || maybe_lt (array_size, vector_size))
   1554     return 0;
   1555 
   1556   return TYPE_ALIGN (vectype);
   1557 }
   1558 
   1559 /* Return alignment of field having maximum alignment of vector type
   1560    corresponding to it's scalar type. For now, we only consider fields whose
   1561    offset is a multiple of it's vector alignment.
   1562    0 if no suitable field is found.  */
   1563 static unsigned
   1564 get_vec_alignment_for_record_type (tree type)
   1565 {
   1566   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
   1567 
   1568   unsigned max_align = 0, alignment;
   1569   HOST_WIDE_INT offset;
   1570   tree offset_tree;
   1571 
   1572   if (TYPE_PACKED (type))
   1573     return 0;
   1574 
   1575   unsigned *slot = type_align_map->get (type);
   1576   if (slot)
   1577     return *slot;
   1578 
   1579   for (tree field = first_field (type);
   1580        field != NULL_TREE;
   1581        field = DECL_CHAIN (field))
   1582     {
   1583       /* Skip if not FIELD_DECL or if alignment is set by user.  */
   1584       if (TREE_CODE (field) != FIELD_DECL
   1585 	  || DECL_USER_ALIGN (field)
   1586 	  || DECL_ARTIFICIAL (field))
   1587 	continue;
   1588 
   1589       /* We don't need to process the type further if offset is variable,
   1590 	 since the offsets of remaining members will also be variable.  */
   1591       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
   1592 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
   1593 	break;
   1594 
   1595       /* Similarly stop processing the type if offset_tree
   1596 	 does not fit in unsigned HOST_WIDE_INT.  */
   1597       offset_tree = bit_position (field);
   1598       if (!tree_fits_uhwi_p (offset_tree))
   1599 	break;
   1600 
   1601       offset = tree_to_uhwi (offset_tree);
   1602       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
   1603 
   1604       /* Get maximum alignment of vectorized field/array among those members
   1605 	 whose offset is multiple of the vector alignment.  */
   1606       if (alignment
   1607 	  && (offset % alignment == 0)
   1608 	  && (alignment > max_align))
   1609 	max_align = alignment;
   1610     }
   1611 
   1612   type_align_map->put (type, max_align);
   1613   return max_align;
   1614 }
   1615 
   1616 /* Return alignment of vector type corresponding to decl's scalar type
   1617    or 0 if it doesn't exist or the vector alignment is lesser than
   1618    decl's alignment.  */
   1619 static unsigned
   1620 get_vec_alignment_for_type (tree type)
   1621 {
   1622   if (type == NULL_TREE)
   1623     return 0;
   1624 
   1625   gcc_assert (TYPE_P (type));
   1626 
   1627   static unsigned alignment = 0;
   1628   switch (TREE_CODE (type))
   1629     {
   1630       case ARRAY_TYPE:
   1631 	alignment = get_vec_alignment_for_array_type (type);
   1632 	break;
   1633       case RECORD_TYPE:
   1634 	alignment = get_vec_alignment_for_record_type (type);
   1635 	break;
   1636       default:
   1637 	alignment = 0;
   1638 	break;
   1639     }
   1640 
   1641   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
   1642 }
   1643 
   1644 /* Entry point to increase_alignment pass.  */
   1645 static unsigned int
   1646 increase_alignment (void)
   1647 {
   1648   varpool_node *vnode;
   1649 
   1650   vect_location = dump_user_location_t ();
   1651   type_align_map = new hash_map<tree, unsigned>;
   1652 
   1653   /* Increase the alignment of all global arrays for vectorization.  */
   1654   FOR_EACH_DEFINED_VARIABLE (vnode)
   1655     {
   1656       tree decl = vnode->decl;
   1657       unsigned int alignment;
   1658 
   1659       if ((decl_in_symtab_p (decl)
   1660 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
   1661 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
   1662 	continue;
   1663 
   1664       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
   1665       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
   1666         {
   1667 	  vnode->increase_alignment (alignment);
   1668 	  if (dump_enabled_p ())
   1669 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
   1670         }
   1671     }
   1672 
   1673   delete type_align_map;
   1674   return 0;
   1675 }
   1676 
   1677 
   1678 namespace {
   1679 
   1680 const pass_data pass_data_ipa_increase_alignment =
   1681 {
   1682   SIMPLE_IPA_PASS, /* type */
   1683   "increase_alignment", /* name */
   1684   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1685   TV_IPA_OPT, /* tv_id */
   1686   0, /* properties_required */
   1687   0, /* properties_provided */
   1688   0, /* properties_destroyed */
   1689   0, /* todo_flags_start */
   1690   0, /* todo_flags_finish */
   1691 };
   1692 
   1693 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
   1694 {
   1695 public:
   1696   pass_ipa_increase_alignment (gcc::context *ctxt)
   1697     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
   1698   {}
   1699 
   1700   /* opt_pass methods: */
   1701   virtual bool gate (function *)
   1702     {
   1703       return flag_section_anchors && flag_tree_loop_vectorize;
   1704     }
   1705 
   1706   virtual unsigned int execute (function *) { return increase_alignment (); }
   1707 
   1708 }; // class pass_ipa_increase_alignment
   1709 
   1710 } // anon namespace
   1711 
   1712 simple_ipa_opt_pass *
   1713 make_pass_ipa_increase_alignment (gcc::context *ctxt)
   1714 {
   1715   return new pass_ipa_increase_alignment (ctxt);
   1716 }
   1717 
   1718 /* If the condition represented by T is a comparison or the SSA name
   1719    result of a comparison, extract the comparison's operands.  Represent
   1720    T as NE_EXPR <T, 0> otherwise.  */
   1721 
   1722 void
   1723 scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
   1724 {
   1725   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
   1726     {
   1727       this->code = TREE_CODE (t);
   1728       this->op0 = TREE_OPERAND (t, 0);
   1729       this->op1 = TREE_OPERAND (t, 1);
   1730       this->inverted_p = false;
   1731       return;
   1732     }
   1733 
   1734   if (TREE_CODE (t) == SSA_NAME)
   1735     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
   1736       {
   1737 	tree_code code = gimple_assign_rhs_code (stmt);
   1738 	if (TREE_CODE_CLASS (code) == tcc_comparison)
   1739 	  {
   1740 	    this->code = code;
   1741 	    this->op0 = gimple_assign_rhs1 (stmt);
   1742 	    this->op1 = gimple_assign_rhs2 (stmt);
   1743 	    this->inverted_p = false;
   1744 	    return;
   1745 	  }
   1746 	else if (code == BIT_NOT_EXPR)
   1747 	  {
   1748 	    tree n_op = gimple_assign_rhs1 (stmt);
   1749 	    if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
   1750 	      {
   1751 		code = gimple_assign_rhs_code (stmt);
   1752 		if (TREE_CODE_CLASS (code) == tcc_comparison)
   1753 		  {
   1754 		    this->code = code;
   1755 		    this->op0 = gimple_assign_rhs1 (stmt);
   1756 		    this->op1 = gimple_assign_rhs2 (stmt);
   1757 		    this->inverted_p = true;
   1758 		    return;
   1759 		  }
   1760 	      }
   1761 	  }
   1762       }
   1763 
   1764   this->code = NE_EXPR;
   1765   this->op0 = t;
   1766   this->op1 = build_zero_cst (TREE_TYPE (t));
   1767   this->inverted_p = false;
   1768 }
   1769 
   1770 /* See the comment above the declaration for details.  */
   1771 
   1772 unsigned int
   1773 vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   1774 			     stmt_vec_info stmt_info, slp_tree,
   1775 			     tree vectype, int misalign,
   1776 			     vect_cost_model_location where)
   1777 {
   1778   unsigned int cost
   1779     = builtin_vectorization_cost (kind, vectype, misalign) * count;
   1780   return record_stmt_cost (stmt_info, where, cost);
   1781 }
   1782 
   1783 /* See the comment above the declaration for details.  */
   1784 
   1785 void
   1786 vector_costs::finish_cost (const vector_costs *)
   1787 {
   1788   gcc_assert (!m_finished);
   1789   m_finished = true;
   1790 }
   1791 
   1792 /* Record a base cost of COST units against WHERE.  If STMT_INFO is
   1793    nonnull, use it to adjust the cost based on execution frequency
   1794    (where appropriate).  */
   1795 
   1796 unsigned int
   1797 vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
   1798 				vect_cost_model_location where,
   1799 				unsigned int cost)
   1800 {
   1801   cost = adjust_cost_for_freq (stmt_info, where, cost);
   1802   m_costs[where] += cost;
   1803   return cost;
   1804 }
   1805 
   1806 /* COST is the base cost we have calculated for an operation in location WHERE.
   1807    If STMT_INFO is nonnull, use it to adjust the cost based on execution
   1808    frequency (where appropriate).  Return the adjusted cost.  */
   1809 
   1810 unsigned int
   1811 vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
   1812 				    vect_cost_model_location where,
   1813 				    unsigned int cost)
   1814 {
   1815   /* Statements in an inner loop relative to the loop being
   1816      vectorized are weighted more heavily.  The value here is
   1817      arbitrary and could potentially be improved with analysis.  */
   1818   if (where == vect_body
   1819       && stmt_info
   1820       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
   1821     {
   1822       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
   1823       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
   1824     }
   1825   return cost;
   1826 }
   1827 
   1828 /* See the comment above the declaration for details.  */
   1829 
   1830 bool
   1831 vector_costs::better_main_loop_than_p (const vector_costs *other) const
   1832 {
   1833   int diff = compare_inside_loop_cost (other);
   1834   if (diff != 0)
   1835     return diff < 0;
   1836 
   1837   /* If there's nothing to choose between the loop bodies, see whether
   1838      there's a difference in the prologue and epilogue costs.  */
   1839   diff = compare_outside_loop_cost (other);
   1840   if (diff != 0)
   1841     return diff < 0;
   1842 
   1843   return false;
   1844 }
   1845 
   1846 
   1847 /* See the comment above the declaration for details.  */
   1848 
   1849 bool
   1850 vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
   1851 					   loop_vec_info main_loop) const
   1852 {
   1853   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
   1854   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
   1855 
   1856   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
   1857   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
   1858 
   1859   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
   1860   unsigned HOST_WIDE_INT main_vf;
   1861   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
   1862   /* If we can determine how many iterations are left for the epilogue
   1863      loop, that is if both the main loop's vectorization factor and number
   1864      of iterations are constant, then we use them to calculate the cost of
   1865      the epilogue loop together with a 'likely value' for the epilogues
   1866      vectorization factor.  Otherwise we use the main loop's vectorization
   1867      factor and the maximum poly value for the epilogue's.  If the target
   1868      has not provided with a sensible upper bound poly vectorization
   1869      factors are likely to be favored over constant ones.  */
   1870   if (main_poly_vf.is_constant (&main_vf)
   1871       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
   1872     {
   1873       unsigned HOST_WIDE_INT niters
   1874 	= LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
   1875       HOST_WIDE_INT other_likely_vf
   1876 	= estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
   1877       HOST_WIDE_INT this_likely_vf
   1878 	= estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
   1879 
   1880       /* If the epilogue is using partial vectors we account for the
   1881 	 partial iteration here too.  */
   1882       other_factor = niters / other_likely_vf;
   1883       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
   1884 	  && niters % other_likely_vf != 0)
   1885 	other_factor++;
   1886 
   1887       this_factor = niters / this_likely_vf;
   1888       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
   1889 	  && niters % this_likely_vf != 0)
   1890 	this_factor++;
   1891     }
   1892   else
   1893     {
   1894       unsigned HOST_WIDE_INT main_vf_max
   1895 	= estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
   1896       unsigned HOST_WIDE_INT other_vf_max
   1897 	= estimated_poly_value (other_vf, POLY_VALUE_MAX);
   1898       unsigned HOST_WIDE_INT this_vf_max
   1899 	= estimated_poly_value (this_vf, POLY_VALUE_MAX);
   1900 
   1901       other_factor = CEIL (main_vf_max, other_vf_max);
   1902       this_factor = CEIL (main_vf_max, this_vf_max);
   1903 
   1904       /* If the loop is not using partial vectors then it will iterate one
   1905 	 time less than one that does.  It is safe to subtract one here,
   1906 	 because the main loop's vf is always at least 2x bigger than that
   1907 	 of an epilogue.  */
   1908       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
   1909 	other_factor -= 1;
   1910       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
   1911 	this_factor -= 1;
   1912     }
   1913 
   1914   /* Compute the costs by multiplying the inside costs with the factor and
   1915      add the outside costs for a more complete picture.  The factor is the
   1916      amount of times we are expecting to iterate this epilogue.  */
   1917   other_cost = other->body_cost () * other_factor;
   1918   this_cost = this->body_cost () * this_factor;
   1919   other_cost += other->outside_cost ();
   1920   this_cost += this->outside_cost ();
   1921   return this_cost < other_cost;
   1922 }
   1923 
   1924 /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
   1925    determine the return value of better_main_loop_than_p by comparing the
   1926    inside (loop body) costs of THIS and OTHER.  Return:
   1927 
   1928    * -1 if better_main_loop_than_p should return true.
   1929    * 1 if better_main_loop_than_p should return false.
   1930    * 0 if we can't decide.  */
   1931 
   1932 int
   1933 vector_costs::compare_inside_loop_cost (const vector_costs *other) const
   1934 {
   1935   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
   1936   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
   1937 
   1938   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
   1939   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
   1940 
   1941   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
   1942   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
   1943 
   1944   /* Limit the VFs to what is likely to be the maximum number of iterations,
   1945      to handle cases in which at least one loop_vinfo is fully-masked.  */
   1946   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
   1947   if (estimated_max_niter != -1)
   1948     {
   1949       if (known_le (estimated_max_niter, this_vf))
   1950 	this_vf = estimated_max_niter;
   1951       if (known_le (estimated_max_niter, other_vf))
   1952 	other_vf = estimated_max_niter;
   1953     }
   1954 
   1955   /* Check whether the (fractional) cost per scalar iteration is lower or
   1956      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
   1957   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
   1958   poly_int64 rel_other
   1959     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
   1960 
   1961   HOST_WIDE_INT est_rel_this_min
   1962     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
   1963   HOST_WIDE_INT est_rel_this_max
   1964     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
   1965 
   1966   HOST_WIDE_INT est_rel_other_min
   1967     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
   1968   HOST_WIDE_INT est_rel_other_max
   1969     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
   1970 
   1971   /* Check first if we can make out an unambigous total order from the minimum
   1972      and maximum estimates.  */
   1973   if (est_rel_this_min < est_rel_other_min
   1974       && est_rel_this_max < est_rel_other_max)
   1975     return -1;
   1976 
   1977   if (est_rel_other_min < est_rel_this_min
   1978       && est_rel_other_max < est_rel_this_max)
   1979     return 1;
   1980 
   1981   /* When other_loop_vinfo uses a variable vectorization factor,
   1982      we know that it has a lower cost for at least one runtime VF.
   1983      However, we don't know how likely that VF is.
   1984 
   1985      One option would be to compare the costs for the estimated VFs.
   1986      The problem is that that can put too much pressure on the cost
   1987      model.  E.g. if the estimated VF is also the lowest possible VF,
   1988      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
   1989      for the estimated VF, we'd then choose this_loop_vinfo even
   1990      though (a) this_loop_vinfo might not actually be better than
   1991      other_loop_vinfo for that VF and (b) it would be significantly
   1992      worse at larger VFs.
   1993 
   1994      Here we go for a hacky compromise: pick this_loop_vinfo if it is
   1995      no more expensive than other_loop_vinfo even after doubling the
   1996      estimated other_loop_vinfo VF.  For all but trivial loops, this
   1997      ensures that we only pick this_loop_vinfo if it is significantly
   1998      better than other_loop_vinfo at the estimated VF.  */
   1999   if (est_rel_other_min != est_rel_this_min
   2000       || est_rel_other_max != est_rel_this_max)
   2001     {
   2002       HOST_WIDE_INT est_rel_this_likely
   2003 	= estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
   2004       HOST_WIDE_INT est_rel_other_likely
   2005 	= estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
   2006 
   2007       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
   2008     }
   2009 
   2010   return 0;
   2011 }
   2012 
   2013 /* A <=>-style subroutine of better_main_loop_than_p, used when there is
   2014    nothing to choose between the inside (loop body) costs of THIS and OTHER.
   2015    Check whether we can determine the return value of better_main_loop_than_p
   2016    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
   2017    Return:
   2018 
   2019    * -1 if better_main_loop_than_p should return true.
   2020    * 1 if better_main_loop_than_p should return false.
   2021    * 0 if we can't decide.  */
   2022 
   2023 int
   2024 vector_costs::compare_outside_loop_cost (const vector_costs *other) const
   2025 {
   2026   auto this_outside_cost = this->outside_cost ();
   2027   auto other_outside_cost = other->outside_cost ();
   2028   if (this_outside_cost != other_outside_cost)
   2029     return this_outside_cost < other_outside_cost ? -1 : 1;
   2030 
   2031   return 0;
   2032 }
   2033