Home | History | Annotate | Line # | Download | only in gcc
tree-vectorizer.cc revision 1.1
      1  1.1  mrg /* Vectorizer
      2  1.1  mrg    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
      4  1.1  mrg 
      5  1.1  mrg This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      8  1.1  mrg the terms of the GNU General Public License as published by the Free
      9  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     10  1.1  mrg version.
     11  1.1  mrg 
     12  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  1.1  mrg for more details.
     16  1.1  mrg 
     17  1.1  mrg You should have received a copy of the GNU General Public License
     18  1.1  mrg along with GCC; see the file COPYING3.  If not see
     19  1.1  mrg <http://www.gnu.org/licenses/>.  */
     20  1.1  mrg 
     21  1.1  mrg /* Loop and basic block vectorizer.
     22  1.1  mrg 
     23  1.1  mrg   This file contains drivers for the three vectorizers:
     24  1.1  mrg   (1) loop vectorizer (inter-iteration parallelism),
     25  1.1  mrg   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
     26  1.1  mrg       vectorizer)
     27  1.1  mrg   (3) BB vectorizer (out-of-loops), aka SLP
     28  1.1  mrg 
     29  1.1  mrg   The rest of the vectorizer's code is organized as follows:
     30  1.1  mrg   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
     31  1.1  mrg     used by drivers (1) and (2).
     32  1.1  mrg   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
     33  1.1  mrg     drivers (1) and (2).
     34  1.1  mrg   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
     35  1.1  mrg     used by drivers (2) and (3).
     36  1.1  mrg   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
     37  1.1  mrg   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
     38  1.1  mrg     manipulations (used by all).
     39  1.1  mrg   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
     40  1.1  mrg 
     41  1.1  mrg   Here's a poor attempt at illustrating that:
     42  1.1  mrg 
     43  1.1  mrg      tree-vectorizer.cc:
     44  1.1  mrg      loop_vect()  loop_aware_slp()  slp_vect()
     45  1.1  mrg           |        /           \          /
     46  1.1  mrg           |       /             \        /
     47  1.1  mrg           tree-vect-loop.cc  tree-vect-slp.cc
     48  1.1  mrg                 | \      \  /      /   |
     49  1.1  mrg                 |  \      \/      /    |
     50  1.1  mrg                 |   \     /\     /     |
     51  1.1  mrg                 |    \   /  \   /      |
     52  1.1  mrg          tree-vect-stmts.cc  tree-vect-data-refs.cc
     53  1.1  mrg                        \      /
     54  1.1  mrg                     tree-vect-patterns.cc
     55  1.1  mrg */
     56  1.1  mrg 
     57  1.1  mrg #include "config.h"
     58  1.1  mrg #include "system.h"
     59  1.1  mrg #include "coretypes.h"
     60  1.1  mrg #include "backend.h"
     61  1.1  mrg #include "tree.h"
     62  1.1  mrg #include "gimple.h"
     63  1.1  mrg #include "predict.h"
     64  1.1  mrg #include "tree-pass.h"
     65  1.1  mrg #include "ssa.h"
     66  1.1  mrg #include "cgraph.h"
     67  1.1  mrg #include "fold-const.h"
     68  1.1  mrg #include "stor-layout.h"
     69  1.1  mrg #include "gimple-iterator.h"
     70  1.1  mrg #include "gimple-walk.h"
     71  1.1  mrg #include "tree-ssa-loop-manip.h"
     72  1.1  mrg #include "tree-ssa-loop-niter.h"
     73  1.1  mrg #include "tree-cfg.h"
     74  1.1  mrg #include "cfgloop.h"
     75  1.1  mrg #include "tree-vectorizer.h"
     76  1.1  mrg #include "tree-ssa-propagate.h"
     77  1.1  mrg #include "dbgcnt.h"
     78  1.1  mrg #include "tree-scalar-evolution.h"
     79  1.1  mrg #include "stringpool.h"
     80  1.1  mrg #include "attribs.h"
     81  1.1  mrg #include "gimple-pretty-print.h"
     82  1.1  mrg #include "opt-problem.h"
     83  1.1  mrg #include "internal-fn.h"
     84  1.1  mrg #include "tree-ssa-sccvn.h"
     85  1.1  mrg 
     86  1.1  mrg /* Loop or bb location, with hotness information.  */
     87  1.1  mrg dump_user_location_t vect_location;
     88  1.1  mrg 
     89  1.1  mrg /* auto_purge_vect_location's dtor: reset the vect_location
     90  1.1  mrg    global, to avoid stale location_t values that could reference
     91  1.1  mrg    GC-ed blocks.  */
     92  1.1  mrg 
     93  1.1  mrg auto_purge_vect_location::~auto_purge_vect_location ()
     94  1.1  mrg {
     95  1.1  mrg   vect_location = dump_user_location_t ();
     96  1.1  mrg }
     97  1.1  mrg 
     98  1.1  mrg /* Dump a cost entry according to args to F.  */
     99  1.1  mrg 
    100  1.1  mrg void
    101  1.1  mrg dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
    102  1.1  mrg 		stmt_vec_info stmt_info, slp_tree node, tree,
    103  1.1  mrg 		int misalign, unsigned cost,
    104  1.1  mrg 		enum vect_cost_model_location where)
    105  1.1  mrg {
    106  1.1  mrg   if (stmt_info)
    107  1.1  mrg     {
    108  1.1  mrg       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
    109  1.1  mrg       fprintf (f, " ");
    110  1.1  mrg     }
    111  1.1  mrg   else if (node)
    112  1.1  mrg     fprintf (f, "node %p ", (void *)node);
    113  1.1  mrg   else
    114  1.1  mrg     fprintf (f, "<unknown> ");
    115  1.1  mrg   fprintf (f, "%d times ", count);
    116  1.1  mrg   const char *ks = "unknown";
    117  1.1  mrg   switch (kind)
    118  1.1  mrg     {
    119  1.1  mrg     case scalar_stmt:
    120  1.1  mrg       ks = "scalar_stmt";
    121  1.1  mrg       break;
    122  1.1  mrg     case scalar_load:
    123  1.1  mrg       ks = "scalar_load";
    124  1.1  mrg       break;
    125  1.1  mrg     case scalar_store:
    126  1.1  mrg       ks = "scalar_store";
    127  1.1  mrg       break;
    128  1.1  mrg     case vector_stmt:
    129  1.1  mrg       ks = "vector_stmt";
    130  1.1  mrg       break;
    131  1.1  mrg     case vector_load:
    132  1.1  mrg       ks = "vector_load";
    133  1.1  mrg       break;
    134  1.1  mrg     case vector_gather_load:
    135  1.1  mrg       ks = "vector_gather_load";
    136  1.1  mrg       break;
    137  1.1  mrg     case unaligned_load:
    138  1.1  mrg       ks = "unaligned_load";
    139  1.1  mrg       break;
    140  1.1  mrg     case unaligned_store:
    141  1.1  mrg       ks = "unaligned_store";
    142  1.1  mrg       break;
    143  1.1  mrg     case vector_store:
    144  1.1  mrg       ks = "vector_store";
    145  1.1  mrg       break;
    146  1.1  mrg     case vector_scatter_store:
    147  1.1  mrg       ks = "vector_scatter_store";
    148  1.1  mrg       break;
    149  1.1  mrg     case vec_to_scalar:
    150  1.1  mrg       ks = "vec_to_scalar";
    151  1.1  mrg       break;
    152  1.1  mrg     case scalar_to_vec:
    153  1.1  mrg       ks = "scalar_to_vec";
    154  1.1  mrg       break;
    155  1.1  mrg     case cond_branch_not_taken:
    156  1.1  mrg       ks = "cond_branch_not_taken";
    157  1.1  mrg       break;
    158  1.1  mrg     case cond_branch_taken:
    159  1.1  mrg       ks = "cond_branch_taken";
    160  1.1  mrg       break;
    161  1.1  mrg     case vec_perm:
    162  1.1  mrg       ks = "vec_perm";
    163  1.1  mrg       break;
    164  1.1  mrg     case vec_promote_demote:
    165  1.1  mrg       ks = "vec_promote_demote";
    166  1.1  mrg       break;
    167  1.1  mrg     case vec_construct:
    168  1.1  mrg       ks = "vec_construct";
    169  1.1  mrg       break;
    170  1.1  mrg     }
    171  1.1  mrg   fprintf (f, "%s ", ks);
    172  1.1  mrg   if (kind == unaligned_load || kind == unaligned_store)
    173  1.1  mrg     fprintf (f, "(misalign %d) ", misalign);
    174  1.1  mrg   fprintf (f, "costs %u ", cost);
    175  1.1  mrg   const char *ws = "unknown";
    176  1.1  mrg   switch (where)
    177  1.1  mrg     {
    178  1.1  mrg     case vect_prologue:
    179  1.1  mrg       ws = "prologue";
    180  1.1  mrg       break;
    181  1.1  mrg     case vect_body:
    182  1.1  mrg       ws = "body";
    183  1.1  mrg       break;
    184  1.1  mrg     case vect_epilogue:
    185  1.1  mrg       ws = "epilogue";
    186  1.1  mrg       break;
    187  1.1  mrg     }
    188  1.1  mrg   fprintf (f, "in %s\n", ws);
    189  1.1  mrg }
    190  1.1  mrg 
    191  1.1  mrg /* For mapping simduid to vectorization factor.  */
    193  1.1  mrg 
    194  1.1  mrg class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
    195  1.1  mrg {
    196  1.1  mrg public:
    197  1.1  mrg   unsigned int simduid;
    198  1.1  mrg   poly_uint64 vf;
    199  1.1  mrg 
    200  1.1  mrg   /* hash_table support.  */
    201  1.1  mrg   static inline hashval_t hash (const simduid_to_vf *);
    202  1.1  mrg   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
    203  1.1  mrg };
    204  1.1  mrg 
    205  1.1  mrg inline hashval_t
    206  1.1  mrg simduid_to_vf::hash (const simduid_to_vf *p)
    207  1.1  mrg {
    208  1.1  mrg   return p->simduid;
    209  1.1  mrg }
    210  1.1  mrg 
    211  1.1  mrg inline int
    212  1.1  mrg simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
    213  1.1  mrg {
    214  1.1  mrg   return p1->simduid == p2->simduid;
    215  1.1  mrg }
    216  1.1  mrg 
    217  1.1  mrg /* This hash maps the OMP simd array to the corresponding simduid used
    218  1.1  mrg    to index into it.  Like thus,
    219  1.1  mrg 
    220  1.1  mrg         _7 = GOMP_SIMD_LANE (simduid.0)
    221  1.1  mrg         ...
    222  1.1  mrg         ...
    223  1.1  mrg         D.1737[_7] = stuff;
    224  1.1  mrg 
    225  1.1  mrg 
    226  1.1  mrg    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
    227  1.1  mrg    simduid.0.  */
    228  1.1  mrg 
    229  1.1  mrg struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
    230  1.1  mrg {
    231  1.1  mrg   tree decl;
    232  1.1  mrg   unsigned int simduid;
    233  1.1  mrg 
    234  1.1  mrg   /* hash_table support.  */
    235  1.1  mrg   static inline hashval_t hash (const simd_array_to_simduid *);
    236  1.1  mrg   static inline int equal (const simd_array_to_simduid *,
    237  1.1  mrg 			   const simd_array_to_simduid *);
    238  1.1  mrg };
    239  1.1  mrg 
    240  1.1  mrg inline hashval_t
    241  1.1  mrg simd_array_to_simduid::hash (const simd_array_to_simduid *p)
    242  1.1  mrg {
    243  1.1  mrg   return DECL_UID (p->decl);
    244  1.1  mrg }
    245  1.1  mrg 
    246  1.1  mrg inline int
    247  1.1  mrg simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
    248  1.1  mrg 			      const simd_array_to_simduid *p2)
    249  1.1  mrg {
    250  1.1  mrg   return p1->decl == p2->decl;
    251  1.1  mrg }
    252  1.1  mrg 
    253  1.1  mrg /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
    254  1.1  mrg    into their corresponding constants and remove
    255  1.1  mrg    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
    256  1.1  mrg 
    257  1.1  mrg static void
    258  1.1  mrg adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
    259  1.1  mrg {
    260  1.1  mrg   basic_block bb;
    261  1.1  mrg 
    262  1.1  mrg   FOR_EACH_BB_FN (bb, fun)
    263  1.1  mrg     {
    264  1.1  mrg       gimple_stmt_iterator i;
    265  1.1  mrg 
    266  1.1  mrg       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
    267  1.1  mrg 	{
    268  1.1  mrg 	  poly_uint64 vf = 1;
    269  1.1  mrg 	  enum internal_fn ifn;
    270  1.1  mrg 	  gimple *stmt = gsi_stmt (i);
    271  1.1  mrg 	  tree t;
    272  1.1  mrg 	  if (!is_gimple_call (stmt)
    273  1.1  mrg 	      || !gimple_call_internal_p (stmt))
    274  1.1  mrg 	    {
    275  1.1  mrg 	      gsi_next (&i);
    276  1.1  mrg 	      continue;
    277  1.1  mrg 	    }
    278  1.1  mrg 	  ifn = gimple_call_internal_fn (stmt);
    279  1.1  mrg 	  switch (ifn)
    280  1.1  mrg 	    {
    281  1.1  mrg 	    case IFN_GOMP_SIMD_LANE:
    282  1.1  mrg 	    case IFN_GOMP_SIMD_VF:
    283  1.1  mrg 	    case IFN_GOMP_SIMD_LAST_LANE:
    284  1.1  mrg 	      break;
    285  1.1  mrg 	    case IFN_GOMP_SIMD_ORDERED_START:
    286  1.1  mrg 	    case IFN_GOMP_SIMD_ORDERED_END:
    287  1.1  mrg 	      if (integer_onep (gimple_call_arg (stmt, 0)))
    288  1.1  mrg 		{
    289  1.1  mrg 		  enum built_in_function bcode
    290  1.1  mrg 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
    291  1.1  mrg 		       ? BUILT_IN_GOMP_ORDERED_START
    292  1.1  mrg 		       : BUILT_IN_GOMP_ORDERED_END);
    293  1.1  mrg 		  gimple *g
    294  1.1  mrg 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
    295  1.1  mrg 		  gimple_move_vops (g, stmt);
    296  1.1  mrg 		  gsi_replace (&i, g, true);
    297  1.1  mrg 		  continue;
    298  1.1  mrg 		}
    299  1.1  mrg 	      gsi_remove (&i, true);
    300  1.1  mrg 	      unlink_stmt_vdef (stmt);
    301  1.1  mrg 	      continue;
    302  1.1  mrg 	    default:
    303  1.1  mrg 	      gsi_next (&i);
    304  1.1  mrg 	      continue;
    305  1.1  mrg 	    }
    306  1.1  mrg 	  tree arg = gimple_call_arg (stmt, 0);
    307  1.1  mrg 	  gcc_assert (arg != NULL_TREE);
    308  1.1  mrg 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
    309  1.1  mrg 	  simduid_to_vf *p = NULL, data;
    310  1.1  mrg 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
    311  1.1  mrg 	  /* Need to nullify loop safelen field since it's value is not
    312  1.1  mrg 	     valid after transformation.  */
    313  1.1  mrg 	  if (bb->loop_father && bb->loop_father->safelen > 0)
    314  1.1  mrg 	    bb->loop_father->safelen = 0;
    315  1.1  mrg 	  if (htab)
    316  1.1  mrg 	    {
    317  1.1  mrg 	      p = htab->find (&data);
    318  1.1  mrg 	      if (p)
    319  1.1  mrg 		vf = p->vf;
    320  1.1  mrg 	    }
    321  1.1  mrg 	  switch (ifn)
    322  1.1  mrg 	    {
    323  1.1  mrg 	    case IFN_GOMP_SIMD_VF:
    324  1.1  mrg 	      t = build_int_cst (unsigned_type_node, vf);
    325  1.1  mrg 	      break;
    326  1.1  mrg 	    case IFN_GOMP_SIMD_LANE:
    327  1.1  mrg 	      t = build_int_cst (unsigned_type_node, 0);
    328  1.1  mrg 	      break;
    329  1.1  mrg 	    case IFN_GOMP_SIMD_LAST_LANE:
    330  1.1  mrg 	      t = gimple_call_arg (stmt, 1);
    331  1.1  mrg 	      break;
    332  1.1  mrg 	    default:
    333  1.1  mrg 	      gcc_unreachable ();
    334  1.1  mrg 	    }
    335  1.1  mrg 	  tree lhs = gimple_call_lhs (stmt);
    336  1.1  mrg 	  if (lhs)
    337  1.1  mrg 	    replace_uses_by (lhs, t);
    338  1.1  mrg 	  release_defs (stmt);
    339  1.1  mrg 	  gsi_remove (&i, true);
    340  1.1  mrg 	}
    341  1.1  mrg     }
    342  1.1  mrg }
    343  1.1  mrg 
    344  1.1  mrg /* Helper structure for note_simd_array_uses.  */
    345  1.1  mrg 
    346  1.1  mrg struct note_simd_array_uses_struct
    347  1.1  mrg {
    348  1.1  mrg   hash_table<simd_array_to_simduid> **htab;
    349  1.1  mrg   unsigned int simduid;
    350  1.1  mrg };
    351  1.1  mrg 
    352  1.1  mrg /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
    353  1.1  mrg 
    354  1.1  mrg static tree
    355  1.1  mrg note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
    356  1.1  mrg {
    357  1.1  mrg   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
    358  1.1  mrg   struct note_simd_array_uses_struct *ns
    359  1.1  mrg     = (struct note_simd_array_uses_struct *) wi->info;
    360  1.1  mrg 
    361  1.1  mrg   if (TYPE_P (*tp))
    362  1.1  mrg     *walk_subtrees = 0;
    363  1.1  mrg   else if (VAR_P (*tp)
    364  1.1  mrg 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
    365  1.1  mrg 	   && DECL_CONTEXT (*tp) == current_function_decl)
    366  1.1  mrg     {
    367  1.1  mrg       simd_array_to_simduid data;
    368  1.1  mrg       if (!*ns->htab)
    369  1.1  mrg 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
    370  1.1  mrg       data.decl = *tp;
    371  1.1  mrg       data.simduid = ns->simduid;
    372  1.1  mrg       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
    373  1.1  mrg       if (*slot == NULL)
    374  1.1  mrg 	{
    375  1.1  mrg 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
    376  1.1  mrg 	  *p = data;
    377  1.1  mrg 	  *slot = p;
    378  1.1  mrg 	}
    379  1.1  mrg       else if ((*slot)->simduid != ns->simduid)
    380  1.1  mrg 	(*slot)->simduid = -1U;
    381  1.1  mrg       *walk_subtrees = 0;
    382  1.1  mrg     }
    383  1.1  mrg   return NULL_TREE;
    384  1.1  mrg }
    385  1.1  mrg 
    386  1.1  mrg /* Find "omp simd array" temporaries and map them to corresponding
    387  1.1  mrg    simduid.  */
    388  1.1  mrg 
    389  1.1  mrg static void
    390  1.1  mrg note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
    391  1.1  mrg {
    392  1.1  mrg   basic_block bb;
    393  1.1  mrg   gimple_stmt_iterator gsi;
    394  1.1  mrg   struct walk_stmt_info wi;
    395  1.1  mrg   struct note_simd_array_uses_struct ns;
    396  1.1  mrg 
    397  1.1  mrg   memset (&wi, 0, sizeof (wi));
    398  1.1  mrg   wi.info = &ns;
    399  1.1  mrg   ns.htab = htab;
    400  1.1  mrg 
    401  1.1  mrg   FOR_EACH_BB_FN (bb, fun)
    402  1.1  mrg     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    403  1.1  mrg       {
    404  1.1  mrg 	gimple *stmt = gsi_stmt (gsi);
    405  1.1  mrg 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
    406  1.1  mrg 	  continue;
    407  1.1  mrg 	switch (gimple_call_internal_fn (stmt))
    408  1.1  mrg 	  {
    409  1.1  mrg 	  case IFN_GOMP_SIMD_LANE:
    410  1.1  mrg 	  case IFN_GOMP_SIMD_VF:
    411  1.1  mrg 	  case IFN_GOMP_SIMD_LAST_LANE:
    412  1.1  mrg 	    break;
    413  1.1  mrg 	  default:
    414  1.1  mrg 	    continue;
    415  1.1  mrg 	  }
    416  1.1  mrg 	tree lhs = gimple_call_lhs (stmt);
    417  1.1  mrg 	if (lhs == NULL_TREE)
    418  1.1  mrg 	  continue;
    419  1.1  mrg 	imm_use_iterator use_iter;
    420  1.1  mrg 	gimple *use_stmt;
    421  1.1  mrg 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
    422  1.1  mrg 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
    423  1.1  mrg 	  if (!is_gimple_debug (use_stmt))
    424  1.1  mrg 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
    425  1.1  mrg       }
    426  1.1  mrg }
    427  1.1  mrg 
    428  1.1  mrg /* Shrink arrays with "omp simd array" attribute to the corresponding
    429  1.1  mrg    vectorization factor.  */
    430  1.1  mrg 
    431  1.1  mrg static void
    432  1.1  mrg shrink_simd_arrays
    433  1.1  mrg   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
    434  1.1  mrg    hash_table<simduid_to_vf> *simduid_to_vf_htab)
    435  1.1  mrg {
    436  1.1  mrg   for (hash_table<simd_array_to_simduid>::iterator iter
    437  1.1  mrg 	 = simd_array_to_simduid_htab->begin ();
    438  1.1  mrg        iter != simd_array_to_simduid_htab->end (); ++iter)
    439  1.1  mrg     if ((*iter)->simduid != -1U)
    440  1.1  mrg       {
    441  1.1  mrg 	tree decl = (*iter)->decl;
    442  1.1  mrg 	poly_uint64 vf = 1;
    443  1.1  mrg 	if (simduid_to_vf_htab)
    444  1.1  mrg 	  {
    445  1.1  mrg 	    simduid_to_vf *p = NULL, data;
    446  1.1  mrg 	    data.simduid = (*iter)->simduid;
    447  1.1  mrg 	    p = simduid_to_vf_htab->find (&data);
    448  1.1  mrg 	    if (p)
    449  1.1  mrg 	      vf = p->vf;
    450  1.1  mrg 	  }
    451  1.1  mrg 	tree atype
    452  1.1  mrg 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
    453  1.1  mrg 	TREE_TYPE (decl) = atype;
    454  1.1  mrg 	relayout_decl (decl);
    455  1.1  mrg       }
    456  1.1  mrg 
    457  1.1  mrg   delete simd_array_to_simduid_htab;
    458  1.1  mrg }
    459  1.1  mrg 
    460  1.1  mrg /* Initialize the vec_info with kind KIND_IN and target cost data
    462  1.1  mrg    TARGET_COST_DATA_IN.  */
    463  1.1  mrg 
    464  1.1  mrg vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
    465  1.1  mrg   : kind (kind_in),
    466  1.1  mrg     shared (shared_),
    467  1.1  mrg     stmt_vec_info_ro (false)
    468  1.1  mrg {
    469  1.1  mrg   stmt_vec_infos.create (50);
    470  1.1  mrg }
    471  1.1  mrg 
    472  1.1  mrg vec_info::~vec_info ()
    473  1.1  mrg {
    474  1.1  mrg   for (slp_instance &instance : slp_instances)
    475  1.1  mrg     vect_free_slp_instance (instance);
    476  1.1  mrg 
    477  1.1  mrg   free_stmt_vec_infos ();
    478  1.1  mrg }
    479  1.1  mrg 
    480  1.1  mrg vec_info_shared::vec_info_shared ()
    481  1.1  mrg   : n_stmts (0),
    482  1.1  mrg     datarefs (vNULL),
    483  1.1  mrg     datarefs_copy (vNULL),
    484  1.1  mrg     ddrs (vNULL)
    485  1.1  mrg {
    486  1.1  mrg }
    487  1.1  mrg 
    488  1.1  mrg vec_info_shared::~vec_info_shared ()
    489  1.1  mrg {
    490  1.1  mrg   free_data_refs (datarefs);
    491  1.1  mrg   free_dependence_relations (ddrs);
    492  1.1  mrg   datarefs_copy.release ();
    493  1.1  mrg }
    494  1.1  mrg 
    495  1.1  mrg void
    496  1.1  mrg vec_info_shared::save_datarefs ()
    497  1.1  mrg {
    498  1.1  mrg   if (!flag_checking)
    499  1.1  mrg     return;
    500  1.1  mrg   datarefs_copy.reserve_exact (datarefs.length ());
    501  1.1  mrg   for (unsigned i = 0; i < datarefs.length (); ++i)
    502  1.1  mrg     datarefs_copy.quick_push (*datarefs[i]);
    503  1.1  mrg }
    504  1.1  mrg 
    505  1.1  mrg void
    506  1.1  mrg vec_info_shared::check_datarefs ()
    507  1.1  mrg {
    508  1.1  mrg   if (!flag_checking)
    509  1.1  mrg     return;
    510  1.1  mrg   gcc_assert (datarefs.length () == datarefs_copy.length ());
    511  1.1  mrg   for (unsigned i = 0; i < datarefs.length (); ++i)
    512  1.1  mrg     if (memcmp (&datarefs_copy[i], datarefs[i],
    513  1.1  mrg 		offsetof (data_reference, alt_indices)) != 0)
    514  1.1  mrg       gcc_unreachable ();
    515  1.1  mrg }
    516  1.1  mrg 
    517  1.1  mrg /* Record that STMT belongs to the vectorizable region.  Create and return
    518  1.1  mrg    an associated stmt_vec_info.  */
    519  1.1  mrg 
    520  1.1  mrg stmt_vec_info
    521  1.1  mrg vec_info::add_stmt (gimple *stmt)
    522  1.1  mrg {
    523  1.1  mrg   stmt_vec_info res = new_stmt_vec_info (stmt);
    524  1.1  mrg   set_vinfo_for_stmt (stmt, res);
    525  1.1  mrg   return res;
    526  1.1  mrg }
    527  1.1  mrg 
    528  1.1  mrg /* Record that STMT belongs to the vectorizable region.  Create a new
    529  1.1  mrg    stmt_vec_info and mark VECINFO as being related and return the new
    530  1.1  mrg    stmt_vec_info.  */
    531  1.1  mrg 
    532  1.1  mrg stmt_vec_info
    533  1.1  mrg vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
    534  1.1  mrg {
    535  1.1  mrg   stmt_vec_info res = new_stmt_vec_info (stmt);
    536  1.1  mrg   set_vinfo_for_stmt (stmt, res, false);
    537  1.1  mrg   STMT_VINFO_RELATED_STMT (res) = stmt_info;
    538  1.1  mrg   return res;
    539  1.1  mrg }
    540  1.1  mrg 
    541  1.1  mrg /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
    542  1.1  mrg    return null.  It is safe to call this function on any statement, even if
    543  1.1  mrg    it might not be part of the vectorizable region.  */
    544  1.1  mrg 
    545  1.1  mrg stmt_vec_info
    546  1.1  mrg vec_info::lookup_stmt (gimple *stmt)
    547  1.1  mrg {
    548  1.1  mrg   unsigned int uid = gimple_uid (stmt);
    549  1.1  mrg   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
    550  1.1  mrg     {
    551  1.1  mrg       stmt_vec_info res = stmt_vec_infos[uid - 1];
    552  1.1  mrg       if (res && res->stmt == stmt)
    553  1.1  mrg 	return res;
    554  1.1  mrg     }
    555  1.1  mrg   return NULL;
    556  1.1  mrg }
    557  1.1  mrg 
    558  1.1  mrg /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
    559  1.1  mrg    return that stmt_vec_info, otherwise return null.  It is safe to call
    560  1.1  mrg    this on arbitrary operands.  */
    561  1.1  mrg 
    562  1.1  mrg stmt_vec_info
    563  1.1  mrg vec_info::lookup_def (tree name)
    564  1.1  mrg {
    565  1.1  mrg   if (TREE_CODE (name) == SSA_NAME
    566  1.1  mrg       && !SSA_NAME_IS_DEFAULT_DEF (name))
    567  1.1  mrg     return lookup_stmt (SSA_NAME_DEF_STMT (name));
    568  1.1  mrg   return NULL;
    569  1.1  mrg }
    570  1.1  mrg 
    571  1.1  mrg /* See whether there is a single non-debug statement that uses LHS and
    572  1.1  mrg    whether that statement has an associated stmt_vec_info.  Return the
    573  1.1  mrg    stmt_vec_info if so, otherwise return null.  */
    574  1.1  mrg 
    575  1.1  mrg stmt_vec_info
    576  1.1  mrg vec_info::lookup_single_use (tree lhs)
    577  1.1  mrg {
    578  1.1  mrg   use_operand_p dummy;
    579  1.1  mrg   gimple *use_stmt;
    580  1.1  mrg   if (single_imm_use (lhs, &dummy, &use_stmt))
    581  1.1  mrg     return lookup_stmt (use_stmt);
    582  1.1  mrg   return NULL;
    583  1.1  mrg }
    584  1.1  mrg 
    585  1.1  mrg /* Return vectorization information about DR.  */
    586  1.1  mrg 
    587  1.1  mrg dr_vec_info *
    588  1.1  mrg vec_info::lookup_dr (data_reference *dr)
    589  1.1  mrg {
    590  1.1  mrg   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
    591  1.1  mrg   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
    592  1.1  mrg   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
    593  1.1  mrg   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
    594  1.1  mrg }
    595  1.1  mrg 
    596  1.1  mrg /* Record that NEW_STMT_INFO now implements the same data reference
    597  1.1  mrg    as OLD_STMT_INFO.  */
    598  1.1  mrg 
    599  1.1  mrg void
    600  1.1  mrg vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
    601  1.1  mrg {
    602  1.1  mrg   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
    603  1.1  mrg   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
    604  1.1  mrg   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
    605  1.1  mrg   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
    606  1.1  mrg     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
    607  1.1  mrg   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
    608  1.1  mrg     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
    609  1.1  mrg }
    610  1.1  mrg 
    611  1.1  mrg /* Permanently remove the statement described by STMT_INFO from the
    612  1.1  mrg    function.  */
    613  1.1  mrg 
    614  1.1  mrg void
    615  1.1  mrg vec_info::remove_stmt (stmt_vec_info stmt_info)
    616  1.1  mrg {
    617  1.1  mrg   gcc_assert (!stmt_info->pattern_stmt_p);
    618  1.1  mrg   set_vinfo_for_stmt (stmt_info->stmt, NULL);
    619  1.1  mrg   unlink_stmt_vdef (stmt_info->stmt);
    620  1.1  mrg   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
    621  1.1  mrg   gsi_remove (&si, true);
    622  1.1  mrg   release_defs (stmt_info->stmt);
    623  1.1  mrg   free_stmt_vec_info (stmt_info);
    624  1.1  mrg }
    625  1.1  mrg 
    626  1.1  mrg /* Replace the statement at GSI by NEW_STMT, both the vectorization
    627  1.1  mrg    information and the function itself.  STMT_INFO describes the statement
    628  1.1  mrg    at GSI.  */
    629  1.1  mrg 
    630  1.1  mrg void
    631  1.1  mrg vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
    632  1.1  mrg 			gimple *new_stmt)
    633  1.1  mrg {
    634  1.1  mrg   gimple *old_stmt = stmt_info->stmt;
    635  1.1  mrg   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
    636  1.1  mrg   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
    637  1.1  mrg   stmt_info->stmt = new_stmt;
    638  1.1  mrg   gsi_replace (gsi, new_stmt, true);
    639  1.1  mrg }
    640  1.1  mrg 
    641  1.1  mrg /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
    642  1.1  mrg    not NULL it specifies whether to use the sub-region entry
    643  1.1  mrg    determined by it, currently used for loop vectorization to insert
    644  1.1  mrg    on the inner loop entry vs. the outer loop entry.  */
    645  1.1  mrg 
    646  1.1  mrg void
    647  1.1  mrg vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
    648  1.1  mrg {
    649  1.1  mrg   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
    650  1.1  mrg     {
    651  1.1  mrg       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    652  1.1  mrg       basic_block new_bb;
    653  1.1  mrg       edge pe;
    654  1.1  mrg 
    655  1.1  mrg       if (context && nested_in_vect_loop_p (loop, context))
    656  1.1  mrg 	loop = loop->inner;
    657  1.1  mrg 
    658  1.1  mrg       pe = loop_preheader_edge (loop);
    659  1.1  mrg       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    660  1.1  mrg       gcc_assert (!new_bb);
    661  1.1  mrg     }
    662  1.1  mrg   else
    663  1.1  mrg     {
    664  1.1  mrg       bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
    665  1.1  mrg       gimple_stmt_iterator gsi_region_begin
    666  1.1  mrg 	= gsi_after_labels (bb_vinfo->bbs[0]);
    667  1.1  mrg       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
    668  1.1  mrg     }
    669  1.1  mrg }
    670  1.1  mrg 
    671  1.1  mrg /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
    672  1.1  mrg 
    673  1.1  mrg void
    674  1.1  mrg vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
    675  1.1  mrg {
    676  1.1  mrg   gimple_seq seq = NULL;
    677  1.1  mrg   gimple_stmt_iterator gsi = gsi_start (seq);
    678  1.1  mrg   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
    679  1.1  mrg   insert_seq_on_entry (context, seq);
    680  1.1  mrg }
    681  1.1  mrg 
    682  1.1  mrg /* Create and initialize a new stmt_vec_info struct for STMT.  */
    683  1.1  mrg 
    684  1.1  mrg stmt_vec_info
    685  1.1  mrg vec_info::new_stmt_vec_info (gimple *stmt)
    686  1.1  mrg {
    687  1.1  mrg   stmt_vec_info res = XCNEW (class _stmt_vec_info);
    688  1.1  mrg   res->stmt = stmt;
    689  1.1  mrg 
    690  1.1  mrg   STMT_VINFO_TYPE (res) = undef_vec_info_type;
    691  1.1  mrg   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
    692  1.1  mrg   STMT_VINFO_VECTORIZABLE (res) = true;
    693  1.1  mrg   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
    694  1.1  mrg   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
    695  1.1  mrg   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
    696  1.1  mrg   STMT_VINFO_REDUC_IDX (res) = -1;
    697  1.1  mrg   STMT_VINFO_SLP_VECT_ONLY (res) = false;
    698  1.1  mrg   STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
    699  1.1  mrg   STMT_VINFO_VEC_STMTS (res) = vNULL;
    700  1.1  mrg   res->reduc_initial_values = vNULL;
    701  1.1  mrg   res->reduc_scalar_results = vNULL;
    702  1.1  mrg 
    703  1.1  mrg   if (is_a <loop_vec_info> (this)
    704  1.1  mrg       && gimple_code (stmt) == GIMPLE_PHI
    705  1.1  mrg       && is_loop_header_bb_p (gimple_bb (stmt)))
    706  1.1  mrg     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
    707  1.1  mrg   else
    708  1.1  mrg     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
    709  1.1  mrg 
    710  1.1  mrg   STMT_SLP_TYPE (res) = loop_vect;
    711  1.1  mrg 
    712  1.1  mrg   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
    713  1.1  mrg   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
    714  1.1  mrg 
    715  1.1  mrg   return res;
    716  1.1  mrg }
    717  1.1  mrg 
    718  1.1  mrg /* Associate STMT with INFO.  */
    719  1.1  mrg 
    720  1.1  mrg void
    721  1.1  mrg vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
    722  1.1  mrg {
    723  1.1  mrg   unsigned int uid = gimple_uid (stmt);
    724  1.1  mrg   if (uid == 0)
    725  1.1  mrg     {
    726  1.1  mrg       gcc_assert (!check_ro || !stmt_vec_info_ro);
    727  1.1  mrg       gcc_checking_assert (info);
    728  1.1  mrg       uid = stmt_vec_infos.length () + 1;
    729  1.1  mrg       gimple_set_uid (stmt, uid);
    730  1.1  mrg       stmt_vec_infos.safe_push (info);
    731  1.1  mrg     }
    732  1.1  mrg   else
    733  1.1  mrg     {
    734  1.1  mrg       gcc_checking_assert (info == NULL);
    735  1.1  mrg       stmt_vec_infos[uid - 1] = info;
    736  1.1  mrg     }
    737  1.1  mrg }
    738  1.1  mrg 
    739  1.1  mrg /* Free the contents of stmt_vec_infos.  */
    740  1.1  mrg 
    741  1.1  mrg void
    742  1.1  mrg vec_info::free_stmt_vec_infos (void)
    743  1.1  mrg {
    744  1.1  mrg   for (stmt_vec_info &info : stmt_vec_infos)
    745  1.1  mrg     if (info != NULL)
    746  1.1  mrg       free_stmt_vec_info (info);
    747  1.1  mrg   stmt_vec_infos.release ();
    748  1.1  mrg }
    749  1.1  mrg 
    750  1.1  mrg /* Free STMT_INFO.  */
    751  1.1  mrg 
    752  1.1  mrg void
    753  1.1  mrg vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
    754  1.1  mrg {
    755  1.1  mrg   if (stmt_info->pattern_stmt_p)
    756  1.1  mrg     {
    757  1.1  mrg       gimple_set_bb (stmt_info->stmt, NULL);
    758  1.1  mrg       tree lhs = gimple_get_lhs (stmt_info->stmt);
    759  1.1  mrg       if (lhs && TREE_CODE (lhs) == SSA_NAME)
    760  1.1  mrg 	release_ssa_name (lhs);
    761  1.1  mrg     }
    762  1.1  mrg 
    763  1.1  mrg   stmt_info->reduc_initial_values.release ();
    764  1.1  mrg   stmt_info->reduc_scalar_results.release ();
    765  1.1  mrg   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
    766  1.1  mrg   STMT_VINFO_VEC_STMTS (stmt_info).release ();
    767  1.1  mrg   free (stmt_info);
    768  1.1  mrg }
    769  1.1  mrg 
    770  1.1  mrg /* Returns true if S1 dominates S2.  */
    771  1.1  mrg 
    772  1.1  mrg bool
    773  1.1  mrg vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
    774  1.1  mrg {
    775  1.1  mrg   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
    776  1.1  mrg 
    777  1.1  mrg   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
    778  1.1  mrg      SSA_NAME.  Assume it lives at the beginning of function and
    779  1.1  mrg      thus dominates everything.  */
    780  1.1  mrg   if (!bb1 || s1 == s2)
    781  1.1  mrg     return true;
    782  1.1  mrg 
    783  1.1  mrg   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
    784  1.1  mrg   if (!bb2)
    785  1.1  mrg     return false;
    786  1.1  mrg 
    787  1.1  mrg   if (bb1 != bb2)
    788  1.1  mrg     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
    789  1.1  mrg 
    790  1.1  mrg   /* PHIs in the same basic block are assumed to be
    791  1.1  mrg      executed all in parallel, if only one stmt is a PHI,
    792  1.1  mrg      it dominates the other stmt in the same basic block.  */
    793  1.1  mrg   if (gimple_code (s1) == GIMPLE_PHI)
    794  1.1  mrg     return true;
    795  1.1  mrg 
    796  1.1  mrg   if (gimple_code (s2) == GIMPLE_PHI)
    797  1.1  mrg     return false;
    798  1.1  mrg 
    799  1.1  mrg   /* Inserted vectorized stmts all have UID 0 while the original stmts
    800  1.1  mrg      in the IL have UID increasing within a BB.  Walk from both sides
    801  1.1  mrg      until we find the other stmt or a stmt with UID != 0.  */
    802  1.1  mrg   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
    803  1.1  mrg   while (gimple_uid (gsi_stmt (gsi1)) == 0)
    804  1.1  mrg     {
    805  1.1  mrg       gsi_next (&gsi1);
    806  1.1  mrg       if (gsi_end_p (gsi1))
    807  1.1  mrg 	return false;
    808  1.1  mrg       if (gsi_stmt (gsi1) == s2)
    809  1.1  mrg 	return true;
    810  1.1  mrg     }
    811  1.1  mrg   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
    812  1.1  mrg     return false;
    813  1.1  mrg 
    814  1.1  mrg   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
    815  1.1  mrg   while (gimple_uid (gsi_stmt (gsi2)) == 0)
    816  1.1  mrg     {
    817  1.1  mrg       gsi_prev (&gsi2);
    818  1.1  mrg       if (gsi_end_p (gsi2))
    819  1.1  mrg 	return false;
    820  1.1  mrg       if (gsi_stmt (gsi2) == s1)
    821  1.1  mrg 	return true;
    822  1.1  mrg     }
    823  1.1  mrg   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
    824  1.1  mrg     return false;
    825  1.1  mrg 
    826  1.1  mrg   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
    827  1.1  mrg     return true;
    828  1.1  mrg   return false;
    829  1.1  mrg }
    830  1.1  mrg 
    831  1.1  mrg /* A helper function to free scev and LOOP niter information, as well as
    832  1.1  mrg    clear loop constraint LOOP_C_FINITE.  */
    833  1.1  mrg 
    834  1.1  mrg void
    835  1.1  mrg vect_free_loop_info_assumptions (class loop *loop)
    836  1.1  mrg {
    837  1.1  mrg   scev_reset_htab ();
    838  1.1  mrg   /* We need to explicitly reset upper bound information since they are
    839  1.1  mrg      used even after free_numbers_of_iterations_estimates.  */
    840  1.1  mrg   loop->any_upper_bound = false;
    841  1.1  mrg   loop->any_likely_upper_bound = false;
    842  1.1  mrg   free_numbers_of_iterations_estimates (loop);
    843  1.1  mrg   loop_constraint_clear (loop, LOOP_C_FINITE);
    844  1.1  mrg }
    845  1.1  mrg 
    846  1.1  mrg /* If LOOP has been versioned during ifcvt, return the internal call
    847  1.1  mrg    guarding it.  */
    848  1.1  mrg 
    849  1.1  mrg gimple *
    850  1.1  mrg vect_loop_vectorized_call (class loop *loop, gcond **cond)
    851  1.1  mrg {
    852  1.1  mrg   basic_block bb = loop_preheader_edge (loop)->src;
    853  1.1  mrg   gimple *g;
    854  1.1  mrg   do
    855  1.1  mrg     {
    856  1.1  mrg       g = last_stmt (bb);
    857  1.1  mrg       if ((g && gimple_code (g) == GIMPLE_COND)
    858  1.1  mrg 	  || !single_succ_p (bb))
    859  1.1  mrg 	break;
    860  1.1  mrg       if (!single_pred_p (bb))
    861  1.1  mrg 	break;
    862  1.1  mrg       bb = single_pred (bb);
    863  1.1  mrg     }
    864  1.1  mrg   while (1);
    865  1.1  mrg   if (g && gimple_code (g) == GIMPLE_COND)
    866  1.1  mrg     {
    867  1.1  mrg       if (cond)
    868  1.1  mrg 	*cond = as_a <gcond *> (g);
    869  1.1  mrg       gimple_stmt_iterator gsi = gsi_for_stmt (g);
    870  1.1  mrg       gsi_prev (&gsi);
    871  1.1  mrg       if (!gsi_end_p (gsi))
    872  1.1  mrg 	{
    873  1.1  mrg 	  g = gsi_stmt (gsi);
    874  1.1  mrg 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
    875  1.1  mrg 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
    876  1.1  mrg 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
    877  1.1  mrg 	    return g;
    878  1.1  mrg 	}
    879  1.1  mrg     }
    880  1.1  mrg   return NULL;
    881  1.1  mrg }
    882  1.1  mrg 
    883  1.1  mrg /* If LOOP has been versioned during loop distribution, return the gurading
    884  1.1  mrg    internal call.  */
    885  1.1  mrg 
    886  1.1  mrg static gimple *
    887  1.1  mrg vect_loop_dist_alias_call (class loop *loop, function *fun)
    888  1.1  mrg {
    889  1.1  mrg   basic_block bb;
    890  1.1  mrg   basic_block entry;
    891  1.1  mrg   class loop *outer, *orig;
    892  1.1  mrg   gimple_stmt_iterator gsi;
    893  1.1  mrg   gimple *g;
    894  1.1  mrg 
    895  1.1  mrg   if (loop->orig_loop_num == 0)
    896  1.1  mrg     return NULL;
    897  1.1  mrg 
    898  1.1  mrg   orig = get_loop (fun, loop->orig_loop_num);
    899  1.1  mrg   if (orig == NULL)
    900  1.1  mrg     {
    901  1.1  mrg       /* The original loop is somehow destroyed.  Clear the information.  */
    902  1.1  mrg       loop->orig_loop_num = 0;
    903  1.1  mrg       return NULL;
    904  1.1  mrg     }
    905  1.1  mrg 
    906  1.1  mrg   if (loop != orig)
    907  1.1  mrg     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
    908  1.1  mrg   else
    909  1.1  mrg     bb = loop_preheader_edge (loop)->src;
    910  1.1  mrg 
    911  1.1  mrg   outer = bb->loop_father;
    912  1.1  mrg   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
    913  1.1  mrg 
    914  1.1  mrg   /* Look upward in dominance tree.  */
    915  1.1  mrg   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
    916  1.1  mrg        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
    917  1.1  mrg     {
    918  1.1  mrg       g = last_stmt (bb);
    919  1.1  mrg       if (g == NULL || gimple_code (g) != GIMPLE_COND)
    920  1.1  mrg 	continue;
    921  1.1  mrg 
    922  1.1  mrg       gsi = gsi_for_stmt (g);
    923  1.1  mrg       gsi_prev (&gsi);
    924  1.1  mrg       if (gsi_end_p (gsi))
    925  1.1  mrg 	continue;
    926  1.1  mrg 
    927  1.1  mrg       g = gsi_stmt (gsi);
    928  1.1  mrg       /* The guarding internal function call must have the same distribution
    929  1.1  mrg 	 alias id.  */
    930  1.1  mrg       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
    931  1.1  mrg 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
    932  1.1  mrg 	return g;
    933  1.1  mrg     }
    934  1.1  mrg   return NULL;
    935  1.1  mrg }
    936  1.1  mrg 
    937  1.1  mrg /* Set the uids of all the statements in basic blocks inside loop
    938  1.1  mrg    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
    939  1.1  mrg    call guarding the loop which has been if converted.  */
    940  1.1  mrg static void
    941  1.1  mrg set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
    942  1.1  mrg 		  function *fun)
    943  1.1  mrg {
    944  1.1  mrg   tree arg = gimple_call_arg (loop_vectorized_call, 1);
    945  1.1  mrg   basic_block *bbs;
    946  1.1  mrg   unsigned int i;
    947  1.1  mrg   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
    948  1.1  mrg 
    949  1.1  mrg   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
    950  1.1  mrg   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
    951  1.1  mrg 		       == loop_vectorized_call);
    952  1.1  mrg   /* If we are going to vectorize outer loop, prevent vectorization
    953  1.1  mrg      of the inner loop in the scalar loop - either the scalar loop is
    954  1.1  mrg      thrown away, so it is a wasted work, or is used only for
    955  1.1  mrg      a few iterations.  */
    956  1.1  mrg   if (scalar_loop->inner)
    957  1.1  mrg     {
    958  1.1  mrg       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
    959  1.1  mrg       if (g)
    960  1.1  mrg 	{
    961  1.1  mrg 	  arg = gimple_call_arg (g, 0);
    962  1.1  mrg 	  get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
    963  1.1  mrg 	  fold_loop_internal_call (g, boolean_false_node);
    964  1.1  mrg 	}
    965  1.1  mrg     }
    966  1.1  mrg   bbs = get_loop_body (scalar_loop);
    967  1.1  mrg   for (i = 0; i < scalar_loop->num_nodes; i++)
    968  1.1  mrg     {
    969  1.1  mrg       basic_block bb = bbs[i];
    970  1.1  mrg       gimple_stmt_iterator gsi;
    971  1.1  mrg       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    972  1.1  mrg 	{
    973  1.1  mrg 	  gimple *phi = gsi_stmt (gsi);
    974  1.1  mrg 	  gimple_set_uid (phi, 0);
    975  1.1  mrg 	}
    976  1.1  mrg       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    977  1.1  mrg 	{
    978  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    979  1.1  mrg 	  gimple_set_uid (stmt, 0);
    980  1.1  mrg 	}
    981  1.1  mrg     }
    982  1.1  mrg   free (bbs);
    983  1.1  mrg }
    984  1.1  mrg 
    985  1.1  mrg /* Generate vectorized code for LOOP and its epilogues.  */
    986  1.1  mrg 
    987  1.1  mrg static void
    988  1.1  mrg vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    989  1.1  mrg 		      loop_p loop, gimple *loop_vectorized_call,
    990  1.1  mrg 		      function *fun)
    991  1.1  mrg {
    992  1.1  mrg   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
    993  1.1  mrg 
    994  1.1  mrg   if (loop_vectorized_call)
    995  1.1  mrg     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
    996  1.1  mrg 
    997  1.1  mrg   unsigned HOST_WIDE_INT bytes;
    998  1.1  mrg   if (dump_enabled_p ())
    999  1.1  mrg     {
   1000  1.1  mrg       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
   1001  1.1  mrg 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
   1002  1.1  mrg 			 "loop vectorized using %wu byte vectors\n", bytes);
   1003  1.1  mrg       else
   1004  1.1  mrg 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
   1005  1.1  mrg 			 "loop vectorized using variable length vectors\n");
   1006  1.1  mrg     }
   1007  1.1  mrg 
   1008  1.1  mrg   loop_p new_loop = vect_transform_loop (loop_vinfo,
   1009  1.1  mrg 					 loop_vectorized_call);
   1010  1.1  mrg   /* Now that the loop has been vectorized, allow it to be unrolled
   1011  1.1  mrg      etc.  */
   1012  1.1  mrg   loop->force_vectorize = false;
   1013  1.1  mrg 
   1014  1.1  mrg   if (loop->simduid)
   1015  1.1  mrg     {
   1016  1.1  mrg       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
   1017  1.1  mrg       if (!simduid_to_vf_htab)
   1018  1.1  mrg 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
   1019  1.1  mrg       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
   1020  1.1  mrg       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
   1021  1.1  mrg       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
   1022  1.1  mrg 	  = simduid_to_vf_data;
   1023  1.1  mrg     }
   1024  1.1  mrg 
   1025  1.1  mrg   /* Epilogue of vectorized loop must be vectorized too.  */
   1026  1.1  mrg   if (new_loop)
   1027  1.1  mrg     vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
   1028  1.1  mrg }
   1029  1.1  mrg 
   1030  1.1  mrg /* Try to vectorize LOOP.  */
   1031  1.1  mrg 
   1032  1.1  mrg static unsigned
   1033  1.1  mrg try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
   1034  1.1  mrg 		      unsigned *num_vectorized_loops, loop_p loop,
   1035  1.1  mrg 		      gimple *loop_vectorized_call,
   1036  1.1  mrg 		      gimple *loop_dist_alias_call,
   1037  1.1  mrg 		      function *fun)
   1038  1.1  mrg {
   1039  1.1  mrg   unsigned ret = 0;
   1040  1.1  mrg   vec_info_shared shared;
   1041  1.1  mrg   auto_purge_vect_location sentinel;
   1042  1.1  mrg   vect_location = find_loop_location (loop);
   1043  1.1  mrg 
   1044  1.1  mrg   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
   1045  1.1  mrg       && dump_enabled_p ())
   1046  1.1  mrg     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
   1047  1.1  mrg 		 "\nAnalyzing loop at %s:%d\n",
   1048  1.1  mrg 		 LOCATION_FILE (vect_location.get_location_t ()),
   1049  1.1  mrg 		 LOCATION_LINE (vect_location.get_location_t ()));
   1050  1.1  mrg 
   1051  1.1  mrg   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
   1052  1.1  mrg   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared);
   1053  1.1  mrg   loop->aux = loop_vinfo;
   1054  1.1  mrg 
   1055  1.1  mrg   if (!loop_vinfo)
   1056  1.1  mrg     if (dump_enabled_p ())
   1057  1.1  mrg       if (opt_problem *problem = loop_vinfo.get_problem ())
   1058  1.1  mrg 	{
   1059  1.1  mrg 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1060  1.1  mrg 			   "couldn't vectorize loop\n");
   1061  1.1  mrg 	  problem->emit_and_clear ();
   1062  1.1  mrg 	}
   1063  1.1  mrg 
   1064  1.1  mrg   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
   1065  1.1  mrg     {
   1066  1.1  mrg       /* Free existing information if loop is analyzed with some
   1067  1.1  mrg 	 assumptions.  */
   1068  1.1  mrg       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
   1069  1.1  mrg 	vect_free_loop_info_assumptions (loop);
   1070  1.1  mrg 
   1071  1.1  mrg       /* If we applied if-conversion then try to vectorize the
   1072  1.1  mrg 	 BB of innermost loops.
   1073  1.1  mrg 	 ???  Ideally BB vectorization would learn to vectorize
   1074  1.1  mrg 	 control flow by applying if-conversion on-the-fly, the
   1075  1.1  mrg 	 following retains the if-converted loop body even when
   1076  1.1  mrg 	 only non-if-converted parts took part in BB vectorization.  */
   1077  1.1  mrg       if (flag_tree_slp_vectorize != 0
   1078  1.1  mrg 	  && loop_vectorized_call
   1079  1.1  mrg 	  && ! loop->inner)
   1080  1.1  mrg 	{
   1081  1.1  mrg 	  basic_block bb = loop->header;
   1082  1.1  mrg 	  bool require_loop_vectorize = false;
   1083  1.1  mrg 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
   1084  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
   1085  1.1  mrg 	    {
   1086  1.1  mrg 	      gimple *stmt = gsi_stmt (gsi);
   1087  1.1  mrg 	      gcall *call = dyn_cast <gcall *> (stmt);
   1088  1.1  mrg 	      if (call && gimple_call_internal_p (call))
   1089  1.1  mrg 		{
   1090  1.1  mrg 		  internal_fn ifn = gimple_call_internal_fn (call);
   1091  1.1  mrg 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
   1092  1.1  mrg 		      /* Don't keep the if-converted parts when the ifn with
   1093  1.1  mrg 			 specifc type is not supported by the backend.  */
   1094  1.1  mrg 		      || (direct_internal_fn_p (ifn)
   1095  1.1  mrg 			  && !direct_internal_fn_supported_p
   1096  1.1  mrg 			  (call, OPTIMIZE_FOR_SPEED)))
   1097  1.1  mrg 		    {
   1098  1.1  mrg 		      require_loop_vectorize = true;
   1099  1.1  mrg 		      break;
   1100  1.1  mrg 		    }
   1101  1.1  mrg 		}
   1102  1.1  mrg 	      gimple_set_uid (stmt, -1);
   1103  1.1  mrg 	      gimple_set_visited (stmt, false);
   1104  1.1  mrg 	    }
   1105  1.1  mrg 	  if (!require_loop_vectorize)
   1106  1.1  mrg 	    {
   1107  1.1  mrg 	      tree arg = gimple_call_arg (loop_vectorized_call, 1);
   1108  1.1  mrg 	      class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
   1109  1.1  mrg 	      if (vect_slp_if_converted_bb (bb, scalar_loop))
   1110  1.1  mrg 		{
   1111  1.1  mrg 		  fold_loop_internal_call (loop_vectorized_call,
   1112  1.1  mrg 					   boolean_true_node);
   1113  1.1  mrg 		  loop_vectorized_call = NULL;
   1114  1.1  mrg 		  ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
   1115  1.1  mrg 		}
   1116  1.1  mrg 	    }
   1117  1.1  mrg 	}
   1118  1.1  mrg       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
   1119  1.1  mrg 	 loop, don't vectorize its inner loop; we'll attempt to
   1120  1.1  mrg 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
   1121  1.1  mrg 	 loop version.  */
   1122  1.1  mrg       if (loop_vectorized_call && loop->inner)
   1123  1.1  mrg 	loop->inner->dont_vectorize = true;
   1124  1.1  mrg       return ret;
   1125  1.1  mrg     }
   1126  1.1  mrg 
   1127  1.1  mrg   if (!dbg_cnt (vect_loop))
   1128  1.1  mrg     {
   1129  1.1  mrg       /* Free existing information if loop is analyzed with some
   1130  1.1  mrg 	 assumptions.  */
   1131  1.1  mrg       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
   1132  1.1  mrg 	vect_free_loop_info_assumptions (loop);
   1133  1.1  mrg       return ret;
   1134  1.1  mrg     }
   1135  1.1  mrg 
   1136  1.1  mrg   (*num_vectorized_loops)++;
   1137  1.1  mrg   /* Transform LOOP and its epilogues.  */
   1138  1.1  mrg   vect_transform_loops (simduid_to_vf_htab, loop, loop_vectorized_call, fun);
   1139  1.1  mrg 
   1140  1.1  mrg   if (loop_vectorized_call)
   1141  1.1  mrg     {
   1142  1.1  mrg       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
   1143  1.1  mrg       ret |= TODO_cleanup_cfg;
   1144  1.1  mrg     }
   1145  1.1  mrg   if (loop_dist_alias_call)
   1146  1.1  mrg     {
   1147  1.1  mrg       tree value = gimple_call_arg (loop_dist_alias_call, 1);
   1148  1.1  mrg       fold_loop_internal_call (loop_dist_alias_call, value);
   1149  1.1  mrg       ret |= TODO_cleanup_cfg;
   1150  1.1  mrg     }
   1151  1.1  mrg 
   1152  1.1  mrg   return ret;
   1153  1.1  mrg }
   1154  1.1  mrg 
   1155  1.1  mrg /* Try to vectorize LOOP.  */
   1156  1.1  mrg 
   1157  1.1  mrg static unsigned
   1158  1.1  mrg try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
   1159  1.1  mrg 		    unsigned *num_vectorized_loops, loop_p loop,
   1160  1.1  mrg 		    function *fun)
   1161  1.1  mrg {
   1162  1.1  mrg   if (!((flag_tree_loop_vectorize
   1163  1.1  mrg 	 && optimize_loop_nest_for_speed_p (loop))
   1164  1.1  mrg 	|| loop->force_vectorize))
   1165  1.1  mrg     return 0;
   1166  1.1  mrg 
   1167  1.1  mrg   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
   1168  1.1  mrg 			       vect_loop_vectorized_call (loop),
   1169  1.1  mrg 			       vect_loop_dist_alias_call (loop, fun), fun);
   1170  1.1  mrg }
   1171  1.1  mrg 
   1172  1.1  mrg 
   1173  1.1  mrg /* Loop autovectorization.  */
   1174  1.1  mrg 
   1175  1.1  mrg namespace {
   1176  1.1  mrg 
   1177  1.1  mrg const pass_data pass_data_vectorize =
   1178  1.1  mrg {
   1179  1.1  mrg   GIMPLE_PASS, /* type */
   1180  1.1  mrg   "vect", /* name */
   1181  1.1  mrg   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1182  1.1  mrg   TV_TREE_VECTORIZATION, /* tv_id */
   1183  1.1  mrg   ( PROP_cfg | PROP_ssa ), /* properties_required */
   1184  1.1  mrg   0, /* properties_provided */
   1185  1.1  mrg   0, /* properties_destroyed */
   1186  1.1  mrg   0, /* todo_flags_start */
   1187  1.1  mrg   0, /* todo_flags_finish */
   1188  1.1  mrg };
   1189  1.1  mrg 
   1190  1.1  mrg class pass_vectorize : public gimple_opt_pass
   1191  1.1  mrg {
   1192  1.1  mrg public:
   1193  1.1  mrg   pass_vectorize (gcc::context *ctxt)
   1194  1.1  mrg     : gimple_opt_pass (pass_data_vectorize, ctxt)
   1195  1.1  mrg   {}
   1196  1.1  mrg 
   1197  1.1  mrg   /* opt_pass methods: */
   1198  1.1  mrg   virtual bool gate (function *fun)
   1199  1.1  mrg     {
   1200  1.1  mrg       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
   1201  1.1  mrg     }
   1202  1.1  mrg 
   1203  1.1  mrg   virtual unsigned int execute (function *);
   1204  1.1  mrg 
   1205  1.1  mrg }; // class pass_vectorize
   1206  1.1  mrg 
   1207  1.1  mrg /* Function vectorize_loops.
   1208  1.1  mrg 
   1209  1.1  mrg    Entry point to loop vectorization phase.  */
   1210  1.1  mrg 
   1211  1.1  mrg unsigned
   1212  1.1  mrg pass_vectorize::execute (function *fun)
   1213  1.1  mrg {
   1214  1.1  mrg   unsigned int i;
   1215  1.1  mrg   unsigned int num_vectorized_loops = 0;
   1216  1.1  mrg   unsigned int vect_loops_num;
   1217  1.1  mrg   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
   1218  1.1  mrg   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
   1219  1.1  mrg   bool any_ifcvt_loops = false;
   1220  1.1  mrg   unsigned ret = 0;
   1221  1.1  mrg 
   1222  1.1  mrg   vect_loops_num = number_of_loops (fun);
   1223  1.1  mrg 
   1224  1.1  mrg   /* Bail out if there are no loops.  */
   1225  1.1  mrg   if (vect_loops_num <= 1)
   1226  1.1  mrg     return 0;
   1227  1.1  mrg 
   1228  1.1  mrg   vect_slp_init ();
   1229  1.1  mrg 
   1230  1.1  mrg   if (fun->has_simduid_loops)
   1231  1.1  mrg     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
   1232  1.1  mrg 
   1233  1.1  mrg   /*  ----------- Analyze loops. -----------  */
   1234  1.1  mrg 
   1235  1.1  mrg   /* If some loop was duplicated, it gets bigger number
   1236  1.1  mrg      than all previously defined loops.  This fact allows us to run
   1237  1.1  mrg      only over initial loops skipping newly generated ones.  */
   1238  1.1  mrg   for (auto loop : loops_list (fun, 0))
   1239  1.1  mrg     if (loop->dont_vectorize)
   1240  1.1  mrg       {
   1241  1.1  mrg 	any_ifcvt_loops = true;
   1242  1.1  mrg 	/* If-conversion sometimes versions both the outer loop
   1243  1.1  mrg 	   (for the case when outer loop vectorization might be
   1244  1.1  mrg 	   desirable) as well as the inner loop in the scalar version
   1245  1.1  mrg 	   of the loop.  So we have:
   1246  1.1  mrg 	    if (LOOP_VECTORIZED (1, 3))
   1247  1.1  mrg 	      {
   1248  1.1  mrg 		loop1
   1249  1.1  mrg 		  loop2
   1250  1.1  mrg 	      }
   1251  1.1  mrg 	    else
   1252  1.1  mrg 	      loop3 (copy of loop1)
   1253  1.1  mrg 		if (LOOP_VECTORIZED (4, 5))
   1254  1.1  mrg 		  loop4 (copy of loop2)
   1255  1.1  mrg 		else
   1256  1.1  mrg 		  loop5 (copy of loop4)
   1257  1.1  mrg 	   If loops' iteration gives us loop3 first (which has
   1258  1.1  mrg 	   dont_vectorize set), make sure to process loop1 before loop4;
   1259  1.1  mrg 	   so that we can prevent vectorization of loop4 if loop1
   1260  1.1  mrg 	   is successfully vectorized.  */
   1261  1.1  mrg 	if (loop->inner)
   1262  1.1  mrg 	  {
   1263  1.1  mrg 	    gimple *loop_vectorized_call
   1264  1.1  mrg 	      = vect_loop_vectorized_call (loop);
   1265  1.1  mrg 	    if (loop_vectorized_call
   1266  1.1  mrg 		&& vect_loop_vectorized_call (loop->inner))
   1267  1.1  mrg 	      {
   1268  1.1  mrg 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
   1269  1.1  mrg 		class loop *vector_loop
   1270  1.1  mrg 		  = get_loop (fun, tree_to_shwi (arg));
   1271  1.1  mrg 		if (vector_loop && vector_loop != loop)
   1272  1.1  mrg 		  {
   1273  1.1  mrg 		    /* Make sure we don't vectorize it twice.  */
   1274  1.1  mrg 		    vector_loop->dont_vectorize = true;
   1275  1.1  mrg 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
   1276  1.1  mrg 					       &num_vectorized_loops,
   1277  1.1  mrg 					       vector_loop, fun);
   1278  1.1  mrg 		  }
   1279  1.1  mrg 	      }
   1280  1.1  mrg 	  }
   1281  1.1  mrg       }
   1282  1.1  mrg     else
   1283  1.1  mrg       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
   1284  1.1  mrg 				 loop, fun);
   1285  1.1  mrg 
   1286  1.1  mrg   vect_location = dump_user_location_t ();
   1287  1.1  mrg 
   1288  1.1  mrg   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
   1289  1.1  mrg   if (dump_enabled_p ()
   1290  1.1  mrg       || (num_vectorized_loops > 0 && dump_enabled_p ()))
   1291  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1292  1.1  mrg                      "vectorized %u loops in function.\n",
   1293  1.1  mrg                      num_vectorized_loops);
   1294  1.1  mrg 
   1295  1.1  mrg   /*  ----------- Finalize. -----------  */
   1296  1.1  mrg 
   1297  1.1  mrg   if (any_ifcvt_loops)
   1298  1.1  mrg     for (i = 1; i < number_of_loops (fun); i++)
   1299  1.1  mrg       {
   1300  1.1  mrg 	class loop *loop = get_loop (fun, i);
   1301  1.1  mrg 	if (loop && loop->dont_vectorize)
   1302  1.1  mrg 	  {
   1303  1.1  mrg 	    gimple *g = vect_loop_vectorized_call (loop);
   1304  1.1  mrg 	    if (g)
   1305  1.1  mrg 	      {
   1306  1.1  mrg 		fold_loop_internal_call (g, boolean_false_node);
   1307  1.1  mrg 		ret |= TODO_cleanup_cfg;
   1308  1.1  mrg 		g = NULL;
   1309  1.1  mrg 	      }
   1310  1.1  mrg 	    else
   1311  1.1  mrg 	      g = vect_loop_dist_alias_call (loop, fun);
   1312  1.1  mrg 
   1313  1.1  mrg 	    if (g)
   1314  1.1  mrg 	      {
   1315  1.1  mrg 		fold_loop_internal_call (g, boolean_false_node);
   1316  1.1  mrg 		ret |= TODO_cleanup_cfg;
   1317  1.1  mrg 	      }
   1318  1.1  mrg 	  }
   1319  1.1  mrg       }
   1320  1.1  mrg 
   1321  1.1  mrg   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
   1322  1.1  mrg   if (fun->has_simduid_loops)
   1323  1.1  mrg     {
   1324  1.1  mrg       adjust_simduid_builtins (simduid_to_vf_htab, fun);
   1325  1.1  mrg       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
   1326  1.1  mrg       scev_reset ();
   1327  1.1  mrg     }
   1328  1.1  mrg   /* Shrink any "omp array simd" temporary arrays to the
   1329  1.1  mrg      actual vectorization factors.  */
   1330  1.1  mrg   if (simd_array_to_simduid_htab)
   1331  1.1  mrg     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
   1332  1.1  mrg   delete simduid_to_vf_htab;
   1333  1.1  mrg   fun->has_simduid_loops = false;
   1334  1.1  mrg 
   1335  1.1  mrg   if (num_vectorized_loops > 0)
   1336  1.1  mrg     {
   1337  1.1  mrg       /* If we vectorized any loop only virtual SSA form needs to be updated.
   1338  1.1  mrg 	 ???  Also while we try hard to update loop-closed SSA form we fail
   1339  1.1  mrg 	 to properly do this in some corner-cases (see PR56286).  */
   1340  1.1  mrg       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
   1341  1.1  mrg       ret |= TODO_cleanup_cfg;
   1342  1.1  mrg     }
   1343  1.1  mrg 
   1344  1.1  mrg   for (i = 1; i < number_of_loops (fun); i++)
   1345  1.1  mrg     {
   1346  1.1  mrg       loop_vec_info loop_vinfo;
   1347  1.1  mrg       bool has_mask_store;
   1348  1.1  mrg 
   1349  1.1  mrg       class loop *loop = get_loop (fun, i);
   1350  1.1  mrg       if (!loop || !loop->aux)
   1351  1.1  mrg 	continue;
   1352  1.1  mrg       loop_vinfo = (loop_vec_info) loop->aux;
   1353  1.1  mrg       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
   1354  1.1  mrg       delete loop_vinfo;
   1355  1.1  mrg       if (has_mask_store
   1356  1.1  mrg 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
   1357  1.1  mrg 	optimize_mask_stores (loop);
   1358  1.1  mrg 
   1359  1.1  mrg       auto_bitmap exit_bbs;
   1360  1.1  mrg       /* Perform local CSE, this esp. helps because we emit code for
   1361  1.1  mrg 	 predicates that need to be shared for optimal predicate usage.
   1362  1.1  mrg 	 However reassoc will re-order them and prevent CSE from working
   1363  1.1  mrg 	 as it should.  CSE only the loop body, not the entry.  */
   1364  1.1  mrg       bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
   1365  1.1  mrg 
   1366  1.1  mrg       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
   1367  1.1  mrg       do_rpo_vn (fun, entry, exit_bbs);
   1368  1.1  mrg 
   1369  1.1  mrg       loop->aux = NULL;
   1370  1.1  mrg     }
   1371  1.1  mrg 
   1372  1.1  mrg   vect_slp_fini ();
   1373  1.1  mrg 
   1374  1.1  mrg   return ret;
   1375  1.1  mrg }
   1376  1.1  mrg 
   1377  1.1  mrg } // anon namespace
   1378  1.1  mrg 
   1379  1.1  mrg gimple_opt_pass *
   1380  1.1  mrg make_pass_vectorize (gcc::context *ctxt)
   1381  1.1  mrg {
   1382  1.1  mrg   return new pass_vectorize (ctxt);
   1383  1.1  mrg }
   1384  1.1  mrg 
   1385  1.1  mrg /* Entry point to the simduid cleanup pass.  */
   1386  1.1  mrg 
   1387  1.1  mrg namespace {
   1388  1.1  mrg 
   1389  1.1  mrg const pass_data pass_data_simduid_cleanup =
   1390  1.1  mrg {
   1391  1.1  mrg   GIMPLE_PASS, /* type */
   1392  1.1  mrg   "simduid", /* name */
   1393  1.1  mrg   OPTGROUP_NONE, /* optinfo_flags */
   1394  1.1  mrg   TV_NONE, /* tv_id */
   1395  1.1  mrg   ( PROP_ssa | PROP_cfg ), /* properties_required */
   1396  1.1  mrg   0, /* properties_provided */
   1397  1.1  mrg   0, /* properties_destroyed */
   1398  1.1  mrg   0, /* todo_flags_start */
   1399  1.1  mrg   0, /* todo_flags_finish */
   1400  1.1  mrg };
   1401  1.1  mrg 
   1402  1.1  mrg class pass_simduid_cleanup : public gimple_opt_pass
   1403  1.1  mrg {
   1404  1.1  mrg public:
   1405  1.1  mrg   pass_simduid_cleanup (gcc::context *ctxt)
   1406  1.1  mrg     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
   1407  1.1  mrg   {}
   1408  1.1  mrg 
   1409  1.1  mrg   /* opt_pass methods: */
   1410  1.1  mrg   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
   1411  1.1  mrg   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
   1412  1.1  mrg   virtual unsigned int execute (function *);
   1413  1.1  mrg 
   1414  1.1  mrg }; // class pass_simduid_cleanup
   1415  1.1  mrg 
   1416  1.1  mrg unsigned int
   1417  1.1  mrg pass_simduid_cleanup::execute (function *fun)
   1418  1.1  mrg {
   1419  1.1  mrg   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
   1420  1.1  mrg 
   1421  1.1  mrg   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
   1422  1.1  mrg 
   1423  1.1  mrg   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
   1424  1.1  mrg   adjust_simduid_builtins (NULL, fun);
   1425  1.1  mrg 
   1426  1.1  mrg   /* Shrink any "omp array simd" temporary arrays to the
   1427  1.1  mrg      actual vectorization factors.  */
   1428  1.1  mrg   if (simd_array_to_simduid_htab)
   1429  1.1  mrg     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
   1430  1.1  mrg   fun->has_simduid_loops = false;
   1431  1.1  mrg   return 0;
   1432  1.1  mrg }
   1433  1.1  mrg 
   1434  1.1  mrg }  // anon namespace
   1435  1.1  mrg 
   1436  1.1  mrg gimple_opt_pass *
   1437  1.1  mrg make_pass_simduid_cleanup (gcc::context *ctxt)
   1438  1.1  mrg {
   1439  1.1  mrg   return new pass_simduid_cleanup (ctxt);
   1440  1.1  mrg }
   1441  1.1  mrg 
   1442  1.1  mrg 
   1443  1.1  mrg /*  Entry point to basic block SLP phase.  */
   1444  1.1  mrg 
   1445  1.1  mrg namespace {
   1446  1.1  mrg 
   1447  1.1  mrg const pass_data pass_data_slp_vectorize =
   1448  1.1  mrg {
   1449  1.1  mrg   GIMPLE_PASS, /* type */
   1450  1.1  mrg   "slp", /* name */
   1451  1.1  mrg   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1452  1.1  mrg   TV_TREE_SLP_VECTORIZATION, /* tv_id */
   1453  1.1  mrg   ( PROP_ssa | PROP_cfg ), /* properties_required */
   1454  1.1  mrg   0, /* properties_provided */
   1455  1.1  mrg   0, /* properties_destroyed */
   1456  1.1  mrg   0, /* todo_flags_start */
   1457  1.1  mrg   TODO_update_ssa, /* todo_flags_finish */
   1458  1.1  mrg };
   1459  1.1  mrg 
   1460  1.1  mrg class pass_slp_vectorize : public gimple_opt_pass
   1461  1.1  mrg {
   1462  1.1  mrg public:
   1463  1.1  mrg   pass_slp_vectorize (gcc::context *ctxt)
   1464  1.1  mrg     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
   1465  1.1  mrg   {}
   1466  1.1  mrg 
   1467  1.1  mrg   /* opt_pass methods: */
   1468  1.1  mrg   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
   1469  1.1  mrg   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
   1470  1.1  mrg   virtual unsigned int execute (function *);
   1471  1.1  mrg 
   1472  1.1  mrg }; // class pass_slp_vectorize
   1473  1.1  mrg 
   1474  1.1  mrg unsigned int
   1475  1.1  mrg pass_slp_vectorize::execute (function *fun)
   1476  1.1  mrg {
   1477  1.1  mrg   auto_purge_vect_location sentinel;
   1478  1.1  mrg   basic_block bb;
   1479  1.1  mrg 
   1480  1.1  mrg   bool in_loop_pipeline = scev_initialized_p ();
   1481  1.1  mrg   if (!in_loop_pipeline)
   1482  1.1  mrg     {
   1483  1.1  mrg       loop_optimizer_init (LOOPS_NORMAL);
   1484  1.1  mrg       scev_initialize ();
   1485  1.1  mrg     }
   1486  1.1  mrg 
   1487  1.1  mrg   /* Mark all stmts as not belonging to the current region and unvisited.  */
   1488  1.1  mrg   FOR_EACH_BB_FN (bb, fun)
   1489  1.1  mrg     {
   1490  1.1  mrg       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
   1491  1.1  mrg 	   gsi_next (&gsi))
   1492  1.1  mrg 	{
   1493  1.1  mrg 	  gphi *stmt = gsi.phi ();
   1494  1.1  mrg 	  gimple_set_uid (stmt, -1);
   1495  1.1  mrg 	  gimple_set_visited (stmt, false);
   1496  1.1  mrg 	}
   1497  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
   1498  1.1  mrg 	   gsi_next (&gsi))
   1499  1.1  mrg 	{
   1500  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
   1501  1.1  mrg 	  gimple_set_uid (stmt, -1);
   1502  1.1  mrg 	  gimple_set_visited (stmt, false);
   1503  1.1  mrg 	}
   1504  1.1  mrg     }
   1505  1.1  mrg 
   1506  1.1  mrg   vect_slp_init ();
   1507  1.1  mrg 
   1508  1.1  mrg   vect_slp_function (fun);
   1509  1.1  mrg 
   1510  1.1  mrg   vect_slp_fini ();
   1511  1.1  mrg 
   1512  1.1  mrg   if (!in_loop_pipeline)
   1513  1.1  mrg     {
   1514  1.1  mrg       scev_finalize ();
   1515  1.1  mrg       loop_optimizer_finalize ();
   1516  1.1  mrg     }
   1517  1.1  mrg 
   1518  1.1  mrg   return 0;
   1519  1.1  mrg }
   1520  1.1  mrg 
   1521  1.1  mrg } // anon namespace
   1522  1.1  mrg 
   1523  1.1  mrg gimple_opt_pass *
   1524  1.1  mrg make_pass_slp_vectorize (gcc::context *ctxt)
   1525  1.1  mrg {
   1526  1.1  mrg   return new pass_slp_vectorize (ctxt);
   1527  1.1  mrg }
   1528  1.1  mrg 
   1529  1.1  mrg 
   1530  1.1  mrg /* Increase alignment of global arrays to improve vectorization potential.
   1531  1.1  mrg    TODO:
   1532  1.1  mrg    - Consider also structs that have an array field.
   1533  1.1  mrg    - Use ipa analysis to prune arrays that can't be vectorized?
   1534  1.1  mrg      This should involve global alignment analysis and in the future also
   1535  1.1  mrg      array padding.  */
   1536  1.1  mrg 
   1537  1.1  mrg static unsigned get_vec_alignment_for_type (tree);
   1538  1.1  mrg static hash_map<tree, unsigned> *type_align_map;
   1539  1.1  mrg 
   1540  1.1  mrg /* Return alignment of array's vector type corresponding to scalar type.
   1541  1.1  mrg    0 if no vector type exists.  */
   1542  1.1  mrg static unsigned
   1543  1.1  mrg get_vec_alignment_for_array_type (tree type)
   1544  1.1  mrg {
   1545  1.1  mrg   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
   1546  1.1  mrg   poly_uint64 array_size, vector_size;
   1547  1.1  mrg 
   1548  1.1  mrg   tree scalar_type = strip_array_types (type);
   1549  1.1  mrg   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
   1550  1.1  mrg   if (!vectype
   1551  1.1  mrg       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
   1552  1.1  mrg       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
   1553  1.1  mrg       || maybe_lt (array_size, vector_size))
   1554  1.1  mrg     return 0;
   1555  1.1  mrg 
   1556  1.1  mrg   return TYPE_ALIGN (vectype);
   1557  1.1  mrg }
   1558  1.1  mrg 
   1559  1.1  mrg /* Return alignment of field having maximum alignment of vector type
   1560  1.1  mrg    corresponding to it's scalar type. For now, we only consider fields whose
   1561  1.1  mrg    offset is a multiple of it's vector alignment.
   1562  1.1  mrg    0 if no suitable field is found.  */
   1563  1.1  mrg static unsigned
   1564  1.1  mrg get_vec_alignment_for_record_type (tree type)
   1565  1.1  mrg {
   1566  1.1  mrg   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
   1567  1.1  mrg 
   1568  1.1  mrg   unsigned max_align = 0, alignment;
   1569  1.1  mrg   HOST_WIDE_INT offset;
   1570  1.1  mrg   tree offset_tree;
   1571  1.1  mrg 
   1572  1.1  mrg   if (TYPE_PACKED (type))
   1573  1.1  mrg     return 0;
   1574  1.1  mrg 
   1575  1.1  mrg   unsigned *slot = type_align_map->get (type);
   1576  1.1  mrg   if (slot)
   1577  1.1  mrg     return *slot;
   1578  1.1  mrg 
   1579  1.1  mrg   for (tree field = first_field (type);
   1580  1.1  mrg        field != NULL_TREE;
   1581  1.1  mrg        field = DECL_CHAIN (field))
   1582  1.1  mrg     {
   1583  1.1  mrg       /* Skip if not FIELD_DECL or if alignment is set by user.  */
   1584  1.1  mrg       if (TREE_CODE (field) != FIELD_DECL
   1585  1.1  mrg 	  || DECL_USER_ALIGN (field)
   1586  1.1  mrg 	  || DECL_ARTIFICIAL (field))
   1587  1.1  mrg 	continue;
   1588  1.1  mrg 
   1589  1.1  mrg       /* We don't need to process the type further if offset is variable,
   1590  1.1  mrg 	 since the offsets of remaining members will also be variable.  */
   1591  1.1  mrg       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
   1592  1.1  mrg 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
   1593  1.1  mrg 	break;
   1594  1.1  mrg 
   1595  1.1  mrg       /* Similarly stop processing the type if offset_tree
   1596  1.1  mrg 	 does not fit in unsigned HOST_WIDE_INT.  */
   1597  1.1  mrg       offset_tree = bit_position (field);
   1598  1.1  mrg       if (!tree_fits_uhwi_p (offset_tree))
   1599  1.1  mrg 	break;
   1600  1.1  mrg 
   1601  1.1  mrg       offset = tree_to_uhwi (offset_tree);
   1602  1.1  mrg       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
   1603  1.1  mrg 
   1604  1.1  mrg       /* Get maximum alignment of vectorized field/array among those members
   1605  1.1  mrg 	 whose offset is multiple of the vector alignment.  */
   1606  1.1  mrg       if (alignment
   1607  1.1  mrg 	  && (offset % alignment == 0)
   1608  1.1  mrg 	  && (alignment > max_align))
   1609  1.1  mrg 	max_align = alignment;
   1610  1.1  mrg     }
   1611  1.1  mrg 
   1612  1.1  mrg   type_align_map->put (type, max_align);
   1613  1.1  mrg   return max_align;
   1614  1.1  mrg }
   1615  1.1  mrg 
   1616  1.1  mrg /* Return alignment of vector type corresponding to decl's scalar type
   1617  1.1  mrg    or 0 if it doesn't exist or the vector alignment is lesser than
   1618  1.1  mrg    decl's alignment.  */
   1619  1.1  mrg static unsigned
   1620  1.1  mrg get_vec_alignment_for_type (tree type)
   1621  1.1  mrg {
   1622  1.1  mrg   if (type == NULL_TREE)
   1623  1.1  mrg     return 0;
   1624  1.1  mrg 
   1625  1.1  mrg   gcc_assert (TYPE_P (type));
   1626  1.1  mrg 
   1627  1.1  mrg   static unsigned alignment = 0;
   1628  1.1  mrg   switch (TREE_CODE (type))
   1629  1.1  mrg     {
   1630  1.1  mrg       case ARRAY_TYPE:
   1631  1.1  mrg 	alignment = get_vec_alignment_for_array_type (type);
   1632  1.1  mrg 	break;
   1633  1.1  mrg       case RECORD_TYPE:
   1634  1.1  mrg 	alignment = get_vec_alignment_for_record_type (type);
   1635  1.1  mrg 	break;
   1636  1.1  mrg       default:
   1637  1.1  mrg 	alignment = 0;
   1638  1.1  mrg 	break;
   1639  1.1  mrg     }
   1640  1.1  mrg 
   1641  1.1  mrg   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
   1642  1.1  mrg }
   1643  1.1  mrg 
   1644  1.1  mrg /* Entry point to increase_alignment pass.  */
   1645  1.1  mrg static unsigned int
   1646  1.1  mrg increase_alignment (void)
   1647  1.1  mrg {
   1648  1.1  mrg   varpool_node *vnode;
   1649  1.1  mrg 
   1650  1.1  mrg   vect_location = dump_user_location_t ();
   1651  1.1  mrg   type_align_map = new hash_map<tree, unsigned>;
   1652  1.1  mrg 
   1653  1.1  mrg   /* Increase the alignment of all global arrays for vectorization.  */
   1654  1.1  mrg   FOR_EACH_DEFINED_VARIABLE (vnode)
   1655  1.1  mrg     {
   1656  1.1  mrg       tree decl = vnode->decl;
   1657  1.1  mrg       unsigned int alignment;
   1658  1.1  mrg 
   1659  1.1  mrg       if ((decl_in_symtab_p (decl)
   1660  1.1  mrg 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
   1661  1.1  mrg 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
   1662  1.1  mrg 	continue;
   1663  1.1  mrg 
   1664  1.1  mrg       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
   1665  1.1  mrg       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
   1666  1.1  mrg         {
   1667  1.1  mrg 	  vnode->increase_alignment (alignment);
   1668  1.1  mrg 	  if (dump_enabled_p ())
   1669  1.1  mrg 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
   1670  1.1  mrg         }
   1671  1.1  mrg     }
   1672  1.1  mrg 
   1673  1.1  mrg   delete type_align_map;
   1674  1.1  mrg   return 0;
   1675  1.1  mrg }
   1676  1.1  mrg 
   1677  1.1  mrg 
   1678  1.1  mrg namespace {
   1679  1.1  mrg 
   1680  1.1  mrg const pass_data pass_data_ipa_increase_alignment =
   1681  1.1  mrg {
   1682  1.1  mrg   SIMPLE_IPA_PASS, /* type */
   1683  1.1  mrg   "increase_alignment", /* name */
   1684  1.1  mrg   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
   1685  1.1  mrg   TV_IPA_OPT, /* tv_id */
   1686  1.1  mrg   0, /* properties_required */
   1687  1.1  mrg   0, /* properties_provided */
   1688  1.1  mrg   0, /* properties_destroyed */
   1689  1.1  mrg   0, /* todo_flags_start */
   1690  1.1  mrg   0, /* todo_flags_finish */
   1691  1.1  mrg };
   1692  1.1  mrg 
   1693  1.1  mrg class pass_ipa_increase_alignment : public simple_ipa_opt_pass
   1694  1.1  mrg {
   1695  1.1  mrg public:
   1696  1.1  mrg   pass_ipa_increase_alignment (gcc::context *ctxt)
   1697  1.1  mrg     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
   1698  1.1  mrg   {}
   1699  1.1  mrg 
   1700  1.1  mrg   /* opt_pass methods: */
   1701  1.1  mrg   virtual bool gate (function *)
   1702  1.1  mrg     {
   1703  1.1  mrg       return flag_section_anchors && flag_tree_loop_vectorize;
   1704  1.1  mrg     }
   1705  1.1  mrg 
   1706  1.1  mrg   virtual unsigned int execute (function *) { return increase_alignment (); }
   1707  1.1  mrg 
   1708  1.1  mrg }; // class pass_ipa_increase_alignment
   1709  1.1  mrg 
   1710  1.1  mrg } // anon namespace
   1711  1.1  mrg 
   1712  1.1  mrg simple_ipa_opt_pass *
   1713  1.1  mrg make_pass_ipa_increase_alignment (gcc::context *ctxt)
   1714  1.1  mrg {
   1715  1.1  mrg   return new pass_ipa_increase_alignment (ctxt);
   1716  1.1  mrg }
   1717  1.1  mrg 
   1718  1.1  mrg /* If the condition represented by T is a comparison or the SSA name
   1719  1.1  mrg    result of a comparison, extract the comparison's operands.  Represent
   1720  1.1  mrg    T as NE_EXPR <T, 0> otherwise.  */
   1721  1.1  mrg 
   1722  1.1  mrg void
   1723  1.1  mrg scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
   1724  1.1  mrg {
   1725  1.1  mrg   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
   1726  1.1  mrg     {
   1727  1.1  mrg       this->code = TREE_CODE (t);
   1728  1.1  mrg       this->op0 = TREE_OPERAND (t, 0);
   1729  1.1  mrg       this->op1 = TREE_OPERAND (t, 1);
   1730  1.1  mrg       this->inverted_p = false;
   1731  1.1  mrg       return;
   1732  1.1  mrg     }
   1733  1.1  mrg 
   1734  1.1  mrg   if (TREE_CODE (t) == SSA_NAME)
   1735  1.1  mrg     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
   1736  1.1  mrg       {
   1737  1.1  mrg 	tree_code code = gimple_assign_rhs_code (stmt);
   1738  1.1  mrg 	if (TREE_CODE_CLASS (code) == tcc_comparison)
   1739  1.1  mrg 	  {
   1740  1.1  mrg 	    this->code = code;
   1741  1.1  mrg 	    this->op0 = gimple_assign_rhs1 (stmt);
   1742  1.1  mrg 	    this->op1 = gimple_assign_rhs2 (stmt);
   1743  1.1  mrg 	    this->inverted_p = false;
   1744  1.1  mrg 	    return;
   1745  1.1  mrg 	  }
   1746  1.1  mrg 	else if (code == BIT_NOT_EXPR)
   1747  1.1  mrg 	  {
   1748  1.1  mrg 	    tree n_op = gimple_assign_rhs1 (stmt);
   1749  1.1  mrg 	    if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
   1750  1.1  mrg 	      {
   1751  1.1  mrg 		code = gimple_assign_rhs_code (stmt);
   1752  1.1  mrg 		if (TREE_CODE_CLASS (code) == tcc_comparison)
   1753  1.1  mrg 		  {
   1754  1.1  mrg 		    this->code = code;
   1755  1.1  mrg 		    this->op0 = gimple_assign_rhs1 (stmt);
   1756  1.1  mrg 		    this->op1 = gimple_assign_rhs2 (stmt);
   1757  1.1  mrg 		    this->inverted_p = true;
   1758  1.1  mrg 		    return;
   1759  1.1  mrg 		  }
   1760  1.1  mrg 	      }
   1761  1.1  mrg 	  }
   1762  1.1  mrg       }
   1763  1.1  mrg 
   1764  1.1  mrg   this->code = NE_EXPR;
   1765  1.1  mrg   this->op0 = t;
   1766  1.1  mrg   this->op1 = build_zero_cst (TREE_TYPE (t));
   1767  1.1  mrg   this->inverted_p = false;
   1768  1.1  mrg }
   1769  1.1  mrg 
   1770  1.1  mrg /* See the comment above the declaration for details.  */
   1771  1.1  mrg 
   1772  1.1  mrg unsigned int
   1773  1.1  mrg vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   1774  1.1  mrg 			     stmt_vec_info stmt_info, slp_tree,
   1775  1.1  mrg 			     tree vectype, int misalign,
   1776  1.1  mrg 			     vect_cost_model_location where)
   1777  1.1  mrg {
   1778  1.1  mrg   unsigned int cost
   1779  1.1  mrg     = builtin_vectorization_cost (kind, vectype, misalign) * count;
   1780  1.1  mrg   return record_stmt_cost (stmt_info, where, cost);
   1781  1.1  mrg }
   1782  1.1  mrg 
   1783  1.1  mrg /* See the comment above the declaration for details.  */
   1784  1.1  mrg 
   1785  1.1  mrg void
   1786  1.1  mrg vector_costs::finish_cost (const vector_costs *)
   1787  1.1  mrg {
   1788  1.1  mrg   gcc_assert (!m_finished);
   1789  1.1  mrg   m_finished = true;
   1790  1.1  mrg }
   1791  1.1  mrg 
   1792  1.1  mrg /* Record a base cost of COST units against WHERE.  If STMT_INFO is
   1793  1.1  mrg    nonnull, use it to adjust the cost based on execution frequency
   1794  1.1  mrg    (where appropriate).  */
   1795  1.1  mrg 
   1796  1.1  mrg unsigned int
   1797  1.1  mrg vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
   1798  1.1  mrg 				vect_cost_model_location where,
   1799  1.1  mrg 				unsigned int cost)
   1800  1.1  mrg {
   1801  1.1  mrg   cost = adjust_cost_for_freq (stmt_info, where, cost);
   1802  1.1  mrg   m_costs[where] += cost;
   1803  1.1  mrg   return cost;
   1804  1.1  mrg }
   1805  1.1  mrg 
   1806  1.1  mrg /* COST is the base cost we have calculated for an operation in location WHERE.
   1807  1.1  mrg    If STMT_INFO is nonnull, use it to adjust the cost based on execution
   1808  1.1  mrg    frequency (where appropriate).  Return the adjusted cost.  */
   1809  1.1  mrg 
   1810  1.1  mrg unsigned int
   1811  1.1  mrg vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
   1812  1.1  mrg 				    vect_cost_model_location where,
   1813  1.1  mrg 				    unsigned int cost)
   1814  1.1  mrg {
   1815  1.1  mrg   /* Statements in an inner loop relative to the loop being
   1816  1.1  mrg      vectorized are weighted more heavily.  The value here is
   1817  1.1  mrg      arbitrary and could potentially be improved with analysis.  */
   1818  1.1  mrg   if (where == vect_body
   1819  1.1  mrg       && stmt_info
   1820  1.1  mrg       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
   1821  1.1  mrg     {
   1822  1.1  mrg       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
   1823  1.1  mrg       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
   1824  1.1  mrg     }
   1825  1.1  mrg   return cost;
   1826  1.1  mrg }
   1827  1.1  mrg 
   1828  1.1  mrg /* See the comment above the declaration for details.  */
   1829  1.1  mrg 
   1830  1.1  mrg bool
   1831  1.1  mrg vector_costs::better_main_loop_than_p (const vector_costs *other) const
   1832  1.1  mrg {
   1833  1.1  mrg   int diff = compare_inside_loop_cost (other);
   1834  1.1  mrg   if (diff != 0)
   1835  1.1  mrg     return diff < 0;
   1836  1.1  mrg 
   1837  1.1  mrg   /* If there's nothing to choose between the loop bodies, see whether
   1838  1.1  mrg      there's a difference in the prologue and epilogue costs.  */
   1839  1.1  mrg   diff = compare_outside_loop_cost (other);
   1840  1.1  mrg   if (diff != 0)
   1841  1.1  mrg     return diff < 0;
   1842  1.1  mrg 
   1843  1.1  mrg   return false;
   1844  1.1  mrg }
   1845  1.1  mrg 
   1846  1.1  mrg 
   1847  1.1  mrg /* See the comment above the declaration for details.  */
   1848  1.1  mrg 
   1849  1.1  mrg bool
   1850  1.1  mrg vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
   1851  1.1  mrg 					   loop_vec_info main_loop) const
   1852  1.1  mrg {
   1853  1.1  mrg   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
   1854  1.1  mrg   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
   1855  1.1  mrg 
   1856  1.1  mrg   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
   1857  1.1  mrg   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
   1858  1.1  mrg 
   1859  1.1  mrg   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
   1860  1.1  mrg   unsigned HOST_WIDE_INT main_vf;
   1861  1.1  mrg   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
   1862  1.1  mrg   /* If we can determine how many iterations are left for the epilogue
   1863  1.1  mrg      loop, that is if both the main loop's vectorization factor and number
   1864  1.1  mrg      of iterations are constant, then we use them to calculate the cost of
   1865  1.1  mrg      the epilogue loop together with a 'likely value' for the epilogues
   1866  1.1  mrg      vectorization factor.  Otherwise we use the main loop's vectorization
   1867  1.1  mrg      factor and the maximum poly value for the epilogue's.  If the target
   1868  1.1  mrg      has not provided with a sensible upper bound poly vectorization
   1869  1.1  mrg      factors are likely to be favored over constant ones.  */
   1870  1.1  mrg   if (main_poly_vf.is_constant (&main_vf)
   1871  1.1  mrg       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
   1872  1.1  mrg     {
   1873  1.1  mrg       unsigned HOST_WIDE_INT niters
   1874  1.1  mrg 	= LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
   1875  1.1  mrg       HOST_WIDE_INT other_likely_vf
   1876  1.1  mrg 	= estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
   1877  1.1  mrg       HOST_WIDE_INT this_likely_vf
   1878  1.1  mrg 	= estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
   1879  1.1  mrg 
   1880  1.1  mrg       /* If the epilogue is using partial vectors we account for the
   1881  1.1  mrg 	 partial iteration here too.  */
   1882  1.1  mrg       other_factor = niters / other_likely_vf;
   1883  1.1  mrg       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
   1884  1.1  mrg 	  && niters % other_likely_vf != 0)
   1885  1.1  mrg 	other_factor++;
   1886  1.1  mrg 
   1887  1.1  mrg       this_factor = niters / this_likely_vf;
   1888  1.1  mrg       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
   1889  1.1  mrg 	  && niters % this_likely_vf != 0)
   1890  1.1  mrg 	this_factor++;
   1891  1.1  mrg     }
   1892  1.1  mrg   else
   1893  1.1  mrg     {
   1894  1.1  mrg       unsigned HOST_WIDE_INT main_vf_max
   1895  1.1  mrg 	= estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
   1896  1.1  mrg       unsigned HOST_WIDE_INT other_vf_max
   1897  1.1  mrg 	= estimated_poly_value (other_vf, POLY_VALUE_MAX);
   1898  1.1  mrg       unsigned HOST_WIDE_INT this_vf_max
   1899  1.1  mrg 	= estimated_poly_value (this_vf, POLY_VALUE_MAX);
   1900  1.1  mrg 
   1901  1.1  mrg       other_factor = CEIL (main_vf_max, other_vf_max);
   1902  1.1  mrg       this_factor = CEIL (main_vf_max, this_vf_max);
   1903  1.1  mrg 
   1904  1.1  mrg       /* If the loop is not using partial vectors then it will iterate one
   1905  1.1  mrg 	 time less than one that does.  It is safe to subtract one here,
   1906  1.1  mrg 	 because the main loop's vf is always at least 2x bigger than that
   1907  1.1  mrg 	 of an epilogue.  */
   1908  1.1  mrg       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
   1909  1.1  mrg 	other_factor -= 1;
   1910  1.1  mrg       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
   1911  1.1  mrg 	this_factor -= 1;
   1912  1.1  mrg     }
   1913  1.1  mrg 
   1914  1.1  mrg   /* Compute the costs by multiplying the inside costs with the factor and
   1915  1.1  mrg      add the outside costs for a more complete picture.  The factor is the
   1916  1.1  mrg      amount of times we are expecting to iterate this epilogue.  */
   1917  1.1  mrg   other_cost = other->body_cost () * other_factor;
   1918  1.1  mrg   this_cost = this->body_cost () * this_factor;
   1919  1.1  mrg   other_cost += other->outside_cost ();
   1920  1.1  mrg   this_cost += this->outside_cost ();
   1921  1.1  mrg   return this_cost < other_cost;
   1922  1.1  mrg }
   1923  1.1  mrg 
   1924  1.1  mrg /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
   1925  1.1  mrg    determine the return value of better_main_loop_than_p by comparing the
   1926  1.1  mrg    inside (loop body) costs of THIS and OTHER.  Return:
   1927  1.1  mrg 
   1928  1.1  mrg    * -1 if better_main_loop_than_p should return true.
   1929  1.1  mrg    * 1 if better_main_loop_than_p should return false.
   1930  1.1  mrg    * 0 if we can't decide.  */
   1931  1.1  mrg 
   1932  1.1  mrg int
   1933  1.1  mrg vector_costs::compare_inside_loop_cost (const vector_costs *other) const
   1934  1.1  mrg {
   1935  1.1  mrg   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
   1936  1.1  mrg   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
   1937  1.1  mrg 
   1938  1.1  mrg   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
   1939  1.1  mrg   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
   1940  1.1  mrg 
   1941  1.1  mrg   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
   1942  1.1  mrg   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
   1943  1.1  mrg 
   1944  1.1  mrg   /* Limit the VFs to what is likely to be the maximum number of iterations,
   1945  1.1  mrg      to handle cases in which at least one loop_vinfo is fully-masked.  */
   1946  1.1  mrg   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
   1947  1.1  mrg   if (estimated_max_niter != -1)
   1948  1.1  mrg     {
   1949  1.1  mrg       if (known_le (estimated_max_niter, this_vf))
   1950  1.1  mrg 	this_vf = estimated_max_niter;
   1951  1.1  mrg       if (known_le (estimated_max_niter, other_vf))
   1952  1.1  mrg 	other_vf = estimated_max_niter;
   1953  1.1  mrg     }
   1954  1.1  mrg 
   1955  1.1  mrg   /* Check whether the (fractional) cost per scalar iteration is lower or
   1956  1.1  mrg      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
   1957  1.1  mrg   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
   1958  1.1  mrg   poly_int64 rel_other
   1959  1.1  mrg     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
   1960  1.1  mrg 
   1961  1.1  mrg   HOST_WIDE_INT est_rel_this_min
   1962  1.1  mrg     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
   1963  1.1  mrg   HOST_WIDE_INT est_rel_this_max
   1964  1.1  mrg     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
   1965  1.1  mrg 
   1966  1.1  mrg   HOST_WIDE_INT est_rel_other_min
   1967  1.1  mrg     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
   1968  1.1  mrg   HOST_WIDE_INT est_rel_other_max
   1969  1.1  mrg     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
   1970  1.1  mrg 
   1971  1.1  mrg   /* Check first if we can make out an unambigous total order from the minimum
   1972  1.1  mrg      and maximum estimates.  */
   1973  1.1  mrg   if (est_rel_this_min < est_rel_other_min
   1974  1.1  mrg       && est_rel_this_max < est_rel_other_max)
   1975  1.1  mrg     return -1;
   1976  1.1  mrg 
   1977  1.1  mrg   if (est_rel_other_min < est_rel_this_min
   1978  1.1  mrg       && est_rel_other_max < est_rel_this_max)
   1979  1.1  mrg     return 1;
   1980  1.1  mrg 
   1981  1.1  mrg   /* When other_loop_vinfo uses a variable vectorization factor,
   1982  1.1  mrg      we know that it has a lower cost for at least one runtime VF.
   1983  1.1  mrg      However, we don't know how likely that VF is.
   1984  1.1  mrg 
   1985  1.1  mrg      One option would be to compare the costs for the estimated VFs.
   1986  1.1  mrg      The problem is that that can put too much pressure on the cost
   1987  1.1  mrg      model.  E.g. if the estimated VF is also the lowest possible VF,
   1988  1.1  mrg      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
   1989  1.1  mrg      for the estimated VF, we'd then choose this_loop_vinfo even
   1990  1.1  mrg      though (a) this_loop_vinfo might not actually be better than
   1991  1.1  mrg      other_loop_vinfo for that VF and (b) it would be significantly
   1992  1.1  mrg      worse at larger VFs.
   1993  1.1  mrg 
   1994  1.1  mrg      Here we go for a hacky compromise: pick this_loop_vinfo if it is
   1995  1.1  mrg      no more expensive than other_loop_vinfo even after doubling the
   1996  1.1  mrg      estimated other_loop_vinfo VF.  For all but trivial loops, this
   1997  1.1  mrg      ensures that we only pick this_loop_vinfo if it is significantly
   1998  1.1  mrg      better than other_loop_vinfo at the estimated VF.  */
   1999  1.1  mrg   if (est_rel_other_min != est_rel_this_min
   2000  1.1  mrg       || est_rel_other_max != est_rel_this_max)
   2001  1.1  mrg     {
   2002  1.1  mrg       HOST_WIDE_INT est_rel_this_likely
   2003  1.1  mrg 	= estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
   2004  1.1  mrg       HOST_WIDE_INT est_rel_other_likely
   2005  1.1  mrg 	= estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
   2006  1.1  mrg 
   2007  1.1  mrg       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
   2008  1.1  mrg     }
   2009  1.1  mrg 
   2010  1.1  mrg   return 0;
   2011  1.1  mrg }
   2012  1.1  mrg 
   2013  1.1  mrg /* A <=>-style subroutine of better_main_loop_than_p, used when there is
   2014  1.1  mrg    nothing to choose between the inside (loop body) costs of THIS and OTHER.
   2015  1.1  mrg    Check whether we can determine the return value of better_main_loop_than_p
   2016  1.1  mrg    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
   2017  1.1  mrg    Return:
   2018  1.1  mrg 
   2019  1.1  mrg    * -1 if better_main_loop_than_p should return true.
   2020  1.1  mrg    * 1 if better_main_loop_than_p should return false.
   2021  1.1  mrg    * 0 if we can't decide.  */
   2022  1.1  mrg 
   2023  1.1  mrg int
   2024  1.1  mrg vector_costs::compare_outside_loop_cost (const vector_costs *other) const
   2025  1.1  mrg {
   2026  1.1  mrg   auto this_outside_cost = this->outside_cost ();
   2027  1.1  mrg   auto other_outside_cost = other->outside_cost ();
   2028  1.1  mrg   if (this_outside_cost != other_outside_cost)
   2029  1.1  mrg     return this_outside_cost < other_outside_cost ? -1 : 1;
   2030  1.1  mrg 
   2031             return 0;
   2032           }
   2033