Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* Statement Analysis and Transformation for Vectorization
      2  1.1  mrg    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
      4  1.1  mrg    and Ira Rosen <irar (at) il.ibm.com>
      5  1.1  mrg 
      6  1.1  mrg This file is part of GCC.
      7  1.1  mrg 
      8  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      9  1.1  mrg the terms of the GNU General Public License as published by the Free
     10  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     11  1.1  mrg version.
     12  1.1  mrg 
     13  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16  1.1  mrg for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU General Public License
     19  1.1  mrg along with GCC; see the file COPYING3.  If not see
     20  1.1  mrg <http://www.gnu.org/licenses/>.  */
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "coretypes.h"
     25  1.1  mrg #include "backend.h"
     26  1.1  mrg #include "target.h"
     27  1.1  mrg #include "rtl.h"
     28  1.1  mrg #include "tree.h"
     29  1.1  mrg #include "gimple.h"
     30  1.1  mrg #include "ssa.h"
     31  1.1  mrg #include "optabs-tree.h"
     32  1.1  mrg #include "insn-config.h"
     33  1.1  mrg #include "recog.h"		/* FIXME: for insn_data */
     34  1.1  mrg #include "cgraph.h"
     35  1.1  mrg #include "dumpfile.h"
     36  1.1  mrg #include "alias.h"
     37  1.1  mrg #include "fold-const.h"
     38  1.1  mrg #include "stor-layout.h"
     39  1.1  mrg #include "tree-eh.h"
     40  1.1  mrg #include "gimplify.h"
     41  1.1  mrg #include "gimple-iterator.h"
     42  1.1  mrg #include "gimplify-me.h"
     43  1.1  mrg #include "tree-cfg.h"
     44  1.1  mrg #include "tree-ssa-loop-manip.h"
     45  1.1  mrg #include "cfgloop.h"
     46  1.1  mrg #include "explow.h"
     47  1.1  mrg #include "tree-ssa-loop.h"
     48  1.1  mrg #include "tree-scalar-evolution.h"
     49  1.1  mrg #include "tree-vectorizer.h"
     50  1.1  mrg #include "builtins.h"
     51  1.1  mrg #include "internal-fn.h"
     52  1.1  mrg #include "tree-vector-builder.h"
     53  1.1  mrg #include "vec-perm-indices.h"
     54  1.1  mrg #include "tree-ssa-loop-niter.h"
     55  1.1  mrg #include "gimple-fold.h"
     56  1.1  mrg #include "regs.h"
     57  1.1  mrg #include "attribs.h"
     58  1.1  mrg 
     59  1.1  mrg /* For lang_hooks.types.type_for_mode.  */
     60  1.1  mrg #include "langhooks.h"
     61  1.1  mrg 
     62  1.1  mrg /* Return the vectorized type for the given statement.  */
     63  1.1  mrg 
     64  1.1  mrg tree
     65  1.1  mrg stmt_vectype (class _stmt_vec_info *stmt_info)
     66  1.1  mrg {
     67  1.1  mrg   return STMT_VINFO_VECTYPE (stmt_info);
     68  1.1  mrg }
     69  1.1  mrg 
     70  1.1  mrg /* Return TRUE iff the given statement is in an inner loop relative to
     71  1.1  mrg    the loop being vectorized.  */
     72  1.1  mrg bool
     73  1.1  mrg stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
     74  1.1  mrg {
     75  1.1  mrg   gimple *stmt = STMT_VINFO_STMT (stmt_info);
     76  1.1  mrg   basic_block bb = gimple_bb (stmt);
     77  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
     78  1.1  mrg   class loop* loop;
     79  1.1  mrg 
     80  1.1  mrg   if (!loop_vinfo)
     81  1.1  mrg     return false;
     82  1.1  mrg 
     83  1.1  mrg   loop = LOOP_VINFO_LOOP (loop_vinfo);
     84  1.1  mrg 
     85  1.1  mrg   return (bb->loop_father == loop->inner);
     86  1.1  mrg }
     87  1.1  mrg 
     88  1.1  mrg /* Record the cost of a statement, either by directly informing the
     89  1.1  mrg    target model or by saving it in a vector for later processing.
     90  1.1  mrg    Return a preliminary estimate of the statement's cost.  */
     91  1.1  mrg 
     92  1.1  mrg static unsigned
     93  1.1  mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
     94  1.1  mrg 		  enum vect_cost_for_stmt kind,
     95  1.1  mrg 		  stmt_vec_info stmt_info, slp_tree node,
     96  1.1  mrg 		  tree vectype, int misalign,
     97  1.1  mrg 		  enum vect_cost_model_location where)
     98  1.1  mrg {
     99  1.1  mrg   if ((kind == vector_load || kind == unaligned_load)
    100  1.1  mrg       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
    101  1.1  mrg     kind = vector_gather_load;
    102  1.1  mrg   if ((kind == vector_store || kind == unaligned_store)
    103  1.1  mrg       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
    104  1.1  mrg     kind = vector_scatter_store;
    105  1.1  mrg 
    106  1.1  mrg   stmt_info_for_cost si
    107  1.1  mrg     = { count, kind, where, stmt_info, node, vectype, misalign };
    108  1.1  mrg   body_cost_vec->safe_push (si);
    109  1.1  mrg 
    110  1.1  mrg   return (unsigned)
    111  1.1  mrg       (builtin_vectorization_cost (kind, vectype, misalign) * count);
    112  1.1  mrg }
    113  1.1  mrg 
    114  1.1  mrg unsigned
    115  1.1  mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
    116  1.1  mrg 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
    117  1.1  mrg 		  tree vectype, int misalign,
    118  1.1  mrg 		  enum vect_cost_model_location where)
    119  1.1  mrg {
    120  1.1  mrg   return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
    121  1.1  mrg 			   vectype, misalign, where);
    122  1.1  mrg }
    123  1.1  mrg 
    124  1.1  mrg unsigned
    125  1.1  mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
    126  1.1  mrg 		  enum vect_cost_for_stmt kind, slp_tree node,
    127  1.1  mrg 		  tree vectype, int misalign,
    128  1.1  mrg 		  enum vect_cost_model_location where)
    129  1.1  mrg {
    130  1.1  mrg   return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
    131  1.1  mrg 			   vectype, misalign, where);
    132  1.1  mrg }
    133  1.1  mrg 
    134  1.1  mrg unsigned
    135  1.1  mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
    136  1.1  mrg 		  enum vect_cost_for_stmt kind,
    137  1.1  mrg 		  enum vect_cost_model_location where)
    138  1.1  mrg {
    139  1.1  mrg   gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
    140  1.1  mrg 	      || kind == scalar_stmt);
    141  1.1  mrg   return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
    142  1.1  mrg 			   NULL_TREE, 0, where);
    143  1.1  mrg }
    144  1.1  mrg 
    145  1.1  mrg /* Return a variable of type ELEM_TYPE[NELEMS].  */
    146  1.1  mrg 
    147  1.1  mrg static tree
    148  1.1  mrg create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
    149  1.1  mrg {
    150  1.1  mrg   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
    151  1.1  mrg 			 "vect_array");
    152  1.1  mrg }
    153  1.1  mrg 
    154  1.1  mrg /* ARRAY is an array of vectors created by create_vector_array.
    155  1.1  mrg    Return an SSA_NAME for the vector in index N.  The reference
    156  1.1  mrg    is part of the vectorization of STMT_INFO and the vector is associated
    157  1.1  mrg    with scalar destination SCALAR_DEST.  */
    158  1.1  mrg 
    159  1.1  mrg static tree
    160  1.1  mrg read_vector_array (vec_info *vinfo,
    161  1.1  mrg 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    162  1.1  mrg 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
    163  1.1  mrg {
    164  1.1  mrg   tree vect_type, vect, vect_name, array_ref;
    165  1.1  mrg   gimple *new_stmt;
    166  1.1  mrg 
    167  1.1  mrg   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
    168  1.1  mrg   vect_type = TREE_TYPE (TREE_TYPE (array));
    169  1.1  mrg   vect = vect_create_destination_var (scalar_dest, vect_type);
    170  1.1  mrg   array_ref = build4 (ARRAY_REF, vect_type, array,
    171  1.1  mrg 		      build_int_cst (size_type_node, n),
    172  1.1  mrg 		      NULL_TREE, NULL_TREE);
    173  1.1  mrg 
    174  1.1  mrg   new_stmt = gimple_build_assign (vect, array_ref);
    175  1.1  mrg   vect_name = make_ssa_name (vect, new_stmt);
    176  1.1  mrg   gimple_assign_set_lhs (new_stmt, vect_name);
    177  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    178  1.1  mrg 
    179  1.1  mrg   return vect_name;
    180  1.1  mrg }
    181  1.1  mrg 
    182  1.1  mrg /* ARRAY is an array of vectors created by create_vector_array.
    183  1.1  mrg    Emit code to store SSA_NAME VECT in index N of the array.
    184  1.1  mrg    The store is part of the vectorization of STMT_INFO.  */
    185  1.1  mrg 
    186  1.1  mrg static void
    187  1.1  mrg write_vector_array (vec_info *vinfo,
    188  1.1  mrg 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    189  1.1  mrg 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
    190  1.1  mrg {
    191  1.1  mrg   tree array_ref;
    192  1.1  mrg   gimple *new_stmt;
    193  1.1  mrg 
    194  1.1  mrg   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
    195  1.1  mrg 		      build_int_cst (size_type_node, n),
    196  1.1  mrg 		      NULL_TREE, NULL_TREE);
    197  1.1  mrg 
    198  1.1  mrg   new_stmt = gimple_build_assign (array_ref, vect);
    199  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    200  1.1  mrg }
    201  1.1  mrg 
    202  1.1  mrg /* PTR is a pointer to an array of type TYPE.  Return a representation
    203  1.1  mrg    of *PTR.  The memory reference replaces those in FIRST_DR
    204  1.1  mrg    (and its group).  */
    205  1.1  mrg 
    206  1.1  mrg static tree
    207  1.1  mrg create_array_ref (tree type, tree ptr, tree alias_ptr_type)
    208  1.1  mrg {
    209  1.1  mrg   tree mem_ref;
    210  1.1  mrg 
    211  1.1  mrg   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
    212  1.1  mrg   /* Arrays have the same alignment as their type.  */
    213  1.1  mrg   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
    214  1.1  mrg   return mem_ref;
    215  1.1  mrg }
    216  1.1  mrg 
    217  1.1  mrg /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
    218  1.1  mrg    Emit the clobber before *GSI.  */
    219  1.1  mrg 
    220  1.1  mrg static void
    221  1.1  mrg vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
    222  1.1  mrg 		       gimple_stmt_iterator *gsi, tree var)
    223  1.1  mrg {
    224  1.1  mrg   tree clobber = build_clobber (TREE_TYPE (var));
    225  1.1  mrg   gimple *new_stmt = gimple_build_assign (var, clobber);
    226  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    227  1.1  mrg }
    228  1.1  mrg 
    229  1.1  mrg /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
    230  1.1  mrg 
    231  1.1  mrg /* Function vect_mark_relevant.
    232  1.1  mrg 
    233  1.1  mrg    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
    234  1.1  mrg 
    235  1.1  mrg static void
    236  1.1  mrg vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
    237  1.1  mrg 		    enum vect_relevant relevant, bool live_p)
    238  1.1  mrg {
    239  1.1  mrg   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
    240  1.1  mrg   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
    241  1.1  mrg 
    242  1.1  mrg   if (dump_enabled_p ())
    243  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
    244  1.1  mrg 		     "mark relevant %d, live %d: %G", relevant, live_p,
    245  1.1  mrg 		     stmt_info->stmt);
    246  1.1  mrg 
    247  1.1  mrg   /* If this stmt is an original stmt in a pattern, we might need to mark its
    248  1.1  mrg      related pattern stmt instead of the original stmt.  However, such stmts
    249  1.1  mrg      may have their own uses that are not in any pattern, in such cases the
    250  1.1  mrg      stmt itself should be marked.  */
    251  1.1  mrg   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    252  1.1  mrg     {
    253  1.1  mrg       /* This is the last stmt in a sequence that was detected as a
    254  1.1  mrg 	 pattern that can potentially be vectorized.  Don't mark the stmt
    255  1.1  mrg 	 as relevant/live because it's not going to be vectorized.
    256  1.1  mrg 	 Instead mark the pattern-stmt that replaces it.  */
    257  1.1  mrg 
    258  1.1  mrg       if (dump_enabled_p ())
    259  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    260  1.1  mrg 			 "last stmt in pattern. don't mark"
    261  1.1  mrg 			 " relevant/live.\n");
    262  1.1  mrg       stmt_vec_info old_stmt_info = stmt_info;
    263  1.1  mrg       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
    264  1.1  mrg       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
    265  1.1  mrg       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
    266  1.1  mrg       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
    267  1.1  mrg     }
    268  1.1  mrg 
    269  1.1  mrg   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
    270  1.1  mrg   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
    271  1.1  mrg     STMT_VINFO_RELEVANT (stmt_info) = relevant;
    272  1.1  mrg 
    273  1.1  mrg   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
    274  1.1  mrg       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
    275  1.1  mrg     {
    276  1.1  mrg       if (dump_enabled_p ())
    277  1.1  mrg         dump_printf_loc (MSG_NOTE, vect_location,
    278  1.1  mrg                          "already marked relevant/live.\n");
    279  1.1  mrg       return;
    280  1.1  mrg     }
    281  1.1  mrg 
    282  1.1  mrg   worklist->safe_push (stmt_info);
    283  1.1  mrg }
    284  1.1  mrg 
    285  1.1  mrg 
    286  1.1  mrg /* Function is_simple_and_all_uses_invariant
    287  1.1  mrg 
    288  1.1  mrg    Return true if STMT_INFO is simple and all uses of it are invariant.  */
    289  1.1  mrg 
    290  1.1  mrg bool
    291  1.1  mrg is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
    292  1.1  mrg 				  loop_vec_info loop_vinfo)
    293  1.1  mrg {
    294  1.1  mrg   tree op;
    295  1.1  mrg   ssa_op_iter iter;
    296  1.1  mrg 
    297  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    298  1.1  mrg   if (!stmt)
    299  1.1  mrg     return false;
    300  1.1  mrg 
    301  1.1  mrg   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
    302  1.1  mrg     {
    303  1.1  mrg       enum vect_def_type dt = vect_uninitialized_def;
    304  1.1  mrg 
    305  1.1  mrg       if (!vect_is_simple_use (op, loop_vinfo, &dt))
    306  1.1  mrg 	{
    307  1.1  mrg 	  if (dump_enabled_p ())
    308  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    309  1.1  mrg 			     "use not simple.\n");
    310  1.1  mrg 	  return false;
    311  1.1  mrg 	}
    312  1.1  mrg 
    313  1.1  mrg       if (dt != vect_external_def && dt != vect_constant_def)
    314  1.1  mrg 	return false;
    315  1.1  mrg     }
    316  1.1  mrg   return true;
    317  1.1  mrg }
    318  1.1  mrg 
    319  1.1  mrg /* Function vect_stmt_relevant_p.
    320  1.1  mrg 
    321  1.1  mrg    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
    322  1.1  mrg    is "relevant for vectorization".
    323  1.1  mrg 
    324  1.1  mrg    A stmt is considered "relevant for vectorization" if:
    325  1.1  mrg    - it has uses outside the loop.
    326  1.1  mrg    - it has vdefs (it alters memory).
    327  1.1  mrg    - control stmts in the loop (except for the exit condition).
    328  1.1  mrg 
    329  1.1  mrg    CHECKME: what other side effects would the vectorizer allow?  */
    330  1.1  mrg 
    331  1.1  mrg static bool
    332  1.1  mrg vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
    333  1.1  mrg 		      enum vect_relevant *relevant, bool *live_p)
    334  1.1  mrg {
    335  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    336  1.1  mrg   ssa_op_iter op_iter;
    337  1.1  mrg   imm_use_iterator imm_iter;
    338  1.1  mrg   use_operand_p use_p;
    339  1.1  mrg   def_operand_p def_p;
    340  1.1  mrg 
    341  1.1  mrg   *relevant = vect_unused_in_scope;
    342  1.1  mrg   *live_p = false;
    343  1.1  mrg 
    344  1.1  mrg   /* cond stmt other than loop exit cond.  */
    345  1.1  mrg   if (is_ctrl_stmt (stmt_info->stmt)
    346  1.1  mrg       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
    347  1.1  mrg     *relevant = vect_used_in_scope;
    348  1.1  mrg 
    349  1.1  mrg   /* changing memory.  */
    350  1.1  mrg   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
    351  1.1  mrg     if (gimple_vdef (stmt_info->stmt)
    352  1.1  mrg 	&& !gimple_clobber_p (stmt_info->stmt))
    353  1.1  mrg       {
    354  1.1  mrg 	if (dump_enabled_p ())
    355  1.1  mrg 	  dump_printf_loc (MSG_NOTE, vect_location,
    356  1.1  mrg                            "vec_stmt_relevant_p: stmt has vdefs.\n");
    357  1.1  mrg 	*relevant = vect_used_in_scope;
    358  1.1  mrg       }
    359  1.1  mrg 
    360  1.1  mrg   /* uses outside the loop.  */
    361  1.1  mrg   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
    362  1.1  mrg     {
    363  1.1  mrg       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
    364  1.1  mrg 	{
    365  1.1  mrg 	  basic_block bb = gimple_bb (USE_STMT (use_p));
    366  1.1  mrg 	  if (!flow_bb_inside_loop_p (loop, bb))
    367  1.1  mrg 	    {
    368  1.1  mrg 	      if (is_gimple_debug (USE_STMT (use_p)))
    369  1.1  mrg 		continue;
    370  1.1  mrg 
    371  1.1  mrg 	      if (dump_enabled_p ())
    372  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
    373  1.1  mrg                                  "vec_stmt_relevant_p: used out of loop.\n");
    374  1.1  mrg 
    375  1.1  mrg 	      /* We expect all such uses to be in the loop exit phis
    376  1.1  mrg 		 (because of loop closed form)   */
    377  1.1  mrg 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
    378  1.1  mrg 	      gcc_assert (bb == single_exit (loop)->dest);
    379  1.1  mrg 
    380  1.1  mrg               *live_p = true;
    381  1.1  mrg 	    }
    382  1.1  mrg 	}
    383  1.1  mrg     }
    384  1.1  mrg 
    385  1.1  mrg   if (*live_p && *relevant == vect_unused_in_scope
    386  1.1  mrg       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
    387  1.1  mrg     {
    388  1.1  mrg       if (dump_enabled_p ())
    389  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    390  1.1  mrg 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
    391  1.1  mrg       *relevant = vect_used_only_live;
    392  1.1  mrg     }
    393  1.1  mrg 
    394  1.1  mrg   return (*live_p || *relevant);
    395  1.1  mrg }
    396  1.1  mrg 
    397  1.1  mrg 
    398  1.1  mrg /* Function exist_non_indexing_operands_for_use_p
    399  1.1  mrg 
    400  1.1  mrg    USE is one of the uses attached to STMT_INFO.  Check if USE is
    401  1.1  mrg    used in STMT_INFO for anything other than indexing an array.  */
    402  1.1  mrg 
    403  1.1  mrg static bool
    404  1.1  mrg exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
    405  1.1  mrg {
    406  1.1  mrg   tree operand;
    407  1.1  mrg 
    408  1.1  mrg   /* USE corresponds to some operand in STMT.  If there is no data
    409  1.1  mrg      reference in STMT, then any operand that corresponds to USE
    410  1.1  mrg      is not indexing an array.  */
    411  1.1  mrg   if (!STMT_VINFO_DATA_REF (stmt_info))
    412  1.1  mrg     return true;
    413  1.1  mrg 
    414  1.1  mrg   /* STMT has a data_ref. FORNOW this means that its of one of
    415  1.1  mrg      the following forms:
    416  1.1  mrg      -1- ARRAY_REF = var
    417  1.1  mrg      -2- var = ARRAY_REF
    418  1.1  mrg      (This should have been verified in analyze_data_refs).
    419  1.1  mrg 
    420  1.1  mrg      'var' in the second case corresponds to a def, not a use,
    421  1.1  mrg      so USE cannot correspond to any operands that are not used
    422  1.1  mrg      for array indexing.
    423  1.1  mrg 
    424  1.1  mrg      Therefore, all we need to check is if STMT falls into the
    425  1.1  mrg      first case, and whether var corresponds to USE.  */
    426  1.1  mrg 
    427  1.1  mrg   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    428  1.1  mrg   if (!assign || !gimple_assign_copy_p (assign))
    429  1.1  mrg     {
    430  1.1  mrg       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
    431  1.1  mrg       if (call && gimple_call_internal_p (call))
    432  1.1  mrg 	{
    433  1.1  mrg 	  internal_fn ifn = gimple_call_internal_fn (call);
    434  1.1  mrg 	  int mask_index = internal_fn_mask_index (ifn);
    435  1.1  mrg 	  if (mask_index >= 0
    436  1.1  mrg 	      && use == gimple_call_arg (call, mask_index))
    437  1.1  mrg 	    return true;
    438  1.1  mrg 	  int stored_value_index = internal_fn_stored_value_index (ifn);
    439  1.1  mrg 	  if (stored_value_index >= 0
    440  1.1  mrg 	      && use == gimple_call_arg (call, stored_value_index))
    441  1.1  mrg 	    return true;
    442  1.1  mrg 	  if (internal_gather_scatter_fn_p (ifn)
    443  1.1  mrg 	      && use == gimple_call_arg (call, 1))
    444  1.1  mrg 	    return true;
    445  1.1  mrg 	}
    446  1.1  mrg       return false;
    447  1.1  mrg     }
    448  1.1  mrg 
    449  1.1  mrg   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
    450  1.1  mrg     return false;
    451  1.1  mrg   operand = gimple_assign_rhs1 (assign);
    452  1.1  mrg   if (TREE_CODE (operand) != SSA_NAME)
    453  1.1  mrg     return false;
    454  1.1  mrg 
    455  1.1  mrg   if (operand == use)
    456  1.1  mrg     return true;
    457  1.1  mrg 
    458  1.1  mrg   return false;
    459  1.1  mrg }
    460  1.1  mrg 
    461  1.1  mrg 
    462  1.1  mrg /*
    463  1.1  mrg    Function process_use.
    464  1.1  mrg 
    465  1.1  mrg    Inputs:
    466  1.1  mrg    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
    467  1.1  mrg    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
    468  1.1  mrg      that defined USE.  This is done by calling mark_relevant and passing it
    469  1.1  mrg      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
    470  1.1  mrg    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
    471  1.1  mrg      be performed.
    472  1.1  mrg 
    473  1.1  mrg    Outputs:
    474  1.1  mrg    Generally, LIVE_P and RELEVANT are used to define the liveness and
    475  1.1  mrg    relevance info of the DEF_STMT of this USE:
    476  1.1  mrg        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
    477  1.1  mrg        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
    478  1.1  mrg    Exceptions:
    479  1.1  mrg    - case 1: If USE is used only for address computations (e.g. array indexing),
    480  1.1  mrg    which does not need to be directly vectorized, then the liveness/relevance
    481  1.1  mrg    of the respective DEF_STMT is left unchanged.
    482  1.1  mrg    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
    483  1.1  mrg    we skip DEF_STMT cause it had already been processed.
    484  1.1  mrg    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
    485  1.1  mrg    "relevant" will be modified accordingly.
    486  1.1  mrg 
    487  1.1  mrg    Return true if everything is as expected. Return false otherwise.  */
    488  1.1  mrg 
    489  1.1  mrg static opt_result
    490  1.1  mrg process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
    491  1.1  mrg 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
    492  1.1  mrg 	     bool force)
    493  1.1  mrg {
    494  1.1  mrg   stmt_vec_info dstmt_vinfo;
    495  1.1  mrg   enum vect_def_type dt;
    496  1.1  mrg 
    497  1.1  mrg   /* case 1: we are only interested in uses that need to be vectorized.  Uses
    498  1.1  mrg      that are used for address computation are not considered relevant.  */
    499  1.1  mrg   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
    500  1.1  mrg     return opt_result::success ();
    501  1.1  mrg 
    502  1.1  mrg   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
    503  1.1  mrg     return opt_result::failure_at (stmt_vinfo->stmt,
    504  1.1  mrg 				   "not vectorized:"
    505  1.1  mrg 				   " unsupported use in stmt.\n");
    506  1.1  mrg 
    507  1.1  mrg   if (!dstmt_vinfo)
    508  1.1  mrg     return opt_result::success ();
    509  1.1  mrg 
    510  1.1  mrg   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
    511  1.1  mrg   basic_block bb = gimple_bb (stmt_vinfo->stmt);
    512  1.1  mrg 
    513  1.1  mrg   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
    514  1.1  mrg      We have to force the stmt live since the epilogue loop needs it to
    515  1.1  mrg      continue computing the reduction.  */
    516  1.1  mrg   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
    517  1.1  mrg       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
    518  1.1  mrg       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
    519  1.1  mrg       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
    520  1.1  mrg       && bb->loop_father == def_bb->loop_father)
    521  1.1  mrg     {
    522  1.1  mrg       if (dump_enabled_p ())
    523  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    524  1.1  mrg 			 "reduc-stmt defining reduc-phi in the same nest.\n");
    525  1.1  mrg       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
    526  1.1  mrg       return opt_result::success ();
    527  1.1  mrg     }
    528  1.1  mrg 
    529  1.1  mrg   /* case 3a: outer-loop stmt defining an inner-loop stmt:
    530  1.1  mrg 	outer-loop-header-bb:
    531  1.1  mrg 		d = dstmt_vinfo
    532  1.1  mrg 	inner-loop:
    533  1.1  mrg 		stmt # use (d)
    534  1.1  mrg 	outer-loop-tail-bb:
    535  1.1  mrg 		...		  */
    536  1.1  mrg   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
    537  1.1  mrg     {
    538  1.1  mrg       if (dump_enabled_p ())
    539  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    540  1.1  mrg                          "outer-loop def-stmt defining inner-loop stmt.\n");
    541  1.1  mrg 
    542  1.1  mrg       switch (relevant)
    543  1.1  mrg 	{
    544  1.1  mrg 	case vect_unused_in_scope:
    545  1.1  mrg 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
    546  1.1  mrg 		      vect_used_in_scope : vect_unused_in_scope;
    547  1.1  mrg 	  break;
    548  1.1  mrg 
    549  1.1  mrg 	case vect_used_in_outer_by_reduction:
    550  1.1  mrg           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
    551  1.1  mrg 	  relevant = vect_used_by_reduction;
    552  1.1  mrg 	  break;
    553  1.1  mrg 
    554  1.1  mrg 	case vect_used_in_outer:
    555  1.1  mrg           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
    556  1.1  mrg 	  relevant = vect_used_in_scope;
    557  1.1  mrg 	  break;
    558  1.1  mrg 
    559  1.1  mrg 	case vect_used_in_scope:
    560  1.1  mrg 	  break;
    561  1.1  mrg 
    562  1.1  mrg 	default:
    563  1.1  mrg 	  gcc_unreachable ();
    564  1.1  mrg 	}
    565  1.1  mrg     }
    566  1.1  mrg 
    567  1.1  mrg   /* case 3b: inner-loop stmt defining an outer-loop stmt:
    568  1.1  mrg 	outer-loop-header-bb:
    569  1.1  mrg 		...
    570  1.1  mrg 	inner-loop:
    571  1.1  mrg 		d = dstmt_vinfo
    572  1.1  mrg 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
    573  1.1  mrg 		stmt # use (d)		*/
    574  1.1  mrg   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
    575  1.1  mrg     {
    576  1.1  mrg       if (dump_enabled_p ())
    577  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    578  1.1  mrg                          "inner-loop def-stmt defining outer-loop stmt.\n");
    579  1.1  mrg 
    580  1.1  mrg       switch (relevant)
    581  1.1  mrg         {
    582  1.1  mrg         case vect_unused_in_scope:
    583  1.1  mrg           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
    584  1.1  mrg             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
    585  1.1  mrg                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
    586  1.1  mrg           break;
    587  1.1  mrg 
    588  1.1  mrg         case vect_used_by_reduction:
    589  1.1  mrg 	case vect_used_only_live:
    590  1.1  mrg           relevant = vect_used_in_outer_by_reduction;
    591  1.1  mrg           break;
    592  1.1  mrg 
    593  1.1  mrg         case vect_used_in_scope:
    594  1.1  mrg           relevant = vect_used_in_outer;
    595  1.1  mrg           break;
    596  1.1  mrg 
    597  1.1  mrg         default:
    598  1.1  mrg           gcc_unreachable ();
    599  1.1  mrg         }
    600  1.1  mrg     }
    601  1.1  mrg   /* We are also not interested in uses on loop PHI backedges that are
    602  1.1  mrg      inductions.  Otherwise we'll needlessly vectorize the IV increment
    603  1.1  mrg      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
    604  1.1  mrg      of course.  */
    605  1.1  mrg   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
    606  1.1  mrg 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
    607  1.1  mrg 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
    608  1.1  mrg 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
    609  1.1  mrg 				      loop_latch_edge (bb->loop_father))
    610  1.1  mrg 	       == use))
    611  1.1  mrg     {
    612  1.1  mrg       if (dump_enabled_p ())
    613  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    614  1.1  mrg                          "induction value on backedge.\n");
    615  1.1  mrg       return opt_result::success ();
    616  1.1  mrg     }
    617  1.1  mrg 
    618  1.1  mrg 
    619  1.1  mrg   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
    620  1.1  mrg   return opt_result::success ();
    621  1.1  mrg }
    622  1.1  mrg 
    623  1.1  mrg 
    624  1.1  mrg /* Function vect_mark_stmts_to_be_vectorized.
    625  1.1  mrg 
    626  1.1  mrg    Not all stmts in the loop need to be vectorized. For example:
    627  1.1  mrg 
    628  1.1  mrg      for i...
    629  1.1  mrg        for j...
    630  1.1  mrg    1.    T0 = i + j
    631  1.1  mrg    2.	 T1 = a[T0]
    632  1.1  mrg 
    633  1.1  mrg    3.    j = j + 1
    634  1.1  mrg 
    635  1.1  mrg    Stmt 1 and 3 do not need to be vectorized, because loop control and
    636  1.1  mrg    addressing of vectorized data-refs are handled differently.
    637  1.1  mrg 
    638  1.1  mrg    This pass detects such stmts.  */
    639  1.1  mrg 
    640  1.1  mrg opt_result
    641  1.1  mrg vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
    642  1.1  mrg {
    643  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    644  1.1  mrg   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
    645  1.1  mrg   unsigned int nbbs = loop->num_nodes;
    646  1.1  mrg   gimple_stmt_iterator si;
    647  1.1  mrg   unsigned int i;
    648  1.1  mrg   basic_block bb;
    649  1.1  mrg   bool live_p;
    650  1.1  mrg   enum vect_relevant relevant;
    651  1.1  mrg 
    652  1.1  mrg   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
    653  1.1  mrg 
    654  1.1  mrg   auto_vec<stmt_vec_info, 64> worklist;
    655  1.1  mrg 
    656  1.1  mrg   /* 1. Init worklist.  */
    657  1.1  mrg   for (i = 0; i < nbbs; i++)
    658  1.1  mrg     {
    659  1.1  mrg       bb = bbs[i];
    660  1.1  mrg       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
    661  1.1  mrg 	{
    662  1.1  mrg 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
    663  1.1  mrg 	  if (dump_enabled_p ())
    664  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
    665  1.1  mrg 			     phi_info->stmt);
    666  1.1  mrg 
    667  1.1  mrg 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
    668  1.1  mrg 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
    669  1.1  mrg 	}
    670  1.1  mrg       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    671  1.1  mrg 	{
    672  1.1  mrg 	  if (is_gimple_debug (gsi_stmt (si)))
    673  1.1  mrg 	    continue;
    674  1.1  mrg 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
    675  1.1  mrg 	  if (dump_enabled_p ())
    676  1.1  mrg 	      dump_printf_loc (MSG_NOTE, vect_location,
    677  1.1  mrg 			       "init: stmt relevant? %G", stmt_info->stmt);
    678  1.1  mrg 
    679  1.1  mrg 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
    680  1.1  mrg 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
    681  1.1  mrg 	}
    682  1.1  mrg     }
    683  1.1  mrg 
    684  1.1  mrg   /* 2. Process_worklist */
    685  1.1  mrg   while (worklist.length () > 0)
    686  1.1  mrg     {
    687  1.1  mrg       use_operand_p use_p;
    688  1.1  mrg       ssa_op_iter iter;
    689  1.1  mrg 
    690  1.1  mrg       stmt_vec_info stmt_vinfo = worklist.pop ();
    691  1.1  mrg       if (dump_enabled_p ())
    692  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
    693  1.1  mrg 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
    694  1.1  mrg 
    695  1.1  mrg       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
    696  1.1  mrg 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
    697  1.1  mrg 	 of STMT.  */
    698  1.1  mrg       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
    699  1.1  mrg 
    700  1.1  mrg       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
    701  1.1  mrg 	 propagated as is to the DEF_STMTs of its USEs.
    702  1.1  mrg 
    703  1.1  mrg 	 One exception is when STMT has been identified as defining a reduction
    704  1.1  mrg 	 variable; in this case we set the relevance to vect_used_by_reduction.
    705  1.1  mrg 	 This is because we distinguish between two kinds of relevant stmts -
    706  1.1  mrg 	 those that are used by a reduction computation, and those that are
    707  1.1  mrg 	 (also) used by a regular computation.  This allows us later on to
    708  1.1  mrg 	 identify stmts that are used solely by a reduction, and therefore the
    709  1.1  mrg 	 order of the results that they produce does not have to be kept.  */
    710  1.1  mrg 
    711  1.1  mrg       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
    712  1.1  mrg         {
    713  1.1  mrg           case vect_reduction_def:
    714  1.1  mrg 	    gcc_assert (relevant != vect_unused_in_scope);
    715  1.1  mrg 	    if (relevant != vect_unused_in_scope
    716  1.1  mrg 		&& relevant != vect_used_in_scope
    717  1.1  mrg 		&& relevant != vect_used_by_reduction
    718  1.1  mrg 		&& relevant != vect_used_only_live)
    719  1.1  mrg 	      return opt_result::failure_at
    720  1.1  mrg 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
    721  1.1  mrg 	    break;
    722  1.1  mrg 
    723  1.1  mrg           case vect_nested_cycle:
    724  1.1  mrg 	    if (relevant != vect_unused_in_scope
    725  1.1  mrg 		&& relevant != vect_used_in_outer_by_reduction
    726  1.1  mrg 		&& relevant != vect_used_in_outer)
    727  1.1  mrg 	      return opt_result::failure_at
    728  1.1  mrg 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
    729  1.1  mrg             break;
    730  1.1  mrg 
    731  1.1  mrg           case vect_double_reduction_def:
    732  1.1  mrg 	    if (relevant != vect_unused_in_scope
    733  1.1  mrg 		&& relevant != vect_used_by_reduction
    734  1.1  mrg 		&& relevant != vect_used_only_live)
    735  1.1  mrg 	      return opt_result::failure_at
    736  1.1  mrg 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
    737  1.1  mrg             break;
    738  1.1  mrg 
    739  1.1  mrg           default:
    740  1.1  mrg             break;
    741  1.1  mrg         }
    742  1.1  mrg 
    743  1.1  mrg       if (is_pattern_stmt_p (stmt_vinfo))
    744  1.1  mrg         {
    745  1.1  mrg           /* Pattern statements are not inserted into the code, so
    746  1.1  mrg              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
    747  1.1  mrg              have to scan the RHS or function arguments instead.  */
    748  1.1  mrg 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
    749  1.1  mrg 	    {
    750  1.1  mrg 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
    751  1.1  mrg 	      tree op = gimple_assign_rhs1 (assign);
    752  1.1  mrg 
    753  1.1  mrg 	      i = 1;
    754  1.1  mrg 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
    755  1.1  mrg 		{
    756  1.1  mrg 		  opt_result res
    757  1.1  mrg 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
    758  1.1  mrg 				   loop_vinfo, relevant, &worklist, false);
    759  1.1  mrg 		  if (!res)
    760  1.1  mrg 		    return res;
    761  1.1  mrg 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
    762  1.1  mrg 				     loop_vinfo, relevant, &worklist, false);
    763  1.1  mrg 		  if (!res)
    764  1.1  mrg 		    return res;
    765  1.1  mrg 		  i = 2;
    766  1.1  mrg 		}
    767  1.1  mrg 	      for (; i < gimple_num_ops (assign); i++)
    768  1.1  mrg 		{
    769  1.1  mrg 		  op = gimple_op (assign, i);
    770  1.1  mrg                   if (TREE_CODE (op) == SSA_NAME)
    771  1.1  mrg 		    {
    772  1.1  mrg 		      opt_result res
    773  1.1  mrg 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
    774  1.1  mrg 				       &worklist, false);
    775  1.1  mrg 		      if (!res)
    776  1.1  mrg 			return res;
    777  1.1  mrg 		    }
    778  1.1  mrg                  }
    779  1.1  mrg             }
    780  1.1  mrg 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
    781  1.1  mrg 	    {
    782  1.1  mrg 	      for (i = 0; i < gimple_call_num_args (call); i++)
    783  1.1  mrg 		{
    784  1.1  mrg 		  tree arg = gimple_call_arg (call, i);
    785  1.1  mrg 		  opt_result res
    786  1.1  mrg 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
    787  1.1  mrg 				   &worklist, false);
    788  1.1  mrg 		  if (!res)
    789  1.1  mrg 		    return res;
    790  1.1  mrg 		}
    791  1.1  mrg 	    }
    792  1.1  mrg         }
    793  1.1  mrg       else
    794  1.1  mrg 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
    795  1.1  mrg           {
    796  1.1  mrg             tree op = USE_FROM_PTR (use_p);
    797  1.1  mrg 	    opt_result res
    798  1.1  mrg 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
    799  1.1  mrg 			     &worklist, false);
    800  1.1  mrg 	    if (!res)
    801  1.1  mrg 	      return res;
    802  1.1  mrg           }
    803  1.1  mrg 
    804  1.1  mrg       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
    805  1.1  mrg 	{
    806  1.1  mrg 	  gather_scatter_info gs_info;
    807  1.1  mrg 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
    808  1.1  mrg 	    gcc_unreachable ();
    809  1.1  mrg 	  opt_result res
    810  1.1  mrg 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
    811  1.1  mrg 			   &worklist, true);
    812  1.1  mrg 	  if (!res)
    813  1.1  mrg 	    {
    814  1.1  mrg 	      if (fatal)
    815  1.1  mrg 		*fatal = false;
    816  1.1  mrg 	      return res;
    817  1.1  mrg 	    }
    818  1.1  mrg 	}
    819  1.1  mrg     } /* while worklist */
    820  1.1  mrg 
    821  1.1  mrg   return opt_result::success ();
    822  1.1  mrg }
    823  1.1  mrg 
    824  1.1  mrg /* Function vect_model_simple_cost.
    825  1.1  mrg 
    826  1.1  mrg    Models cost for simple operations, i.e. those that only emit ncopies of a
    827  1.1  mrg    single op.  Right now, this does not account for multiple insns that could
    828  1.1  mrg    be generated for the single vector op.  We will handle that shortly.  */
    829  1.1  mrg 
    830  1.1  mrg static void
    831  1.1  mrg vect_model_simple_cost (vec_info *,
    832  1.1  mrg 			stmt_vec_info stmt_info, int ncopies,
    833  1.1  mrg 			enum vect_def_type *dt,
    834  1.1  mrg 			int ndts,
    835  1.1  mrg 			slp_tree node,
    836  1.1  mrg 			stmt_vector_for_cost *cost_vec,
    837  1.1  mrg 			vect_cost_for_stmt kind = vector_stmt)
    838  1.1  mrg {
    839  1.1  mrg   int inside_cost = 0, prologue_cost = 0;
    840  1.1  mrg 
    841  1.1  mrg   gcc_assert (cost_vec != NULL);
    842  1.1  mrg 
    843  1.1  mrg   /* ???  Somehow we need to fix this at the callers.  */
    844  1.1  mrg   if (node)
    845  1.1  mrg     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
    846  1.1  mrg 
    847  1.1  mrg   if (!node)
    848  1.1  mrg     /* Cost the "broadcast" of a scalar operand in to a vector operand.
    849  1.1  mrg        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
    850  1.1  mrg        cost model.  */
    851  1.1  mrg     for (int i = 0; i < ndts; i++)
    852  1.1  mrg       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
    853  1.1  mrg 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
    854  1.1  mrg 					   stmt_info, 0, vect_prologue);
    855  1.1  mrg 
    856  1.1  mrg   /* Pass the inside-of-loop statements to the target-specific cost model.  */
    857  1.1  mrg   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
    858  1.1  mrg 				   stmt_info, 0, vect_body);
    859  1.1  mrg 
    860  1.1  mrg   if (dump_enabled_p ())
    861  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
    862  1.1  mrg                      "vect_model_simple_cost: inside_cost = %d, "
    863  1.1  mrg                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
    864  1.1  mrg }
    865  1.1  mrg 
    866  1.1  mrg 
    867  1.1  mrg /* Model cost for type demotion and promotion operations.  PWR is
    868  1.1  mrg    normally zero for single-step promotions and demotions.  It will be
    869  1.1  mrg    one if two-step promotion/demotion is required, and so on.  NCOPIES
    870  1.1  mrg    is the number of vector results (and thus number of instructions)
    871  1.1  mrg    for the narrowest end of the operation chain.  Each additional
    872  1.1  mrg    step doubles the number of instructions required.  If WIDEN_ARITH
    873  1.1  mrg    is true the stmt is doing widening arithmetic.  */
    874  1.1  mrg 
    875  1.1  mrg static void
    876  1.1  mrg vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
    877  1.1  mrg 				    enum vect_def_type *dt,
    878  1.1  mrg 				    unsigned int ncopies, int pwr,
    879  1.1  mrg 				    stmt_vector_for_cost *cost_vec,
    880  1.1  mrg 				    bool widen_arith)
    881  1.1  mrg {
    882  1.1  mrg   int i;
    883  1.1  mrg   int inside_cost = 0, prologue_cost = 0;
    884  1.1  mrg 
    885  1.1  mrg   for (i = 0; i < pwr + 1; i++)
    886  1.1  mrg     {
    887  1.1  mrg       inside_cost += record_stmt_cost (cost_vec, ncopies,
    888  1.1  mrg 				       widen_arith
    889  1.1  mrg 				       ? vector_stmt : vec_promote_demote,
    890  1.1  mrg 				       stmt_info, 0, vect_body);
    891  1.1  mrg       ncopies *= 2;
    892  1.1  mrg     }
    893  1.1  mrg 
    894  1.1  mrg   /* FORNOW: Assuming maximum 2 args per stmts.  */
    895  1.1  mrg   for (i = 0; i < 2; i++)
    896  1.1  mrg     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
    897  1.1  mrg       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
    898  1.1  mrg 					 stmt_info, 0, vect_prologue);
    899  1.1  mrg 
    900  1.1  mrg   if (dump_enabled_p ())
    901  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
    902  1.1  mrg                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
    903  1.1  mrg                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
    904  1.1  mrg }
    905  1.1  mrg 
    906  1.1  mrg /* Returns true if the current function returns DECL.  */
    907  1.1  mrg 
    908  1.1  mrg static bool
    909  1.1  mrg cfun_returns (tree decl)
    910  1.1  mrg {
    911  1.1  mrg   edge_iterator ei;
    912  1.1  mrg   edge e;
    913  1.1  mrg   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
    914  1.1  mrg     {
    915  1.1  mrg       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
    916  1.1  mrg       if (!ret)
    917  1.1  mrg 	continue;
    918  1.1  mrg       if (gimple_return_retval (ret) == decl)
    919  1.1  mrg 	return true;
    920  1.1  mrg       /* We often end up with an aggregate copy to the result decl,
    921  1.1  mrg          handle that case as well.  First skip intermediate clobbers
    922  1.1  mrg 	 though.  */
    923  1.1  mrg       gimple *def = ret;
    924  1.1  mrg       do
    925  1.1  mrg 	{
    926  1.1  mrg 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
    927  1.1  mrg 	}
    928  1.1  mrg       while (gimple_clobber_p (def));
    929  1.1  mrg       if (is_a <gassign *> (def)
    930  1.1  mrg 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
    931  1.1  mrg 	  && gimple_assign_rhs1 (def) == decl)
    932  1.1  mrg 	return true;
    933  1.1  mrg     }
    934  1.1  mrg   return false;
    935  1.1  mrg }
    936  1.1  mrg 
    937  1.1  mrg /* Function vect_model_store_cost
    938  1.1  mrg 
    939  1.1  mrg    Models cost for stores.  In the case of grouped accesses, one access
    940  1.1  mrg    has the overhead of the grouped access attributed to it.  */
    941  1.1  mrg 
    942  1.1  mrg static void
    943  1.1  mrg vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
    944  1.1  mrg 		       vect_memory_access_type memory_access_type,
    945  1.1  mrg 		       dr_alignment_support alignment_support_scheme,
    946  1.1  mrg 		       int misalignment,
    947  1.1  mrg 		       vec_load_store_type vls_type, slp_tree slp_node,
    948  1.1  mrg 		       stmt_vector_for_cost *cost_vec)
    949  1.1  mrg {
    950  1.1  mrg   unsigned int inside_cost = 0, prologue_cost = 0;
    951  1.1  mrg   stmt_vec_info first_stmt_info = stmt_info;
    952  1.1  mrg   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
    953  1.1  mrg 
    954  1.1  mrg   /* ???  Somehow we need to fix this at the callers.  */
    955  1.1  mrg   if (slp_node)
    956  1.1  mrg     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
    957  1.1  mrg 
    958  1.1  mrg   if (vls_type == VLS_STORE_INVARIANT)
    959  1.1  mrg     {
    960  1.1  mrg       if (!slp_node)
    961  1.1  mrg 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
    962  1.1  mrg 					   stmt_info, 0, vect_prologue);
    963  1.1  mrg     }
    964  1.1  mrg 
    965  1.1  mrg   /* Grouped stores update all elements in the group at once,
    966  1.1  mrg      so we want the DR for the first statement.  */
    967  1.1  mrg   if (!slp_node && grouped_access_p)
    968  1.1  mrg     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    969  1.1  mrg 
    970  1.1  mrg   /* True if we should include any once-per-group costs as well as
    971  1.1  mrg      the cost of the statement itself.  For SLP we only get called
    972  1.1  mrg      once per group anyhow.  */
    973  1.1  mrg   bool first_stmt_p = (first_stmt_info == stmt_info);
    974  1.1  mrg 
    975  1.1  mrg   /* We assume that the cost of a single store-lanes instruction is
    976  1.1  mrg      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
    977  1.1  mrg      access is instead being provided by a permute-and-store operation,
    978  1.1  mrg      include the cost of the permutes.  */
    979  1.1  mrg   if (first_stmt_p
    980  1.1  mrg       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
    981  1.1  mrg     {
    982  1.1  mrg       /* Uses a high and low interleave or shuffle operations for each
    983  1.1  mrg 	 needed permute.  */
    984  1.1  mrg       int group_size = DR_GROUP_SIZE (first_stmt_info);
    985  1.1  mrg       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
    986  1.1  mrg       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
    987  1.1  mrg 				      stmt_info, 0, vect_body);
    988  1.1  mrg 
    989  1.1  mrg       if (dump_enabled_p ())
    990  1.1  mrg         dump_printf_loc (MSG_NOTE, vect_location,
    991  1.1  mrg                          "vect_model_store_cost: strided group_size = %d .\n",
    992  1.1  mrg                          group_size);
    993  1.1  mrg     }
    994  1.1  mrg 
    995  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    996  1.1  mrg   /* Costs of the stores.  */
    997  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
    998  1.1  mrg       || memory_access_type == VMAT_GATHER_SCATTER)
    999  1.1  mrg     {
   1000  1.1  mrg       /* N scalar stores plus extracting the elements.  */
   1001  1.1  mrg       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
   1002  1.1  mrg       inside_cost += record_stmt_cost (cost_vec,
   1003  1.1  mrg 				       ncopies * assumed_nunits,
   1004  1.1  mrg 				       scalar_store, stmt_info, 0, vect_body);
   1005  1.1  mrg     }
   1006  1.1  mrg   else
   1007  1.1  mrg     vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
   1008  1.1  mrg 			 misalignment, &inside_cost, cost_vec);
   1009  1.1  mrg 
   1010  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
   1011  1.1  mrg       || memory_access_type == VMAT_STRIDED_SLP)
   1012  1.1  mrg     {
   1013  1.1  mrg       /* N scalar stores plus extracting the elements.  */
   1014  1.1  mrg       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
   1015  1.1  mrg       inside_cost += record_stmt_cost (cost_vec,
   1016  1.1  mrg 				       ncopies * assumed_nunits,
   1017  1.1  mrg 				       vec_to_scalar, stmt_info, 0, vect_body);
   1018  1.1  mrg     }
   1019  1.1  mrg 
   1020  1.1  mrg   /* When vectorizing a store into the function result assign
   1021  1.1  mrg      a penalty if the function returns in a multi-register location.
   1022  1.1  mrg      In this case we assume we'll end up with having to spill the
   1023  1.1  mrg      vector result and do piecewise loads as a conservative estimate.  */
   1024  1.1  mrg   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
   1025  1.1  mrg   if (base
   1026  1.1  mrg       && (TREE_CODE (base) == RESULT_DECL
   1027  1.1  mrg 	  || (DECL_P (base) && cfun_returns (base)))
   1028  1.1  mrg       && !aggregate_value_p (base, cfun->decl))
   1029  1.1  mrg     {
   1030  1.1  mrg       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
   1031  1.1  mrg       /* ???  Handle PARALLEL in some way.  */
   1032  1.1  mrg       if (REG_P (reg))
   1033  1.1  mrg 	{
   1034  1.1  mrg 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
   1035  1.1  mrg 	  /* Assume that a single reg-reg move is possible and cheap,
   1036  1.1  mrg 	     do not account for vector to gp register move cost.  */
   1037  1.1  mrg 	  if (nregs > 1)
   1038  1.1  mrg 	    {
   1039  1.1  mrg 	      /* Spill.  */
   1040  1.1  mrg 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
   1041  1.1  mrg 						 vector_store,
   1042  1.1  mrg 						 stmt_info, 0, vect_epilogue);
   1043  1.1  mrg 	      /* Loads.  */
   1044  1.1  mrg 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
   1045  1.1  mrg 						 scalar_load,
   1046  1.1  mrg 						 stmt_info, 0, vect_epilogue);
   1047  1.1  mrg 	    }
   1048  1.1  mrg 	}
   1049  1.1  mrg     }
   1050  1.1  mrg 
   1051  1.1  mrg   if (dump_enabled_p ())
   1052  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1053  1.1  mrg                      "vect_model_store_cost: inside_cost = %d, "
   1054  1.1  mrg                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
   1055  1.1  mrg }
   1056  1.1  mrg 
   1057  1.1  mrg 
   1058  1.1  mrg /* Calculate cost of DR's memory access.  */
   1059  1.1  mrg void
   1060  1.1  mrg vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
   1061  1.1  mrg 		     dr_alignment_support alignment_support_scheme,
   1062  1.1  mrg 		     int misalignment,
   1063  1.1  mrg 		     unsigned int *inside_cost,
   1064  1.1  mrg 		     stmt_vector_for_cost *body_cost_vec)
   1065  1.1  mrg {
   1066  1.1  mrg   switch (alignment_support_scheme)
   1067  1.1  mrg     {
   1068  1.1  mrg     case dr_aligned:
   1069  1.1  mrg       {
   1070  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
   1071  1.1  mrg 					  vector_store, stmt_info, 0,
   1072  1.1  mrg 					  vect_body);
   1073  1.1  mrg 
   1074  1.1  mrg         if (dump_enabled_p ())
   1075  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1076  1.1  mrg                            "vect_model_store_cost: aligned.\n");
   1077  1.1  mrg         break;
   1078  1.1  mrg       }
   1079  1.1  mrg 
   1080  1.1  mrg     case dr_unaligned_supported:
   1081  1.1  mrg       {
   1082  1.1  mrg         /* Here, we assign an additional cost for the unaligned store.  */
   1083  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
   1084  1.1  mrg 					  unaligned_store, stmt_info,
   1085  1.1  mrg 					  misalignment, vect_body);
   1086  1.1  mrg         if (dump_enabled_p ())
   1087  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1088  1.1  mrg                            "vect_model_store_cost: unaligned supported by "
   1089  1.1  mrg                            "hardware.\n");
   1090  1.1  mrg         break;
   1091  1.1  mrg       }
   1092  1.1  mrg 
   1093  1.1  mrg     case dr_unaligned_unsupported:
   1094  1.1  mrg       {
   1095  1.1  mrg         *inside_cost = VECT_MAX_COST;
   1096  1.1  mrg 
   1097  1.1  mrg         if (dump_enabled_p ())
   1098  1.1  mrg           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1099  1.1  mrg                            "vect_model_store_cost: unsupported access.\n");
   1100  1.1  mrg         break;
   1101  1.1  mrg       }
   1102  1.1  mrg 
   1103  1.1  mrg     default:
   1104  1.1  mrg       gcc_unreachable ();
   1105  1.1  mrg     }
   1106  1.1  mrg }
   1107  1.1  mrg 
   1108  1.1  mrg 
   1109  1.1  mrg /* Function vect_model_load_cost
   1110  1.1  mrg 
   1111  1.1  mrg    Models cost for loads.  In the case of grouped accesses, one access has
   1112  1.1  mrg    the overhead of the grouped access attributed to it.  Since unaligned
   1113  1.1  mrg    accesses are supported for loads, we also account for the costs of the
   1114  1.1  mrg    access scheme chosen.  */
   1115  1.1  mrg 
   1116  1.1  mrg static void
   1117  1.1  mrg vect_model_load_cost (vec_info *vinfo,
   1118  1.1  mrg 		      stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
   1119  1.1  mrg 		      vect_memory_access_type memory_access_type,
   1120  1.1  mrg 		      dr_alignment_support alignment_support_scheme,
   1121  1.1  mrg 		      int misalignment,
   1122  1.1  mrg 		      gather_scatter_info *gs_info,
   1123  1.1  mrg 		      slp_tree slp_node,
   1124  1.1  mrg 		      stmt_vector_for_cost *cost_vec)
   1125  1.1  mrg {
   1126  1.1  mrg   unsigned int inside_cost = 0, prologue_cost = 0;
   1127  1.1  mrg   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
   1128  1.1  mrg 
   1129  1.1  mrg   gcc_assert (cost_vec);
   1130  1.1  mrg 
   1131  1.1  mrg   /* ???  Somehow we need to fix this at the callers.  */
   1132  1.1  mrg   if (slp_node)
   1133  1.1  mrg     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   1134  1.1  mrg 
   1135  1.1  mrg   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   1136  1.1  mrg     {
   1137  1.1  mrg       /* If the load is permuted then the alignment is determined by
   1138  1.1  mrg 	 the first group element not by the first scalar stmt DR.  */
   1139  1.1  mrg       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   1140  1.1  mrg       /* Record the cost for the permutation.  */
   1141  1.1  mrg       unsigned n_perms, n_loads;
   1142  1.1  mrg       vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
   1143  1.1  mrg 				    vf, true, &n_perms, &n_loads);
   1144  1.1  mrg       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
   1145  1.1  mrg 				       first_stmt_info, 0, vect_body);
   1146  1.1  mrg 
   1147  1.1  mrg       /* And adjust the number of loads performed.  This handles
   1148  1.1  mrg 	 redundancies as well as loads that are later dead.  */
   1149  1.1  mrg       ncopies = n_loads;
   1150  1.1  mrg     }
   1151  1.1  mrg 
   1152  1.1  mrg   /* Grouped loads read all elements in the group at once,
   1153  1.1  mrg      so we want the DR for the first statement.  */
   1154  1.1  mrg   stmt_vec_info first_stmt_info = stmt_info;
   1155  1.1  mrg   if (!slp_node && grouped_access_p)
   1156  1.1  mrg     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   1157  1.1  mrg 
   1158  1.1  mrg   /* True if we should include any once-per-group costs as well as
   1159  1.1  mrg      the cost of the statement itself.  For SLP we only get called
   1160  1.1  mrg      once per group anyhow.  */
   1161  1.1  mrg   bool first_stmt_p = (first_stmt_info == stmt_info);
   1162  1.1  mrg 
   1163  1.1  mrg   /* An IFN_LOAD_LANES will load all its vector results, regardless of which
   1164  1.1  mrg      ones we actually need.  Account for the cost of unused results.  */
   1165  1.1  mrg   if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
   1166  1.1  mrg     {
   1167  1.1  mrg       unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
   1168  1.1  mrg       stmt_vec_info next_stmt_info = first_stmt_info;
   1169  1.1  mrg       do
   1170  1.1  mrg 	{
   1171  1.1  mrg 	  gaps -= 1;
   1172  1.1  mrg 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   1173  1.1  mrg 	}
   1174  1.1  mrg       while (next_stmt_info);
   1175  1.1  mrg       if (gaps)
   1176  1.1  mrg 	{
   1177  1.1  mrg 	  if (dump_enabled_p ())
   1178  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   1179  1.1  mrg 			     "vect_model_load_cost: %d unused vectors.\n",
   1180  1.1  mrg 			     gaps);
   1181  1.1  mrg 	  vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
   1182  1.1  mrg 			      alignment_support_scheme, misalignment, false,
   1183  1.1  mrg 			      &inside_cost, &prologue_cost,
   1184  1.1  mrg 			      cost_vec, cost_vec, true);
   1185  1.1  mrg 	}
   1186  1.1  mrg     }
   1187  1.1  mrg 
   1188  1.1  mrg   /* We assume that the cost of a single load-lanes instruction is
   1189  1.1  mrg      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
   1190  1.1  mrg      access is instead being provided by a load-and-permute operation,
   1191  1.1  mrg      include the cost of the permutes.  */
   1192  1.1  mrg   if (first_stmt_p
   1193  1.1  mrg       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
   1194  1.1  mrg     {
   1195  1.1  mrg       /* Uses an even and odd extract operations or shuffle operations
   1196  1.1  mrg 	 for each needed permute.  */
   1197  1.1  mrg       int group_size = DR_GROUP_SIZE (first_stmt_info);
   1198  1.1  mrg       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
   1199  1.1  mrg       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
   1200  1.1  mrg 				       stmt_info, 0, vect_body);
   1201  1.1  mrg 
   1202  1.1  mrg       if (dump_enabled_p ())
   1203  1.1  mrg         dump_printf_loc (MSG_NOTE, vect_location,
   1204  1.1  mrg                          "vect_model_load_cost: strided group_size = %d .\n",
   1205  1.1  mrg                          group_size);
   1206  1.1  mrg     }
   1207  1.1  mrg 
   1208  1.1  mrg   /* The loads themselves.  */
   1209  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
   1210  1.1  mrg       || memory_access_type == VMAT_GATHER_SCATTER)
   1211  1.1  mrg     {
   1212  1.1  mrg       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1213  1.1  mrg       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
   1214  1.1  mrg       if (memory_access_type == VMAT_GATHER_SCATTER
   1215  1.1  mrg 	  && gs_info->ifn == IFN_LAST && !gs_info->decl)
   1216  1.1  mrg 	/* For emulated gathers N offset vector element extracts
   1217  1.1  mrg 	   (we assume the scalar scaling and ptr + offset add is consumed by
   1218  1.1  mrg 	   the load).  */
   1219  1.1  mrg 	inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
   1220  1.1  mrg 					 vec_to_scalar, stmt_info, 0,
   1221  1.1  mrg 					 vect_body);
   1222  1.1  mrg       /* N scalar loads plus gathering them into a vector.  */
   1223  1.1  mrg       inside_cost += record_stmt_cost (cost_vec,
   1224  1.1  mrg 				       ncopies * assumed_nunits,
   1225  1.1  mrg 				       scalar_load, stmt_info, 0, vect_body);
   1226  1.1  mrg     }
   1227  1.1  mrg   else if (memory_access_type == VMAT_INVARIANT)
   1228  1.1  mrg     {
   1229  1.1  mrg       /* Invariant loads will ideally be hoisted and splat to a vector.  */
   1230  1.1  mrg       prologue_cost += record_stmt_cost (cost_vec, 1,
   1231  1.1  mrg 					 scalar_load, stmt_info, 0,
   1232  1.1  mrg 					 vect_prologue);
   1233  1.1  mrg       prologue_cost += record_stmt_cost (cost_vec, 1,
   1234  1.1  mrg 					 scalar_to_vec, stmt_info, 0,
   1235  1.1  mrg 					 vect_prologue);
   1236  1.1  mrg     }
   1237  1.1  mrg   else
   1238  1.1  mrg     vect_get_load_cost (vinfo, stmt_info, ncopies,
   1239  1.1  mrg 			alignment_support_scheme, misalignment, first_stmt_p,
   1240  1.1  mrg 			&inside_cost, &prologue_cost,
   1241  1.1  mrg 			cost_vec, cost_vec, true);
   1242  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
   1243  1.1  mrg       || memory_access_type == VMAT_STRIDED_SLP
   1244  1.1  mrg       || (memory_access_type == VMAT_GATHER_SCATTER
   1245  1.1  mrg 	  && gs_info->ifn == IFN_LAST && !gs_info->decl))
   1246  1.1  mrg     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
   1247  1.1  mrg 				     stmt_info, 0, vect_body);
   1248  1.1  mrg 
   1249  1.1  mrg   if (dump_enabled_p ())
   1250  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1251  1.1  mrg                      "vect_model_load_cost: inside_cost = %d, "
   1252  1.1  mrg                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
   1253  1.1  mrg }
   1254  1.1  mrg 
   1255  1.1  mrg 
   1256  1.1  mrg /* Calculate cost of DR's memory access.  */
   1257  1.1  mrg void
   1258  1.1  mrg vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
   1259  1.1  mrg 		    dr_alignment_support alignment_support_scheme,
   1260  1.1  mrg 		    int misalignment,
   1261  1.1  mrg 		    bool add_realign_cost, unsigned int *inside_cost,
   1262  1.1  mrg 		    unsigned int *prologue_cost,
   1263  1.1  mrg 		    stmt_vector_for_cost *prologue_cost_vec,
   1264  1.1  mrg 		    stmt_vector_for_cost *body_cost_vec,
   1265  1.1  mrg 		    bool record_prologue_costs)
   1266  1.1  mrg {
   1267  1.1  mrg   switch (alignment_support_scheme)
   1268  1.1  mrg     {
   1269  1.1  mrg     case dr_aligned:
   1270  1.1  mrg       {
   1271  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
   1272  1.1  mrg 					  stmt_info, 0, vect_body);
   1273  1.1  mrg 
   1274  1.1  mrg         if (dump_enabled_p ())
   1275  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1276  1.1  mrg                            "vect_model_load_cost: aligned.\n");
   1277  1.1  mrg 
   1278  1.1  mrg         break;
   1279  1.1  mrg       }
   1280  1.1  mrg     case dr_unaligned_supported:
   1281  1.1  mrg       {
   1282  1.1  mrg         /* Here, we assign an additional cost for the unaligned load.  */
   1283  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
   1284  1.1  mrg 					  unaligned_load, stmt_info,
   1285  1.1  mrg 					  misalignment, vect_body);
   1286  1.1  mrg 
   1287  1.1  mrg         if (dump_enabled_p ())
   1288  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1289  1.1  mrg                            "vect_model_load_cost: unaligned supported by "
   1290  1.1  mrg                            "hardware.\n");
   1291  1.1  mrg 
   1292  1.1  mrg         break;
   1293  1.1  mrg       }
   1294  1.1  mrg     case dr_explicit_realign:
   1295  1.1  mrg       {
   1296  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
   1297  1.1  mrg 					  vector_load, stmt_info, 0, vect_body);
   1298  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
   1299  1.1  mrg 					  vec_perm, stmt_info, 0, vect_body);
   1300  1.1  mrg 
   1301  1.1  mrg         /* FIXME: If the misalignment remains fixed across the iterations of
   1302  1.1  mrg            the containing loop, the following cost should be added to the
   1303  1.1  mrg            prologue costs.  */
   1304  1.1  mrg         if (targetm.vectorize.builtin_mask_for_load)
   1305  1.1  mrg 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
   1306  1.1  mrg 					    stmt_info, 0, vect_body);
   1307  1.1  mrg 
   1308  1.1  mrg         if (dump_enabled_p ())
   1309  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1310  1.1  mrg                            "vect_model_load_cost: explicit realign\n");
   1311  1.1  mrg 
   1312  1.1  mrg         break;
   1313  1.1  mrg       }
   1314  1.1  mrg     case dr_explicit_realign_optimized:
   1315  1.1  mrg       {
   1316  1.1  mrg         if (dump_enabled_p ())
   1317  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1318  1.1  mrg                            "vect_model_load_cost: unaligned software "
   1319  1.1  mrg                            "pipelined.\n");
   1320  1.1  mrg 
   1321  1.1  mrg         /* Unaligned software pipeline has a load of an address, an initial
   1322  1.1  mrg            load, and possibly a mask operation to "prime" the loop.  However,
   1323  1.1  mrg            if this is an access in a group of loads, which provide grouped
   1324  1.1  mrg            access, then the above cost should only be considered for one
   1325  1.1  mrg            access in the group.  Inside the loop, there is a load op
   1326  1.1  mrg            and a realignment op.  */
   1327  1.1  mrg 
   1328  1.1  mrg         if (add_realign_cost && record_prologue_costs)
   1329  1.1  mrg           {
   1330  1.1  mrg 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
   1331  1.1  mrg 						vector_stmt, stmt_info,
   1332  1.1  mrg 						0, vect_prologue);
   1333  1.1  mrg             if (targetm.vectorize.builtin_mask_for_load)
   1334  1.1  mrg 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
   1335  1.1  mrg 						  vector_stmt, stmt_info,
   1336  1.1  mrg 						  0, vect_prologue);
   1337  1.1  mrg           }
   1338  1.1  mrg 
   1339  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
   1340  1.1  mrg 					  stmt_info, 0, vect_body);
   1341  1.1  mrg 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
   1342  1.1  mrg 					  stmt_info, 0, vect_body);
   1343  1.1  mrg 
   1344  1.1  mrg         if (dump_enabled_p ())
   1345  1.1  mrg           dump_printf_loc (MSG_NOTE, vect_location,
   1346  1.1  mrg                            "vect_model_load_cost: explicit realign optimized"
   1347  1.1  mrg                            "\n");
   1348  1.1  mrg 
   1349  1.1  mrg         break;
   1350  1.1  mrg       }
   1351  1.1  mrg 
   1352  1.1  mrg     case dr_unaligned_unsupported:
   1353  1.1  mrg       {
   1354  1.1  mrg         *inside_cost = VECT_MAX_COST;
   1355  1.1  mrg 
   1356  1.1  mrg         if (dump_enabled_p ())
   1357  1.1  mrg           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1358  1.1  mrg                            "vect_model_load_cost: unsupported access.\n");
   1359  1.1  mrg         break;
   1360  1.1  mrg       }
   1361  1.1  mrg 
   1362  1.1  mrg     default:
   1363  1.1  mrg       gcc_unreachable ();
   1364  1.1  mrg     }
   1365  1.1  mrg }
   1366  1.1  mrg 
   1367  1.1  mrg /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
   1368  1.1  mrg    the loop preheader for the vectorized stmt STMT_VINFO.  */
   1369  1.1  mrg 
   1370  1.1  mrg static void
   1371  1.1  mrg vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
   1372  1.1  mrg 		    gimple_stmt_iterator *gsi)
   1373  1.1  mrg {
   1374  1.1  mrg   if (gsi)
   1375  1.1  mrg     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
   1376  1.1  mrg   else
   1377  1.1  mrg     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
   1378  1.1  mrg 
   1379  1.1  mrg   if (dump_enabled_p ())
   1380  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1381  1.1  mrg 		     "created new init_stmt: %G", new_stmt);
   1382  1.1  mrg }
   1383  1.1  mrg 
   1384  1.1  mrg /* Function vect_init_vector.
   1385  1.1  mrg 
   1386  1.1  mrg    Insert a new stmt (INIT_STMT) that initializes a new variable of type
   1387  1.1  mrg    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
   1388  1.1  mrg    vector type a vector with all elements equal to VAL is created first.
   1389  1.1  mrg    Place the initialization at GSI if it is not NULL.  Otherwise, place the
   1390  1.1  mrg    initialization at the loop preheader.
   1391  1.1  mrg    Return the DEF of INIT_STMT.
   1392  1.1  mrg    It will be used in the vectorization of STMT_INFO.  */
   1393  1.1  mrg 
   1394  1.1  mrg tree
   1395  1.1  mrg vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
   1396  1.1  mrg 		  gimple_stmt_iterator *gsi)
   1397  1.1  mrg {
   1398  1.1  mrg   gimple *init_stmt;
   1399  1.1  mrg   tree new_temp;
   1400  1.1  mrg 
   1401  1.1  mrg   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
   1402  1.1  mrg   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
   1403  1.1  mrg     {
   1404  1.1  mrg       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
   1405  1.1  mrg       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
   1406  1.1  mrg 	{
   1407  1.1  mrg 	  /* Scalar boolean value should be transformed into
   1408  1.1  mrg 	     all zeros or all ones value before building a vector.  */
   1409  1.1  mrg 	  if (VECTOR_BOOLEAN_TYPE_P (type))
   1410  1.1  mrg 	    {
   1411  1.1  mrg 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
   1412  1.1  mrg 	      tree false_val = build_zero_cst (TREE_TYPE (type));
   1413  1.1  mrg 
   1414  1.1  mrg 	      if (CONSTANT_CLASS_P (val))
   1415  1.1  mrg 		val = integer_zerop (val) ? false_val : true_val;
   1416  1.1  mrg 	      else
   1417  1.1  mrg 		{
   1418  1.1  mrg 		  new_temp = make_ssa_name (TREE_TYPE (type));
   1419  1.1  mrg 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
   1420  1.1  mrg 						   val, true_val, false_val);
   1421  1.1  mrg 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
   1422  1.1  mrg 		  val = new_temp;
   1423  1.1  mrg 		}
   1424  1.1  mrg 	    }
   1425  1.1  mrg 	  else
   1426  1.1  mrg 	    {
   1427  1.1  mrg 	      gimple_seq stmts = NULL;
   1428  1.1  mrg 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
   1429  1.1  mrg 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
   1430  1.1  mrg 				    TREE_TYPE (type), val);
   1431  1.1  mrg 	      else
   1432  1.1  mrg 		/* ???  Condition vectorization expects us to do
   1433  1.1  mrg 		   promotion of invariant/external defs.  */
   1434  1.1  mrg 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
   1435  1.1  mrg 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
   1436  1.1  mrg 		   !gsi_end_p (gsi2); )
   1437  1.1  mrg 		{
   1438  1.1  mrg 		  init_stmt = gsi_stmt (gsi2);
   1439  1.1  mrg 		  gsi_remove (&gsi2, false);
   1440  1.1  mrg 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
   1441  1.1  mrg 		}
   1442  1.1  mrg 	    }
   1443  1.1  mrg 	}
   1444  1.1  mrg       val = build_vector_from_val (type, val);
   1445  1.1  mrg     }
   1446  1.1  mrg 
   1447  1.1  mrg   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
   1448  1.1  mrg   init_stmt = gimple_build_assign (new_temp, val);
   1449  1.1  mrg   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
   1450  1.1  mrg   return new_temp;
   1451  1.1  mrg }
   1452  1.1  mrg 
   1453  1.1  mrg 
   1454  1.1  mrg /* Function vect_get_vec_defs_for_operand.
   1455  1.1  mrg 
   1456  1.1  mrg    OP is an operand in STMT_VINFO.  This function returns a vector of
   1457  1.1  mrg    NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
   1458  1.1  mrg 
   1459  1.1  mrg    In the case that OP is an SSA_NAME which is defined in the loop, then
   1460  1.1  mrg    STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
   1461  1.1  mrg 
   1462  1.1  mrg    In case OP is an invariant or constant, a new stmt that creates a vector def
   1463  1.1  mrg    needs to be introduced.  VECTYPE may be used to specify a required type for
   1464  1.1  mrg    vector invariant.  */
   1465  1.1  mrg 
   1466  1.1  mrg void
   1467  1.1  mrg vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
   1468  1.1  mrg 			       unsigned ncopies,
   1469  1.1  mrg 			       tree op, vec<tree> *vec_oprnds, tree vectype)
   1470  1.1  mrg {
   1471  1.1  mrg   gimple *def_stmt;
   1472  1.1  mrg   enum vect_def_type dt;
   1473  1.1  mrg   bool is_simple_use;
   1474  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   1475  1.1  mrg 
   1476  1.1  mrg   if (dump_enabled_p ())
   1477  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1478  1.1  mrg 		     "vect_get_vec_defs_for_operand: %T\n", op);
   1479  1.1  mrg 
   1480  1.1  mrg   stmt_vec_info def_stmt_info;
   1481  1.1  mrg   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
   1482  1.1  mrg 				      &def_stmt_info, &def_stmt);
   1483  1.1  mrg   gcc_assert (is_simple_use);
   1484  1.1  mrg   if (def_stmt && dump_enabled_p ())
   1485  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
   1486  1.1  mrg 
   1487  1.1  mrg   vec_oprnds->create (ncopies);
   1488  1.1  mrg   if (dt == vect_constant_def || dt == vect_external_def)
   1489  1.1  mrg     {
   1490  1.1  mrg       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
   1491  1.1  mrg       tree vector_type;
   1492  1.1  mrg 
   1493  1.1  mrg       if (vectype)
   1494  1.1  mrg 	vector_type = vectype;
   1495  1.1  mrg       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
   1496  1.1  mrg 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
   1497  1.1  mrg 	vector_type = truth_type_for (stmt_vectype);
   1498  1.1  mrg       else
   1499  1.1  mrg 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
   1500  1.1  mrg 
   1501  1.1  mrg       gcc_assert (vector_type);
   1502  1.1  mrg       tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
   1503  1.1  mrg       while (ncopies--)
   1504  1.1  mrg 	vec_oprnds->quick_push (vop);
   1505  1.1  mrg     }
   1506  1.1  mrg   else
   1507  1.1  mrg     {
   1508  1.1  mrg       def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
   1509  1.1  mrg       gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
   1510  1.1  mrg       for (unsigned i = 0; i < ncopies; ++i)
   1511  1.1  mrg 	vec_oprnds->quick_push (gimple_get_lhs
   1512  1.1  mrg 				  (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
   1513  1.1  mrg     }
   1514  1.1  mrg }
   1515  1.1  mrg 
   1516  1.1  mrg 
   1517  1.1  mrg /* Get vectorized definitions for OP0 and OP1.  */
   1518  1.1  mrg 
   1519  1.1  mrg void
   1520  1.1  mrg vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
   1521  1.1  mrg 		   unsigned ncopies,
   1522  1.1  mrg 		   tree op0, vec<tree> *vec_oprnds0, tree vectype0,
   1523  1.1  mrg 		   tree op1, vec<tree> *vec_oprnds1, tree vectype1,
   1524  1.1  mrg 		   tree op2, vec<tree> *vec_oprnds2, tree vectype2,
   1525  1.1  mrg 		   tree op3, vec<tree> *vec_oprnds3, tree vectype3)
   1526  1.1  mrg {
   1527  1.1  mrg   if (slp_node)
   1528  1.1  mrg     {
   1529  1.1  mrg       if (op0)
   1530  1.1  mrg 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
   1531  1.1  mrg       if (op1)
   1532  1.1  mrg 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
   1533  1.1  mrg       if (op2)
   1534  1.1  mrg 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
   1535  1.1  mrg       if (op3)
   1536  1.1  mrg 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
   1537  1.1  mrg     }
   1538  1.1  mrg   else
   1539  1.1  mrg     {
   1540  1.1  mrg       if (op0)
   1541  1.1  mrg 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   1542  1.1  mrg 				       op0, vec_oprnds0, vectype0);
   1543  1.1  mrg       if (op1)
   1544  1.1  mrg 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   1545  1.1  mrg 				       op1, vec_oprnds1, vectype1);
   1546  1.1  mrg       if (op2)
   1547  1.1  mrg 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   1548  1.1  mrg 				       op2, vec_oprnds2, vectype2);
   1549  1.1  mrg       if (op3)
   1550  1.1  mrg 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   1551  1.1  mrg 				       op3, vec_oprnds3, vectype3);
   1552  1.1  mrg     }
   1553  1.1  mrg }
   1554  1.1  mrg 
   1555  1.1  mrg void
   1556  1.1  mrg vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
   1557  1.1  mrg 		   unsigned ncopies,
   1558  1.1  mrg 		   tree op0, vec<tree> *vec_oprnds0,
   1559  1.1  mrg 		   tree op1, vec<tree> *vec_oprnds1,
   1560  1.1  mrg 		   tree op2, vec<tree> *vec_oprnds2,
   1561  1.1  mrg 		   tree op3, vec<tree> *vec_oprnds3)
   1562  1.1  mrg {
   1563  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   1564  1.1  mrg 		     op0, vec_oprnds0, NULL_TREE,
   1565  1.1  mrg 		     op1, vec_oprnds1, NULL_TREE,
   1566  1.1  mrg 		     op2, vec_oprnds2, NULL_TREE,
   1567  1.1  mrg 		     op3, vec_oprnds3, NULL_TREE);
   1568  1.1  mrg }
   1569  1.1  mrg 
   1570  1.1  mrg /* Helper function called by vect_finish_replace_stmt and
   1571  1.1  mrg    vect_finish_stmt_generation.  Set the location of the new
   1572  1.1  mrg    statement and create and return a stmt_vec_info for it.  */
   1573  1.1  mrg 
   1574  1.1  mrg static void
   1575  1.1  mrg vect_finish_stmt_generation_1 (vec_info *,
   1576  1.1  mrg 			       stmt_vec_info stmt_info, gimple *vec_stmt)
   1577  1.1  mrg {
   1578  1.1  mrg   if (dump_enabled_p ())
   1579  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
   1580  1.1  mrg 
   1581  1.1  mrg   if (stmt_info)
   1582  1.1  mrg     {
   1583  1.1  mrg       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
   1584  1.1  mrg 
   1585  1.1  mrg       /* While EH edges will generally prevent vectorization, stmt might
   1586  1.1  mrg 	 e.g. be in a must-not-throw region.  Ensure newly created stmts
   1587  1.1  mrg 	 that could throw are part of the same region.  */
   1588  1.1  mrg       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
   1589  1.1  mrg       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
   1590  1.1  mrg 	add_stmt_to_eh_lp (vec_stmt, lp_nr);
   1591  1.1  mrg     }
   1592  1.1  mrg   else
   1593  1.1  mrg     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
   1594  1.1  mrg }
   1595  1.1  mrg 
   1596  1.1  mrg /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
   1597  1.1  mrg    which sets the same scalar result as STMT_INFO did.  Create and return a
   1598  1.1  mrg    stmt_vec_info for VEC_STMT.  */
   1599  1.1  mrg 
   1600  1.1  mrg void
   1601  1.1  mrg vect_finish_replace_stmt (vec_info *vinfo,
   1602  1.1  mrg 			  stmt_vec_info stmt_info, gimple *vec_stmt)
   1603  1.1  mrg {
   1604  1.1  mrg   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
   1605  1.1  mrg   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
   1606  1.1  mrg 
   1607  1.1  mrg   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
   1608  1.1  mrg   gsi_replace (&gsi, vec_stmt, true);
   1609  1.1  mrg 
   1610  1.1  mrg   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
   1611  1.1  mrg }
   1612  1.1  mrg 
   1613  1.1  mrg /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
   1614  1.1  mrg    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
   1615  1.1  mrg 
   1616  1.1  mrg void
   1617  1.1  mrg vect_finish_stmt_generation (vec_info *vinfo,
   1618  1.1  mrg 			     stmt_vec_info stmt_info, gimple *vec_stmt,
   1619  1.1  mrg 			     gimple_stmt_iterator *gsi)
   1620  1.1  mrg {
   1621  1.1  mrg   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
   1622  1.1  mrg 
   1623  1.1  mrg   if (!gsi_end_p (*gsi)
   1624  1.1  mrg       && gimple_has_mem_ops (vec_stmt))
   1625  1.1  mrg     {
   1626  1.1  mrg       gimple *at_stmt = gsi_stmt (*gsi);
   1627  1.1  mrg       tree vuse = gimple_vuse (at_stmt);
   1628  1.1  mrg       if (vuse && TREE_CODE (vuse) == SSA_NAME)
   1629  1.1  mrg 	{
   1630  1.1  mrg 	  tree vdef = gimple_vdef (at_stmt);
   1631  1.1  mrg 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
   1632  1.1  mrg 	  gimple_set_modified (vec_stmt, true);
   1633  1.1  mrg 	  /* If we have an SSA vuse and insert a store, update virtual
   1634  1.1  mrg 	     SSA form to avoid triggering the renamer.  Do so only
   1635  1.1  mrg 	     if we can easily see all uses - which is what almost always
   1636  1.1  mrg 	     happens with the way vectorized stmts are inserted.  */
   1637  1.1  mrg 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
   1638  1.1  mrg 	      && ((is_gimple_assign (vec_stmt)
   1639  1.1  mrg 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
   1640  1.1  mrg 		  || (is_gimple_call (vec_stmt)
   1641  1.1  mrg 		      && !(gimple_call_flags (vec_stmt)
   1642  1.1  mrg 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
   1643  1.1  mrg 	    {
   1644  1.1  mrg 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
   1645  1.1  mrg 	      gimple_set_vdef (vec_stmt, new_vdef);
   1646  1.1  mrg 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
   1647  1.1  mrg 	    }
   1648  1.1  mrg 	}
   1649  1.1  mrg     }
   1650  1.1  mrg   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
   1651  1.1  mrg   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
   1652  1.1  mrg }
   1653  1.1  mrg 
   1654  1.1  mrg /* We want to vectorize a call to combined function CFN with function
   1655  1.1  mrg    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
   1656  1.1  mrg    as the types of all inputs.  Check whether this is possible using
   1657  1.1  mrg    an internal function, returning its code if so or IFN_LAST if not.  */
   1658  1.1  mrg 
   1659  1.1  mrg static internal_fn
   1660  1.1  mrg vectorizable_internal_function (combined_fn cfn, tree fndecl,
   1661  1.1  mrg 				tree vectype_out, tree vectype_in)
   1662  1.1  mrg {
   1663  1.1  mrg   internal_fn ifn;
   1664  1.1  mrg   if (internal_fn_p (cfn))
   1665  1.1  mrg     ifn = as_internal_fn (cfn);
   1666  1.1  mrg   else
   1667  1.1  mrg     ifn = associated_internal_fn (fndecl);
   1668  1.1  mrg   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
   1669  1.1  mrg     {
   1670  1.1  mrg       const direct_internal_fn_info &info = direct_internal_fn (ifn);
   1671  1.1  mrg       if (info.vectorizable)
   1672  1.1  mrg 	{
   1673  1.1  mrg 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
   1674  1.1  mrg 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
   1675  1.1  mrg 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
   1676  1.1  mrg 					      OPTIMIZE_FOR_SPEED))
   1677  1.1  mrg 	    return ifn;
   1678  1.1  mrg 	}
   1679  1.1  mrg     }
   1680  1.1  mrg   return IFN_LAST;
   1681  1.1  mrg }
   1682  1.1  mrg 
   1683  1.1  mrg 
   1684  1.1  mrg static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
   1685  1.1  mrg 				  gimple_stmt_iterator *);
   1686  1.1  mrg 
   1687  1.1  mrg /* Check whether a load or store statement in the loop described by
   1688  1.1  mrg    LOOP_VINFO is possible in a loop using partial vectors.  This is
   1689  1.1  mrg    testing whether the vectorizer pass has the appropriate support,
   1690  1.1  mrg    as well as whether the target does.
   1691  1.1  mrg 
   1692  1.1  mrg    VLS_TYPE says whether the statement is a load or store and VECTYPE
   1693  1.1  mrg    is the type of the vector being loaded or stored.  SLP_NODE is the SLP
   1694  1.1  mrg    node that contains the statement, or null if none.  MEMORY_ACCESS_TYPE
   1695  1.1  mrg    says how the load or store is going to be implemented and GROUP_SIZE
   1696  1.1  mrg    is the number of load or store statements in the containing group.
   1697  1.1  mrg    If the access is a gather load or scatter store, GS_INFO describes
   1698  1.1  mrg    its arguments.  If the load or store is conditional, SCALAR_MASK is the
   1699  1.1  mrg    condition under which it occurs.
   1700  1.1  mrg 
   1701  1.1  mrg    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
   1702  1.1  mrg    vectors is not supported, otherwise record the required rgroup control
   1703  1.1  mrg    types.  */
   1704  1.1  mrg 
   1705  1.1  mrg static void
   1706  1.1  mrg check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
   1707  1.1  mrg 				      slp_tree slp_node,
   1708  1.1  mrg 				      vec_load_store_type vls_type,
   1709  1.1  mrg 				      int group_size,
   1710  1.1  mrg 				      vect_memory_access_type
   1711  1.1  mrg 				      memory_access_type,
   1712  1.1  mrg 				      gather_scatter_info *gs_info,
   1713  1.1  mrg 				      tree scalar_mask)
   1714  1.1  mrg {
   1715  1.1  mrg   /* Invariant loads need no special support.  */
   1716  1.1  mrg   if (memory_access_type == VMAT_INVARIANT)
   1717  1.1  mrg     return;
   1718  1.1  mrg 
   1719  1.1  mrg   unsigned int nvectors;
   1720  1.1  mrg   if (slp_node)
   1721  1.1  mrg     nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   1722  1.1  mrg   else
   1723  1.1  mrg     nvectors = vect_get_num_copies (loop_vinfo, vectype);
   1724  1.1  mrg 
   1725  1.1  mrg   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   1726  1.1  mrg   machine_mode vecmode = TYPE_MODE (vectype);
   1727  1.1  mrg   bool is_load = (vls_type == VLS_LOAD);
   1728  1.1  mrg   if (memory_access_type == VMAT_LOAD_STORE_LANES)
   1729  1.1  mrg     {
   1730  1.1  mrg       if (is_load
   1731  1.1  mrg 	  ? !vect_load_lanes_supported (vectype, group_size, true)
   1732  1.1  mrg 	  : !vect_store_lanes_supported (vectype, group_size, true))
   1733  1.1  mrg 	{
   1734  1.1  mrg 	  if (dump_enabled_p ())
   1735  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1736  1.1  mrg 			     "can't operate on partial vectors because"
   1737  1.1  mrg 			     " the target doesn't have an appropriate"
   1738  1.1  mrg 			     " load/store-lanes instruction.\n");
   1739  1.1  mrg 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   1740  1.1  mrg 	  return;
   1741  1.1  mrg 	}
   1742  1.1  mrg       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
   1743  1.1  mrg 			     scalar_mask);
   1744  1.1  mrg       return;
   1745  1.1  mrg     }
   1746  1.1  mrg 
   1747  1.1  mrg   if (memory_access_type == VMAT_GATHER_SCATTER)
   1748  1.1  mrg     {
   1749  1.1  mrg       internal_fn ifn = (is_load
   1750  1.1  mrg 			 ? IFN_MASK_GATHER_LOAD
   1751  1.1  mrg 			 : IFN_MASK_SCATTER_STORE);
   1752  1.1  mrg       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
   1753  1.1  mrg 						   gs_info->memory_type,
   1754  1.1  mrg 						   gs_info->offset_vectype,
   1755  1.1  mrg 						   gs_info->scale))
   1756  1.1  mrg 	{
   1757  1.1  mrg 	  if (dump_enabled_p ())
   1758  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1759  1.1  mrg 			     "can't operate on partial vectors because"
   1760  1.1  mrg 			     " the target doesn't have an appropriate"
   1761  1.1  mrg 			     " gather load or scatter store instruction.\n");
   1762  1.1  mrg 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   1763  1.1  mrg 	  return;
   1764  1.1  mrg 	}
   1765  1.1  mrg       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
   1766  1.1  mrg 			     scalar_mask);
   1767  1.1  mrg       return;
   1768  1.1  mrg     }
   1769  1.1  mrg 
   1770  1.1  mrg   if (memory_access_type != VMAT_CONTIGUOUS
   1771  1.1  mrg       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
   1772  1.1  mrg     {
   1773  1.1  mrg       /* Element X of the data must come from iteration i * VF + X of the
   1774  1.1  mrg 	 scalar loop.  We need more work to support other mappings.  */
   1775  1.1  mrg       if (dump_enabled_p ())
   1776  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1777  1.1  mrg 			 "can't operate on partial vectors because an"
   1778  1.1  mrg 			 " access isn't contiguous.\n");
   1779  1.1  mrg       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   1780  1.1  mrg       return;
   1781  1.1  mrg     }
   1782  1.1  mrg 
   1783  1.1  mrg   if (!VECTOR_MODE_P (vecmode))
   1784  1.1  mrg     {
   1785  1.1  mrg       if (dump_enabled_p ())
   1786  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1787  1.1  mrg 			 "can't operate on partial vectors when emulating"
   1788  1.1  mrg 			 " vector operations.\n");
   1789  1.1  mrg       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   1790  1.1  mrg       return;
   1791  1.1  mrg     }
   1792  1.1  mrg 
   1793  1.1  mrg   /* We might load more scalars than we need for permuting SLP loads.
   1794  1.1  mrg      We checked in get_group_load_store_type that the extra elements
   1795  1.1  mrg      don't leak into a new vector.  */
   1796  1.1  mrg   auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
   1797  1.1  mrg   {
   1798  1.1  mrg     unsigned int nvectors;
   1799  1.1  mrg     if (can_div_away_from_zero_p (size, nunits, &nvectors))
   1800  1.1  mrg       return nvectors;
   1801  1.1  mrg     gcc_unreachable ();
   1802  1.1  mrg   };
   1803  1.1  mrg 
   1804  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   1805  1.1  mrg   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   1806  1.1  mrg   machine_mode mask_mode;
   1807  1.1  mrg   bool using_partial_vectors_p = false;
   1808  1.1  mrg   if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
   1809  1.1  mrg       && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
   1810  1.1  mrg     {
   1811  1.1  mrg       nvectors = group_memory_nvectors (group_size * vf, nunits);
   1812  1.1  mrg       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
   1813  1.1  mrg       using_partial_vectors_p = true;
   1814  1.1  mrg     }
   1815  1.1  mrg 
   1816  1.1  mrg   machine_mode vmode;
   1817  1.1  mrg   if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
   1818  1.1  mrg     {
   1819  1.1  mrg       nvectors = group_memory_nvectors (group_size * vf, nunits);
   1820  1.1  mrg       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   1821  1.1  mrg       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
   1822  1.1  mrg       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
   1823  1.1  mrg       using_partial_vectors_p = true;
   1824  1.1  mrg     }
   1825  1.1  mrg 
   1826  1.1  mrg   if (!using_partial_vectors_p)
   1827  1.1  mrg     {
   1828  1.1  mrg       if (dump_enabled_p ())
   1829  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   1830  1.1  mrg 			 "can't operate on partial vectors because the"
   1831  1.1  mrg 			 " target doesn't have the appropriate partial"
   1832  1.1  mrg 			 " vectorization load or store.\n");
   1833  1.1  mrg       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   1834  1.1  mrg     }
   1835  1.1  mrg }
   1836  1.1  mrg 
   1837  1.1  mrg /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
   1838  1.1  mrg    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
   1839  1.1  mrg    that needs to be applied to all loads and stores in a vectorized loop.
   1840  1.1  mrg    Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
   1841  1.1  mrg    otherwise return VEC_MASK & LOOP_MASK.
   1842  1.1  mrg 
   1843  1.1  mrg    MASK_TYPE is the type of both masks.  If new statements are needed,
   1844  1.1  mrg    insert them before GSI.  */
   1845  1.1  mrg 
   1846  1.1  mrg static tree
   1847  1.1  mrg prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
   1848  1.1  mrg 		  tree vec_mask, gimple_stmt_iterator *gsi)
   1849  1.1  mrg {
   1850  1.1  mrg   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
   1851  1.1  mrg   if (!loop_mask)
   1852  1.1  mrg     return vec_mask;
   1853  1.1  mrg 
   1854  1.1  mrg   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
   1855  1.1  mrg 
   1856  1.1  mrg   if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
   1857  1.1  mrg     return vec_mask;
   1858  1.1  mrg 
   1859  1.1  mrg   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
   1860  1.1  mrg   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
   1861  1.1  mrg 					  vec_mask, loop_mask);
   1862  1.1  mrg 
   1863  1.1  mrg   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
   1864  1.1  mrg   return and_res;
   1865  1.1  mrg }
   1866  1.1  mrg 
   1867  1.1  mrg /* Determine whether we can use a gather load or scatter store to vectorize
   1868  1.1  mrg    strided load or store STMT_INFO by truncating the current offset to a
   1869  1.1  mrg    smaller width.  We need to be able to construct an offset vector:
   1870  1.1  mrg 
   1871  1.1  mrg      { 0, X, X*2, X*3, ... }
   1872  1.1  mrg 
   1873  1.1  mrg    without loss of precision, where X is STMT_INFO's DR_STEP.
   1874  1.1  mrg 
   1875  1.1  mrg    Return true if this is possible, describing the gather load or scatter
   1876  1.1  mrg    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
   1877  1.1  mrg 
   1878  1.1  mrg static bool
   1879  1.1  mrg vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
   1880  1.1  mrg 				     loop_vec_info loop_vinfo, bool masked_p,
   1881  1.1  mrg 				     gather_scatter_info *gs_info)
   1882  1.1  mrg {
   1883  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   1884  1.1  mrg   data_reference *dr = dr_info->dr;
   1885  1.1  mrg   tree step = DR_STEP (dr);
   1886  1.1  mrg   if (TREE_CODE (step) != INTEGER_CST)
   1887  1.1  mrg     {
   1888  1.1  mrg       /* ??? Perhaps we could use range information here?  */
   1889  1.1  mrg       if (dump_enabled_p ())
   1890  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   1891  1.1  mrg 			 "cannot truncate variable step.\n");
   1892  1.1  mrg       return false;
   1893  1.1  mrg     }
   1894  1.1  mrg 
   1895  1.1  mrg   /* Get the number of bits in an element.  */
   1896  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   1897  1.1  mrg   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
   1898  1.1  mrg   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
   1899  1.1  mrg 
   1900  1.1  mrg   /* Set COUNT to the upper limit on the number of elements - 1.
   1901  1.1  mrg      Start with the maximum vectorization factor.  */
   1902  1.1  mrg   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
   1903  1.1  mrg 
   1904  1.1  mrg   /* Try lowering COUNT to the number of scalar latch iterations.  */
   1905  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   1906  1.1  mrg   widest_int max_iters;
   1907  1.1  mrg   if (max_loop_iterations (loop, &max_iters)
   1908  1.1  mrg       && max_iters < count)
   1909  1.1  mrg     count = max_iters.to_shwi ();
   1910  1.1  mrg 
   1911  1.1  mrg   /* Try scales of 1 and the element size.  */
   1912  1.1  mrg   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
   1913  1.1  mrg   wi::overflow_type overflow = wi::OVF_NONE;
   1914  1.1  mrg   for (int i = 0; i < 2; ++i)
   1915  1.1  mrg     {
   1916  1.1  mrg       int scale = scales[i];
   1917  1.1  mrg       widest_int factor;
   1918  1.1  mrg       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
   1919  1.1  mrg 	continue;
   1920  1.1  mrg 
   1921  1.1  mrg       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
   1922  1.1  mrg       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
   1923  1.1  mrg       if (overflow)
   1924  1.1  mrg 	continue;
   1925  1.1  mrg       signop sign = range >= 0 ? UNSIGNED : SIGNED;
   1926  1.1  mrg       unsigned int min_offset_bits = wi::min_precision (range, sign);
   1927  1.1  mrg 
   1928  1.1  mrg       /* Find the narrowest viable offset type.  */
   1929  1.1  mrg       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
   1930  1.1  mrg       tree offset_type = build_nonstandard_integer_type (offset_bits,
   1931  1.1  mrg 							 sign == UNSIGNED);
   1932  1.1  mrg 
   1933  1.1  mrg       /* See whether the target supports the operation with an offset
   1934  1.1  mrg 	 no narrower than OFFSET_TYPE.  */
   1935  1.1  mrg       tree memory_type = TREE_TYPE (DR_REF (dr));
   1936  1.1  mrg       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
   1937  1.1  mrg 				     vectype, memory_type, offset_type, scale,
   1938  1.1  mrg 				     &gs_info->ifn, &gs_info->offset_vectype)
   1939  1.1  mrg 	  || gs_info->ifn == IFN_LAST)
   1940  1.1  mrg 	continue;
   1941  1.1  mrg 
   1942  1.1  mrg       gs_info->decl = NULL_TREE;
   1943  1.1  mrg       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
   1944  1.1  mrg 	 but we don't need to store that here.  */
   1945  1.1  mrg       gs_info->base = NULL_TREE;
   1946  1.1  mrg       gs_info->element_type = TREE_TYPE (vectype);
   1947  1.1  mrg       gs_info->offset = fold_convert (offset_type, step);
   1948  1.1  mrg       gs_info->offset_dt = vect_constant_def;
   1949  1.1  mrg       gs_info->scale = scale;
   1950  1.1  mrg       gs_info->memory_type = memory_type;
   1951  1.1  mrg       return true;
   1952  1.1  mrg     }
   1953  1.1  mrg 
   1954  1.1  mrg   if (overflow && dump_enabled_p ())
   1955  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1956  1.1  mrg 		     "truncating gather/scatter offset to %d bits"
   1957  1.1  mrg 		     " might change its value.\n", element_bits);
   1958  1.1  mrg 
   1959  1.1  mrg   return false;
   1960  1.1  mrg }
   1961  1.1  mrg 
   1962  1.1  mrg /* Return true if we can use gather/scatter internal functions to
   1963  1.1  mrg    vectorize STMT_INFO, which is a grouped or strided load or store.
   1964  1.1  mrg    MASKED_P is true if load or store is conditional.  When returning
   1965  1.1  mrg    true, fill in GS_INFO with the information required to perform the
   1966  1.1  mrg    operation.  */
   1967  1.1  mrg 
   1968  1.1  mrg static bool
   1969  1.1  mrg vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
   1970  1.1  mrg 				    loop_vec_info loop_vinfo, bool masked_p,
   1971  1.1  mrg 				    gather_scatter_info *gs_info)
   1972  1.1  mrg {
   1973  1.1  mrg   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
   1974  1.1  mrg       || gs_info->ifn == IFN_LAST)
   1975  1.1  mrg     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
   1976  1.1  mrg 						masked_p, gs_info);
   1977  1.1  mrg 
   1978  1.1  mrg   tree old_offset_type = TREE_TYPE (gs_info->offset);
   1979  1.1  mrg   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
   1980  1.1  mrg 
   1981  1.1  mrg   gcc_assert (TYPE_PRECISION (new_offset_type)
   1982  1.1  mrg 	      >= TYPE_PRECISION (old_offset_type));
   1983  1.1  mrg   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
   1984  1.1  mrg 
   1985  1.1  mrg   if (dump_enabled_p ())
   1986  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   1987  1.1  mrg 		     "using gather/scatter for strided/grouped access,"
   1988  1.1  mrg 		     " scale = %d\n", gs_info->scale);
   1989  1.1  mrg 
   1990  1.1  mrg   return true;
   1991  1.1  mrg }
   1992  1.1  mrg 
   1993  1.1  mrg /* STMT_INFO is a non-strided load or store, meaning that it accesses
   1994  1.1  mrg    elements with a known constant step.  Return -1 if that step
   1995  1.1  mrg    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
   1996  1.1  mrg 
   1997  1.1  mrg static int
   1998  1.1  mrg compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
   1999  1.1  mrg {
   2000  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   2001  1.1  mrg   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
   2002  1.1  mrg 			       size_zero_node);
   2003  1.1  mrg }
   2004  1.1  mrg 
   2005  1.1  mrg /* If the target supports a permute mask that reverses the elements in
   2006  1.1  mrg    a vector of type VECTYPE, return that mask, otherwise return null.  */
   2007  1.1  mrg 
   2008  1.1  mrg static tree
   2009  1.1  mrg perm_mask_for_reverse (tree vectype)
   2010  1.1  mrg {
   2011  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   2012  1.1  mrg 
   2013  1.1  mrg   /* The encoding has a single stepped pattern.  */
   2014  1.1  mrg   vec_perm_builder sel (nunits, 1, 3);
   2015  1.1  mrg   for (int i = 0; i < 3; ++i)
   2016  1.1  mrg     sel.quick_push (nunits - 1 - i);
   2017  1.1  mrg 
   2018  1.1  mrg   vec_perm_indices indices (sel, 1, nunits);
   2019  1.1  mrg   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
   2020  1.1  mrg     return NULL_TREE;
   2021  1.1  mrg   return vect_gen_perm_mask_checked (vectype, indices);
   2022  1.1  mrg }
   2023  1.1  mrg 
   2024  1.1  mrg /* A subroutine of get_load_store_type, with a subset of the same
   2025  1.1  mrg    arguments.  Handle the case where STMT_INFO is a load or store that
   2026  1.1  mrg    accesses consecutive elements with a negative step.  Sets *POFFSET
   2027  1.1  mrg    to the offset to be applied to the DR for the first access.  */
   2028  1.1  mrg 
   2029  1.1  mrg static vect_memory_access_type
   2030  1.1  mrg get_negative_load_store_type (vec_info *vinfo,
   2031  1.1  mrg 			      stmt_vec_info stmt_info, tree vectype,
   2032  1.1  mrg 			      vec_load_store_type vls_type,
   2033  1.1  mrg 			      unsigned int ncopies, poly_int64 *poffset)
   2034  1.1  mrg {
   2035  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   2036  1.1  mrg   dr_alignment_support alignment_support_scheme;
   2037  1.1  mrg 
   2038  1.1  mrg   if (ncopies > 1)
   2039  1.1  mrg     {
   2040  1.1  mrg       if (dump_enabled_p ())
   2041  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2042  1.1  mrg 			 "multiple types with negative step.\n");
   2043  1.1  mrg       return VMAT_ELEMENTWISE;
   2044  1.1  mrg     }
   2045  1.1  mrg 
   2046  1.1  mrg   /* For backward running DRs the first access in vectype actually is
   2047  1.1  mrg      N-1 elements before the address of the DR.  */
   2048  1.1  mrg   *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
   2049  1.1  mrg 	      * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
   2050  1.1  mrg 
   2051  1.1  mrg   int misalignment = dr_misalignment (dr_info, vectype, *poffset);
   2052  1.1  mrg   alignment_support_scheme
   2053  1.1  mrg     = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
   2054  1.1  mrg   if (alignment_support_scheme != dr_aligned
   2055  1.1  mrg       && alignment_support_scheme != dr_unaligned_supported)
   2056  1.1  mrg     {
   2057  1.1  mrg       if (dump_enabled_p ())
   2058  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2059  1.1  mrg 			 "negative step but alignment required.\n");
   2060  1.1  mrg       *poffset = 0;
   2061  1.1  mrg       return VMAT_ELEMENTWISE;
   2062  1.1  mrg     }
   2063  1.1  mrg 
   2064  1.1  mrg   if (vls_type == VLS_STORE_INVARIANT)
   2065  1.1  mrg     {
   2066  1.1  mrg       if (dump_enabled_p ())
   2067  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   2068  1.1  mrg 			 "negative step with invariant source;"
   2069  1.1  mrg 			 " no permute needed.\n");
   2070  1.1  mrg       return VMAT_CONTIGUOUS_DOWN;
   2071  1.1  mrg     }
   2072  1.1  mrg 
   2073  1.1  mrg   if (!perm_mask_for_reverse (vectype))
   2074  1.1  mrg     {
   2075  1.1  mrg       if (dump_enabled_p ())
   2076  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2077  1.1  mrg 			 "negative step and reversing not supported.\n");
   2078  1.1  mrg       *poffset = 0;
   2079  1.1  mrg       return VMAT_ELEMENTWISE;
   2080  1.1  mrg     }
   2081  1.1  mrg 
   2082  1.1  mrg   return VMAT_CONTIGUOUS_REVERSE;
   2083  1.1  mrg }
   2084  1.1  mrg 
   2085  1.1  mrg /* STMT_INFO is either a masked or unconditional store.  Return the value
   2086  1.1  mrg    being stored.  */
   2087  1.1  mrg 
   2088  1.1  mrg tree
   2089  1.1  mrg vect_get_store_rhs (stmt_vec_info stmt_info)
   2090  1.1  mrg {
   2091  1.1  mrg   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   2092  1.1  mrg     {
   2093  1.1  mrg       gcc_assert (gimple_assign_single_p (assign));
   2094  1.1  mrg       return gimple_assign_rhs1 (assign);
   2095  1.1  mrg     }
   2096  1.1  mrg   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
   2097  1.1  mrg     {
   2098  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (call);
   2099  1.1  mrg       int index = internal_fn_stored_value_index (ifn);
   2100  1.1  mrg       gcc_assert (index >= 0);
   2101  1.1  mrg       return gimple_call_arg (call, index);
   2102  1.1  mrg     }
   2103  1.1  mrg   gcc_unreachable ();
   2104  1.1  mrg }
   2105  1.1  mrg 
   2106  1.1  mrg /* Function VECTOR_VECTOR_COMPOSITION_TYPE
   2107  1.1  mrg 
   2108  1.1  mrg    This function returns a vector type which can be composed with NETLS pieces,
   2109  1.1  mrg    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
   2110  1.1  mrg    same vector size as the return vector.  It checks target whether supports
   2111  1.1  mrg    pieces-size vector mode for construction firstly, if target fails to, check
   2112  1.1  mrg    pieces-size scalar mode for construction further.  It returns NULL_TREE if
   2113  1.1  mrg    fails to find the available composition.
   2114  1.1  mrg 
   2115  1.1  mrg    For example, for (vtype=V16QI, nelts=4), we can probably get:
   2116  1.1  mrg      - V16QI with PTYPE V4QI.
   2117  1.1  mrg      - V4SI with PTYPE SI.
   2118  1.1  mrg      - NULL_TREE.  */
   2119  1.1  mrg 
   2120  1.1  mrg static tree
   2121  1.1  mrg vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
   2122  1.1  mrg {
   2123  1.1  mrg   gcc_assert (VECTOR_TYPE_P (vtype));
   2124  1.1  mrg   gcc_assert (known_gt (nelts, 0U));
   2125  1.1  mrg 
   2126  1.1  mrg   machine_mode vmode = TYPE_MODE (vtype);
   2127  1.1  mrg   if (!VECTOR_MODE_P (vmode))
   2128  1.1  mrg     return NULL_TREE;
   2129  1.1  mrg 
   2130  1.1  mrg   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
   2131  1.1  mrg   unsigned int pbsize;
   2132  1.1  mrg   if (constant_multiple_p (vbsize, nelts, &pbsize))
   2133  1.1  mrg     {
   2134  1.1  mrg       /* First check if vec_init optab supports construction from
   2135  1.1  mrg 	 vector pieces directly.  */
   2136  1.1  mrg       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
   2137  1.1  mrg       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
   2138  1.1  mrg       machine_mode rmode;
   2139  1.1  mrg       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
   2140  1.1  mrg 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
   2141  1.1  mrg 	      != CODE_FOR_nothing))
   2142  1.1  mrg 	{
   2143  1.1  mrg 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
   2144  1.1  mrg 	  return vtype;
   2145  1.1  mrg 	}
   2146  1.1  mrg 
   2147  1.1  mrg       /* Otherwise check if exists an integer type of the same piece size and
   2148  1.1  mrg 	 if vec_init optab supports construction from it directly.  */
   2149  1.1  mrg       if (int_mode_for_size (pbsize, 0).exists (&elmode)
   2150  1.1  mrg 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
   2151  1.1  mrg 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
   2152  1.1  mrg 	      != CODE_FOR_nothing))
   2153  1.1  mrg 	{
   2154  1.1  mrg 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
   2155  1.1  mrg 	  return build_vector_type (*ptype, nelts);
   2156  1.1  mrg 	}
   2157  1.1  mrg     }
   2158  1.1  mrg 
   2159  1.1  mrg   return NULL_TREE;
   2160  1.1  mrg }
   2161  1.1  mrg 
   2162  1.1  mrg /* A subroutine of get_load_store_type, with a subset of the same
   2163  1.1  mrg    arguments.  Handle the case where STMT_INFO is part of a grouped load
   2164  1.1  mrg    or store.
   2165  1.1  mrg 
   2166  1.1  mrg    For stores, the statements in the group are all consecutive
   2167  1.1  mrg    and there is no gap at the end.  For loads, the statements in the
   2168  1.1  mrg    group might not be consecutive; there can be gaps between statements
   2169  1.1  mrg    as well as at the end.  */
   2170  1.1  mrg 
   2171  1.1  mrg static bool
   2172  1.1  mrg get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
   2173  1.1  mrg 			   tree vectype, slp_tree slp_node,
   2174  1.1  mrg 			   bool masked_p, vec_load_store_type vls_type,
   2175  1.1  mrg 			   vect_memory_access_type *memory_access_type,
   2176  1.1  mrg 			   poly_int64 *poffset,
   2177  1.1  mrg 			   dr_alignment_support *alignment_support_scheme,
   2178  1.1  mrg 			   int *misalignment,
   2179  1.1  mrg 			   gather_scatter_info *gs_info)
   2180  1.1  mrg {
   2181  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2182  1.1  mrg   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
   2183  1.1  mrg   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   2184  1.1  mrg   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   2185  1.1  mrg   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
   2186  1.1  mrg   bool single_element_p = (stmt_info == first_stmt_info
   2187  1.1  mrg 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
   2188  1.1  mrg   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
   2189  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   2190  1.1  mrg 
   2191  1.1  mrg   /* True if the vectorized statements would access beyond the last
   2192  1.1  mrg      statement in the group.  */
   2193  1.1  mrg   bool overrun_p = false;
   2194  1.1  mrg 
   2195  1.1  mrg   /* True if we can cope with such overrun by peeling for gaps, so that
   2196  1.1  mrg      there is at least one final scalar iteration after the vector loop.  */
   2197  1.1  mrg   bool can_overrun_p = (!masked_p
   2198  1.1  mrg 			&& vls_type == VLS_LOAD
   2199  1.1  mrg 			&& loop_vinfo
   2200  1.1  mrg 			&& !loop->inner);
   2201  1.1  mrg 
   2202  1.1  mrg   /* There can only be a gap at the end of the group if the stride is
   2203  1.1  mrg      known at compile time.  */
   2204  1.1  mrg   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
   2205  1.1  mrg 
   2206  1.1  mrg   /* Stores can't yet have gaps.  */
   2207  1.1  mrg   gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
   2208  1.1  mrg 
   2209  1.1  mrg   if (slp_node)
   2210  1.1  mrg     {
   2211  1.1  mrg       /* For SLP vectorization we directly vectorize a subchain
   2212  1.1  mrg 	 without permutation.  */
   2213  1.1  mrg       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   2214  1.1  mrg 	first_dr_info
   2215  1.1  mrg 	  = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
   2216  1.1  mrg       if (STMT_VINFO_STRIDED_P (first_stmt_info))
   2217  1.1  mrg 	{
   2218  1.1  mrg 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
   2219  1.1  mrg 	     separated by the stride, until we have a complete vector.
   2220  1.1  mrg 	     Fall back to scalar accesses if that isn't possible.  */
   2221  1.1  mrg 	  if (multiple_p (nunits, group_size))
   2222  1.1  mrg 	    *memory_access_type = VMAT_STRIDED_SLP;
   2223  1.1  mrg 	  else
   2224  1.1  mrg 	    *memory_access_type = VMAT_ELEMENTWISE;
   2225  1.1  mrg 	}
   2226  1.1  mrg       else
   2227  1.1  mrg 	{
   2228  1.1  mrg 	  overrun_p = loop_vinfo && gap != 0;
   2229  1.1  mrg 	  if (overrun_p && vls_type != VLS_LOAD)
   2230  1.1  mrg 	    {
   2231  1.1  mrg 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2232  1.1  mrg 			       "Grouped store with gaps requires"
   2233  1.1  mrg 			       " non-consecutive accesses\n");
   2234  1.1  mrg 	      return false;
   2235  1.1  mrg 	    }
   2236  1.1  mrg 	  /* An overrun is fine if the trailing elements are smaller
   2237  1.1  mrg 	     than the alignment boundary B.  Every vector access will
   2238  1.1  mrg 	     be a multiple of B and so we are guaranteed to access a
   2239  1.1  mrg 	     non-gap element in the same B-sized block.  */
   2240  1.1  mrg 	  if (overrun_p
   2241  1.1  mrg 	      && gap < (vect_known_alignment_in_bytes (first_dr_info,
   2242  1.1  mrg 						       vectype)
   2243  1.1  mrg 			/ vect_get_scalar_dr_size (first_dr_info)))
   2244  1.1  mrg 	    overrun_p = false;
   2245  1.1  mrg 
   2246  1.1  mrg 	  /* If the gap splits the vector in half and the target
   2247  1.1  mrg 	     can do half-vector operations avoid the epilogue peeling
   2248  1.1  mrg 	     by simply loading half of the vector only.  Usually
   2249  1.1  mrg 	     the construction with an upper zero half will be elided.  */
   2250  1.1  mrg 	  dr_alignment_support alss;
   2251  1.1  mrg 	  int misalign = dr_misalignment (first_dr_info, vectype);
   2252  1.1  mrg 	  tree half_vtype;
   2253  1.1  mrg 	  if (overrun_p
   2254  1.1  mrg 	      && !masked_p
   2255  1.1  mrg 	      && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
   2256  1.1  mrg 							  vectype, misalign)))
   2257  1.1  mrg 		   == dr_aligned
   2258  1.1  mrg 		  || alss == dr_unaligned_supported)
   2259  1.1  mrg 	      && known_eq (nunits, (group_size - gap) * 2)
   2260  1.1  mrg 	      && known_eq (nunits, group_size)
   2261  1.1  mrg 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
   2262  1.1  mrg 		  != NULL_TREE))
   2263  1.1  mrg 	    overrun_p = false;
   2264  1.1  mrg 
   2265  1.1  mrg 	  if (overrun_p && !can_overrun_p)
   2266  1.1  mrg 	    {
   2267  1.1  mrg 	      if (dump_enabled_p ())
   2268  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2269  1.1  mrg 				 "Peeling for outer loop is not supported\n");
   2270  1.1  mrg 	      return false;
   2271  1.1  mrg 	    }
   2272  1.1  mrg 	  int cmp = compare_step_with_zero (vinfo, stmt_info);
   2273  1.1  mrg 	  if (cmp < 0)
   2274  1.1  mrg 	    {
   2275  1.1  mrg 	      if (single_element_p)
   2276  1.1  mrg 		/* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
   2277  1.1  mrg 		   only correct for single element "interleaving" SLP.  */
   2278  1.1  mrg 		*memory_access_type = get_negative_load_store_type
   2279  1.1  mrg 			     (vinfo, stmt_info, vectype, vls_type, 1, poffset);
   2280  1.1  mrg 	      else
   2281  1.1  mrg 		{
   2282  1.1  mrg 		  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
   2283  1.1  mrg 		     separated by the stride, until we have a complete vector.
   2284  1.1  mrg 		     Fall back to scalar accesses if that isn't possible.  */
   2285  1.1  mrg 		  if (multiple_p (nunits, group_size))
   2286  1.1  mrg 		    *memory_access_type = VMAT_STRIDED_SLP;
   2287  1.1  mrg 		  else
   2288  1.1  mrg 		    *memory_access_type = VMAT_ELEMENTWISE;
   2289  1.1  mrg 		}
   2290  1.1  mrg 	    }
   2291  1.1  mrg 	  else
   2292  1.1  mrg 	    {
   2293  1.1  mrg 	      gcc_assert (!loop_vinfo || cmp > 0);
   2294  1.1  mrg 	      *memory_access_type = VMAT_CONTIGUOUS;
   2295  1.1  mrg 	    }
   2296  1.1  mrg 
   2297  1.1  mrg 	  /* When we have a contiguous access across loop iterations
   2298  1.1  mrg 	     but the access in the loop doesn't cover the full vector
   2299  1.1  mrg 	     we can end up with no gap recorded but still excess
   2300  1.1  mrg 	     elements accessed, see PR103116.  Make sure we peel for
   2301  1.1  mrg 	     gaps if necessary and sufficient and give up if not.  */
   2302  1.1  mrg 	  if (loop_vinfo
   2303  1.1  mrg 	      && *memory_access_type == VMAT_CONTIGUOUS
   2304  1.1  mrg 	      && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
   2305  1.1  mrg 	      && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
   2306  1.1  mrg 			      nunits))
   2307  1.1  mrg 	    {
   2308  1.1  mrg 	      unsigned HOST_WIDE_INT cnunits, cvf;
   2309  1.1  mrg 	      if (!can_overrun_p
   2310  1.1  mrg 		  || !nunits.is_constant (&cnunits)
   2311  1.1  mrg 		  || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
   2312  1.1  mrg 		  /* Peeling for gaps assumes that a single scalar iteration
   2313  1.1  mrg 		     is enough to make sure the last vector iteration doesn't
   2314  1.1  mrg 		     access excess elements.
   2315  1.1  mrg 		     ???  Enhancements include peeling multiple iterations
   2316  1.1  mrg 		     or using masked loads with a static mask.  */
   2317  1.1  mrg 		  || (group_size * cvf) % cnunits + group_size < cnunits)
   2318  1.1  mrg 		{
   2319  1.1  mrg 		  if (dump_enabled_p ())
   2320  1.1  mrg 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2321  1.1  mrg 				     "peeling for gaps insufficient for "
   2322  1.1  mrg 				     "access\n");
   2323  1.1  mrg 		  return false;
   2324  1.1  mrg 		}
   2325  1.1  mrg 	      overrun_p = true;
   2326  1.1  mrg 	    }
   2327  1.1  mrg 	}
   2328  1.1  mrg     }
   2329  1.1  mrg   else
   2330  1.1  mrg     {
   2331  1.1  mrg       /* We can always handle this case using elementwise accesses,
   2332  1.1  mrg 	 but see if something more efficient is available.  */
   2333  1.1  mrg       *memory_access_type = VMAT_ELEMENTWISE;
   2334  1.1  mrg 
   2335  1.1  mrg       /* If there is a gap at the end of the group then these optimizations
   2336  1.1  mrg 	 would access excess elements in the last iteration.  */
   2337  1.1  mrg       bool would_overrun_p = (gap != 0);
   2338  1.1  mrg       /* An overrun is fine if the trailing elements are smaller than the
   2339  1.1  mrg 	 alignment boundary B.  Every vector access will be a multiple of B
   2340  1.1  mrg 	 and so we are guaranteed to access a non-gap element in the
   2341  1.1  mrg 	 same B-sized block.  */
   2342  1.1  mrg       if (would_overrun_p
   2343  1.1  mrg 	  && !masked_p
   2344  1.1  mrg 	  && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
   2345  1.1  mrg 		    / vect_get_scalar_dr_size (first_dr_info)))
   2346  1.1  mrg 	would_overrun_p = false;
   2347  1.1  mrg 
   2348  1.1  mrg       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
   2349  1.1  mrg 	  && (can_overrun_p || !would_overrun_p)
   2350  1.1  mrg 	  && compare_step_with_zero (vinfo, stmt_info) > 0)
   2351  1.1  mrg 	{
   2352  1.1  mrg 	  /* First cope with the degenerate case of a single-element
   2353  1.1  mrg 	     vector.  */
   2354  1.1  mrg 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
   2355  1.1  mrg 	    ;
   2356  1.1  mrg 
   2357  1.1  mrg 	  /* Otherwise try using LOAD/STORE_LANES.  */
   2358  1.1  mrg 	  else if (vls_type == VLS_LOAD
   2359  1.1  mrg 		   ? vect_load_lanes_supported (vectype, group_size, masked_p)
   2360  1.1  mrg 		   : vect_store_lanes_supported (vectype, group_size,
   2361  1.1  mrg 						 masked_p))
   2362  1.1  mrg 	    {
   2363  1.1  mrg 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
   2364  1.1  mrg 	      overrun_p = would_overrun_p;
   2365  1.1  mrg 	    }
   2366  1.1  mrg 
   2367  1.1  mrg 	  /* If that fails, try using permuting loads.  */
   2368  1.1  mrg 	  else if (vls_type == VLS_LOAD
   2369  1.1  mrg 		   ? vect_grouped_load_supported (vectype, single_element_p,
   2370  1.1  mrg 						  group_size)
   2371  1.1  mrg 		   : vect_grouped_store_supported (vectype, group_size))
   2372  1.1  mrg 	    {
   2373  1.1  mrg 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
   2374  1.1  mrg 	      overrun_p = would_overrun_p;
   2375  1.1  mrg 	    }
   2376  1.1  mrg 	}
   2377  1.1  mrg 
   2378  1.1  mrg       /* As a last resort, trying using a gather load or scatter store.
   2379  1.1  mrg 
   2380  1.1  mrg 	 ??? Although the code can handle all group sizes correctly,
   2381  1.1  mrg 	 it probably isn't a win to use separate strided accesses based
   2382  1.1  mrg 	 on nearby locations.  Or, even if it's a win over scalar code,
   2383  1.1  mrg 	 it might not be a win over vectorizing at a lower VF, if that
   2384  1.1  mrg 	 allows us to use contiguous accesses.  */
   2385  1.1  mrg       if (*memory_access_type == VMAT_ELEMENTWISE
   2386  1.1  mrg 	  && single_element_p
   2387  1.1  mrg 	  && loop_vinfo
   2388  1.1  mrg 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
   2389  1.1  mrg 						 masked_p, gs_info))
   2390  1.1  mrg 	*memory_access_type = VMAT_GATHER_SCATTER;
   2391  1.1  mrg     }
   2392  1.1  mrg 
   2393  1.1  mrg   if (*memory_access_type == VMAT_GATHER_SCATTER
   2394  1.1  mrg       || *memory_access_type == VMAT_ELEMENTWISE)
   2395  1.1  mrg     {
   2396  1.1  mrg       *alignment_support_scheme = dr_unaligned_supported;
   2397  1.1  mrg       *misalignment = DR_MISALIGNMENT_UNKNOWN;
   2398  1.1  mrg     }
   2399  1.1  mrg   else
   2400  1.1  mrg     {
   2401  1.1  mrg       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
   2402  1.1  mrg       *alignment_support_scheme
   2403  1.1  mrg 	= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
   2404  1.1  mrg 					 *misalignment);
   2405  1.1  mrg     }
   2406  1.1  mrg 
   2407  1.1  mrg   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
   2408  1.1  mrg     {
   2409  1.1  mrg       /* STMT is the leader of the group. Check the operands of all the
   2410  1.1  mrg 	 stmts of the group.  */
   2411  1.1  mrg       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
   2412  1.1  mrg       while (next_stmt_info)
   2413  1.1  mrg 	{
   2414  1.1  mrg 	  tree op = vect_get_store_rhs (next_stmt_info);
   2415  1.1  mrg 	  enum vect_def_type dt;
   2416  1.1  mrg 	  if (!vect_is_simple_use (op, vinfo, &dt))
   2417  1.1  mrg 	    {
   2418  1.1  mrg 	      if (dump_enabled_p ())
   2419  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2420  1.1  mrg 				 "use not simple.\n");
   2421  1.1  mrg 	      return false;
   2422  1.1  mrg 	    }
   2423  1.1  mrg 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   2424  1.1  mrg 	}
   2425  1.1  mrg     }
   2426  1.1  mrg 
   2427  1.1  mrg   if (overrun_p)
   2428  1.1  mrg     {
   2429  1.1  mrg       gcc_assert (can_overrun_p);
   2430  1.1  mrg       if (dump_enabled_p ())
   2431  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2432  1.1  mrg 			 "Data access with gaps requires scalar "
   2433  1.1  mrg 			 "epilogue loop\n");
   2434  1.1  mrg       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
   2435  1.1  mrg     }
   2436  1.1  mrg 
   2437  1.1  mrg   return true;
   2438  1.1  mrg }
   2439  1.1  mrg 
   2440  1.1  mrg /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
   2441  1.1  mrg    if there is a memory access type that the vectorized form can use,
   2442  1.1  mrg    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
   2443  1.1  mrg    or scatters, fill in GS_INFO accordingly.  In addition
   2444  1.1  mrg    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
   2445  1.1  mrg    the target does not support the alignment scheme.  *MISALIGNMENT
   2446  1.1  mrg    is set according to the alignment of the access (including
   2447  1.1  mrg    DR_MISALIGNMENT_UNKNOWN when it is unknown).
   2448  1.1  mrg 
   2449  1.1  mrg    SLP says whether we're performing SLP rather than loop vectorization.
   2450  1.1  mrg    MASKED_P is true if the statement is conditional on a vectorized mask.
   2451  1.1  mrg    VECTYPE is the vector type that the vectorized statements will use.
   2452  1.1  mrg    NCOPIES is the number of vector statements that will be needed.  */
   2453  1.1  mrg 
   2454  1.1  mrg static bool
   2455  1.1  mrg get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
   2456  1.1  mrg 		     tree vectype, slp_tree slp_node,
   2457  1.1  mrg 		     bool masked_p, vec_load_store_type vls_type,
   2458  1.1  mrg 		     unsigned int ncopies,
   2459  1.1  mrg 		     vect_memory_access_type *memory_access_type,
   2460  1.1  mrg 		     poly_int64 *poffset,
   2461  1.1  mrg 		     dr_alignment_support *alignment_support_scheme,
   2462  1.1  mrg 		     int *misalignment,
   2463  1.1  mrg 		     gather_scatter_info *gs_info)
   2464  1.1  mrg {
   2465  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2466  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   2467  1.1  mrg   *misalignment = DR_MISALIGNMENT_UNKNOWN;
   2468  1.1  mrg   *poffset = 0;
   2469  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   2470  1.1  mrg     {
   2471  1.1  mrg       *memory_access_type = VMAT_GATHER_SCATTER;
   2472  1.1  mrg       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
   2473  1.1  mrg 	gcc_unreachable ();
   2474  1.1  mrg       else if (!vect_is_simple_use (gs_info->offset, vinfo,
   2475  1.1  mrg 				    &gs_info->offset_dt,
   2476  1.1  mrg 				    &gs_info->offset_vectype))
   2477  1.1  mrg 	{
   2478  1.1  mrg 	  if (dump_enabled_p ())
   2479  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2480  1.1  mrg 			     "%s index use not simple.\n",
   2481  1.1  mrg 			     vls_type == VLS_LOAD ? "gather" : "scatter");
   2482  1.1  mrg 	  return false;
   2483  1.1  mrg 	}
   2484  1.1  mrg       else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
   2485  1.1  mrg 	{
   2486  1.1  mrg 	  if (vls_type != VLS_LOAD)
   2487  1.1  mrg 	    {
   2488  1.1  mrg 	      if (dump_enabled_p ())
   2489  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2490  1.1  mrg 				 "unsupported emulated scatter.\n");
   2491  1.1  mrg 	      return false;
   2492  1.1  mrg 	    }
   2493  1.1  mrg 	  else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
   2494  1.1  mrg 		   || !TYPE_VECTOR_SUBPARTS
   2495  1.1  mrg 			 (gs_info->offset_vectype).is_constant ()
   2496  1.1  mrg 		   || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
   2497  1.1  mrg 		   || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
   2498  1.1  mrg 					      (gs_info->offset_vectype),
   2499  1.1  mrg 					    TYPE_VECTOR_SUBPARTS (vectype)))
   2500  1.1  mrg 	    {
   2501  1.1  mrg 	      if (dump_enabled_p ())
   2502  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2503  1.1  mrg 				 "unsupported vector types for emulated "
   2504  1.1  mrg 				 "gather.\n");
   2505  1.1  mrg 	      return false;
   2506  1.1  mrg 	    }
   2507  1.1  mrg 	}
   2508  1.1  mrg       /* Gather-scatter accesses perform only component accesses, alignment
   2509  1.1  mrg 	 is irrelevant for them.  */
   2510  1.1  mrg       *alignment_support_scheme = dr_unaligned_supported;
   2511  1.1  mrg     }
   2512  1.1  mrg   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   2513  1.1  mrg     {
   2514  1.1  mrg       if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
   2515  1.1  mrg 				      masked_p,
   2516  1.1  mrg 				      vls_type, memory_access_type, poffset,
   2517  1.1  mrg 				      alignment_support_scheme,
   2518  1.1  mrg 				      misalignment, gs_info))
   2519  1.1  mrg 	return false;
   2520  1.1  mrg     }
   2521  1.1  mrg   else if (STMT_VINFO_STRIDED_P (stmt_info))
   2522  1.1  mrg     {
   2523  1.1  mrg       gcc_assert (!slp_node);
   2524  1.1  mrg       if (loop_vinfo
   2525  1.1  mrg 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
   2526  1.1  mrg 						 masked_p, gs_info))
   2527  1.1  mrg 	*memory_access_type = VMAT_GATHER_SCATTER;
   2528  1.1  mrg       else
   2529  1.1  mrg 	*memory_access_type = VMAT_ELEMENTWISE;
   2530  1.1  mrg       /* Alignment is irrelevant here.  */
   2531  1.1  mrg       *alignment_support_scheme = dr_unaligned_supported;
   2532  1.1  mrg     }
   2533  1.1  mrg   else
   2534  1.1  mrg     {
   2535  1.1  mrg       int cmp = compare_step_with_zero (vinfo, stmt_info);
   2536  1.1  mrg       if (cmp == 0)
   2537  1.1  mrg 	{
   2538  1.1  mrg 	  gcc_assert (vls_type == VLS_LOAD);
   2539  1.1  mrg 	  *memory_access_type = VMAT_INVARIANT;
   2540  1.1  mrg 	  /* Invariant accesses perform only component accesses, alignment
   2541  1.1  mrg 	     is irrelevant for them.  */
   2542  1.1  mrg 	  *alignment_support_scheme = dr_unaligned_supported;
   2543  1.1  mrg 	}
   2544  1.1  mrg       else
   2545  1.1  mrg 	{
   2546  1.1  mrg 	  if (cmp < 0)
   2547  1.1  mrg 	    *memory_access_type = get_negative_load_store_type
   2548  1.1  mrg 	       (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
   2549  1.1  mrg 	  else
   2550  1.1  mrg 	    *memory_access_type = VMAT_CONTIGUOUS;
   2551  1.1  mrg 	  *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
   2552  1.1  mrg 					   vectype, *poffset);
   2553  1.1  mrg 	  *alignment_support_scheme
   2554  1.1  mrg 	    = vect_supportable_dr_alignment (vinfo,
   2555  1.1  mrg 					     STMT_VINFO_DR_INFO (stmt_info),
   2556  1.1  mrg 					     vectype, *misalignment);
   2557  1.1  mrg 	}
   2558  1.1  mrg     }
   2559  1.1  mrg 
   2560  1.1  mrg   if ((*memory_access_type == VMAT_ELEMENTWISE
   2561  1.1  mrg        || *memory_access_type == VMAT_STRIDED_SLP)
   2562  1.1  mrg       && !nunits.is_constant ())
   2563  1.1  mrg     {
   2564  1.1  mrg       if (dump_enabled_p ())
   2565  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2566  1.1  mrg 			 "Not using elementwise accesses due to variable "
   2567  1.1  mrg 			 "vectorization factor.\n");
   2568  1.1  mrg       return false;
   2569  1.1  mrg     }
   2570  1.1  mrg 
   2571  1.1  mrg   if (*alignment_support_scheme == dr_unaligned_unsupported)
   2572  1.1  mrg     {
   2573  1.1  mrg       if (dump_enabled_p ())
   2574  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2575  1.1  mrg 			 "unsupported unaligned access\n");
   2576  1.1  mrg       return false;
   2577  1.1  mrg     }
   2578  1.1  mrg 
   2579  1.1  mrg   /* FIXME: At the moment the cost model seems to underestimate the
   2580  1.1  mrg      cost of using elementwise accesses.  This check preserves the
   2581  1.1  mrg      traditional behavior until that can be fixed.  */
   2582  1.1  mrg   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   2583  1.1  mrg   if (!first_stmt_info)
   2584  1.1  mrg     first_stmt_info = stmt_info;
   2585  1.1  mrg   if (*memory_access_type == VMAT_ELEMENTWISE
   2586  1.1  mrg       && !STMT_VINFO_STRIDED_P (first_stmt_info)
   2587  1.1  mrg       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
   2588  1.1  mrg 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
   2589  1.1  mrg 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
   2590  1.1  mrg     {
   2591  1.1  mrg       if (dump_enabled_p ())
   2592  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2593  1.1  mrg 			 "not falling back to elementwise accesses\n");
   2594  1.1  mrg       return false;
   2595  1.1  mrg     }
   2596  1.1  mrg   return true;
   2597  1.1  mrg }
   2598  1.1  mrg 
   2599  1.1  mrg /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
   2600  1.1  mrg    conditional operation STMT_INFO.  When returning true, store the mask
   2601  1.1  mrg    in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
   2602  1.1  mrg    vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
   2603  1.1  mrg    to the mask in *MASK_NODE if MASK_NODE is not NULL.  */
   2604  1.1  mrg 
   2605  1.1  mrg static bool
   2606  1.1  mrg vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
   2607  1.1  mrg 			slp_tree slp_node, unsigned mask_index,
   2608  1.1  mrg 			tree *mask, slp_tree *mask_node,
   2609  1.1  mrg 			vect_def_type *mask_dt_out, tree *mask_vectype_out)
   2610  1.1  mrg {
   2611  1.1  mrg   enum vect_def_type mask_dt;
   2612  1.1  mrg   tree mask_vectype;
   2613  1.1  mrg   slp_tree mask_node_1;
   2614  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
   2615  1.1  mrg 			   mask, &mask_node_1, &mask_dt, &mask_vectype))
   2616  1.1  mrg     {
   2617  1.1  mrg       if (dump_enabled_p ())
   2618  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2619  1.1  mrg 			 "mask use not simple.\n");
   2620  1.1  mrg       return false;
   2621  1.1  mrg     }
   2622  1.1  mrg 
   2623  1.1  mrg   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
   2624  1.1  mrg     {
   2625  1.1  mrg       if (dump_enabled_p ())
   2626  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2627  1.1  mrg 			 "mask argument is not a boolean.\n");
   2628  1.1  mrg       return false;
   2629  1.1  mrg     }
   2630  1.1  mrg 
   2631  1.1  mrg   /* If the caller is not prepared for adjusting an external/constant
   2632  1.1  mrg      SLP mask vector type fail.  */
   2633  1.1  mrg   if (slp_node
   2634  1.1  mrg       && !mask_node
   2635  1.1  mrg       && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
   2636  1.1  mrg     {
   2637  1.1  mrg       if (dump_enabled_p ())
   2638  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2639  1.1  mrg 			 "SLP mask argument is not vectorized.\n");
   2640  1.1  mrg       return false;
   2641  1.1  mrg     }
   2642  1.1  mrg 
   2643  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2644  1.1  mrg   if (!mask_vectype)
   2645  1.1  mrg     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
   2646  1.1  mrg 
   2647  1.1  mrg   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
   2648  1.1  mrg     {
   2649  1.1  mrg       if (dump_enabled_p ())
   2650  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2651  1.1  mrg 			 "could not find an appropriate vector mask type.\n");
   2652  1.1  mrg       return false;
   2653  1.1  mrg     }
   2654  1.1  mrg 
   2655  1.1  mrg   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
   2656  1.1  mrg 		TYPE_VECTOR_SUBPARTS (vectype)))
   2657  1.1  mrg     {
   2658  1.1  mrg       if (dump_enabled_p ())
   2659  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2660  1.1  mrg 			 "vector mask type %T"
   2661  1.1  mrg 			 " does not match vector data type %T.\n",
   2662  1.1  mrg 			 mask_vectype, vectype);
   2663  1.1  mrg 
   2664  1.1  mrg       return false;
   2665  1.1  mrg     }
   2666  1.1  mrg 
   2667  1.1  mrg   *mask_dt_out = mask_dt;
   2668  1.1  mrg   *mask_vectype_out = mask_vectype;
   2669  1.1  mrg   if (mask_node)
   2670  1.1  mrg     *mask_node = mask_node_1;
   2671  1.1  mrg   return true;
   2672  1.1  mrg }
   2673  1.1  mrg 
   2674  1.1  mrg /* Return true if stored value RHS is suitable for vectorizing store
   2675  1.1  mrg    statement STMT_INFO.  When returning true, store the type of the
   2676  1.1  mrg    definition in *RHS_DT_OUT, the type of the vectorized store value in
   2677  1.1  mrg    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
   2678  1.1  mrg 
   2679  1.1  mrg static bool
   2680  1.1  mrg vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
   2681  1.1  mrg 		      slp_tree slp_node, tree rhs,
   2682  1.1  mrg 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
   2683  1.1  mrg 		      vec_load_store_type *vls_type_out)
   2684  1.1  mrg {
   2685  1.1  mrg   /* In the case this is a store from a constant make sure
   2686  1.1  mrg      native_encode_expr can handle it.  */
   2687  1.1  mrg   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
   2688  1.1  mrg     {
   2689  1.1  mrg       if (dump_enabled_p ())
   2690  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2691  1.1  mrg 			 "cannot encode constant as a byte sequence.\n");
   2692  1.1  mrg       return false;
   2693  1.1  mrg     }
   2694  1.1  mrg 
   2695  1.1  mrg   unsigned op_no = 0;
   2696  1.1  mrg   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
   2697  1.1  mrg     {
   2698  1.1  mrg       if (gimple_call_internal_p (call)
   2699  1.1  mrg 	  && internal_store_fn_p (gimple_call_internal_fn (call)))
   2700  1.1  mrg 	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
   2701  1.1  mrg     }
   2702  1.1  mrg 
   2703  1.1  mrg   enum vect_def_type rhs_dt;
   2704  1.1  mrg   tree rhs_vectype;
   2705  1.1  mrg   slp_tree slp_op;
   2706  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
   2707  1.1  mrg 			   &rhs, &slp_op, &rhs_dt, &rhs_vectype))
   2708  1.1  mrg     {
   2709  1.1  mrg       if (dump_enabled_p ())
   2710  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2711  1.1  mrg 			 "use not simple.\n");
   2712  1.1  mrg       return false;
   2713  1.1  mrg     }
   2714  1.1  mrg 
   2715  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2716  1.1  mrg   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
   2717  1.1  mrg     {
   2718  1.1  mrg       if (dump_enabled_p ())
   2719  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   2720  1.1  mrg 			 "incompatible vector types.\n");
   2721  1.1  mrg       return false;
   2722  1.1  mrg     }
   2723  1.1  mrg 
   2724  1.1  mrg   *rhs_dt_out = rhs_dt;
   2725  1.1  mrg   *rhs_vectype_out = rhs_vectype;
   2726  1.1  mrg   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
   2727  1.1  mrg     *vls_type_out = VLS_STORE_INVARIANT;
   2728  1.1  mrg   else
   2729  1.1  mrg     *vls_type_out = VLS_STORE;
   2730  1.1  mrg   return true;
   2731  1.1  mrg }
   2732  1.1  mrg 
   2733  1.1  mrg /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
   2734  1.1  mrg    Note that we support masks with floating-point type, in which case the
   2735  1.1  mrg    floats are interpreted as a bitmask.  */
   2736  1.1  mrg 
   2737  1.1  mrg static tree
   2738  1.1  mrg vect_build_all_ones_mask (vec_info *vinfo,
   2739  1.1  mrg 			  stmt_vec_info stmt_info, tree masktype)
   2740  1.1  mrg {
   2741  1.1  mrg   if (TREE_CODE (masktype) == INTEGER_TYPE)
   2742  1.1  mrg     return build_int_cst (masktype, -1);
   2743  1.1  mrg   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
   2744  1.1  mrg     {
   2745  1.1  mrg       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
   2746  1.1  mrg       mask = build_vector_from_val (masktype, mask);
   2747  1.1  mrg       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
   2748  1.1  mrg     }
   2749  1.1  mrg   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
   2750  1.1  mrg     {
   2751  1.1  mrg       REAL_VALUE_TYPE r;
   2752  1.1  mrg       long tmp[6];
   2753  1.1  mrg       for (int j = 0; j < 6; ++j)
   2754  1.1  mrg 	tmp[j] = -1;
   2755  1.1  mrg       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
   2756  1.1  mrg       tree mask = build_real (TREE_TYPE (masktype), r);
   2757  1.1  mrg       mask = build_vector_from_val (masktype, mask);
   2758  1.1  mrg       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
   2759  1.1  mrg     }
   2760  1.1  mrg   gcc_unreachable ();
   2761  1.1  mrg }
   2762  1.1  mrg 
   2763  1.1  mrg /* Build an all-zero merge value of type VECTYPE while vectorizing
   2764  1.1  mrg    STMT_INFO as a gather load.  */
   2765  1.1  mrg 
   2766  1.1  mrg static tree
   2767  1.1  mrg vect_build_zero_merge_argument (vec_info *vinfo,
   2768  1.1  mrg 				stmt_vec_info stmt_info, tree vectype)
   2769  1.1  mrg {
   2770  1.1  mrg   tree merge;
   2771  1.1  mrg   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
   2772  1.1  mrg     merge = build_int_cst (TREE_TYPE (vectype), 0);
   2773  1.1  mrg   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
   2774  1.1  mrg     {
   2775  1.1  mrg       REAL_VALUE_TYPE r;
   2776  1.1  mrg       long tmp[6];
   2777  1.1  mrg       for (int j = 0; j < 6; ++j)
   2778  1.1  mrg 	tmp[j] = 0;
   2779  1.1  mrg       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
   2780  1.1  mrg       merge = build_real (TREE_TYPE (vectype), r);
   2781  1.1  mrg     }
   2782  1.1  mrg   else
   2783  1.1  mrg     gcc_unreachable ();
   2784  1.1  mrg   merge = build_vector_from_val (vectype, merge);
   2785  1.1  mrg   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
   2786  1.1  mrg }
   2787  1.1  mrg 
   2788  1.1  mrg /* Build a gather load call while vectorizing STMT_INFO.  Insert new
   2789  1.1  mrg    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
   2790  1.1  mrg    the gather load operation.  If the load is conditional, MASK is the
   2791  1.1  mrg    unvectorized condition and MASK_DT is its definition type, otherwise
   2792  1.1  mrg    MASK is null.  */
   2793  1.1  mrg 
   2794  1.1  mrg static void
   2795  1.1  mrg vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
   2796  1.1  mrg 			      gimple_stmt_iterator *gsi,
   2797  1.1  mrg 			      gimple **vec_stmt,
   2798  1.1  mrg 			      gather_scatter_info *gs_info,
   2799  1.1  mrg 			      tree mask)
   2800  1.1  mrg {
   2801  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   2802  1.1  mrg   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   2803  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   2804  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   2805  1.1  mrg   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
   2806  1.1  mrg   edge pe = loop_preheader_edge (loop);
   2807  1.1  mrg   enum { NARROW, NONE, WIDEN } modifier;
   2808  1.1  mrg   poly_uint64 gather_off_nunits
   2809  1.1  mrg     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
   2810  1.1  mrg 
   2811  1.1  mrg   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
   2812  1.1  mrg   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
   2813  1.1  mrg   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   2814  1.1  mrg   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   2815  1.1  mrg   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   2816  1.1  mrg   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   2817  1.1  mrg   tree scaletype = TREE_VALUE (arglist);
   2818  1.1  mrg   tree real_masktype = masktype;
   2819  1.1  mrg   gcc_checking_assert (types_compatible_p (srctype, rettype)
   2820  1.1  mrg 		       && (!mask
   2821  1.1  mrg 			   || TREE_CODE (masktype) == INTEGER_TYPE
   2822  1.1  mrg 			   || types_compatible_p (srctype, masktype)));
   2823  1.1  mrg   if (mask)
   2824  1.1  mrg     masktype = truth_type_for (srctype);
   2825  1.1  mrg 
   2826  1.1  mrg   tree mask_halftype = masktype;
   2827  1.1  mrg   tree perm_mask = NULL_TREE;
   2828  1.1  mrg   tree mask_perm_mask = NULL_TREE;
   2829  1.1  mrg   if (known_eq (nunits, gather_off_nunits))
   2830  1.1  mrg     modifier = NONE;
   2831  1.1  mrg   else if (known_eq (nunits * 2, gather_off_nunits))
   2832  1.1  mrg     {
   2833  1.1  mrg       modifier = WIDEN;
   2834  1.1  mrg 
   2835  1.1  mrg       /* Currently widening gathers and scatters are only supported for
   2836  1.1  mrg 	 fixed-length vectors.  */
   2837  1.1  mrg       int count = gather_off_nunits.to_constant ();
   2838  1.1  mrg       vec_perm_builder sel (count, count, 1);
   2839  1.1  mrg       for (int i = 0; i < count; ++i)
   2840  1.1  mrg 	sel.quick_push (i | (count / 2));
   2841  1.1  mrg 
   2842  1.1  mrg       vec_perm_indices indices (sel, 1, count);
   2843  1.1  mrg       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
   2844  1.1  mrg 					      indices);
   2845  1.1  mrg     }
   2846  1.1  mrg   else if (known_eq (nunits, gather_off_nunits * 2))
   2847  1.1  mrg     {
   2848  1.1  mrg       modifier = NARROW;
   2849  1.1  mrg 
   2850  1.1  mrg       /* Currently narrowing gathers and scatters are only supported for
   2851  1.1  mrg 	 fixed-length vectors.  */
   2852  1.1  mrg       int count = nunits.to_constant ();
   2853  1.1  mrg       vec_perm_builder sel (count, count, 1);
   2854  1.1  mrg       sel.quick_grow (count);
   2855  1.1  mrg       for (int i = 0; i < count; ++i)
   2856  1.1  mrg 	sel[i] = i < count / 2 ? i : i + count / 2;
   2857  1.1  mrg       vec_perm_indices indices (sel, 2, count);
   2858  1.1  mrg       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
   2859  1.1  mrg 
   2860  1.1  mrg       ncopies *= 2;
   2861  1.1  mrg 
   2862  1.1  mrg       if (mask && VECTOR_TYPE_P (real_masktype))
   2863  1.1  mrg 	{
   2864  1.1  mrg 	  for (int i = 0; i < count; ++i)
   2865  1.1  mrg 	    sel[i] = i | (count / 2);
   2866  1.1  mrg 	  indices.new_vector (sel, 2, count);
   2867  1.1  mrg 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
   2868  1.1  mrg 	}
   2869  1.1  mrg       else if (mask)
   2870  1.1  mrg 	mask_halftype = truth_type_for (gs_info->offset_vectype);
   2871  1.1  mrg     }
   2872  1.1  mrg   else
   2873  1.1  mrg     gcc_unreachable ();
   2874  1.1  mrg 
   2875  1.1  mrg   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
   2876  1.1  mrg   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
   2877  1.1  mrg 
   2878  1.1  mrg   tree ptr = fold_convert (ptrtype, gs_info->base);
   2879  1.1  mrg   if (!is_gimple_min_invariant (ptr))
   2880  1.1  mrg     {
   2881  1.1  mrg       gimple_seq seq;
   2882  1.1  mrg       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
   2883  1.1  mrg       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
   2884  1.1  mrg       gcc_assert (!new_bb);
   2885  1.1  mrg     }
   2886  1.1  mrg 
   2887  1.1  mrg   tree scale = build_int_cst (scaletype, gs_info->scale);
   2888  1.1  mrg 
   2889  1.1  mrg   tree vec_oprnd0 = NULL_TREE;
   2890  1.1  mrg   tree vec_mask = NULL_TREE;
   2891  1.1  mrg   tree src_op = NULL_TREE;
   2892  1.1  mrg   tree mask_op = NULL_TREE;
   2893  1.1  mrg   tree prev_res = NULL_TREE;
   2894  1.1  mrg 
   2895  1.1  mrg   if (!mask)
   2896  1.1  mrg     {
   2897  1.1  mrg       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
   2898  1.1  mrg       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
   2899  1.1  mrg     }
   2900  1.1  mrg 
   2901  1.1  mrg   auto_vec<tree> vec_oprnds0;
   2902  1.1  mrg   auto_vec<tree> vec_masks;
   2903  1.1  mrg   vect_get_vec_defs_for_operand (vinfo, stmt_info,
   2904  1.1  mrg 				 modifier == WIDEN ? ncopies / 2 : ncopies,
   2905  1.1  mrg 				 gs_info->offset, &vec_oprnds0);
   2906  1.1  mrg   if (mask)
   2907  1.1  mrg     vect_get_vec_defs_for_operand (vinfo, stmt_info,
   2908  1.1  mrg 				   modifier == NARROW ? ncopies / 2 : ncopies,
   2909  1.1  mrg 				   mask, &vec_masks, masktype);
   2910  1.1  mrg   for (int j = 0; j < ncopies; ++j)
   2911  1.1  mrg     {
   2912  1.1  mrg       tree op, var;
   2913  1.1  mrg       if (modifier == WIDEN && (j & 1))
   2914  1.1  mrg 	op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
   2915  1.1  mrg 				   perm_mask, stmt_info, gsi);
   2916  1.1  mrg       else
   2917  1.1  mrg 	op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
   2918  1.1  mrg 
   2919  1.1  mrg       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
   2920  1.1  mrg 	{
   2921  1.1  mrg 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
   2922  1.1  mrg 				TYPE_VECTOR_SUBPARTS (idxtype)));
   2923  1.1  mrg 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
   2924  1.1  mrg 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
   2925  1.1  mrg 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
   2926  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   2927  1.1  mrg 	  op = var;
   2928  1.1  mrg 	}
   2929  1.1  mrg 
   2930  1.1  mrg       if (mask)
   2931  1.1  mrg 	{
   2932  1.1  mrg 	  if (mask_perm_mask && (j & 1))
   2933  1.1  mrg 	    mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
   2934  1.1  mrg 					    mask_perm_mask, stmt_info, gsi);
   2935  1.1  mrg 	  else
   2936  1.1  mrg 	    {
   2937  1.1  mrg 	      if (modifier == NARROW)
   2938  1.1  mrg 		{
   2939  1.1  mrg 		  if ((j & 1) == 0)
   2940  1.1  mrg 		    vec_mask = vec_masks[j / 2];
   2941  1.1  mrg 		}
   2942  1.1  mrg 	      else
   2943  1.1  mrg 		vec_mask = vec_masks[j];
   2944  1.1  mrg 
   2945  1.1  mrg 	      mask_op = vec_mask;
   2946  1.1  mrg 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
   2947  1.1  mrg 		{
   2948  1.1  mrg 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
   2949  1.1  mrg 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
   2950  1.1  mrg 		  gcc_assert (known_eq (sub1, sub2));
   2951  1.1  mrg 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
   2952  1.1  mrg 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
   2953  1.1  mrg 		  gassign *new_stmt
   2954  1.1  mrg 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
   2955  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   2956  1.1  mrg 		  mask_op = var;
   2957  1.1  mrg 		}
   2958  1.1  mrg 	    }
   2959  1.1  mrg 	  if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
   2960  1.1  mrg 	    {
   2961  1.1  mrg 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
   2962  1.1  mrg 	      gassign *new_stmt
   2963  1.1  mrg 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
   2964  1.1  mrg 						    : VEC_UNPACK_LO_EXPR,
   2965  1.1  mrg 				       mask_op);
   2966  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   2967  1.1  mrg 	      mask_op = var;
   2968  1.1  mrg 	    }
   2969  1.1  mrg 	  src_op = mask_op;
   2970  1.1  mrg 	}
   2971  1.1  mrg 
   2972  1.1  mrg       tree mask_arg = mask_op;
   2973  1.1  mrg       if (masktype != real_masktype)
   2974  1.1  mrg 	{
   2975  1.1  mrg 	  tree utype, optype = TREE_TYPE (mask_op);
   2976  1.1  mrg 	  if (VECTOR_TYPE_P (real_masktype)
   2977  1.1  mrg 	      || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
   2978  1.1  mrg 	    utype = real_masktype;
   2979  1.1  mrg 	  else
   2980  1.1  mrg 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
   2981  1.1  mrg 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
   2982  1.1  mrg 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
   2983  1.1  mrg 	  gassign *new_stmt
   2984  1.1  mrg 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
   2985  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   2986  1.1  mrg 	  mask_arg = var;
   2987  1.1  mrg 	  if (!useless_type_conversion_p (real_masktype, utype))
   2988  1.1  mrg 	    {
   2989  1.1  mrg 	      gcc_assert (TYPE_PRECISION (utype)
   2990  1.1  mrg 			  <= TYPE_PRECISION (real_masktype));
   2991  1.1  mrg 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
   2992  1.1  mrg 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
   2993  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   2994  1.1  mrg 	      mask_arg = var;
   2995  1.1  mrg 	    }
   2996  1.1  mrg 	  src_op = build_zero_cst (srctype);
   2997  1.1  mrg 	}
   2998  1.1  mrg       gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
   2999  1.1  mrg 					    mask_arg, scale);
   3000  1.1  mrg 
   3001  1.1  mrg       if (!useless_type_conversion_p (vectype, rettype))
   3002  1.1  mrg 	{
   3003  1.1  mrg 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
   3004  1.1  mrg 				TYPE_VECTOR_SUBPARTS (rettype)));
   3005  1.1  mrg 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
   3006  1.1  mrg 	  gimple_call_set_lhs (new_stmt, op);
   3007  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3008  1.1  mrg 	  var = make_ssa_name (vec_dest);
   3009  1.1  mrg 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
   3010  1.1  mrg 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
   3011  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3012  1.1  mrg 	}
   3013  1.1  mrg       else
   3014  1.1  mrg 	{
   3015  1.1  mrg 	  var = make_ssa_name (vec_dest, new_stmt);
   3016  1.1  mrg 	  gimple_call_set_lhs (new_stmt, var);
   3017  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3018  1.1  mrg 	}
   3019  1.1  mrg 
   3020  1.1  mrg       if (modifier == NARROW)
   3021  1.1  mrg 	{
   3022  1.1  mrg 	  if ((j & 1) == 0)
   3023  1.1  mrg 	    {
   3024  1.1  mrg 	      prev_res = var;
   3025  1.1  mrg 	      continue;
   3026  1.1  mrg 	    }
   3027  1.1  mrg 	  var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
   3028  1.1  mrg 				      stmt_info, gsi);
   3029  1.1  mrg 	  new_stmt = SSA_NAME_DEF_STMT (var);
   3030  1.1  mrg 	}
   3031  1.1  mrg 
   3032  1.1  mrg       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   3033  1.1  mrg     }
   3034  1.1  mrg   *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   3035  1.1  mrg }
   3036  1.1  mrg 
   3037  1.1  mrg /* Prepare the base and offset in GS_INFO for vectorization.
   3038  1.1  mrg    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
   3039  1.1  mrg    to the vectorized offset argument for the first copy of STMT_INFO.
   3040  1.1  mrg    STMT_INFO is the statement described by GS_INFO and LOOP is the
   3041  1.1  mrg    containing loop.  */
   3042  1.1  mrg 
   3043  1.1  mrg static void
   3044  1.1  mrg vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
   3045  1.1  mrg 			     class loop *loop, stmt_vec_info stmt_info,
   3046  1.1  mrg 			     slp_tree slp_node, gather_scatter_info *gs_info,
   3047  1.1  mrg 			     tree *dataref_ptr, vec<tree> *vec_offset)
   3048  1.1  mrg {
   3049  1.1  mrg   gimple_seq stmts = NULL;
   3050  1.1  mrg   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
   3051  1.1  mrg   if (stmts != NULL)
   3052  1.1  mrg     {
   3053  1.1  mrg       basic_block new_bb;
   3054  1.1  mrg       edge pe = loop_preheader_edge (loop);
   3055  1.1  mrg       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
   3056  1.1  mrg       gcc_assert (!new_bb);
   3057  1.1  mrg     }
   3058  1.1  mrg   if (slp_node)
   3059  1.1  mrg     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
   3060  1.1  mrg   else
   3061  1.1  mrg     {
   3062  1.1  mrg       unsigned ncopies
   3063  1.1  mrg 	= vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
   3064  1.1  mrg       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
   3065  1.1  mrg 				     gs_info->offset, vec_offset,
   3066  1.1  mrg 				     gs_info->offset_vectype);
   3067  1.1  mrg     }
   3068  1.1  mrg }
   3069  1.1  mrg 
   3070  1.1  mrg /* Prepare to implement a grouped or strided load or store using
   3071  1.1  mrg    the gather load or scatter store operation described by GS_INFO.
   3072  1.1  mrg    STMT_INFO is the load or store statement.
   3073  1.1  mrg 
   3074  1.1  mrg    Set *DATAREF_BUMP to the amount that should be added to the base
   3075  1.1  mrg    address after each copy of the vectorized statement.  Set *VEC_OFFSET
   3076  1.1  mrg    to an invariant offset vector in which element I has the value
   3077  1.1  mrg    I * DR_STEP / SCALE.  */
   3078  1.1  mrg 
   3079  1.1  mrg static void
   3080  1.1  mrg vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
   3081  1.1  mrg 				 loop_vec_info loop_vinfo,
   3082  1.1  mrg 				 gather_scatter_info *gs_info,
   3083  1.1  mrg 				 tree *dataref_bump, tree *vec_offset)
   3084  1.1  mrg {
   3085  1.1  mrg   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   3086  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   3087  1.1  mrg 
   3088  1.1  mrg   tree bump = size_binop (MULT_EXPR,
   3089  1.1  mrg 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
   3090  1.1  mrg 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
   3091  1.1  mrg   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
   3092  1.1  mrg 
   3093  1.1  mrg   /* The offset given in GS_INFO can have pointer type, so use the element
   3094  1.1  mrg      type of the vector instead.  */
   3095  1.1  mrg   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
   3096  1.1  mrg 
   3097  1.1  mrg   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
   3098  1.1  mrg   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
   3099  1.1  mrg 			  ssize_int (gs_info->scale));
   3100  1.1  mrg   step = fold_convert (offset_type, step);
   3101  1.1  mrg 
   3102  1.1  mrg   /* Create {0, X, X*2, X*3, ...}.  */
   3103  1.1  mrg   tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
   3104  1.1  mrg 			     build_zero_cst (offset_type), step);
   3105  1.1  mrg   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
   3106  1.1  mrg }
   3107  1.1  mrg 
   3108  1.1  mrg /* Return the amount that should be added to a vector pointer to move
   3109  1.1  mrg    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
   3110  1.1  mrg    being vectorized and MEMORY_ACCESS_TYPE describes the type of
   3111  1.1  mrg    vectorization.  */
   3112  1.1  mrg 
   3113  1.1  mrg static tree
   3114  1.1  mrg vect_get_data_ptr_increment (vec_info *vinfo,
   3115  1.1  mrg 			     dr_vec_info *dr_info, tree aggr_type,
   3116  1.1  mrg 			     vect_memory_access_type memory_access_type)
   3117  1.1  mrg {
   3118  1.1  mrg   if (memory_access_type == VMAT_INVARIANT)
   3119  1.1  mrg     return size_zero_node;
   3120  1.1  mrg 
   3121  1.1  mrg   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
   3122  1.1  mrg   tree step = vect_dr_behavior (vinfo, dr_info)->step;
   3123  1.1  mrg   if (tree_int_cst_sgn (step) == -1)
   3124  1.1  mrg     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
   3125  1.1  mrg   return iv_step;
   3126  1.1  mrg }
   3127  1.1  mrg 
   3128  1.1  mrg /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
   3129  1.1  mrg 
   3130  1.1  mrg static bool
   3131  1.1  mrg vectorizable_bswap (vec_info *vinfo,
   3132  1.1  mrg 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   3133  1.1  mrg 		    gimple **vec_stmt, slp_tree slp_node,
   3134  1.1  mrg 		    slp_tree *slp_op,
   3135  1.1  mrg 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
   3136  1.1  mrg {
   3137  1.1  mrg   tree op, vectype;
   3138  1.1  mrg   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
   3139  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   3140  1.1  mrg   unsigned ncopies;
   3141  1.1  mrg 
   3142  1.1  mrg   op = gimple_call_arg (stmt, 0);
   3143  1.1  mrg   vectype = STMT_VINFO_VECTYPE (stmt_info);
   3144  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   3145  1.1  mrg 
   3146  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   3147  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   3148  1.1  mrg      case of SLP.  */
   3149  1.1  mrg   if (slp_node)
   3150  1.1  mrg     ncopies = 1;
   3151  1.1  mrg   else
   3152  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   3153  1.1  mrg 
   3154  1.1  mrg   gcc_assert (ncopies >= 1);
   3155  1.1  mrg 
   3156  1.1  mrg   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
   3157  1.1  mrg   if (! char_vectype)
   3158  1.1  mrg     return false;
   3159  1.1  mrg 
   3160  1.1  mrg   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
   3161  1.1  mrg   unsigned word_bytes;
   3162  1.1  mrg   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
   3163  1.1  mrg     return false;
   3164  1.1  mrg 
   3165  1.1  mrg   /* The encoding uses one stepped pattern for each byte in the word.  */
   3166  1.1  mrg   vec_perm_builder elts (num_bytes, word_bytes, 3);
   3167  1.1  mrg   for (unsigned i = 0; i < 3; ++i)
   3168  1.1  mrg     for (unsigned j = 0; j < word_bytes; ++j)
   3169  1.1  mrg       elts.quick_push ((i + 1) * word_bytes - j - 1);
   3170  1.1  mrg 
   3171  1.1  mrg   vec_perm_indices indices (elts, 1, num_bytes);
   3172  1.1  mrg   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
   3173  1.1  mrg     return false;
   3174  1.1  mrg 
   3175  1.1  mrg   if (! vec_stmt)
   3176  1.1  mrg     {
   3177  1.1  mrg       if (slp_node
   3178  1.1  mrg 	  && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
   3179  1.1  mrg 	{
   3180  1.1  mrg 	  if (dump_enabled_p ())
   3181  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3182  1.1  mrg 			     "incompatible vector types for invariants\n");
   3183  1.1  mrg 	  return false;
   3184  1.1  mrg 	}
   3185  1.1  mrg 
   3186  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
   3187  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_bswap");
   3188  1.1  mrg       record_stmt_cost (cost_vec,
   3189  1.1  mrg 			1, vector_stmt, stmt_info, 0, vect_prologue);
   3190  1.1  mrg       record_stmt_cost (cost_vec,
   3191  1.1  mrg 			slp_node
   3192  1.1  mrg 			? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
   3193  1.1  mrg 			vec_perm, stmt_info, 0, vect_body);
   3194  1.1  mrg       return true;
   3195  1.1  mrg     }
   3196  1.1  mrg 
   3197  1.1  mrg   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
   3198  1.1  mrg 
   3199  1.1  mrg   /* Transform.  */
   3200  1.1  mrg   vec<tree> vec_oprnds = vNULL;
   3201  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   3202  1.1  mrg 		     op, &vec_oprnds);
   3203  1.1  mrg   /* Arguments are ready. create the new vector stmt.  */
   3204  1.1  mrg   unsigned i;
   3205  1.1  mrg   tree vop;
   3206  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
   3207  1.1  mrg     {
   3208  1.1  mrg       gimple *new_stmt;
   3209  1.1  mrg       tree tem = make_ssa_name (char_vectype);
   3210  1.1  mrg       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
   3211  1.1  mrg 						   char_vectype, vop));
   3212  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3213  1.1  mrg       tree tem2 = make_ssa_name (char_vectype);
   3214  1.1  mrg       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
   3215  1.1  mrg 				      tem, tem, bswap_vconst);
   3216  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3217  1.1  mrg       tem = make_ssa_name (vectype);
   3218  1.1  mrg       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
   3219  1.1  mrg 						   vectype, tem2));
   3220  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3221  1.1  mrg       if (slp_node)
   3222  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   3223  1.1  mrg       else
   3224  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   3225  1.1  mrg     }
   3226  1.1  mrg 
   3227  1.1  mrg   if (!slp_node)
   3228  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   3229  1.1  mrg 
   3230  1.1  mrg   vec_oprnds.release ();
   3231  1.1  mrg   return true;
   3232  1.1  mrg }
   3233  1.1  mrg 
   3234  1.1  mrg /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
   3235  1.1  mrg    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
   3236  1.1  mrg    in a single step.  On success, store the binary pack code in
   3237  1.1  mrg    *CONVERT_CODE.  */
   3238  1.1  mrg 
   3239  1.1  mrg static bool
   3240  1.1  mrg simple_integer_narrowing (tree vectype_out, tree vectype_in,
   3241  1.1  mrg 			  tree_code *convert_code)
   3242  1.1  mrg {
   3243  1.1  mrg   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
   3244  1.1  mrg       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
   3245  1.1  mrg     return false;
   3246  1.1  mrg 
   3247  1.1  mrg   tree_code code;
   3248  1.1  mrg   int multi_step_cvt = 0;
   3249  1.1  mrg   auto_vec <tree, 8> interm_types;
   3250  1.1  mrg   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
   3251  1.1  mrg 					&code, &multi_step_cvt, &interm_types)
   3252  1.1  mrg       || multi_step_cvt)
   3253  1.1  mrg     return false;
   3254  1.1  mrg 
   3255  1.1  mrg   *convert_code = code;
   3256  1.1  mrg   return true;
   3257  1.1  mrg }
   3258  1.1  mrg 
   3259  1.1  mrg /* Function vectorizable_call.
   3260  1.1  mrg 
   3261  1.1  mrg    Check if STMT_INFO performs a function call that can be vectorized.
   3262  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   3263  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   3264  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   3265  1.1  mrg 
   3266  1.1  mrg static bool
   3267  1.1  mrg vectorizable_call (vec_info *vinfo,
   3268  1.1  mrg 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   3269  1.1  mrg 		   gimple **vec_stmt, slp_tree slp_node,
   3270  1.1  mrg 		   stmt_vector_for_cost *cost_vec)
   3271  1.1  mrg {
   3272  1.1  mrg   gcall *stmt;
   3273  1.1  mrg   tree vec_dest;
   3274  1.1  mrg   tree scalar_dest;
   3275  1.1  mrg   tree op;
   3276  1.1  mrg   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
   3277  1.1  mrg   tree vectype_out, vectype_in;
   3278  1.1  mrg   poly_uint64 nunits_in;
   3279  1.1  mrg   poly_uint64 nunits_out;
   3280  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   3281  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   3282  1.1  mrg   tree fndecl, new_temp, rhs_type;
   3283  1.1  mrg   enum vect_def_type dt[4]
   3284  1.1  mrg     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
   3285  1.1  mrg 	vect_unknown_def_type };
   3286  1.1  mrg   tree vectypes[ARRAY_SIZE (dt)] = {};
   3287  1.1  mrg   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
   3288  1.1  mrg   int ndts = ARRAY_SIZE (dt);
   3289  1.1  mrg   int ncopies, j;
   3290  1.1  mrg   auto_vec<tree, 8> vargs;
   3291  1.1  mrg   enum { NARROW, NONE, WIDEN } modifier;
   3292  1.1  mrg   size_t i, nargs;
   3293  1.1  mrg   tree lhs;
   3294  1.1  mrg 
   3295  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   3296  1.1  mrg     return false;
   3297  1.1  mrg 
   3298  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   3299  1.1  mrg       && ! vec_stmt)
   3300  1.1  mrg     return false;
   3301  1.1  mrg 
   3302  1.1  mrg   /* Is STMT_INFO a vectorizable call?   */
   3303  1.1  mrg   stmt = dyn_cast <gcall *> (stmt_info->stmt);
   3304  1.1  mrg   if (!stmt)
   3305  1.1  mrg     return false;
   3306  1.1  mrg 
   3307  1.1  mrg   if (gimple_call_internal_p (stmt)
   3308  1.1  mrg       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
   3309  1.1  mrg 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
   3310  1.1  mrg     /* Handled by vectorizable_load and vectorizable_store.  */
   3311  1.1  mrg     return false;
   3312  1.1  mrg 
   3313  1.1  mrg   if (gimple_call_lhs (stmt) == NULL_TREE
   3314  1.1  mrg       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
   3315  1.1  mrg     return false;
   3316  1.1  mrg 
   3317  1.1  mrg   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
   3318  1.1  mrg 
   3319  1.1  mrg   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   3320  1.1  mrg 
   3321  1.1  mrg   /* Process function arguments.  */
   3322  1.1  mrg   rhs_type = NULL_TREE;
   3323  1.1  mrg   vectype_in = NULL_TREE;
   3324  1.1  mrg   nargs = gimple_call_num_args (stmt);
   3325  1.1  mrg 
   3326  1.1  mrg   /* Bail out if the function has more than four arguments, we do not have
   3327  1.1  mrg      interesting builtin functions to vectorize with more than two arguments
   3328  1.1  mrg      except for fma.  No arguments is also not good.  */
   3329  1.1  mrg   if (nargs == 0 || nargs > 4)
   3330  1.1  mrg     return false;
   3331  1.1  mrg 
   3332  1.1  mrg   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
   3333  1.1  mrg   combined_fn cfn = gimple_call_combined_fn (stmt);
   3334  1.1  mrg   if (cfn == CFN_GOMP_SIMD_LANE)
   3335  1.1  mrg     {
   3336  1.1  mrg       nargs = 0;
   3337  1.1  mrg       rhs_type = unsigned_type_node;
   3338  1.1  mrg     }
   3339  1.1  mrg 
   3340  1.1  mrg   int mask_opno = -1;
   3341  1.1  mrg   if (internal_fn_p (cfn))
   3342  1.1  mrg     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
   3343  1.1  mrg 
   3344  1.1  mrg   for (i = 0; i < nargs; i++)
   3345  1.1  mrg     {
   3346  1.1  mrg       if ((int) i == mask_opno)
   3347  1.1  mrg 	{
   3348  1.1  mrg 	  if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
   3349  1.1  mrg 				       &op, &slp_op[i], &dt[i], &vectypes[i]))
   3350  1.1  mrg 	    return false;
   3351  1.1  mrg 	  continue;
   3352  1.1  mrg 	}
   3353  1.1  mrg 
   3354  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   3355  1.1  mrg 			       i, &op, &slp_op[i], &dt[i], &vectypes[i]))
   3356  1.1  mrg 	{
   3357  1.1  mrg 	  if (dump_enabled_p ())
   3358  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3359  1.1  mrg 			     "use not simple.\n");
   3360  1.1  mrg 	  return false;
   3361  1.1  mrg 	}
   3362  1.1  mrg 
   3363  1.1  mrg       /* We can only handle calls with arguments of the same type.  */
   3364  1.1  mrg       if (rhs_type
   3365  1.1  mrg 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
   3366  1.1  mrg 	{
   3367  1.1  mrg 	  if (dump_enabled_p ())
   3368  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3369  1.1  mrg                              "argument types differ.\n");
   3370  1.1  mrg 	  return false;
   3371  1.1  mrg 	}
   3372  1.1  mrg       if (!rhs_type)
   3373  1.1  mrg 	rhs_type = TREE_TYPE (op);
   3374  1.1  mrg 
   3375  1.1  mrg       if (!vectype_in)
   3376  1.1  mrg 	vectype_in = vectypes[i];
   3377  1.1  mrg       else if (vectypes[i]
   3378  1.1  mrg 	       && !types_compatible_p (vectypes[i], vectype_in))
   3379  1.1  mrg 	{
   3380  1.1  mrg 	  if (dump_enabled_p ())
   3381  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3382  1.1  mrg                              "argument vector types differ.\n");
   3383  1.1  mrg 	  return false;
   3384  1.1  mrg 	}
   3385  1.1  mrg     }
   3386  1.1  mrg   /* If all arguments are external or constant defs, infer the vector type
   3387  1.1  mrg      from the scalar type.  */
   3388  1.1  mrg   if (!vectype_in)
   3389  1.1  mrg     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
   3390  1.1  mrg   if (vec_stmt)
   3391  1.1  mrg     gcc_assert (vectype_in);
   3392  1.1  mrg   if (!vectype_in)
   3393  1.1  mrg     {
   3394  1.1  mrg       if (dump_enabled_p ())
   3395  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3396  1.1  mrg 			 "no vectype for scalar type %T\n", rhs_type);
   3397  1.1  mrg 
   3398  1.1  mrg       return false;
   3399  1.1  mrg     }
   3400  1.1  mrg   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
   3401  1.1  mrg      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
   3402  1.1  mrg      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
   3403  1.1  mrg      by a pack of the two vectors into an SI vector.  We would need
   3404  1.1  mrg      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
   3405  1.1  mrg   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
   3406  1.1  mrg     {
   3407  1.1  mrg       if (dump_enabled_p ())
   3408  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3409  1.1  mrg 			 "mismatched vector sizes %T and %T\n",
   3410  1.1  mrg 			 vectype_in, vectype_out);
   3411  1.1  mrg       return false;
   3412  1.1  mrg     }
   3413  1.1  mrg 
   3414  1.1  mrg   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
   3415  1.1  mrg       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
   3416  1.1  mrg     {
   3417  1.1  mrg       if (dump_enabled_p ())
   3418  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3419  1.1  mrg 			 "mixed mask and nonmask vector types\n");
   3420  1.1  mrg       return false;
   3421  1.1  mrg     }
   3422  1.1  mrg 
   3423  1.1  mrg   if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
   3424  1.1  mrg   {
   3425  1.1  mrg       if (dump_enabled_p ())
   3426  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3427  1.1  mrg 			 "use emulated vector type for call\n");
   3428  1.1  mrg       return false;
   3429  1.1  mrg   }
   3430  1.1  mrg 
   3431  1.1  mrg   /* FORNOW */
   3432  1.1  mrg   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
   3433  1.1  mrg   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   3434  1.1  mrg   if (known_eq (nunits_in * 2, nunits_out))
   3435  1.1  mrg     modifier = NARROW;
   3436  1.1  mrg   else if (known_eq (nunits_out, nunits_in))
   3437  1.1  mrg     modifier = NONE;
   3438  1.1  mrg   else if (known_eq (nunits_out * 2, nunits_in))
   3439  1.1  mrg     modifier = WIDEN;
   3440  1.1  mrg   else
   3441  1.1  mrg     return false;
   3442  1.1  mrg 
   3443  1.1  mrg   /* We only handle functions that do not read or clobber memory.  */
   3444  1.1  mrg   if (gimple_vuse (stmt))
   3445  1.1  mrg     {
   3446  1.1  mrg       if (dump_enabled_p ())
   3447  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3448  1.1  mrg 			 "function reads from or writes to memory.\n");
   3449  1.1  mrg       return false;
   3450  1.1  mrg     }
   3451  1.1  mrg 
   3452  1.1  mrg   /* For now, we only vectorize functions if a target specific builtin
   3453  1.1  mrg      is available.  TODO -- in some cases, it might be profitable to
   3454  1.1  mrg      insert the calls for pieces of the vector, in order to be able
   3455  1.1  mrg      to vectorize other operations in the loop.  */
   3456  1.1  mrg   fndecl = NULL_TREE;
   3457  1.1  mrg   internal_fn ifn = IFN_LAST;
   3458  1.1  mrg   tree callee = gimple_call_fndecl (stmt);
   3459  1.1  mrg 
   3460  1.1  mrg   /* First try using an internal function.  */
   3461  1.1  mrg   tree_code convert_code = ERROR_MARK;
   3462  1.1  mrg   if (cfn != CFN_LAST
   3463  1.1  mrg       && (modifier == NONE
   3464  1.1  mrg 	  || (modifier == NARROW
   3465  1.1  mrg 	      && simple_integer_narrowing (vectype_out, vectype_in,
   3466  1.1  mrg 					   &convert_code))))
   3467  1.1  mrg     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
   3468  1.1  mrg 					  vectype_in);
   3469  1.1  mrg 
   3470  1.1  mrg   /* If that fails, try asking for a target-specific built-in function.  */
   3471  1.1  mrg   if (ifn == IFN_LAST)
   3472  1.1  mrg     {
   3473  1.1  mrg       if (cfn != CFN_LAST)
   3474  1.1  mrg 	fndecl = targetm.vectorize.builtin_vectorized_function
   3475  1.1  mrg 	  (cfn, vectype_out, vectype_in);
   3476  1.1  mrg       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
   3477  1.1  mrg 	fndecl = targetm.vectorize.builtin_md_vectorized_function
   3478  1.1  mrg 	  (callee, vectype_out, vectype_in);
   3479  1.1  mrg     }
   3480  1.1  mrg 
   3481  1.1  mrg   if (ifn == IFN_LAST && !fndecl)
   3482  1.1  mrg     {
   3483  1.1  mrg       if (cfn == CFN_GOMP_SIMD_LANE
   3484  1.1  mrg 	  && !slp_node
   3485  1.1  mrg 	  && loop_vinfo
   3486  1.1  mrg 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
   3487  1.1  mrg 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
   3488  1.1  mrg 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
   3489  1.1  mrg 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
   3490  1.1  mrg 	{
   3491  1.1  mrg 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
   3492  1.1  mrg 	     { 0, 1, 2, ... vf - 1 } vector.  */
   3493  1.1  mrg 	  gcc_assert (nargs == 0);
   3494  1.1  mrg 	}
   3495  1.1  mrg       else if (modifier == NONE
   3496  1.1  mrg 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
   3497  1.1  mrg 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
   3498  1.1  mrg 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
   3499  1.1  mrg 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
   3500  1.1  mrg 	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
   3501  1.1  mrg 				   slp_op, vectype_in, cost_vec);
   3502  1.1  mrg       else
   3503  1.1  mrg 	{
   3504  1.1  mrg 	  if (dump_enabled_p ())
   3505  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3506  1.1  mrg 			     "function is not vectorizable.\n");
   3507  1.1  mrg 	  return false;
   3508  1.1  mrg 	}
   3509  1.1  mrg     }
   3510  1.1  mrg 
   3511  1.1  mrg   if (slp_node)
   3512  1.1  mrg     ncopies = 1;
   3513  1.1  mrg   else if (modifier == NARROW && ifn == IFN_LAST)
   3514  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
   3515  1.1  mrg   else
   3516  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
   3517  1.1  mrg 
   3518  1.1  mrg   /* Sanity check: make sure that at least one copy of the vectorized stmt
   3519  1.1  mrg      needs to be generated.  */
   3520  1.1  mrg   gcc_assert (ncopies >= 1);
   3521  1.1  mrg 
   3522  1.1  mrg   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   3523  1.1  mrg   internal_fn cond_fn = get_conditional_internal_fn (ifn);
   3524  1.1  mrg   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   3525  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   3526  1.1  mrg     {
   3527  1.1  mrg       if (slp_node)
   3528  1.1  mrg 	for (i = 0; i < nargs; ++i)
   3529  1.1  mrg 	  if (!vect_maybe_update_slp_op_vectype (slp_op[i],
   3530  1.1  mrg 						 vectypes[i]
   3531  1.1  mrg 						 ? vectypes[i] : vectype_in))
   3532  1.1  mrg 	    {
   3533  1.1  mrg 	      if (dump_enabled_p ())
   3534  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3535  1.1  mrg 				 "incompatible vector types for invariants\n");
   3536  1.1  mrg 	      return false;
   3537  1.1  mrg 	    }
   3538  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
   3539  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_call");
   3540  1.1  mrg       vect_model_simple_cost (vinfo, stmt_info,
   3541  1.1  mrg 			      ncopies, dt, ndts, slp_node, cost_vec);
   3542  1.1  mrg       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
   3543  1.1  mrg 	record_stmt_cost (cost_vec, ncopies / 2,
   3544  1.1  mrg 			  vec_promote_demote, stmt_info, 0, vect_body);
   3545  1.1  mrg 
   3546  1.1  mrg       if (loop_vinfo
   3547  1.1  mrg 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
   3548  1.1  mrg 	  && (reduc_idx >= 0 || mask_opno >= 0))
   3549  1.1  mrg 	{
   3550  1.1  mrg 	  if (reduc_idx >= 0
   3551  1.1  mrg 	      && (cond_fn == IFN_LAST
   3552  1.1  mrg 		  || !direct_internal_fn_supported_p (cond_fn, vectype_out,
   3553  1.1  mrg 						      OPTIMIZE_FOR_SPEED)))
   3554  1.1  mrg 	    {
   3555  1.1  mrg 	      if (dump_enabled_p ())
   3556  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   3557  1.1  mrg 				 "can't use a fully-masked loop because no"
   3558  1.1  mrg 				 " conditional operation is available.\n");
   3559  1.1  mrg 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   3560  1.1  mrg 	    }
   3561  1.1  mrg 	  else
   3562  1.1  mrg 	    {
   3563  1.1  mrg 	      unsigned int nvectors
   3564  1.1  mrg 		= (slp_node
   3565  1.1  mrg 		   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
   3566  1.1  mrg 		   : ncopies);
   3567  1.1  mrg 	      tree scalar_mask = NULL_TREE;
   3568  1.1  mrg 	      if (mask_opno >= 0)
   3569  1.1  mrg 		scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
   3570  1.1  mrg 	      vect_record_loop_mask (loop_vinfo, masks, nvectors,
   3571  1.1  mrg 				     vectype_out, scalar_mask);
   3572  1.1  mrg 	    }
   3573  1.1  mrg 	}
   3574  1.1  mrg       return true;
   3575  1.1  mrg     }
   3576  1.1  mrg 
   3577  1.1  mrg   /* Transform.  */
   3578  1.1  mrg 
   3579  1.1  mrg   if (dump_enabled_p ())
   3580  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
   3581  1.1  mrg 
   3582  1.1  mrg   /* Handle def.  */
   3583  1.1  mrg   scalar_dest = gimple_call_lhs (stmt);
   3584  1.1  mrg   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
   3585  1.1  mrg 
   3586  1.1  mrg   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
   3587  1.1  mrg   unsigned int vect_nargs = nargs;
   3588  1.1  mrg   if (masked_loop_p && reduc_idx >= 0)
   3589  1.1  mrg     {
   3590  1.1  mrg       ifn = cond_fn;
   3591  1.1  mrg       vect_nargs += 2;
   3592  1.1  mrg     }
   3593  1.1  mrg 
   3594  1.1  mrg   if (modifier == NONE || ifn != IFN_LAST)
   3595  1.1  mrg     {
   3596  1.1  mrg       tree prev_res = NULL_TREE;
   3597  1.1  mrg       vargs.safe_grow (vect_nargs, true);
   3598  1.1  mrg       auto_vec<vec<tree> > vec_defs (nargs);
   3599  1.1  mrg       for (j = 0; j < ncopies; ++j)
   3600  1.1  mrg 	{
   3601  1.1  mrg 	  /* Build argument list for the vectorized call.  */
   3602  1.1  mrg 	  if (slp_node)
   3603  1.1  mrg 	    {
   3604  1.1  mrg 	      vec<tree> vec_oprnds0;
   3605  1.1  mrg 
   3606  1.1  mrg 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
   3607  1.1  mrg 	      vec_oprnds0 = vec_defs[0];
   3608  1.1  mrg 
   3609  1.1  mrg 	      /* Arguments are ready.  Create the new vector stmt.  */
   3610  1.1  mrg 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
   3611  1.1  mrg 		{
   3612  1.1  mrg 		  int varg = 0;
   3613  1.1  mrg 		  if (masked_loop_p && reduc_idx >= 0)
   3614  1.1  mrg 		    {
   3615  1.1  mrg 		      unsigned int vec_num = vec_oprnds0.length ();
   3616  1.1  mrg 		      /* Always true for SLP.  */
   3617  1.1  mrg 		      gcc_assert (ncopies == 1);
   3618  1.1  mrg 		      vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
   3619  1.1  mrg 							  vectype_out, i);
   3620  1.1  mrg 		    }
   3621  1.1  mrg 		  size_t k;
   3622  1.1  mrg 		  for (k = 0; k < nargs; k++)
   3623  1.1  mrg 		    {
   3624  1.1  mrg 		      vec<tree> vec_oprndsk = vec_defs[k];
   3625  1.1  mrg 		      vargs[varg++] = vec_oprndsk[i];
   3626  1.1  mrg 		    }
   3627  1.1  mrg 		  if (masked_loop_p && reduc_idx >= 0)
   3628  1.1  mrg 		    vargs[varg++] = vargs[reduc_idx + 1];
   3629  1.1  mrg 		  gimple *new_stmt;
   3630  1.1  mrg 		  if (modifier == NARROW)
   3631  1.1  mrg 		    {
   3632  1.1  mrg 		      /* We don't define any narrowing conditional functions
   3633  1.1  mrg 			 at present.  */
   3634  1.1  mrg 		      gcc_assert (mask_opno < 0);
   3635  1.1  mrg 		      tree half_res = make_ssa_name (vectype_in);
   3636  1.1  mrg 		      gcall *call
   3637  1.1  mrg 			= gimple_build_call_internal_vec (ifn, vargs);
   3638  1.1  mrg 		      gimple_call_set_lhs (call, half_res);
   3639  1.1  mrg 		      gimple_call_set_nothrow (call, true);
   3640  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   3641  1.1  mrg 		      if ((i & 1) == 0)
   3642  1.1  mrg 			{
   3643  1.1  mrg 			  prev_res = half_res;
   3644  1.1  mrg 			  continue;
   3645  1.1  mrg 			}
   3646  1.1  mrg 		      new_temp = make_ssa_name (vec_dest);
   3647  1.1  mrg 		      new_stmt = gimple_build_assign (new_temp, convert_code,
   3648  1.1  mrg 						      prev_res, half_res);
   3649  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info,
   3650  1.1  mrg 						   new_stmt, gsi);
   3651  1.1  mrg 		    }
   3652  1.1  mrg 		  else
   3653  1.1  mrg 		    {
   3654  1.1  mrg 		      if (mask_opno >= 0 && masked_loop_p)
   3655  1.1  mrg 			{
   3656  1.1  mrg 			  unsigned int vec_num = vec_oprnds0.length ();
   3657  1.1  mrg 			  /* Always true for SLP.  */
   3658  1.1  mrg 			  gcc_assert (ncopies == 1);
   3659  1.1  mrg 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
   3660  1.1  mrg 							  vectype_out, i);
   3661  1.1  mrg 			  vargs[mask_opno] = prepare_vec_mask
   3662  1.1  mrg 			    (loop_vinfo, TREE_TYPE (mask), mask,
   3663  1.1  mrg 			     vargs[mask_opno], gsi);
   3664  1.1  mrg 			}
   3665  1.1  mrg 
   3666  1.1  mrg 		      gcall *call;
   3667  1.1  mrg 		      if (ifn != IFN_LAST)
   3668  1.1  mrg 			call = gimple_build_call_internal_vec (ifn, vargs);
   3669  1.1  mrg 		      else
   3670  1.1  mrg 			call = gimple_build_call_vec (fndecl, vargs);
   3671  1.1  mrg 		      new_temp = make_ssa_name (vec_dest, call);
   3672  1.1  mrg 		      gimple_call_set_lhs (call, new_temp);
   3673  1.1  mrg 		      gimple_call_set_nothrow (call, true);
   3674  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   3675  1.1  mrg 		      new_stmt = call;
   3676  1.1  mrg 		    }
   3677  1.1  mrg 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   3678  1.1  mrg 		}
   3679  1.1  mrg 	      continue;
   3680  1.1  mrg 	    }
   3681  1.1  mrg 
   3682  1.1  mrg 	  int varg = 0;
   3683  1.1  mrg 	  if (masked_loop_p && reduc_idx >= 0)
   3684  1.1  mrg 	    vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
   3685  1.1  mrg 						vectype_out, j);
   3686  1.1  mrg 	  for (i = 0; i < nargs; i++)
   3687  1.1  mrg 	    {
   3688  1.1  mrg 	      op = gimple_call_arg (stmt, i);
   3689  1.1  mrg 	      if (j == 0)
   3690  1.1  mrg 		{
   3691  1.1  mrg 		  vec_defs.quick_push (vNULL);
   3692  1.1  mrg 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   3693  1.1  mrg 						 op, &vec_defs[i],
   3694  1.1  mrg 						 vectypes[i]);
   3695  1.1  mrg 		}
   3696  1.1  mrg 	      vargs[varg++] = vec_defs[i][j];
   3697  1.1  mrg 	    }
   3698  1.1  mrg 	  if (masked_loop_p && reduc_idx >= 0)
   3699  1.1  mrg 	    vargs[varg++] = vargs[reduc_idx + 1];
   3700  1.1  mrg 
   3701  1.1  mrg 	  if (mask_opno >= 0 && masked_loop_p)
   3702  1.1  mrg 	    {
   3703  1.1  mrg 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
   3704  1.1  mrg 					      vectype_out, j);
   3705  1.1  mrg 	      vargs[mask_opno]
   3706  1.1  mrg 		= prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
   3707  1.1  mrg 				    vargs[mask_opno], gsi);
   3708  1.1  mrg 	    }
   3709  1.1  mrg 
   3710  1.1  mrg 	  gimple *new_stmt;
   3711  1.1  mrg 	  if (cfn == CFN_GOMP_SIMD_LANE)
   3712  1.1  mrg 	    {
   3713  1.1  mrg 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
   3714  1.1  mrg 	      tree new_var
   3715  1.1  mrg 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
   3716  1.1  mrg 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
   3717  1.1  mrg 	      vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
   3718  1.1  mrg 	      new_temp = make_ssa_name (vec_dest);
   3719  1.1  mrg 	      new_stmt = gimple_build_assign (new_temp, new_var);
   3720  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3721  1.1  mrg 	    }
   3722  1.1  mrg 	  else if (modifier == NARROW)
   3723  1.1  mrg 	    {
   3724  1.1  mrg 	      /* We don't define any narrowing conditional functions at
   3725  1.1  mrg 		 present.  */
   3726  1.1  mrg 	      gcc_assert (mask_opno < 0);
   3727  1.1  mrg 	      tree half_res = make_ssa_name (vectype_in);
   3728  1.1  mrg 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
   3729  1.1  mrg 	      gimple_call_set_lhs (call, half_res);
   3730  1.1  mrg 	      gimple_call_set_nothrow (call, true);
   3731  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   3732  1.1  mrg 	      if ((j & 1) == 0)
   3733  1.1  mrg 		{
   3734  1.1  mrg 		  prev_res = half_res;
   3735  1.1  mrg 		  continue;
   3736  1.1  mrg 		}
   3737  1.1  mrg 	      new_temp = make_ssa_name (vec_dest);
   3738  1.1  mrg 	      new_stmt = gimple_build_assign (new_temp, convert_code,
   3739  1.1  mrg 					      prev_res, half_res);
   3740  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3741  1.1  mrg 	    }
   3742  1.1  mrg 	  else
   3743  1.1  mrg 	    {
   3744  1.1  mrg 	      gcall *call;
   3745  1.1  mrg 	      if (ifn != IFN_LAST)
   3746  1.1  mrg 		call = gimple_build_call_internal_vec (ifn, vargs);
   3747  1.1  mrg 	      else
   3748  1.1  mrg 		call = gimple_build_call_vec (fndecl, vargs);
   3749  1.1  mrg 	      new_temp = make_ssa_name (vec_dest, call);
   3750  1.1  mrg 	      gimple_call_set_lhs (call, new_temp);
   3751  1.1  mrg 	      gimple_call_set_nothrow (call, true);
   3752  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   3753  1.1  mrg 	      new_stmt = call;
   3754  1.1  mrg 	    }
   3755  1.1  mrg 
   3756  1.1  mrg 	  if (j == (modifier == NARROW ? 1 : 0))
   3757  1.1  mrg 	    *vec_stmt = new_stmt;
   3758  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   3759  1.1  mrg 	}
   3760  1.1  mrg       for (i = 0; i < nargs; i++)
   3761  1.1  mrg 	{
   3762  1.1  mrg 	  vec<tree> vec_oprndsi = vec_defs[i];
   3763  1.1  mrg 	  vec_oprndsi.release ();
   3764  1.1  mrg 	}
   3765  1.1  mrg     }
   3766  1.1  mrg   else if (modifier == NARROW)
   3767  1.1  mrg     {
   3768  1.1  mrg       auto_vec<vec<tree> > vec_defs (nargs);
   3769  1.1  mrg       /* We don't define any narrowing conditional functions at present.  */
   3770  1.1  mrg       gcc_assert (mask_opno < 0);
   3771  1.1  mrg       for (j = 0; j < ncopies; ++j)
   3772  1.1  mrg 	{
   3773  1.1  mrg 	  /* Build argument list for the vectorized call.  */
   3774  1.1  mrg 	  if (j == 0)
   3775  1.1  mrg 	    vargs.create (nargs * 2);
   3776  1.1  mrg 	  else
   3777  1.1  mrg 	    vargs.truncate (0);
   3778  1.1  mrg 
   3779  1.1  mrg 	  if (slp_node)
   3780  1.1  mrg 	    {
   3781  1.1  mrg 	      vec<tree> vec_oprnds0;
   3782  1.1  mrg 
   3783  1.1  mrg 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
   3784  1.1  mrg 	      vec_oprnds0 = vec_defs[0];
   3785  1.1  mrg 
   3786  1.1  mrg 	      /* Arguments are ready.  Create the new vector stmt.  */
   3787  1.1  mrg 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
   3788  1.1  mrg 		{
   3789  1.1  mrg 		  size_t k;
   3790  1.1  mrg 		  vargs.truncate (0);
   3791  1.1  mrg 		  for (k = 0; k < nargs; k++)
   3792  1.1  mrg 		    {
   3793  1.1  mrg 		      vec<tree> vec_oprndsk = vec_defs[k];
   3794  1.1  mrg 		      vargs.quick_push (vec_oprndsk[i]);
   3795  1.1  mrg 		      vargs.quick_push (vec_oprndsk[i + 1]);
   3796  1.1  mrg 		    }
   3797  1.1  mrg 		  gcall *call;
   3798  1.1  mrg 		  if (ifn != IFN_LAST)
   3799  1.1  mrg 		    call = gimple_build_call_internal_vec (ifn, vargs);
   3800  1.1  mrg 		  else
   3801  1.1  mrg 		    call = gimple_build_call_vec (fndecl, vargs);
   3802  1.1  mrg 		  new_temp = make_ssa_name (vec_dest, call);
   3803  1.1  mrg 		  gimple_call_set_lhs (call, new_temp);
   3804  1.1  mrg 		  gimple_call_set_nothrow (call, true);
   3805  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   3806  1.1  mrg 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
   3807  1.1  mrg 		}
   3808  1.1  mrg 	      continue;
   3809  1.1  mrg 	    }
   3810  1.1  mrg 
   3811  1.1  mrg 	  for (i = 0; i < nargs; i++)
   3812  1.1  mrg 	    {
   3813  1.1  mrg 	      op = gimple_call_arg (stmt, i);
   3814  1.1  mrg 	      if (j == 0)
   3815  1.1  mrg 		{
   3816  1.1  mrg 		  vec_defs.quick_push (vNULL);
   3817  1.1  mrg 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
   3818  1.1  mrg 						 op, &vec_defs[i], vectypes[i]);
   3819  1.1  mrg 		}
   3820  1.1  mrg 	      vec_oprnd0 = vec_defs[i][2*j];
   3821  1.1  mrg 	      vec_oprnd1 = vec_defs[i][2*j+1];
   3822  1.1  mrg 
   3823  1.1  mrg 	      vargs.quick_push (vec_oprnd0);
   3824  1.1  mrg 	      vargs.quick_push (vec_oprnd1);
   3825  1.1  mrg 	    }
   3826  1.1  mrg 
   3827  1.1  mrg 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
   3828  1.1  mrg 	  new_temp = make_ssa_name (vec_dest, new_stmt);
   3829  1.1  mrg 	  gimple_call_set_lhs (new_stmt, new_temp);
   3830  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   3831  1.1  mrg 
   3832  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   3833  1.1  mrg 	}
   3834  1.1  mrg 
   3835  1.1  mrg       if (!slp_node)
   3836  1.1  mrg 	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   3837  1.1  mrg 
   3838  1.1  mrg       for (i = 0; i < nargs; i++)
   3839  1.1  mrg 	{
   3840  1.1  mrg 	  vec<tree> vec_oprndsi = vec_defs[i];
   3841  1.1  mrg 	  vec_oprndsi.release ();
   3842  1.1  mrg 	}
   3843  1.1  mrg     }
   3844  1.1  mrg   else
   3845  1.1  mrg     /* No current target implements this case.  */
   3846  1.1  mrg     return false;
   3847  1.1  mrg 
   3848  1.1  mrg   vargs.release ();
   3849  1.1  mrg 
   3850  1.1  mrg   /* The call in STMT might prevent it from being removed in dce.
   3851  1.1  mrg      We however cannot remove it here, due to the way the ssa name
   3852  1.1  mrg      it defines is mapped to the new definition.  So just replace
   3853  1.1  mrg      rhs of the statement with something harmless.  */
   3854  1.1  mrg 
   3855  1.1  mrg   if (slp_node)
   3856  1.1  mrg     return true;
   3857  1.1  mrg 
   3858  1.1  mrg   stmt_info = vect_orig_stmt (stmt_info);
   3859  1.1  mrg   lhs = gimple_get_lhs (stmt_info->stmt);
   3860  1.1  mrg 
   3861  1.1  mrg   gassign *new_stmt
   3862  1.1  mrg     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
   3863  1.1  mrg   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
   3864  1.1  mrg 
   3865  1.1  mrg   return true;
   3866  1.1  mrg }
   3867  1.1  mrg 
   3868  1.1  mrg 
   3869  1.1  mrg struct simd_call_arg_info
   3870  1.1  mrg {
   3871  1.1  mrg   tree vectype;
   3872  1.1  mrg   tree op;
   3873  1.1  mrg   HOST_WIDE_INT linear_step;
   3874  1.1  mrg   enum vect_def_type dt;
   3875  1.1  mrg   unsigned int align;
   3876  1.1  mrg   bool simd_lane_linear;
   3877  1.1  mrg };
   3878  1.1  mrg 
   3879  1.1  mrg /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
   3880  1.1  mrg    is linear within simd lane (but not within whole loop), note it in
   3881  1.1  mrg    *ARGINFO.  */
   3882  1.1  mrg 
   3883  1.1  mrg static void
   3884  1.1  mrg vect_simd_lane_linear (tree op, class loop *loop,
   3885  1.1  mrg 		       struct simd_call_arg_info *arginfo)
   3886  1.1  mrg {
   3887  1.1  mrg   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
   3888  1.1  mrg 
   3889  1.1  mrg   if (!is_gimple_assign (def_stmt)
   3890  1.1  mrg       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
   3891  1.1  mrg       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
   3892  1.1  mrg     return;
   3893  1.1  mrg 
   3894  1.1  mrg   tree base = gimple_assign_rhs1 (def_stmt);
   3895  1.1  mrg   HOST_WIDE_INT linear_step = 0;
   3896  1.1  mrg   tree v = gimple_assign_rhs2 (def_stmt);
   3897  1.1  mrg   while (TREE_CODE (v) == SSA_NAME)
   3898  1.1  mrg     {
   3899  1.1  mrg       tree t;
   3900  1.1  mrg       def_stmt = SSA_NAME_DEF_STMT (v);
   3901  1.1  mrg       if (is_gimple_assign (def_stmt))
   3902  1.1  mrg 	switch (gimple_assign_rhs_code (def_stmt))
   3903  1.1  mrg 	  {
   3904  1.1  mrg 	  case PLUS_EXPR:
   3905  1.1  mrg 	    t = gimple_assign_rhs2 (def_stmt);
   3906  1.1  mrg 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
   3907  1.1  mrg 	      return;
   3908  1.1  mrg 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
   3909  1.1  mrg 	    v = gimple_assign_rhs1 (def_stmt);
   3910  1.1  mrg 	    continue;
   3911  1.1  mrg 	  case MULT_EXPR:
   3912  1.1  mrg 	    t = gimple_assign_rhs2 (def_stmt);
   3913  1.1  mrg 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
   3914  1.1  mrg 	      return;
   3915  1.1  mrg 	    linear_step = tree_to_shwi (t);
   3916  1.1  mrg 	    v = gimple_assign_rhs1 (def_stmt);
   3917  1.1  mrg 	    continue;
   3918  1.1  mrg 	  CASE_CONVERT:
   3919  1.1  mrg 	    t = gimple_assign_rhs1 (def_stmt);
   3920  1.1  mrg 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
   3921  1.1  mrg 		|| (TYPE_PRECISION (TREE_TYPE (v))
   3922  1.1  mrg 		    < TYPE_PRECISION (TREE_TYPE (t))))
   3923  1.1  mrg 	      return;
   3924  1.1  mrg 	    if (!linear_step)
   3925  1.1  mrg 	      linear_step = 1;
   3926  1.1  mrg 	    v = t;
   3927  1.1  mrg 	    continue;
   3928  1.1  mrg 	  default:
   3929  1.1  mrg 	    return;
   3930  1.1  mrg 	  }
   3931  1.1  mrg       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
   3932  1.1  mrg 	       && loop->simduid
   3933  1.1  mrg 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
   3934  1.1  mrg 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
   3935  1.1  mrg 		   == loop->simduid))
   3936  1.1  mrg 	{
   3937  1.1  mrg 	  if (!linear_step)
   3938  1.1  mrg 	    linear_step = 1;
   3939  1.1  mrg 	  arginfo->linear_step = linear_step;
   3940  1.1  mrg 	  arginfo->op = base;
   3941  1.1  mrg 	  arginfo->simd_lane_linear = true;
   3942  1.1  mrg 	  return;
   3943  1.1  mrg 	}
   3944  1.1  mrg     }
   3945  1.1  mrg }
   3946  1.1  mrg 
   3947  1.1  mrg /* Return the number of elements in vector type VECTYPE, which is associated
   3948  1.1  mrg    with a SIMD clone.  At present these vectors always have a constant
   3949  1.1  mrg    length.  */
   3950  1.1  mrg 
   3951  1.1  mrg static unsigned HOST_WIDE_INT
   3952  1.1  mrg simd_clone_subparts (tree vectype)
   3953  1.1  mrg {
   3954  1.1  mrg   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   3955  1.1  mrg }
   3956  1.1  mrg 
   3957  1.1  mrg /* Function vectorizable_simd_clone_call.
   3958  1.1  mrg 
   3959  1.1  mrg    Check if STMT_INFO performs a function call that can be vectorized
   3960  1.1  mrg    by calling a simd clone of the function.
   3961  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   3962  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   3963  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   3964  1.1  mrg 
   3965  1.1  mrg static bool
   3966  1.1  mrg vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
   3967  1.1  mrg 			      gimple_stmt_iterator *gsi,
   3968  1.1  mrg 			      gimple **vec_stmt, slp_tree slp_node,
   3969  1.1  mrg 			      stmt_vector_for_cost *)
   3970  1.1  mrg {
   3971  1.1  mrg   tree vec_dest;
   3972  1.1  mrg   tree scalar_dest;
   3973  1.1  mrg   tree op, type;
   3974  1.1  mrg   tree vec_oprnd0 = NULL_TREE;
   3975  1.1  mrg   tree vectype;
   3976  1.1  mrg   poly_uint64 nunits;
   3977  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   3978  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   3979  1.1  mrg   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
   3980  1.1  mrg   tree fndecl, new_temp;
   3981  1.1  mrg   int ncopies, j;
   3982  1.1  mrg   auto_vec<simd_call_arg_info> arginfo;
   3983  1.1  mrg   vec<tree> vargs = vNULL;
   3984  1.1  mrg   size_t i, nargs;
   3985  1.1  mrg   tree lhs, rtype, ratype;
   3986  1.1  mrg   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
   3987  1.1  mrg 
   3988  1.1  mrg   /* Is STMT a vectorizable call?   */
   3989  1.1  mrg   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
   3990  1.1  mrg   if (!stmt)
   3991  1.1  mrg     return false;
   3992  1.1  mrg 
   3993  1.1  mrg   fndecl = gimple_call_fndecl (stmt);
   3994  1.1  mrg   if (fndecl == NULL_TREE)
   3995  1.1  mrg     return false;
   3996  1.1  mrg 
   3997  1.1  mrg   struct cgraph_node *node = cgraph_node::get (fndecl);
   3998  1.1  mrg   if (node == NULL || node->simd_clones == NULL)
   3999  1.1  mrg     return false;
   4000  1.1  mrg 
   4001  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   4002  1.1  mrg     return false;
   4003  1.1  mrg 
   4004  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   4005  1.1  mrg       && ! vec_stmt)
   4006  1.1  mrg     return false;
   4007  1.1  mrg 
   4008  1.1  mrg   if (gimple_call_lhs (stmt)
   4009  1.1  mrg       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
   4010  1.1  mrg     return false;
   4011  1.1  mrg 
   4012  1.1  mrg   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
   4013  1.1  mrg 
   4014  1.1  mrg   vectype = STMT_VINFO_VECTYPE (stmt_info);
   4015  1.1  mrg 
   4016  1.1  mrg   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
   4017  1.1  mrg     return false;
   4018  1.1  mrg 
   4019  1.1  mrg   /* FORNOW */
   4020  1.1  mrg   if (slp_node)
   4021  1.1  mrg     return false;
   4022  1.1  mrg 
   4023  1.1  mrg   /* Process function arguments.  */
   4024  1.1  mrg   nargs = gimple_call_num_args (stmt);
   4025  1.1  mrg 
   4026  1.1  mrg   /* Bail out if the function has zero arguments.  */
   4027  1.1  mrg   if (nargs == 0)
   4028  1.1  mrg     return false;
   4029  1.1  mrg 
   4030  1.1  mrg   arginfo.reserve (nargs, true);
   4031  1.1  mrg 
   4032  1.1  mrg   for (i = 0; i < nargs; i++)
   4033  1.1  mrg     {
   4034  1.1  mrg       simd_call_arg_info thisarginfo;
   4035  1.1  mrg       affine_iv iv;
   4036  1.1  mrg 
   4037  1.1  mrg       thisarginfo.linear_step = 0;
   4038  1.1  mrg       thisarginfo.align = 0;
   4039  1.1  mrg       thisarginfo.op = NULL_TREE;
   4040  1.1  mrg       thisarginfo.simd_lane_linear = false;
   4041  1.1  mrg 
   4042  1.1  mrg       op = gimple_call_arg (stmt, i);
   4043  1.1  mrg       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
   4044  1.1  mrg 			       &thisarginfo.vectype)
   4045  1.1  mrg 	  || thisarginfo.dt == vect_uninitialized_def)
   4046  1.1  mrg 	{
   4047  1.1  mrg 	  if (dump_enabled_p ())
   4048  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4049  1.1  mrg 			     "use not simple.\n");
   4050  1.1  mrg 	  return false;
   4051  1.1  mrg 	}
   4052  1.1  mrg 
   4053  1.1  mrg       if (thisarginfo.dt == vect_constant_def
   4054  1.1  mrg 	  || thisarginfo.dt == vect_external_def)
   4055  1.1  mrg 	gcc_assert (thisarginfo.vectype == NULL_TREE);
   4056  1.1  mrg       else
   4057  1.1  mrg 	{
   4058  1.1  mrg 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
   4059  1.1  mrg 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
   4060  1.1  mrg 	    {
   4061  1.1  mrg 	      if (dump_enabled_p ())
   4062  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4063  1.1  mrg 				 "vector mask arguments are not supported\n");
   4064  1.1  mrg 	      return false;
   4065  1.1  mrg 	    }
   4066  1.1  mrg 	}
   4067  1.1  mrg 
   4068  1.1  mrg       /* For linear arguments, the analyze phase should have saved
   4069  1.1  mrg 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
   4070  1.1  mrg       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
   4071  1.1  mrg 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
   4072  1.1  mrg 	{
   4073  1.1  mrg 	  gcc_assert (vec_stmt);
   4074  1.1  mrg 	  thisarginfo.linear_step
   4075  1.1  mrg 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
   4076  1.1  mrg 	  thisarginfo.op
   4077  1.1  mrg 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
   4078  1.1  mrg 	  thisarginfo.simd_lane_linear
   4079  1.1  mrg 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
   4080  1.1  mrg 	       == boolean_true_node);
   4081  1.1  mrg 	  /* If loop has been peeled for alignment, we need to adjust it.  */
   4082  1.1  mrg 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
   4083  1.1  mrg 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
   4084  1.1  mrg 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
   4085  1.1  mrg 	    {
   4086  1.1  mrg 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
   4087  1.1  mrg 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
   4088  1.1  mrg 	      tree opt = TREE_TYPE (thisarginfo.op);
   4089  1.1  mrg 	      bias = fold_convert (TREE_TYPE (step), bias);
   4090  1.1  mrg 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
   4091  1.1  mrg 	      thisarginfo.op
   4092  1.1  mrg 		= fold_build2 (POINTER_TYPE_P (opt)
   4093  1.1  mrg 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
   4094  1.1  mrg 			       thisarginfo.op, bias);
   4095  1.1  mrg 	    }
   4096  1.1  mrg 	}
   4097  1.1  mrg       else if (!vec_stmt
   4098  1.1  mrg 	       && thisarginfo.dt != vect_constant_def
   4099  1.1  mrg 	       && thisarginfo.dt != vect_external_def
   4100  1.1  mrg 	       && loop_vinfo
   4101  1.1  mrg 	       && TREE_CODE (op) == SSA_NAME
   4102  1.1  mrg 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
   4103  1.1  mrg 			     &iv, false)
   4104  1.1  mrg 	       && tree_fits_shwi_p (iv.step))
   4105  1.1  mrg 	{
   4106  1.1  mrg 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
   4107  1.1  mrg 	  thisarginfo.op = iv.base;
   4108  1.1  mrg 	}
   4109  1.1  mrg       else if ((thisarginfo.dt == vect_constant_def
   4110  1.1  mrg 		|| thisarginfo.dt == vect_external_def)
   4111  1.1  mrg 	       && POINTER_TYPE_P (TREE_TYPE (op)))
   4112  1.1  mrg 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
   4113  1.1  mrg       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
   4114  1.1  mrg 	 linear too.  */
   4115  1.1  mrg       if (POINTER_TYPE_P (TREE_TYPE (op))
   4116  1.1  mrg 	  && !thisarginfo.linear_step
   4117  1.1  mrg 	  && !vec_stmt
   4118  1.1  mrg 	  && thisarginfo.dt != vect_constant_def
   4119  1.1  mrg 	  && thisarginfo.dt != vect_external_def
   4120  1.1  mrg 	  && loop_vinfo
   4121  1.1  mrg 	  && !slp_node
   4122  1.1  mrg 	  && TREE_CODE (op) == SSA_NAME)
   4123  1.1  mrg 	vect_simd_lane_linear (op, loop, &thisarginfo);
   4124  1.1  mrg 
   4125  1.1  mrg       arginfo.quick_push (thisarginfo);
   4126  1.1  mrg     }
   4127  1.1  mrg 
   4128  1.1  mrg   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   4129  1.1  mrg   if (!vf.is_constant ())
   4130  1.1  mrg     {
   4131  1.1  mrg       if (dump_enabled_p ())
   4132  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4133  1.1  mrg 			 "not considering SIMD clones; not yet supported"
   4134  1.1  mrg 			 " for variable-width vectors.\n");
   4135  1.1  mrg       return false;
   4136  1.1  mrg     }
   4137  1.1  mrg 
   4138  1.1  mrg   unsigned int badness = 0;
   4139  1.1  mrg   struct cgraph_node *bestn = NULL;
   4140  1.1  mrg   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
   4141  1.1  mrg     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
   4142  1.1  mrg   else
   4143  1.1  mrg     for (struct cgraph_node *n = node->simd_clones; n != NULL;
   4144  1.1  mrg 	 n = n->simdclone->next_clone)
   4145  1.1  mrg       {
   4146  1.1  mrg 	unsigned int this_badness = 0;
   4147  1.1  mrg 	unsigned int num_calls;
   4148  1.1  mrg 	if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
   4149  1.1  mrg 	    || n->simdclone->nargs != nargs)
   4150  1.1  mrg 	  continue;
   4151  1.1  mrg 	if (num_calls != 1)
   4152  1.1  mrg 	  this_badness += exact_log2 (num_calls) * 4096;
   4153  1.1  mrg 	if (n->simdclone->inbranch)
   4154  1.1  mrg 	  this_badness += 8192;
   4155  1.1  mrg 	int target_badness = targetm.simd_clone.usable (n);
   4156  1.1  mrg 	if (target_badness < 0)
   4157  1.1  mrg 	  continue;
   4158  1.1  mrg 	this_badness += target_badness * 512;
   4159  1.1  mrg 	/* FORNOW: Have to add code to add the mask argument.  */
   4160  1.1  mrg 	if (n->simdclone->inbranch)
   4161  1.1  mrg 	  continue;
   4162  1.1  mrg 	for (i = 0; i < nargs; i++)
   4163  1.1  mrg 	  {
   4164  1.1  mrg 	    switch (n->simdclone->args[i].arg_type)
   4165  1.1  mrg 	      {
   4166  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
   4167  1.1  mrg 		if (!useless_type_conversion_p
   4168  1.1  mrg 			(n->simdclone->args[i].orig_type,
   4169  1.1  mrg 			 TREE_TYPE (gimple_call_arg (stmt, i))))
   4170  1.1  mrg 		  i = -1;
   4171  1.1  mrg 		else if (arginfo[i].dt == vect_constant_def
   4172  1.1  mrg 			 || arginfo[i].dt == vect_external_def
   4173  1.1  mrg 			 || arginfo[i].linear_step)
   4174  1.1  mrg 		  this_badness += 64;
   4175  1.1  mrg 		break;
   4176  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
   4177  1.1  mrg 		if (arginfo[i].dt != vect_constant_def
   4178  1.1  mrg 		    && arginfo[i].dt != vect_external_def)
   4179  1.1  mrg 		  i = -1;
   4180  1.1  mrg 		break;
   4181  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
   4182  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
   4183  1.1  mrg 		if (arginfo[i].dt == vect_constant_def
   4184  1.1  mrg 		    || arginfo[i].dt == vect_external_def
   4185  1.1  mrg 		    || (arginfo[i].linear_step
   4186  1.1  mrg 			!= n->simdclone->args[i].linear_step))
   4187  1.1  mrg 		  i = -1;
   4188  1.1  mrg 		break;
   4189  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
   4190  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
   4191  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
   4192  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
   4193  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
   4194  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
   4195  1.1  mrg 		/* FORNOW */
   4196  1.1  mrg 		i = -1;
   4197  1.1  mrg 		break;
   4198  1.1  mrg 	      case SIMD_CLONE_ARG_TYPE_MASK:
   4199  1.1  mrg 		gcc_unreachable ();
   4200  1.1  mrg 	      }
   4201  1.1  mrg 	    if (i == (size_t) -1)
   4202  1.1  mrg 	      break;
   4203  1.1  mrg 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
   4204  1.1  mrg 	      {
   4205  1.1  mrg 		i = -1;
   4206  1.1  mrg 		break;
   4207  1.1  mrg 	      }
   4208  1.1  mrg 	    if (arginfo[i].align)
   4209  1.1  mrg 	      this_badness += (exact_log2 (arginfo[i].align)
   4210  1.1  mrg 			       - exact_log2 (n->simdclone->args[i].alignment));
   4211  1.1  mrg 	  }
   4212  1.1  mrg 	if (i == (size_t) -1)
   4213  1.1  mrg 	  continue;
   4214  1.1  mrg 	if (bestn == NULL || this_badness < badness)
   4215  1.1  mrg 	  {
   4216  1.1  mrg 	    bestn = n;
   4217  1.1  mrg 	    badness = this_badness;
   4218  1.1  mrg 	  }
   4219  1.1  mrg       }
   4220  1.1  mrg 
   4221  1.1  mrg   if (bestn == NULL)
   4222  1.1  mrg     return false;
   4223  1.1  mrg 
   4224  1.1  mrg   for (i = 0; i < nargs; i++)
   4225  1.1  mrg     if ((arginfo[i].dt == vect_constant_def
   4226  1.1  mrg 	 || arginfo[i].dt == vect_external_def)
   4227  1.1  mrg 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
   4228  1.1  mrg       {
   4229  1.1  mrg 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
   4230  1.1  mrg 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
   4231  1.1  mrg 							  slp_node);
   4232  1.1  mrg 	if (arginfo[i].vectype == NULL
   4233  1.1  mrg 	    || !constant_multiple_p (bestn->simdclone->simdlen,
   4234  1.1  mrg 				     simd_clone_subparts (arginfo[i].vectype)))
   4235  1.1  mrg 	  return false;
   4236  1.1  mrg       }
   4237  1.1  mrg 
   4238  1.1  mrg   fndecl = bestn->decl;
   4239  1.1  mrg   nunits = bestn->simdclone->simdlen;
   4240  1.1  mrg   ncopies = vector_unroll_factor (vf, nunits);
   4241  1.1  mrg 
   4242  1.1  mrg   /* If the function isn't const, only allow it in simd loops where user
   4243  1.1  mrg      has asserted that at least nunits consecutive iterations can be
   4244  1.1  mrg      performed using SIMD instructions.  */
   4245  1.1  mrg   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
   4246  1.1  mrg       && gimple_vuse (stmt))
   4247  1.1  mrg     return false;
   4248  1.1  mrg 
   4249  1.1  mrg   /* Sanity check: make sure that at least one copy of the vectorized stmt
   4250  1.1  mrg      needs to be generated.  */
   4251  1.1  mrg   gcc_assert (ncopies >= 1);
   4252  1.1  mrg 
   4253  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   4254  1.1  mrg     {
   4255  1.1  mrg       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
   4256  1.1  mrg       for (i = 0; i < nargs; i++)
   4257  1.1  mrg 	if ((bestn->simdclone->args[i].arg_type
   4258  1.1  mrg 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
   4259  1.1  mrg 	    || (bestn->simdclone->args[i].arg_type
   4260  1.1  mrg 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
   4261  1.1  mrg 	  {
   4262  1.1  mrg 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
   4263  1.1  mrg 									+ 1,
   4264  1.1  mrg 								      true);
   4265  1.1  mrg 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
   4266  1.1  mrg 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
   4267  1.1  mrg 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
   4268  1.1  mrg 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
   4269  1.1  mrg 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
   4270  1.1  mrg 	    tree sll = arginfo[i].simd_lane_linear
   4271  1.1  mrg 		       ? boolean_true_node : boolean_false_node;
   4272  1.1  mrg 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
   4273  1.1  mrg 	  }
   4274  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
   4275  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
   4276  1.1  mrg /*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
   4277  1.1  mrg 				dt, slp_node, cost_vec); */
   4278  1.1  mrg       return true;
   4279  1.1  mrg     }
   4280  1.1  mrg 
   4281  1.1  mrg   /* Transform.  */
   4282  1.1  mrg 
   4283  1.1  mrg   if (dump_enabled_p ())
   4284  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
   4285  1.1  mrg 
   4286  1.1  mrg   /* Handle def.  */
   4287  1.1  mrg   scalar_dest = gimple_call_lhs (stmt);
   4288  1.1  mrg   vec_dest = NULL_TREE;
   4289  1.1  mrg   rtype = NULL_TREE;
   4290  1.1  mrg   ratype = NULL_TREE;
   4291  1.1  mrg   if (scalar_dest)
   4292  1.1  mrg     {
   4293  1.1  mrg       vec_dest = vect_create_destination_var (scalar_dest, vectype);
   4294  1.1  mrg       rtype = TREE_TYPE (TREE_TYPE (fndecl));
   4295  1.1  mrg       if (TREE_CODE (rtype) == ARRAY_TYPE)
   4296  1.1  mrg 	{
   4297  1.1  mrg 	  ratype = rtype;
   4298  1.1  mrg 	  rtype = TREE_TYPE (ratype);
   4299  1.1  mrg 	}
   4300  1.1  mrg     }
   4301  1.1  mrg 
   4302  1.1  mrg   auto_vec<vec<tree> > vec_oprnds;
   4303  1.1  mrg   auto_vec<unsigned> vec_oprnds_i;
   4304  1.1  mrg   vec_oprnds.safe_grow_cleared (nargs, true);
   4305  1.1  mrg   vec_oprnds_i.safe_grow_cleared (nargs, true);
   4306  1.1  mrg   for (j = 0; j < ncopies; ++j)
   4307  1.1  mrg     {
   4308  1.1  mrg       /* Build argument list for the vectorized call.  */
   4309  1.1  mrg       if (j == 0)
   4310  1.1  mrg 	vargs.create (nargs);
   4311  1.1  mrg       else
   4312  1.1  mrg 	vargs.truncate (0);
   4313  1.1  mrg 
   4314  1.1  mrg       for (i = 0; i < nargs; i++)
   4315  1.1  mrg 	{
   4316  1.1  mrg 	  unsigned int k, l, m, o;
   4317  1.1  mrg 	  tree atype;
   4318  1.1  mrg 	  op = gimple_call_arg (stmt, i);
   4319  1.1  mrg 	  switch (bestn->simdclone->args[i].arg_type)
   4320  1.1  mrg 	    {
   4321  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
   4322  1.1  mrg 	      atype = bestn->simdclone->args[i].vector_type;
   4323  1.1  mrg 	      o = vector_unroll_factor (nunits,
   4324  1.1  mrg 					simd_clone_subparts (atype));
   4325  1.1  mrg 	      for (m = j * o; m < (j + 1) * o; m++)
   4326  1.1  mrg 		{
   4327  1.1  mrg 		  if (simd_clone_subparts (atype)
   4328  1.1  mrg 		      < simd_clone_subparts (arginfo[i].vectype))
   4329  1.1  mrg 		    {
   4330  1.1  mrg 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
   4331  1.1  mrg 		      k = (simd_clone_subparts (arginfo[i].vectype)
   4332  1.1  mrg 			   / simd_clone_subparts (atype));
   4333  1.1  mrg 		      gcc_assert ((k & (k - 1)) == 0);
   4334  1.1  mrg 		      if (m == 0)
   4335  1.1  mrg 			{
   4336  1.1  mrg 			  vect_get_vec_defs_for_operand (vinfo, stmt_info,
   4337  1.1  mrg 							 ncopies * o / k, op,
   4338  1.1  mrg 							 &vec_oprnds[i]);
   4339  1.1  mrg 			  vec_oprnds_i[i] = 0;
   4340  1.1  mrg 			  vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
   4341  1.1  mrg 			}
   4342  1.1  mrg 		      else
   4343  1.1  mrg 			{
   4344  1.1  mrg 			  vec_oprnd0 = arginfo[i].op;
   4345  1.1  mrg 			  if ((m & (k - 1)) == 0)
   4346  1.1  mrg 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
   4347  1.1  mrg 			}
   4348  1.1  mrg 		      arginfo[i].op = vec_oprnd0;
   4349  1.1  mrg 		      vec_oprnd0
   4350  1.1  mrg 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
   4351  1.1  mrg 				  bitsize_int (prec),
   4352  1.1  mrg 				  bitsize_int ((m & (k - 1)) * prec));
   4353  1.1  mrg 		      gassign *new_stmt
   4354  1.1  mrg 			= gimple_build_assign (make_ssa_name (atype),
   4355  1.1  mrg 					       vec_oprnd0);
   4356  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info,
   4357  1.1  mrg 						   new_stmt, gsi);
   4358  1.1  mrg 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
   4359  1.1  mrg 		    }
   4360  1.1  mrg 		  else
   4361  1.1  mrg 		    {
   4362  1.1  mrg 		      k = (simd_clone_subparts (atype)
   4363  1.1  mrg 			   / simd_clone_subparts (arginfo[i].vectype));
   4364  1.1  mrg 		      gcc_assert ((k & (k - 1)) == 0);
   4365  1.1  mrg 		      vec<constructor_elt, va_gc> *ctor_elts;
   4366  1.1  mrg 		      if (k != 1)
   4367  1.1  mrg 			vec_alloc (ctor_elts, k);
   4368  1.1  mrg 		      else
   4369  1.1  mrg 			ctor_elts = NULL;
   4370  1.1  mrg 		      for (l = 0; l < k; l++)
   4371  1.1  mrg 			{
   4372  1.1  mrg 			  if (m == 0 && l == 0)
   4373  1.1  mrg 			    {
   4374  1.1  mrg 			      vect_get_vec_defs_for_operand (vinfo, stmt_info,
   4375  1.1  mrg 							     k * o * ncopies,
   4376  1.1  mrg 							     op,
   4377  1.1  mrg 							     &vec_oprnds[i]);
   4378  1.1  mrg 			      vec_oprnds_i[i] = 0;
   4379  1.1  mrg 			      vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
   4380  1.1  mrg 			    }
   4381  1.1  mrg 			  else
   4382  1.1  mrg 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
   4383  1.1  mrg 			  arginfo[i].op = vec_oprnd0;
   4384  1.1  mrg 			  if (k == 1)
   4385  1.1  mrg 			    break;
   4386  1.1  mrg 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
   4387  1.1  mrg 						  vec_oprnd0);
   4388  1.1  mrg 			}
   4389  1.1  mrg 		      if (k == 1)
   4390  1.1  mrg 			if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
   4391  1.1  mrg 						       atype))
   4392  1.1  mrg 			  {
   4393  1.1  mrg 			    vec_oprnd0
   4394  1.1  mrg 			      = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
   4395  1.1  mrg 			    gassign *new_stmt
   4396  1.1  mrg 			      = gimple_build_assign (make_ssa_name (atype),
   4397  1.1  mrg 						     vec_oprnd0);
   4398  1.1  mrg 			    vect_finish_stmt_generation (vinfo, stmt_info,
   4399  1.1  mrg 							 new_stmt, gsi);
   4400  1.1  mrg 			    vargs.safe_push (gimple_assign_lhs (new_stmt));
   4401  1.1  mrg 			  }
   4402  1.1  mrg 			else
   4403  1.1  mrg 			  vargs.safe_push (vec_oprnd0);
   4404  1.1  mrg 		      else
   4405  1.1  mrg 			{
   4406  1.1  mrg 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
   4407  1.1  mrg 			  gassign *new_stmt
   4408  1.1  mrg 			    = gimple_build_assign (make_ssa_name (atype),
   4409  1.1  mrg 						   vec_oprnd0);
   4410  1.1  mrg 			  vect_finish_stmt_generation (vinfo, stmt_info,
   4411  1.1  mrg 						       new_stmt, gsi);
   4412  1.1  mrg 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
   4413  1.1  mrg 			}
   4414  1.1  mrg 		    }
   4415  1.1  mrg 		}
   4416  1.1  mrg 	      break;
   4417  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
   4418  1.1  mrg 	      vargs.safe_push (op);
   4419  1.1  mrg 	      break;
   4420  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
   4421  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
   4422  1.1  mrg 	      if (j == 0)
   4423  1.1  mrg 		{
   4424  1.1  mrg 		  gimple_seq stmts;
   4425  1.1  mrg 		  arginfo[i].op
   4426  1.1  mrg 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
   4427  1.1  mrg 					    &stmts, true, NULL_TREE);
   4428  1.1  mrg 		  if (stmts != NULL)
   4429  1.1  mrg 		    {
   4430  1.1  mrg 		      basic_block new_bb;
   4431  1.1  mrg 		      edge pe = loop_preheader_edge (loop);
   4432  1.1  mrg 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
   4433  1.1  mrg 		      gcc_assert (!new_bb);
   4434  1.1  mrg 		    }
   4435  1.1  mrg 		  if (arginfo[i].simd_lane_linear)
   4436  1.1  mrg 		    {
   4437  1.1  mrg 		      vargs.safe_push (arginfo[i].op);
   4438  1.1  mrg 		      break;
   4439  1.1  mrg 		    }
   4440  1.1  mrg 		  tree phi_res = copy_ssa_name (op);
   4441  1.1  mrg 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
   4442  1.1  mrg 		  add_phi_arg (new_phi, arginfo[i].op,
   4443  1.1  mrg 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
   4444  1.1  mrg 		  enum tree_code code
   4445  1.1  mrg 		    = POINTER_TYPE_P (TREE_TYPE (op))
   4446  1.1  mrg 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
   4447  1.1  mrg 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
   4448  1.1  mrg 			      ? sizetype : TREE_TYPE (op);
   4449  1.1  mrg 		  poly_widest_int cst
   4450  1.1  mrg 		    = wi::mul (bestn->simdclone->args[i].linear_step,
   4451  1.1  mrg 			       ncopies * nunits);
   4452  1.1  mrg 		  tree tcst = wide_int_to_tree (type, cst);
   4453  1.1  mrg 		  tree phi_arg = copy_ssa_name (op);
   4454  1.1  mrg 		  gassign *new_stmt
   4455  1.1  mrg 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
   4456  1.1  mrg 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
   4457  1.1  mrg 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
   4458  1.1  mrg 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
   4459  1.1  mrg 			       UNKNOWN_LOCATION);
   4460  1.1  mrg 		  arginfo[i].op = phi_res;
   4461  1.1  mrg 		  vargs.safe_push (phi_res);
   4462  1.1  mrg 		}
   4463  1.1  mrg 	      else
   4464  1.1  mrg 		{
   4465  1.1  mrg 		  enum tree_code code
   4466  1.1  mrg 		    = POINTER_TYPE_P (TREE_TYPE (op))
   4467  1.1  mrg 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
   4468  1.1  mrg 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
   4469  1.1  mrg 			      ? sizetype : TREE_TYPE (op);
   4470  1.1  mrg 		  poly_widest_int cst
   4471  1.1  mrg 		    = wi::mul (bestn->simdclone->args[i].linear_step,
   4472  1.1  mrg 			       j * nunits);
   4473  1.1  mrg 		  tree tcst = wide_int_to_tree (type, cst);
   4474  1.1  mrg 		  new_temp = make_ssa_name (TREE_TYPE (op));
   4475  1.1  mrg 		  gassign *new_stmt
   4476  1.1  mrg 		    = gimple_build_assign (new_temp, code,
   4477  1.1  mrg 					   arginfo[i].op, tcst);
   4478  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4479  1.1  mrg 		  vargs.safe_push (new_temp);
   4480  1.1  mrg 		}
   4481  1.1  mrg 	      break;
   4482  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
   4483  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
   4484  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
   4485  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
   4486  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
   4487  1.1  mrg 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
   4488  1.1  mrg 	    default:
   4489  1.1  mrg 	      gcc_unreachable ();
   4490  1.1  mrg 	    }
   4491  1.1  mrg 	}
   4492  1.1  mrg 
   4493  1.1  mrg       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
   4494  1.1  mrg       if (vec_dest)
   4495  1.1  mrg 	{
   4496  1.1  mrg 	  gcc_assert (ratype
   4497  1.1  mrg 		      || known_eq (simd_clone_subparts (rtype), nunits));
   4498  1.1  mrg 	  if (ratype)
   4499  1.1  mrg 	    new_temp = create_tmp_var (ratype);
   4500  1.1  mrg 	  else if (useless_type_conversion_p (vectype, rtype))
   4501  1.1  mrg 	    new_temp = make_ssa_name (vec_dest, new_call);
   4502  1.1  mrg 	  else
   4503  1.1  mrg 	    new_temp = make_ssa_name (rtype, new_call);
   4504  1.1  mrg 	  gimple_call_set_lhs (new_call, new_temp);
   4505  1.1  mrg 	}
   4506  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
   4507  1.1  mrg       gimple *new_stmt = new_call;
   4508  1.1  mrg 
   4509  1.1  mrg       if (vec_dest)
   4510  1.1  mrg 	{
   4511  1.1  mrg 	  if (!multiple_p (simd_clone_subparts (vectype), nunits))
   4512  1.1  mrg 	    {
   4513  1.1  mrg 	      unsigned int k, l;
   4514  1.1  mrg 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
   4515  1.1  mrg 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
   4516  1.1  mrg 	      k = vector_unroll_factor (nunits,
   4517  1.1  mrg 					simd_clone_subparts (vectype));
   4518  1.1  mrg 	      gcc_assert ((k & (k - 1)) == 0);
   4519  1.1  mrg 	      for (l = 0; l < k; l++)
   4520  1.1  mrg 		{
   4521  1.1  mrg 		  tree t;
   4522  1.1  mrg 		  if (ratype)
   4523  1.1  mrg 		    {
   4524  1.1  mrg 		      t = build_fold_addr_expr (new_temp);
   4525  1.1  mrg 		      t = build2 (MEM_REF, vectype, t,
   4526  1.1  mrg 				  build_int_cst (TREE_TYPE (t), l * bytes));
   4527  1.1  mrg 		    }
   4528  1.1  mrg 		  else
   4529  1.1  mrg 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
   4530  1.1  mrg 				bitsize_int (prec), bitsize_int (l * prec));
   4531  1.1  mrg 		  new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
   4532  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4533  1.1  mrg 
   4534  1.1  mrg 		  if (j == 0 && l == 0)
   4535  1.1  mrg 		    *vec_stmt = new_stmt;
   4536  1.1  mrg 		  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   4537  1.1  mrg 		}
   4538  1.1  mrg 
   4539  1.1  mrg 	      if (ratype)
   4540  1.1  mrg 		vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
   4541  1.1  mrg 	      continue;
   4542  1.1  mrg 	    }
   4543  1.1  mrg 	  else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
   4544  1.1  mrg 	    {
   4545  1.1  mrg 	      unsigned int k = (simd_clone_subparts (vectype)
   4546  1.1  mrg 				/ simd_clone_subparts (rtype));
   4547  1.1  mrg 	      gcc_assert ((k & (k - 1)) == 0);
   4548  1.1  mrg 	      if ((j & (k - 1)) == 0)
   4549  1.1  mrg 		vec_alloc (ret_ctor_elts, k);
   4550  1.1  mrg 	      if (ratype)
   4551  1.1  mrg 		{
   4552  1.1  mrg 		  unsigned int m, o;
   4553  1.1  mrg 		  o = vector_unroll_factor (nunits,
   4554  1.1  mrg 					    simd_clone_subparts (rtype));
   4555  1.1  mrg 		  for (m = 0; m < o; m++)
   4556  1.1  mrg 		    {
   4557  1.1  mrg 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
   4558  1.1  mrg 					 size_int (m), NULL_TREE, NULL_TREE);
   4559  1.1  mrg 		      new_stmt = gimple_build_assign (make_ssa_name (rtype),
   4560  1.1  mrg 						      tem);
   4561  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info,
   4562  1.1  mrg 						   new_stmt, gsi);
   4563  1.1  mrg 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
   4564  1.1  mrg 					      gimple_assign_lhs (new_stmt));
   4565  1.1  mrg 		    }
   4566  1.1  mrg 		  vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
   4567  1.1  mrg 		}
   4568  1.1  mrg 	      else
   4569  1.1  mrg 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
   4570  1.1  mrg 	      if ((j & (k - 1)) != k - 1)
   4571  1.1  mrg 		continue;
   4572  1.1  mrg 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
   4573  1.1  mrg 	      new_stmt
   4574  1.1  mrg 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
   4575  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4576  1.1  mrg 
   4577  1.1  mrg 	      if ((unsigned) j == k - 1)
   4578  1.1  mrg 		*vec_stmt = new_stmt;
   4579  1.1  mrg 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   4580  1.1  mrg 	      continue;
   4581  1.1  mrg 	    }
   4582  1.1  mrg 	  else if (ratype)
   4583  1.1  mrg 	    {
   4584  1.1  mrg 	      tree t = build_fold_addr_expr (new_temp);
   4585  1.1  mrg 	      t = build2 (MEM_REF, vectype, t,
   4586  1.1  mrg 			  build_int_cst (TREE_TYPE (t), 0));
   4587  1.1  mrg 	      new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
   4588  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4589  1.1  mrg 	      vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
   4590  1.1  mrg 	    }
   4591  1.1  mrg 	  else if (!useless_type_conversion_p (vectype, rtype))
   4592  1.1  mrg 	    {
   4593  1.1  mrg 	      vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
   4594  1.1  mrg 	      new_stmt
   4595  1.1  mrg 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
   4596  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4597  1.1  mrg 	    }
   4598  1.1  mrg 	}
   4599  1.1  mrg 
   4600  1.1  mrg       if (j == 0)
   4601  1.1  mrg 	*vec_stmt = new_stmt;
   4602  1.1  mrg       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   4603  1.1  mrg     }
   4604  1.1  mrg 
   4605  1.1  mrg   for (i = 0; i < nargs; ++i)
   4606  1.1  mrg     {
   4607  1.1  mrg       vec<tree> oprndsi = vec_oprnds[i];
   4608  1.1  mrg       oprndsi.release ();
   4609  1.1  mrg     }
   4610  1.1  mrg   vargs.release ();
   4611  1.1  mrg 
   4612  1.1  mrg   /* The call in STMT might prevent it from being removed in dce.
   4613  1.1  mrg      We however cannot remove it here, due to the way the ssa name
   4614  1.1  mrg      it defines is mapped to the new definition.  So just replace
   4615  1.1  mrg      rhs of the statement with something harmless.  */
   4616  1.1  mrg 
   4617  1.1  mrg   if (slp_node)
   4618  1.1  mrg     return true;
   4619  1.1  mrg 
   4620  1.1  mrg   gimple *new_stmt;
   4621  1.1  mrg   if (scalar_dest)
   4622  1.1  mrg     {
   4623  1.1  mrg       type = TREE_TYPE (scalar_dest);
   4624  1.1  mrg       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
   4625  1.1  mrg       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
   4626  1.1  mrg     }
   4627  1.1  mrg   else
   4628  1.1  mrg     new_stmt = gimple_build_nop ();
   4629  1.1  mrg   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
   4630  1.1  mrg   unlink_stmt_vdef (stmt);
   4631  1.1  mrg 
   4632  1.1  mrg   return true;
   4633  1.1  mrg }
   4634  1.1  mrg 
   4635  1.1  mrg 
   4636  1.1  mrg /* Function vect_gen_widened_results_half
   4637  1.1  mrg 
   4638  1.1  mrg    Create a vector stmt whose code, type, number of arguments, and result
   4639  1.1  mrg    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
   4640  1.1  mrg    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
   4641  1.1  mrg    In the case that CODE is a CALL_EXPR, this means that a call to DECL
   4642  1.1  mrg    needs to be created (DECL is a function-decl of a target-builtin).
   4643  1.1  mrg    STMT_INFO is the original scalar stmt that we are vectorizing.  */
   4644  1.1  mrg 
   4645  1.1  mrg static gimple *
   4646  1.1  mrg vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
   4647  1.1  mrg                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
   4648  1.1  mrg 			       tree vec_dest, gimple_stmt_iterator *gsi,
   4649  1.1  mrg 			       stmt_vec_info stmt_info)
   4650  1.1  mrg {
   4651  1.1  mrg   gimple *new_stmt;
   4652  1.1  mrg   tree new_temp;
   4653  1.1  mrg 
   4654  1.1  mrg   /* Generate half of the widened result:  */
   4655  1.1  mrg   gcc_assert (op_type == TREE_CODE_LENGTH (code));
   4656  1.1  mrg   if (op_type != binary_op)
   4657  1.1  mrg     vec_oprnd1 = NULL;
   4658  1.1  mrg   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
   4659  1.1  mrg   new_temp = make_ssa_name (vec_dest, new_stmt);
   4660  1.1  mrg   gimple_assign_set_lhs (new_stmt, new_temp);
   4661  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4662  1.1  mrg 
   4663  1.1  mrg   return new_stmt;
   4664  1.1  mrg }
   4665  1.1  mrg 
   4666  1.1  mrg 
   4667  1.1  mrg /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
   4668  1.1  mrg    For multi-step conversions store the resulting vectors and call the function
   4669  1.1  mrg    recursively.  */
   4670  1.1  mrg 
   4671  1.1  mrg static void
   4672  1.1  mrg vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
   4673  1.1  mrg 				       int multi_step_cvt,
   4674  1.1  mrg 				       stmt_vec_info stmt_info,
   4675  1.1  mrg 				       vec<tree> &vec_dsts,
   4676  1.1  mrg 				       gimple_stmt_iterator *gsi,
   4677  1.1  mrg 				       slp_tree slp_node, enum tree_code code)
   4678  1.1  mrg {
   4679  1.1  mrg   unsigned int i;
   4680  1.1  mrg   tree vop0, vop1, new_tmp, vec_dest;
   4681  1.1  mrg 
   4682  1.1  mrg   vec_dest = vec_dsts.pop ();
   4683  1.1  mrg 
   4684  1.1  mrg   for (i = 0; i < vec_oprnds->length (); i += 2)
   4685  1.1  mrg     {
   4686  1.1  mrg       /* Create demotion operation.  */
   4687  1.1  mrg       vop0 = (*vec_oprnds)[i];
   4688  1.1  mrg       vop1 = (*vec_oprnds)[i + 1];
   4689  1.1  mrg       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
   4690  1.1  mrg       new_tmp = make_ssa_name (vec_dest, new_stmt);
   4691  1.1  mrg       gimple_assign_set_lhs (new_stmt, new_tmp);
   4692  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   4693  1.1  mrg 
   4694  1.1  mrg       if (multi_step_cvt)
   4695  1.1  mrg 	/* Store the resulting vector for next recursive call.  */
   4696  1.1  mrg 	(*vec_oprnds)[i/2] = new_tmp;
   4697  1.1  mrg       else
   4698  1.1  mrg 	{
   4699  1.1  mrg 	  /* This is the last step of the conversion sequence. Store the
   4700  1.1  mrg 	     vectors in SLP_NODE or in vector info of the scalar statement
   4701  1.1  mrg 	     (or in STMT_VINFO_RELATED_STMT chain).  */
   4702  1.1  mrg 	  if (slp_node)
   4703  1.1  mrg 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   4704  1.1  mrg 	  else
   4705  1.1  mrg 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   4706  1.1  mrg 	}
   4707  1.1  mrg     }
   4708  1.1  mrg 
   4709  1.1  mrg   /* For multi-step demotion operations we first generate demotion operations
   4710  1.1  mrg      from the source type to the intermediate types, and then combine the
   4711  1.1  mrg      results (stored in VEC_OPRNDS) in demotion operation to the destination
   4712  1.1  mrg      type.  */
   4713  1.1  mrg   if (multi_step_cvt)
   4714  1.1  mrg     {
   4715  1.1  mrg       /* At each level of recursion we have half of the operands we had at the
   4716  1.1  mrg 	 previous level.  */
   4717  1.1  mrg       vec_oprnds->truncate ((i+1)/2);
   4718  1.1  mrg       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
   4719  1.1  mrg 					     multi_step_cvt - 1,
   4720  1.1  mrg 					     stmt_info, vec_dsts, gsi,
   4721  1.1  mrg 					     slp_node, VEC_PACK_TRUNC_EXPR);
   4722  1.1  mrg     }
   4723  1.1  mrg 
   4724  1.1  mrg   vec_dsts.quick_push (vec_dest);
   4725  1.1  mrg }
   4726  1.1  mrg 
   4727  1.1  mrg 
   4728  1.1  mrg /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
   4729  1.1  mrg    and VEC_OPRNDS1, for a binary operation associated with scalar statement
   4730  1.1  mrg    STMT_INFO.  For multi-step conversions store the resulting vectors and
   4731  1.1  mrg    call the function recursively.  */
   4732  1.1  mrg 
   4733  1.1  mrg static void
   4734  1.1  mrg vect_create_vectorized_promotion_stmts (vec_info *vinfo,
   4735  1.1  mrg 					vec<tree> *vec_oprnds0,
   4736  1.1  mrg 					vec<tree> *vec_oprnds1,
   4737  1.1  mrg 					stmt_vec_info stmt_info, tree vec_dest,
   4738  1.1  mrg 					gimple_stmt_iterator *gsi,
   4739  1.1  mrg 					enum tree_code code1,
   4740  1.1  mrg 					enum tree_code code2, int op_type)
   4741  1.1  mrg {
   4742  1.1  mrg   int i;
   4743  1.1  mrg   tree vop0, vop1, new_tmp1, new_tmp2;
   4744  1.1  mrg   gimple *new_stmt1, *new_stmt2;
   4745  1.1  mrg   vec<tree> vec_tmp = vNULL;
   4746  1.1  mrg 
   4747  1.1  mrg   vec_tmp.create (vec_oprnds0->length () * 2);
   4748  1.1  mrg   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
   4749  1.1  mrg     {
   4750  1.1  mrg       if (op_type == binary_op)
   4751  1.1  mrg 	vop1 = (*vec_oprnds1)[i];
   4752  1.1  mrg       else
   4753  1.1  mrg 	vop1 = NULL_TREE;
   4754  1.1  mrg 
   4755  1.1  mrg       /* Generate the two halves of promotion operation.  */
   4756  1.1  mrg       new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
   4757  1.1  mrg 						 op_type, vec_dest, gsi,
   4758  1.1  mrg 						 stmt_info);
   4759  1.1  mrg       new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
   4760  1.1  mrg 						 op_type, vec_dest, gsi,
   4761  1.1  mrg 						 stmt_info);
   4762  1.1  mrg       if (is_gimple_call (new_stmt1))
   4763  1.1  mrg 	{
   4764  1.1  mrg 	  new_tmp1 = gimple_call_lhs (new_stmt1);
   4765  1.1  mrg 	  new_tmp2 = gimple_call_lhs (new_stmt2);
   4766  1.1  mrg 	}
   4767  1.1  mrg       else
   4768  1.1  mrg 	{
   4769  1.1  mrg 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
   4770  1.1  mrg 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
   4771  1.1  mrg 	}
   4772  1.1  mrg 
   4773  1.1  mrg       /* Store the results for the next step.  */
   4774  1.1  mrg       vec_tmp.quick_push (new_tmp1);
   4775  1.1  mrg       vec_tmp.quick_push (new_tmp2);
   4776  1.1  mrg     }
   4777  1.1  mrg 
   4778  1.1  mrg   vec_oprnds0->release ();
   4779  1.1  mrg   *vec_oprnds0 = vec_tmp;
   4780  1.1  mrg }
   4781  1.1  mrg 
   4782  1.1  mrg /* Create vectorized promotion stmts for widening stmts using only half the
   4783  1.1  mrg    potential vector size for input.  */
   4784  1.1  mrg static void
   4785  1.1  mrg vect_create_half_widening_stmts (vec_info *vinfo,
   4786  1.1  mrg 					vec<tree> *vec_oprnds0,
   4787  1.1  mrg 					vec<tree> *vec_oprnds1,
   4788  1.1  mrg 					stmt_vec_info stmt_info, tree vec_dest,
   4789  1.1  mrg 					gimple_stmt_iterator *gsi,
   4790  1.1  mrg 					enum tree_code code1,
   4791  1.1  mrg 					int op_type)
   4792  1.1  mrg {
   4793  1.1  mrg   int i;
   4794  1.1  mrg   tree vop0, vop1;
   4795  1.1  mrg   gimple *new_stmt1;
   4796  1.1  mrg   gimple *new_stmt2;
   4797  1.1  mrg   gimple *new_stmt3;
   4798  1.1  mrg   vec<tree> vec_tmp = vNULL;
   4799  1.1  mrg 
   4800  1.1  mrg   vec_tmp.create (vec_oprnds0->length ());
   4801  1.1  mrg   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
   4802  1.1  mrg     {
   4803  1.1  mrg       tree new_tmp1, new_tmp2, new_tmp3, out_type;
   4804  1.1  mrg 
   4805  1.1  mrg       gcc_assert (op_type == binary_op);
   4806  1.1  mrg       vop1 = (*vec_oprnds1)[i];
   4807  1.1  mrg 
   4808  1.1  mrg       /* Widen the first vector input.  */
   4809  1.1  mrg       out_type = TREE_TYPE (vec_dest);
   4810  1.1  mrg       new_tmp1 = make_ssa_name (out_type);
   4811  1.1  mrg       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
   4812  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
   4813  1.1  mrg       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
   4814  1.1  mrg 	{
   4815  1.1  mrg 	  /* Widen the second vector input.  */
   4816  1.1  mrg 	  new_tmp2 = make_ssa_name (out_type);
   4817  1.1  mrg 	  new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
   4818  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
   4819  1.1  mrg 	  /* Perform the operation.  With both vector inputs widened.  */
   4820  1.1  mrg 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
   4821  1.1  mrg 	}
   4822  1.1  mrg       else
   4823  1.1  mrg 	{
   4824  1.1  mrg 	  /* Perform the operation.  With the single vector input widened.  */
   4825  1.1  mrg 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
   4826  1.1  mrg       }
   4827  1.1  mrg 
   4828  1.1  mrg       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
   4829  1.1  mrg       gimple_assign_set_lhs (new_stmt3, new_tmp3);
   4830  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
   4831  1.1  mrg 
   4832  1.1  mrg       /* Store the results for the next step.  */
   4833  1.1  mrg       vec_tmp.quick_push (new_tmp3);
   4834  1.1  mrg     }
   4835  1.1  mrg 
   4836  1.1  mrg   vec_oprnds0->release ();
   4837  1.1  mrg   *vec_oprnds0 = vec_tmp;
   4838  1.1  mrg }
   4839  1.1  mrg 
   4840  1.1  mrg 
   4841  1.1  mrg /* Check if STMT_INFO performs a conversion operation that can be vectorized.
   4842  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   4843  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   4844  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   4845  1.1  mrg 
   4846  1.1  mrg static bool
   4847  1.1  mrg vectorizable_conversion (vec_info *vinfo,
   4848  1.1  mrg 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   4849  1.1  mrg 			 gimple **vec_stmt, slp_tree slp_node,
   4850  1.1  mrg 			 stmt_vector_for_cost *cost_vec)
   4851  1.1  mrg {
   4852  1.1  mrg   tree vec_dest;
   4853  1.1  mrg   tree scalar_dest;
   4854  1.1  mrg   tree op0, op1 = NULL_TREE;
   4855  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   4856  1.1  mrg   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
   4857  1.1  mrg   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
   4858  1.1  mrg   tree new_temp;
   4859  1.1  mrg   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   4860  1.1  mrg   int ndts = 2;
   4861  1.1  mrg   poly_uint64 nunits_in;
   4862  1.1  mrg   poly_uint64 nunits_out;
   4863  1.1  mrg   tree vectype_out, vectype_in;
   4864  1.1  mrg   int ncopies, i;
   4865  1.1  mrg   tree lhs_type, rhs_type;
   4866  1.1  mrg   enum { NARROW, NONE, WIDEN } modifier;
   4867  1.1  mrg   vec<tree> vec_oprnds0 = vNULL;
   4868  1.1  mrg   vec<tree> vec_oprnds1 = vNULL;
   4869  1.1  mrg   tree vop0;
   4870  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   4871  1.1  mrg   int multi_step_cvt = 0;
   4872  1.1  mrg   vec<tree> interm_types = vNULL;
   4873  1.1  mrg   tree intermediate_type, cvt_type = NULL_TREE;
   4874  1.1  mrg   int op_type;
   4875  1.1  mrg   unsigned short fltsz;
   4876  1.1  mrg 
   4877  1.1  mrg   /* Is STMT a vectorizable conversion?   */
   4878  1.1  mrg 
   4879  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   4880  1.1  mrg     return false;
   4881  1.1  mrg 
   4882  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   4883  1.1  mrg       && ! vec_stmt)
   4884  1.1  mrg     return false;
   4885  1.1  mrg 
   4886  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   4887  1.1  mrg   if (!stmt)
   4888  1.1  mrg     return false;
   4889  1.1  mrg 
   4890  1.1  mrg   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
   4891  1.1  mrg     return false;
   4892  1.1  mrg 
   4893  1.1  mrg   code = gimple_assign_rhs_code (stmt);
   4894  1.1  mrg   if (!CONVERT_EXPR_CODE_P (code)
   4895  1.1  mrg       && code != FIX_TRUNC_EXPR
   4896  1.1  mrg       && code != FLOAT_EXPR
   4897  1.1  mrg       && code != WIDEN_PLUS_EXPR
   4898  1.1  mrg       && code != WIDEN_MINUS_EXPR
   4899  1.1  mrg       && code != WIDEN_MULT_EXPR
   4900  1.1  mrg       && code != WIDEN_LSHIFT_EXPR)
   4901  1.1  mrg     return false;
   4902  1.1  mrg 
   4903  1.1  mrg   bool widen_arith = (code == WIDEN_PLUS_EXPR
   4904  1.1  mrg 		      || code == WIDEN_MINUS_EXPR
   4905  1.1  mrg 		      || code == WIDEN_MULT_EXPR
   4906  1.1  mrg 		      || code == WIDEN_LSHIFT_EXPR);
   4907  1.1  mrg   op_type = TREE_CODE_LENGTH (code);
   4908  1.1  mrg 
   4909  1.1  mrg   /* Check types of lhs and rhs.  */
   4910  1.1  mrg   scalar_dest = gimple_assign_lhs (stmt);
   4911  1.1  mrg   lhs_type = TREE_TYPE (scalar_dest);
   4912  1.1  mrg   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   4913  1.1  mrg 
   4914  1.1  mrg   /* Check the operands of the operation.  */
   4915  1.1  mrg   slp_tree slp_op0, slp_op1 = NULL;
   4916  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   4917  1.1  mrg 			   0, &op0, &slp_op0, &dt[0], &vectype_in))
   4918  1.1  mrg     {
   4919  1.1  mrg       if (dump_enabled_p ())
   4920  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4921  1.1  mrg 			 "use not simple.\n");
   4922  1.1  mrg       return false;
   4923  1.1  mrg     }
   4924  1.1  mrg 
   4925  1.1  mrg   rhs_type = TREE_TYPE (op0);
   4926  1.1  mrg   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
   4927  1.1  mrg       && !((INTEGRAL_TYPE_P (lhs_type)
   4928  1.1  mrg 	    && INTEGRAL_TYPE_P (rhs_type))
   4929  1.1  mrg 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
   4930  1.1  mrg 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
   4931  1.1  mrg     return false;
   4932  1.1  mrg 
   4933  1.1  mrg   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
   4934  1.1  mrg       && ((INTEGRAL_TYPE_P (lhs_type)
   4935  1.1  mrg 	   && !type_has_mode_precision_p (lhs_type))
   4936  1.1  mrg 	  || (INTEGRAL_TYPE_P (rhs_type)
   4937  1.1  mrg 	      && !type_has_mode_precision_p (rhs_type))))
   4938  1.1  mrg     {
   4939  1.1  mrg       if (dump_enabled_p ())
   4940  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4941  1.1  mrg                          "type conversion to/from bit-precision unsupported."
   4942  1.1  mrg                          "\n");
   4943  1.1  mrg       return false;
   4944  1.1  mrg     }
   4945  1.1  mrg 
   4946  1.1  mrg   if (op_type == binary_op)
   4947  1.1  mrg     {
   4948  1.1  mrg       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
   4949  1.1  mrg 		  || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
   4950  1.1  mrg 
   4951  1.1  mrg       op1 = gimple_assign_rhs2 (stmt);
   4952  1.1  mrg       tree vectype1_in;
   4953  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
   4954  1.1  mrg 			       &op1, &slp_op1, &dt[1], &vectype1_in))
   4955  1.1  mrg 	{
   4956  1.1  mrg           if (dump_enabled_p ())
   4957  1.1  mrg             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4958  1.1  mrg                              "use not simple.\n");
   4959  1.1  mrg 	  return false;
   4960  1.1  mrg 	}
   4961  1.1  mrg       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
   4962  1.1  mrg 	 OP1.  */
   4963  1.1  mrg       if (!vectype_in)
   4964  1.1  mrg 	vectype_in = vectype1_in;
   4965  1.1  mrg     }
   4966  1.1  mrg 
   4967  1.1  mrg   /* If op0 is an external or constant def, infer the vector type
   4968  1.1  mrg      from the scalar type.  */
   4969  1.1  mrg   if (!vectype_in)
   4970  1.1  mrg     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
   4971  1.1  mrg   if (vec_stmt)
   4972  1.1  mrg     gcc_assert (vectype_in);
   4973  1.1  mrg   if (!vectype_in)
   4974  1.1  mrg     {
   4975  1.1  mrg       if (dump_enabled_p ())
   4976  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4977  1.1  mrg 			 "no vectype for scalar type %T\n", rhs_type);
   4978  1.1  mrg 
   4979  1.1  mrg       return false;
   4980  1.1  mrg     }
   4981  1.1  mrg 
   4982  1.1  mrg   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
   4983  1.1  mrg       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
   4984  1.1  mrg     {
   4985  1.1  mrg       if (dump_enabled_p ())
   4986  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   4987  1.1  mrg 			 "can't convert between boolean and non "
   4988  1.1  mrg 			 "boolean vectors %T\n", rhs_type);
   4989  1.1  mrg 
   4990  1.1  mrg       return false;
   4991  1.1  mrg     }
   4992  1.1  mrg 
   4993  1.1  mrg   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
   4994  1.1  mrg   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   4995  1.1  mrg   if (known_eq (nunits_out, nunits_in))
   4996  1.1  mrg     if (widen_arith)
   4997  1.1  mrg       modifier = WIDEN;
   4998  1.1  mrg     else
   4999  1.1  mrg       modifier = NONE;
   5000  1.1  mrg   else if (multiple_p (nunits_out, nunits_in))
   5001  1.1  mrg     modifier = NARROW;
   5002  1.1  mrg   else
   5003  1.1  mrg     {
   5004  1.1  mrg       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
   5005  1.1  mrg       modifier = WIDEN;
   5006  1.1  mrg     }
   5007  1.1  mrg 
   5008  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   5009  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   5010  1.1  mrg      case of SLP.  */
   5011  1.1  mrg   if (slp_node)
   5012  1.1  mrg     ncopies = 1;
   5013  1.1  mrg   else if (modifier == NARROW)
   5014  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
   5015  1.1  mrg   else
   5016  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
   5017  1.1  mrg 
   5018  1.1  mrg   /* Sanity check: make sure that at least one copy of the vectorized stmt
   5019  1.1  mrg      needs to be generated.  */
   5020  1.1  mrg   gcc_assert (ncopies >= 1);
   5021  1.1  mrg 
   5022  1.1  mrg   bool found_mode = false;
   5023  1.1  mrg   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
   5024  1.1  mrg   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
   5025  1.1  mrg   opt_scalar_mode rhs_mode_iter;
   5026  1.1  mrg 
   5027  1.1  mrg   /* Supportable by target?  */
   5028  1.1  mrg   switch (modifier)
   5029  1.1  mrg     {
   5030  1.1  mrg     case NONE:
   5031  1.1  mrg       if (code != FIX_TRUNC_EXPR
   5032  1.1  mrg 	  && code != FLOAT_EXPR
   5033  1.1  mrg 	  && !CONVERT_EXPR_CODE_P (code))
   5034  1.1  mrg 	return false;
   5035  1.1  mrg       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
   5036  1.1  mrg 	break;
   5037  1.1  mrg       /* FALLTHRU */
   5038  1.1  mrg     unsupported:
   5039  1.1  mrg       if (dump_enabled_p ())
   5040  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5041  1.1  mrg                          "conversion not supported by target.\n");
   5042  1.1  mrg       return false;
   5043  1.1  mrg 
   5044  1.1  mrg     case WIDEN:
   5045  1.1  mrg       if (known_eq (nunits_in, nunits_out))
   5046  1.1  mrg 	{
   5047  1.1  mrg 	  if (!supportable_half_widening_operation (code, vectype_out,
   5048  1.1  mrg 						   vectype_in, &code1))
   5049  1.1  mrg 	    goto unsupported;
   5050  1.1  mrg 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
   5051  1.1  mrg 	  break;
   5052  1.1  mrg 	}
   5053  1.1  mrg       if (supportable_widening_operation (vinfo, code, stmt_info,
   5054  1.1  mrg 					       vectype_out, vectype_in, &code1,
   5055  1.1  mrg 					       &code2, &multi_step_cvt,
   5056  1.1  mrg 					       &interm_types))
   5057  1.1  mrg 	{
   5058  1.1  mrg 	  /* Binary widening operation can only be supported directly by the
   5059  1.1  mrg 	     architecture.  */
   5060  1.1  mrg 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
   5061  1.1  mrg 	  break;
   5062  1.1  mrg 	}
   5063  1.1  mrg 
   5064  1.1  mrg       if (code != FLOAT_EXPR
   5065  1.1  mrg 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
   5066  1.1  mrg 	goto unsupported;
   5067  1.1  mrg 
   5068  1.1  mrg       fltsz = GET_MODE_SIZE (lhs_mode);
   5069  1.1  mrg       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
   5070  1.1  mrg 	{
   5071  1.1  mrg 	  rhs_mode = rhs_mode_iter.require ();
   5072  1.1  mrg 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
   5073  1.1  mrg 	    break;
   5074  1.1  mrg 
   5075  1.1  mrg 	  cvt_type
   5076  1.1  mrg 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
   5077  1.1  mrg 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
   5078  1.1  mrg 	  if (cvt_type == NULL_TREE)
   5079  1.1  mrg 	    goto unsupported;
   5080  1.1  mrg 
   5081  1.1  mrg 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
   5082  1.1  mrg 	    {
   5083  1.1  mrg 	      if (!supportable_convert_operation (code, vectype_out,
   5084  1.1  mrg 						  cvt_type, &codecvt1))
   5085  1.1  mrg 		goto unsupported;
   5086  1.1  mrg 	    }
   5087  1.1  mrg 	  else if (!supportable_widening_operation (vinfo, code, stmt_info,
   5088  1.1  mrg 						    vectype_out, cvt_type,
   5089  1.1  mrg 						    &codecvt1, &codecvt2,
   5090  1.1  mrg 						    &multi_step_cvt,
   5091  1.1  mrg 						    &interm_types))
   5092  1.1  mrg 	    continue;
   5093  1.1  mrg 	  else
   5094  1.1  mrg 	    gcc_assert (multi_step_cvt == 0);
   5095  1.1  mrg 
   5096  1.1  mrg 	  if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
   5097  1.1  mrg 					      cvt_type,
   5098  1.1  mrg 					      vectype_in, &code1, &code2,
   5099  1.1  mrg 					      &multi_step_cvt, &interm_types))
   5100  1.1  mrg 	    {
   5101  1.1  mrg 	      found_mode = true;
   5102  1.1  mrg 	      break;
   5103  1.1  mrg 	    }
   5104  1.1  mrg 	}
   5105  1.1  mrg 
   5106  1.1  mrg       if (!found_mode)
   5107  1.1  mrg 	goto unsupported;
   5108  1.1  mrg 
   5109  1.1  mrg       if (GET_MODE_SIZE (rhs_mode) == fltsz)
   5110  1.1  mrg 	codecvt2 = ERROR_MARK;
   5111  1.1  mrg       else
   5112  1.1  mrg 	{
   5113  1.1  mrg 	  multi_step_cvt++;
   5114  1.1  mrg 	  interm_types.safe_push (cvt_type);
   5115  1.1  mrg 	  cvt_type = NULL_TREE;
   5116  1.1  mrg 	}
   5117  1.1  mrg       break;
   5118  1.1  mrg 
   5119  1.1  mrg     case NARROW:
   5120  1.1  mrg       gcc_assert (op_type == unary_op);
   5121  1.1  mrg       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
   5122  1.1  mrg 					   &code1, &multi_step_cvt,
   5123  1.1  mrg 					   &interm_types))
   5124  1.1  mrg 	break;
   5125  1.1  mrg 
   5126  1.1  mrg       if (code != FIX_TRUNC_EXPR
   5127  1.1  mrg 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
   5128  1.1  mrg 	goto unsupported;
   5129  1.1  mrg 
   5130  1.1  mrg       cvt_type
   5131  1.1  mrg 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
   5132  1.1  mrg       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
   5133  1.1  mrg       if (cvt_type == NULL_TREE)
   5134  1.1  mrg 	goto unsupported;
   5135  1.1  mrg       if (!supportable_convert_operation (code, cvt_type, vectype_in,
   5136  1.1  mrg 					  &codecvt1))
   5137  1.1  mrg 	goto unsupported;
   5138  1.1  mrg       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
   5139  1.1  mrg 					   &code1, &multi_step_cvt,
   5140  1.1  mrg 					   &interm_types))
   5141  1.1  mrg 	break;
   5142  1.1  mrg       goto unsupported;
   5143  1.1  mrg 
   5144  1.1  mrg     default:
   5145  1.1  mrg       gcc_unreachable ();
   5146  1.1  mrg     }
   5147  1.1  mrg 
   5148  1.1  mrg   if (!vec_stmt)		/* transformation not required.  */
   5149  1.1  mrg     {
   5150  1.1  mrg       if (slp_node
   5151  1.1  mrg 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
   5152  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
   5153  1.1  mrg 	{
   5154  1.1  mrg 	  if (dump_enabled_p ())
   5155  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5156  1.1  mrg 			     "incompatible vector types for invariants\n");
   5157  1.1  mrg 	  return false;
   5158  1.1  mrg 	}
   5159  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_conversion");
   5160  1.1  mrg       if (modifier == NONE)
   5161  1.1  mrg         {
   5162  1.1  mrg 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
   5163  1.1  mrg 	  vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
   5164  1.1  mrg 				  cost_vec);
   5165  1.1  mrg 	}
   5166  1.1  mrg       else if (modifier == NARROW)
   5167  1.1  mrg 	{
   5168  1.1  mrg 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
   5169  1.1  mrg 	  /* The final packing step produces one vector result per copy.  */
   5170  1.1  mrg 	  unsigned int nvectors
   5171  1.1  mrg 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
   5172  1.1  mrg 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
   5173  1.1  mrg 					      multi_step_cvt, cost_vec,
   5174  1.1  mrg 					      widen_arith);
   5175  1.1  mrg 	}
   5176  1.1  mrg       else
   5177  1.1  mrg 	{
   5178  1.1  mrg 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
   5179  1.1  mrg 	  /* The initial unpacking step produces two vector results
   5180  1.1  mrg 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
   5181  1.1  mrg 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
   5182  1.1  mrg 	  unsigned int nvectors
   5183  1.1  mrg 	    = (slp_node
   5184  1.1  mrg 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
   5185  1.1  mrg 	       : ncopies * 2);
   5186  1.1  mrg 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
   5187  1.1  mrg 					      multi_step_cvt, cost_vec,
   5188  1.1  mrg 					      widen_arith);
   5189  1.1  mrg 	}
   5190  1.1  mrg       interm_types.release ();
   5191  1.1  mrg       return true;
   5192  1.1  mrg     }
   5193  1.1  mrg 
   5194  1.1  mrg   /* Transform.  */
   5195  1.1  mrg   if (dump_enabled_p ())
   5196  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   5197  1.1  mrg                      "transform conversion. ncopies = %d.\n", ncopies);
   5198  1.1  mrg 
   5199  1.1  mrg   if (op_type == binary_op)
   5200  1.1  mrg     {
   5201  1.1  mrg       if (CONSTANT_CLASS_P (op0))
   5202  1.1  mrg 	op0 = fold_convert (TREE_TYPE (op1), op0);
   5203  1.1  mrg       else if (CONSTANT_CLASS_P (op1))
   5204  1.1  mrg 	op1 = fold_convert (TREE_TYPE (op0), op1);
   5205  1.1  mrg     }
   5206  1.1  mrg 
   5207  1.1  mrg   /* In case of multi-step conversion, we first generate conversion operations
   5208  1.1  mrg      to the intermediate types, and then from that types to the final one.
   5209  1.1  mrg      We create vector destinations for the intermediate type (TYPES) received
   5210  1.1  mrg      from supportable_*_operation, and store them in the correct order
   5211  1.1  mrg      for future use in vect_create_vectorized_*_stmts ().  */
   5212  1.1  mrg   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
   5213  1.1  mrg   vec_dest = vect_create_destination_var (scalar_dest,
   5214  1.1  mrg 					  (cvt_type && modifier == WIDEN)
   5215  1.1  mrg 					  ? cvt_type : vectype_out);
   5216  1.1  mrg   vec_dsts.quick_push (vec_dest);
   5217  1.1  mrg 
   5218  1.1  mrg   if (multi_step_cvt)
   5219  1.1  mrg     {
   5220  1.1  mrg       for (i = interm_types.length () - 1;
   5221  1.1  mrg 	   interm_types.iterate (i, &intermediate_type); i--)
   5222  1.1  mrg 	{
   5223  1.1  mrg 	  vec_dest = vect_create_destination_var (scalar_dest,
   5224  1.1  mrg 						  intermediate_type);
   5225  1.1  mrg 	  vec_dsts.quick_push (vec_dest);
   5226  1.1  mrg 	}
   5227  1.1  mrg     }
   5228  1.1  mrg 
   5229  1.1  mrg   if (cvt_type)
   5230  1.1  mrg     vec_dest = vect_create_destination_var (scalar_dest,
   5231  1.1  mrg 					    modifier == WIDEN
   5232  1.1  mrg 					    ? vectype_out : cvt_type);
   5233  1.1  mrg 
   5234  1.1  mrg   int ninputs = 1;
   5235  1.1  mrg   if (!slp_node)
   5236  1.1  mrg     {
   5237  1.1  mrg       if (modifier == WIDEN)
   5238  1.1  mrg 	;
   5239  1.1  mrg       else if (modifier == NARROW)
   5240  1.1  mrg 	{
   5241  1.1  mrg 	  if (multi_step_cvt)
   5242  1.1  mrg 	    ninputs = vect_pow2 (multi_step_cvt);
   5243  1.1  mrg 	  ninputs *= 2;
   5244  1.1  mrg 	}
   5245  1.1  mrg     }
   5246  1.1  mrg 
   5247  1.1  mrg   switch (modifier)
   5248  1.1  mrg     {
   5249  1.1  mrg     case NONE:
   5250  1.1  mrg       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   5251  1.1  mrg 			 op0, &vec_oprnds0);
   5252  1.1  mrg       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
   5253  1.1  mrg 	{
   5254  1.1  mrg 	  /* Arguments are ready, create the new vector stmt.  */
   5255  1.1  mrg 	  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
   5256  1.1  mrg 	  gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
   5257  1.1  mrg 	  new_temp = make_ssa_name (vec_dest, new_stmt);
   5258  1.1  mrg 	  gimple_assign_set_lhs (new_stmt, new_temp);
   5259  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5260  1.1  mrg 
   5261  1.1  mrg 	  if (slp_node)
   5262  1.1  mrg 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   5263  1.1  mrg 	  else
   5264  1.1  mrg 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   5265  1.1  mrg 	}
   5266  1.1  mrg       break;
   5267  1.1  mrg 
   5268  1.1  mrg     case WIDEN:
   5269  1.1  mrg       /* In case the vectorization factor (VF) is bigger than the number
   5270  1.1  mrg 	 of elements that we can fit in a vectype (nunits), we have to
   5271  1.1  mrg 	 generate more than one vector stmt - i.e - we need to "unroll"
   5272  1.1  mrg 	 the vector stmt by a factor VF/nunits.  */
   5273  1.1  mrg       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
   5274  1.1  mrg 			 op0, &vec_oprnds0,
   5275  1.1  mrg 			 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
   5276  1.1  mrg 			 &vec_oprnds1);
   5277  1.1  mrg       if (code == WIDEN_LSHIFT_EXPR)
   5278  1.1  mrg 	{
   5279  1.1  mrg 	  int oprnds_size = vec_oprnds0.length ();
   5280  1.1  mrg 	  vec_oprnds1.create (oprnds_size);
   5281  1.1  mrg 	  for (i = 0; i < oprnds_size; ++i)
   5282  1.1  mrg 	    vec_oprnds1.quick_push (op1);
   5283  1.1  mrg 	}
   5284  1.1  mrg       /* Arguments are ready.  Create the new vector stmts.  */
   5285  1.1  mrg       for (i = multi_step_cvt; i >= 0; i--)
   5286  1.1  mrg 	{
   5287  1.1  mrg 	  tree this_dest = vec_dsts[i];
   5288  1.1  mrg 	  enum tree_code c1 = code1, c2 = code2;
   5289  1.1  mrg 	  if (i == 0 && codecvt2 != ERROR_MARK)
   5290  1.1  mrg 	    {
   5291  1.1  mrg 	      c1 = codecvt1;
   5292  1.1  mrg 	      c2 = codecvt2;
   5293  1.1  mrg 	    }
   5294  1.1  mrg 	  if (known_eq (nunits_out, nunits_in))
   5295  1.1  mrg 	    vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
   5296  1.1  mrg 						    &vec_oprnds1, stmt_info,
   5297  1.1  mrg 						    this_dest, gsi,
   5298  1.1  mrg 						    c1, op_type);
   5299  1.1  mrg 	  else
   5300  1.1  mrg 	    vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
   5301  1.1  mrg 						    &vec_oprnds1, stmt_info,
   5302  1.1  mrg 						    this_dest, gsi,
   5303  1.1  mrg 						    c1, c2, op_type);
   5304  1.1  mrg 	}
   5305  1.1  mrg 
   5306  1.1  mrg       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
   5307  1.1  mrg 	{
   5308  1.1  mrg 	  gimple *new_stmt;
   5309  1.1  mrg 	  if (cvt_type)
   5310  1.1  mrg 	    {
   5311  1.1  mrg 	      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
   5312  1.1  mrg 	      new_temp = make_ssa_name (vec_dest);
   5313  1.1  mrg 	      new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
   5314  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5315  1.1  mrg 	    }
   5316  1.1  mrg 	  else
   5317  1.1  mrg 	    new_stmt = SSA_NAME_DEF_STMT (vop0);
   5318  1.1  mrg 
   5319  1.1  mrg 	  if (slp_node)
   5320  1.1  mrg 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   5321  1.1  mrg 	  else
   5322  1.1  mrg 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   5323  1.1  mrg 	}
   5324  1.1  mrg       break;
   5325  1.1  mrg 
   5326  1.1  mrg     case NARROW:
   5327  1.1  mrg       /* In case the vectorization factor (VF) is bigger than the number
   5328  1.1  mrg 	 of elements that we can fit in a vectype (nunits), we have to
   5329  1.1  mrg 	 generate more than one vector stmt - i.e - we need to "unroll"
   5330  1.1  mrg 	 the vector stmt by a factor VF/nunits.  */
   5331  1.1  mrg       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
   5332  1.1  mrg 			 op0, &vec_oprnds0);
   5333  1.1  mrg       /* Arguments are ready.  Create the new vector stmts.  */
   5334  1.1  mrg       if (cvt_type)
   5335  1.1  mrg 	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
   5336  1.1  mrg 	  {
   5337  1.1  mrg 	    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
   5338  1.1  mrg 	    new_temp = make_ssa_name (vec_dest);
   5339  1.1  mrg 	    gassign *new_stmt
   5340  1.1  mrg 	      = gimple_build_assign (new_temp, codecvt1, vop0);
   5341  1.1  mrg 	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5342  1.1  mrg 	    vec_oprnds0[i] = new_temp;
   5343  1.1  mrg 	  }
   5344  1.1  mrg 
   5345  1.1  mrg       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
   5346  1.1  mrg 					     multi_step_cvt,
   5347  1.1  mrg 					     stmt_info, vec_dsts, gsi,
   5348  1.1  mrg 					     slp_node, code1);
   5349  1.1  mrg       break;
   5350  1.1  mrg     }
   5351  1.1  mrg   if (!slp_node)
   5352  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   5353  1.1  mrg 
   5354  1.1  mrg   vec_oprnds0.release ();
   5355  1.1  mrg   vec_oprnds1.release ();
   5356  1.1  mrg   interm_types.release ();
   5357  1.1  mrg 
   5358  1.1  mrg   return true;
   5359  1.1  mrg }
   5360  1.1  mrg 
   5361  1.1  mrg /* Return true if we can assume from the scalar form of STMT_INFO that
   5362  1.1  mrg    neither the scalar nor the vector forms will generate code.  STMT_INFO
   5363  1.1  mrg    is known not to involve a data reference.  */
   5364  1.1  mrg 
   5365  1.1  mrg bool
   5366  1.1  mrg vect_nop_conversion_p (stmt_vec_info stmt_info)
   5367  1.1  mrg {
   5368  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   5369  1.1  mrg   if (!stmt)
   5370  1.1  mrg     return false;
   5371  1.1  mrg 
   5372  1.1  mrg   tree lhs = gimple_assign_lhs (stmt);
   5373  1.1  mrg   tree_code code = gimple_assign_rhs_code (stmt);
   5374  1.1  mrg   tree rhs = gimple_assign_rhs1 (stmt);
   5375  1.1  mrg 
   5376  1.1  mrg   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
   5377  1.1  mrg     return true;
   5378  1.1  mrg 
   5379  1.1  mrg   if (CONVERT_EXPR_CODE_P (code))
   5380  1.1  mrg     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
   5381  1.1  mrg 
   5382  1.1  mrg   return false;
   5383  1.1  mrg }
   5384  1.1  mrg 
   5385  1.1  mrg /* Function vectorizable_assignment.
   5386  1.1  mrg 
   5387  1.1  mrg    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
   5388  1.1  mrg    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
   5389  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   5390  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   5391  1.1  mrg 
   5392  1.1  mrg static bool
   5393  1.1  mrg vectorizable_assignment (vec_info *vinfo,
   5394  1.1  mrg 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   5395  1.1  mrg 			 gimple **vec_stmt, slp_tree slp_node,
   5396  1.1  mrg 			 stmt_vector_for_cost *cost_vec)
   5397  1.1  mrg {
   5398  1.1  mrg   tree vec_dest;
   5399  1.1  mrg   tree scalar_dest;
   5400  1.1  mrg   tree op;
   5401  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5402  1.1  mrg   tree new_temp;
   5403  1.1  mrg   enum vect_def_type dt[1] = {vect_unknown_def_type};
   5404  1.1  mrg   int ndts = 1;
   5405  1.1  mrg   int ncopies;
   5406  1.1  mrg   int i;
   5407  1.1  mrg   vec<tree> vec_oprnds = vNULL;
   5408  1.1  mrg   tree vop;
   5409  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   5410  1.1  mrg   enum tree_code code;
   5411  1.1  mrg   tree vectype_in;
   5412  1.1  mrg 
   5413  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   5414  1.1  mrg     return false;
   5415  1.1  mrg 
   5416  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   5417  1.1  mrg       && ! vec_stmt)
   5418  1.1  mrg     return false;
   5419  1.1  mrg 
   5420  1.1  mrg   /* Is vectorizable assignment?  */
   5421  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   5422  1.1  mrg   if (!stmt)
   5423  1.1  mrg     return false;
   5424  1.1  mrg 
   5425  1.1  mrg   scalar_dest = gimple_assign_lhs (stmt);
   5426  1.1  mrg   if (TREE_CODE (scalar_dest) != SSA_NAME)
   5427  1.1  mrg     return false;
   5428  1.1  mrg 
   5429  1.1  mrg   if (STMT_VINFO_DATA_REF (stmt_info))
   5430  1.1  mrg     return false;
   5431  1.1  mrg 
   5432  1.1  mrg   code = gimple_assign_rhs_code (stmt);
   5433  1.1  mrg   if (!(gimple_assign_single_p (stmt)
   5434  1.1  mrg 	|| code == PAREN_EXPR
   5435  1.1  mrg 	|| CONVERT_EXPR_CODE_P (code)))
   5436  1.1  mrg     return false;
   5437  1.1  mrg 
   5438  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   5439  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   5440  1.1  mrg 
   5441  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   5442  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   5443  1.1  mrg      case of SLP.  */
   5444  1.1  mrg   if (slp_node)
   5445  1.1  mrg     ncopies = 1;
   5446  1.1  mrg   else
   5447  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   5448  1.1  mrg 
   5449  1.1  mrg   gcc_assert (ncopies >= 1);
   5450  1.1  mrg 
   5451  1.1  mrg   slp_tree slp_op;
   5452  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
   5453  1.1  mrg 			   &dt[0], &vectype_in))
   5454  1.1  mrg     {
   5455  1.1  mrg       if (dump_enabled_p ())
   5456  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5457  1.1  mrg                          "use not simple.\n");
   5458  1.1  mrg       return false;
   5459  1.1  mrg     }
   5460  1.1  mrg   if (!vectype_in)
   5461  1.1  mrg     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
   5462  1.1  mrg 
   5463  1.1  mrg   /* We can handle NOP_EXPR conversions that do not change the number
   5464  1.1  mrg      of elements or the vector size.  */
   5465  1.1  mrg   if ((CONVERT_EXPR_CODE_P (code)
   5466  1.1  mrg        || code == VIEW_CONVERT_EXPR)
   5467  1.1  mrg       && (!vectype_in
   5468  1.1  mrg 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
   5469  1.1  mrg 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
   5470  1.1  mrg 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
   5471  1.1  mrg     return false;
   5472  1.1  mrg 
   5473  1.1  mrg   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
   5474  1.1  mrg     {
   5475  1.1  mrg       if (dump_enabled_p ())
   5476  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5477  1.1  mrg 			 "can't convert between boolean and non "
   5478  1.1  mrg 			 "boolean vectors %T\n", TREE_TYPE (op));
   5479  1.1  mrg 
   5480  1.1  mrg       return false;
   5481  1.1  mrg     }
   5482  1.1  mrg 
   5483  1.1  mrg   /* We do not handle bit-precision changes.  */
   5484  1.1  mrg   if ((CONVERT_EXPR_CODE_P (code)
   5485  1.1  mrg        || code == VIEW_CONVERT_EXPR)
   5486  1.1  mrg       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
   5487  1.1  mrg       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
   5488  1.1  mrg 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
   5489  1.1  mrg       /* But a conversion that does not change the bit-pattern is ok.  */
   5490  1.1  mrg       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
   5491  1.1  mrg 	    > TYPE_PRECISION (TREE_TYPE (op)))
   5492  1.1  mrg 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
   5493  1.1  mrg     {
   5494  1.1  mrg       if (dump_enabled_p ())
   5495  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5496  1.1  mrg                          "type conversion to/from bit-precision "
   5497  1.1  mrg                          "unsupported.\n");
   5498  1.1  mrg       return false;
   5499  1.1  mrg     }
   5500  1.1  mrg 
   5501  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   5502  1.1  mrg     {
   5503  1.1  mrg       if (slp_node
   5504  1.1  mrg 	  && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
   5505  1.1  mrg 	{
   5506  1.1  mrg 	  if (dump_enabled_p ())
   5507  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5508  1.1  mrg 			     "incompatible vector types for invariants\n");
   5509  1.1  mrg 	  return false;
   5510  1.1  mrg 	}
   5511  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
   5512  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_assignment");
   5513  1.1  mrg       if (!vect_nop_conversion_p (stmt_info))
   5514  1.1  mrg 	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
   5515  1.1  mrg 				cost_vec);
   5516  1.1  mrg       return true;
   5517  1.1  mrg     }
   5518  1.1  mrg 
   5519  1.1  mrg   /* Transform.  */
   5520  1.1  mrg   if (dump_enabled_p ())
   5521  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
   5522  1.1  mrg 
   5523  1.1  mrg   /* Handle def.  */
   5524  1.1  mrg   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5525  1.1  mrg 
   5526  1.1  mrg   /* Handle use.  */
   5527  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
   5528  1.1  mrg 
   5529  1.1  mrg   /* Arguments are ready. create the new vector stmt.  */
   5530  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
   5531  1.1  mrg     {
   5532  1.1  mrg       if (CONVERT_EXPR_CODE_P (code)
   5533  1.1  mrg 	  || code == VIEW_CONVERT_EXPR)
   5534  1.1  mrg 	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
   5535  1.1  mrg       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
   5536  1.1  mrg       new_temp = make_ssa_name (vec_dest, new_stmt);
   5537  1.1  mrg       gimple_assign_set_lhs (new_stmt, new_temp);
   5538  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5539  1.1  mrg       if (slp_node)
   5540  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   5541  1.1  mrg       else
   5542  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   5543  1.1  mrg     }
   5544  1.1  mrg   if (!slp_node)
   5545  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   5546  1.1  mrg 
   5547  1.1  mrg   vec_oprnds.release ();
   5548  1.1  mrg   return true;
   5549  1.1  mrg }
   5550  1.1  mrg 
   5551  1.1  mrg 
   5552  1.1  mrg /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
   5553  1.1  mrg    either as shift by a scalar or by a vector.  */
   5554  1.1  mrg 
   5555  1.1  mrg bool
   5556  1.1  mrg vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
   5557  1.1  mrg {
   5558  1.1  mrg 
   5559  1.1  mrg   machine_mode vec_mode;
   5560  1.1  mrg   optab optab;
   5561  1.1  mrg   int icode;
   5562  1.1  mrg   tree vectype;
   5563  1.1  mrg 
   5564  1.1  mrg   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
   5565  1.1  mrg   if (!vectype)
   5566  1.1  mrg     return false;
   5567  1.1  mrg 
   5568  1.1  mrg   optab = optab_for_tree_code (code, vectype, optab_scalar);
   5569  1.1  mrg   if (!optab
   5570  1.1  mrg       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
   5571  1.1  mrg     {
   5572  1.1  mrg       optab = optab_for_tree_code (code, vectype, optab_vector);
   5573  1.1  mrg       if (!optab
   5574  1.1  mrg           || (optab_handler (optab, TYPE_MODE (vectype))
   5575  1.1  mrg                       == CODE_FOR_nothing))
   5576  1.1  mrg         return false;
   5577  1.1  mrg     }
   5578  1.1  mrg 
   5579  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   5580  1.1  mrg   icode = (int) optab_handler (optab, vec_mode);
   5581  1.1  mrg   if (icode == CODE_FOR_nothing)
   5582  1.1  mrg     return false;
   5583  1.1  mrg 
   5584  1.1  mrg   return true;
   5585  1.1  mrg }
   5586  1.1  mrg 
   5587  1.1  mrg 
   5588  1.1  mrg /* Function vectorizable_shift.
   5589  1.1  mrg 
   5590  1.1  mrg    Check if STMT_INFO performs a shift operation that can be vectorized.
   5591  1.1  mrg    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
   5592  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   5593  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   5594  1.1  mrg 
   5595  1.1  mrg static bool
   5596  1.1  mrg vectorizable_shift (vec_info *vinfo,
   5597  1.1  mrg 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   5598  1.1  mrg 		    gimple **vec_stmt, slp_tree slp_node,
   5599  1.1  mrg 		    stmt_vector_for_cost *cost_vec)
   5600  1.1  mrg {
   5601  1.1  mrg   tree vec_dest;
   5602  1.1  mrg   tree scalar_dest;
   5603  1.1  mrg   tree op0, op1 = NULL;
   5604  1.1  mrg   tree vec_oprnd1 = NULL_TREE;
   5605  1.1  mrg   tree vectype;
   5606  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   5607  1.1  mrg   enum tree_code code;
   5608  1.1  mrg   machine_mode vec_mode;
   5609  1.1  mrg   tree new_temp;
   5610  1.1  mrg   optab optab;
   5611  1.1  mrg   int icode;
   5612  1.1  mrg   machine_mode optab_op2_mode;
   5613  1.1  mrg   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   5614  1.1  mrg   int ndts = 2;
   5615  1.1  mrg   poly_uint64 nunits_in;
   5616  1.1  mrg   poly_uint64 nunits_out;
   5617  1.1  mrg   tree vectype_out;
   5618  1.1  mrg   tree op1_vectype;
   5619  1.1  mrg   int ncopies;
   5620  1.1  mrg   int i;
   5621  1.1  mrg   vec<tree> vec_oprnds0 = vNULL;
   5622  1.1  mrg   vec<tree> vec_oprnds1 = vNULL;
   5623  1.1  mrg   tree vop0, vop1;
   5624  1.1  mrg   unsigned int k;
   5625  1.1  mrg   bool scalar_shift_arg = true;
   5626  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   5627  1.1  mrg   bool incompatible_op1_vectype_p = false;
   5628  1.1  mrg 
   5629  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   5630  1.1  mrg     return false;
   5631  1.1  mrg 
   5632  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   5633  1.1  mrg       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
   5634  1.1  mrg       && ! vec_stmt)
   5635  1.1  mrg     return false;
   5636  1.1  mrg 
   5637  1.1  mrg   /* Is STMT a vectorizable binary/unary operation?   */
   5638  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   5639  1.1  mrg   if (!stmt)
   5640  1.1  mrg     return false;
   5641  1.1  mrg 
   5642  1.1  mrg   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
   5643  1.1  mrg     return false;
   5644  1.1  mrg 
   5645  1.1  mrg   code = gimple_assign_rhs_code (stmt);
   5646  1.1  mrg 
   5647  1.1  mrg   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
   5648  1.1  mrg       || code == RROTATE_EXPR))
   5649  1.1  mrg     return false;
   5650  1.1  mrg 
   5651  1.1  mrg   scalar_dest = gimple_assign_lhs (stmt);
   5652  1.1  mrg   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   5653  1.1  mrg   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
   5654  1.1  mrg     {
   5655  1.1  mrg       if (dump_enabled_p ())
   5656  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5657  1.1  mrg                          "bit-precision shifts not supported.\n");
   5658  1.1  mrg       return false;
   5659  1.1  mrg     }
   5660  1.1  mrg 
   5661  1.1  mrg   slp_tree slp_op0;
   5662  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   5663  1.1  mrg 			   0, &op0, &slp_op0, &dt[0], &vectype))
   5664  1.1  mrg     {
   5665  1.1  mrg       if (dump_enabled_p ())
   5666  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5667  1.1  mrg                          "use not simple.\n");
   5668  1.1  mrg       return false;
   5669  1.1  mrg     }
   5670  1.1  mrg   /* If op0 is an external or constant def, infer the vector type
   5671  1.1  mrg      from the scalar type.  */
   5672  1.1  mrg   if (!vectype)
   5673  1.1  mrg     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
   5674  1.1  mrg   if (vec_stmt)
   5675  1.1  mrg     gcc_assert (vectype);
   5676  1.1  mrg   if (!vectype)
   5677  1.1  mrg     {
   5678  1.1  mrg       if (dump_enabled_p ())
   5679  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5680  1.1  mrg                          "no vectype for scalar type\n");
   5681  1.1  mrg       return false;
   5682  1.1  mrg     }
   5683  1.1  mrg 
   5684  1.1  mrg   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   5685  1.1  mrg   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
   5686  1.1  mrg   if (maybe_ne (nunits_out, nunits_in))
   5687  1.1  mrg     return false;
   5688  1.1  mrg 
   5689  1.1  mrg   stmt_vec_info op1_def_stmt_info;
   5690  1.1  mrg   slp_tree slp_op1;
   5691  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
   5692  1.1  mrg 			   &dt[1], &op1_vectype, &op1_def_stmt_info))
   5693  1.1  mrg     {
   5694  1.1  mrg       if (dump_enabled_p ())
   5695  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5696  1.1  mrg                          "use not simple.\n");
   5697  1.1  mrg       return false;
   5698  1.1  mrg     }
   5699  1.1  mrg 
   5700  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   5701  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   5702  1.1  mrg      case of SLP.  */
   5703  1.1  mrg   if (slp_node)
   5704  1.1  mrg     ncopies = 1;
   5705  1.1  mrg   else
   5706  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   5707  1.1  mrg 
   5708  1.1  mrg   gcc_assert (ncopies >= 1);
   5709  1.1  mrg 
   5710  1.1  mrg   /* Determine whether the shift amount is a vector, or scalar.  If the
   5711  1.1  mrg      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
   5712  1.1  mrg 
   5713  1.1  mrg   if ((dt[1] == vect_internal_def
   5714  1.1  mrg        || dt[1] == vect_induction_def
   5715  1.1  mrg        || dt[1] == vect_nested_cycle)
   5716  1.1  mrg       && !slp_node)
   5717  1.1  mrg     scalar_shift_arg = false;
   5718  1.1  mrg   else if (dt[1] == vect_constant_def
   5719  1.1  mrg 	   || dt[1] == vect_external_def
   5720  1.1  mrg 	   || dt[1] == vect_internal_def)
   5721  1.1  mrg     {
   5722  1.1  mrg       /* In SLP, need to check whether the shift count is the same,
   5723  1.1  mrg 	 in loops if it is a constant or invariant, it is always
   5724  1.1  mrg 	 a scalar shift.  */
   5725  1.1  mrg       if (slp_node)
   5726  1.1  mrg 	{
   5727  1.1  mrg 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
   5728  1.1  mrg 	  stmt_vec_info slpstmt_info;
   5729  1.1  mrg 
   5730  1.1  mrg 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
   5731  1.1  mrg 	    {
   5732  1.1  mrg 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
   5733  1.1  mrg 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
   5734  1.1  mrg 		scalar_shift_arg = false;
   5735  1.1  mrg 	    }
   5736  1.1  mrg 
   5737  1.1  mrg 	  /* For internal SLP defs we have to make sure we see scalar stmts
   5738  1.1  mrg 	     for all vector elements.
   5739  1.1  mrg 	     ???  For different vectors we could resort to a different
   5740  1.1  mrg 	     scalar shift operand but code-generation below simply always
   5741  1.1  mrg 	     takes the first.  */
   5742  1.1  mrg 	  if (dt[1] == vect_internal_def
   5743  1.1  mrg 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
   5744  1.1  mrg 			   stmts.length ()))
   5745  1.1  mrg 	    scalar_shift_arg = false;
   5746  1.1  mrg 	}
   5747  1.1  mrg 
   5748  1.1  mrg       /* If the shift amount is computed by a pattern stmt we cannot
   5749  1.1  mrg          use the scalar amount directly thus give up and use a vector
   5750  1.1  mrg 	 shift.  */
   5751  1.1  mrg       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
   5752  1.1  mrg 	scalar_shift_arg = false;
   5753  1.1  mrg     }
   5754  1.1  mrg   else
   5755  1.1  mrg     {
   5756  1.1  mrg       if (dump_enabled_p ())
   5757  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5758  1.1  mrg                          "operand mode requires invariant argument.\n");
   5759  1.1  mrg       return false;
   5760  1.1  mrg     }
   5761  1.1  mrg 
   5762  1.1  mrg   /* Vector shifted by vector.  */
   5763  1.1  mrg   bool was_scalar_shift_arg = scalar_shift_arg;
   5764  1.1  mrg   if (!scalar_shift_arg)
   5765  1.1  mrg     {
   5766  1.1  mrg       optab = optab_for_tree_code (code, vectype, optab_vector);
   5767  1.1  mrg       if (dump_enabled_p ())
   5768  1.1  mrg         dump_printf_loc (MSG_NOTE, vect_location,
   5769  1.1  mrg                          "vector/vector shift/rotate found.\n");
   5770  1.1  mrg 
   5771  1.1  mrg       if (!op1_vectype)
   5772  1.1  mrg 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
   5773  1.1  mrg 						   slp_op1);
   5774  1.1  mrg       incompatible_op1_vectype_p
   5775  1.1  mrg 	= (op1_vectype == NULL_TREE
   5776  1.1  mrg 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
   5777  1.1  mrg 			TYPE_VECTOR_SUBPARTS (vectype))
   5778  1.1  mrg 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
   5779  1.1  mrg       if (incompatible_op1_vectype_p
   5780  1.1  mrg 	  && (!slp_node
   5781  1.1  mrg 	      || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
   5782  1.1  mrg 	      || slp_op1->refcnt != 1))
   5783  1.1  mrg 	{
   5784  1.1  mrg 	  if (dump_enabled_p ())
   5785  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5786  1.1  mrg                              "unusable type for last operand in"
   5787  1.1  mrg                              " vector/vector shift/rotate.\n");
   5788  1.1  mrg 	  return false;
   5789  1.1  mrg 	}
   5790  1.1  mrg     }
   5791  1.1  mrg   /* See if the machine has a vector shifted by scalar insn and if not
   5792  1.1  mrg      then see if it has a vector shifted by vector insn.  */
   5793  1.1  mrg   else
   5794  1.1  mrg     {
   5795  1.1  mrg       optab = optab_for_tree_code (code, vectype, optab_scalar);
   5796  1.1  mrg       if (optab
   5797  1.1  mrg           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
   5798  1.1  mrg         {
   5799  1.1  mrg           if (dump_enabled_p ())
   5800  1.1  mrg             dump_printf_loc (MSG_NOTE, vect_location,
   5801  1.1  mrg                              "vector/scalar shift/rotate found.\n");
   5802  1.1  mrg         }
   5803  1.1  mrg       else
   5804  1.1  mrg         {
   5805  1.1  mrg           optab = optab_for_tree_code (code, vectype, optab_vector);
   5806  1.1  mrg           if (optab
   5807  1.1  mrg                && (optab_handler (optab, TYPE_MODE (vectype))
   5808  1.1  mrg                       != CODE_FOR_nothing))
   5809  1.1  mrg             {
   5810  1.1  mrg 	      scalar_shift_arg = false;
   5811  1.1  mrg 
   5812  1.1  mrg               if (dump_enabled_p ())
   5813  1.1  mrg                 dump_printf_loc (MSG_NOTE, vect_location,
   5814  1.1  mrg                                  "vector/vector shift/rotate found.\n");
   5815  1.1  mrg 
   5816  1.1  mrg 	      if (!op1_vectype)
   5817  1.1  mrg 		op1_vectype = get_vectype_for_scalar_type (vinfo,
   5818  1.1  mrg 							   TREE_TYPE (op1),
   5819  1.1  mrg 							   slp_op1);
   5820  1.1  mrg 
   5821  1.1  mrg               /* Unlike the other binary operators, shifts/rotates have
   5822  1.1  mrg                  the rhs being int, instead of the same type as the lhs,
   5823  1.1  mrg                  so make sure the scalar is the right type if we are
   5824  1.1  mrg 		 dealing with vectors of long long/long/short/char.  */
   5825  1.1  mrg 	      incompatible_op1_vectype_p
   5826  1.1  mrg 		= (!op1_vectype
   5827  1.1  mrg 		   || !tree_nop_conversion_p (TREE_TYPE (vectype),
   5828  1.1  mrg 					      TREE_TYPE (op1)));
   5829  1.1  mrg 	      if (incompatible_op1_vectype_p
   5830  1.1  mrg 		  && dt[1] == vect_internal_def)
   5831  1.1  mrg 		{
   5832  1.1  mrg 		  if (dump_enabled_p ())
   5833  1.1  mrg 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5834  1.1  mrg 				     "unusable type for last operand in"
   5835  1.1  mrg 				     " vector/vector shift/rotate.\n");
   5836  1.1  mrg 		  return false;
   5837  1.1  mrg 		}
   5838  1.1  mrg             }
   5839  1.1  mrg         }
   5840  1.1  mrg     }
   5841  1.1  mrg 
   5842  1.1  mrg   /* Supportable by target?  */
   5843  1.1  mrg   if (!optab)
   5844  1.1  mrg     {
   5845  1.1  mrg       if (dump_enabled_p ())
   5846  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5847  1.1  mrg                          "no optab.\n");
   5848  1.1  mrg       return false;
   5849  1.1  mrg     }
   5850  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   5851  1.1  mrg   icode = (int) optab_handler (optab, vec_mode);
   5852  1.1  mrg   if (icode == CODE_FOR_nothing)
   5853  1.1  mrg     {
   5854  1.1  mrg       if (dump_enabled_p ())
   5855  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5856  1.1  mrg                          "op not supported by target.\n");
   5857  1.1  mrg       return false;
   5858  1.1  mrg     }
   5859  1.1  mrg   /* vector lowering cannot optimize vector shifts using word arithmetic.  */
   5860  1.1  mrg   if (vect_emulated_vector_p (vectype))
   5861  1.1  mrg     return false;
   5862  1.1  mrg 
   5863  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   5864  1.1  mrg     {
   5865  1.1  mrg       if (slp_node
   5866  1.1  mrg 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
   5867  1.1  mrg 	      || ((!scalar_shift_arg || dt[1] == vect_internal_def)
   5868  1.1  mrg 		  && (!incompatible_op1_vectype_p
   5869  1.1  mrg 		      || dt[1] == vect_constant_def)
   5870  1.1  mrg 		  && !vect_maybe_update_slp_op_vectype
   5871  1.1  mrg 			(slp_op1,
   5872  1.1  mrg 			 incompatible_op1_vectype_p ? vectype : op1_vectype))))
   5873  1.1  mrg 	{
   5874  1.1  mrg 	  if (dump_enabled_p ())
   5875  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   5876  1.1  mrg 			     "incompatible vector types for invariants\n");
   5877  1.1  mrg 	  return false;
   5878  1.1  mrg 	}
   5879  1.1  mrg       /* Now adjust the constant shift amount in place.  */
   5880  1.1  mrg       if (slp_node
   5881  1.1  mrg 	  && incompatible_op1_vectype_p
   5882  1.1  mrg 	  && dt[1] == vect_constant_def)
   5883  1.1  mrg 	{
   5884  1.1  mrg 	  for (unsigned i = 0;
   5885  1.1  mrg 	       i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
   5886  1.1  mrg 	    {
   5887  1.1  mrg 	      SLP_TREE_SCALAR_OPS (slp_op1)[i]
   5888  1.1  mrg 		= fold_convert (TREE_TYPE (vectype),
   5889  1.1  mrg 				SLP_TREE_SCALAR_OPS (slp_op1)[i]);
   5890  1.1  mrg 	      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
   5891  1.1  mrg 			   == INTEGER_CST));
   5892  1.1  mrg 	    }
   5893  1.1  mrg 	}
   5894  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
   5895  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_shift");
   5896  1.1  mrg       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
   5897  1.1  mrg 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
   5898  1.1  mrg       return true;
   5899  1.1  mrg     }
   5900  1.1  mrg 
   5901  1.1  mrg   /* Transform.  */
   5902  1.1  mrg 
   5903  1.1  mrg   if (dump_enabled_p ())
   5904  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   5905  1.1  mrg                      "transform binary/unary operation.\n");
   5906  1.1  mrg 
   5907  1.1  mrg   if (incompatible_op1_vectype_p && !slp_node)
   5908  1.1  mrg     {
   5909  1.1  mrg       gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
   5910  1.1  mrg       op1 = fold_convert (TREE_TYPE (vectype), op1);
   5911  1.1  mrg       if (dt[1] != vect_constant_def)
   5912  1.1  mrg 	op1 = vect_init_vector (vinfo, stmt_info, op1,
   5913  1.1  mrg 				TREE_TYPE (vectype), NULL);
   5914  1.1  mrg     }
   5915  1.1  mrg 
   5916  1.1  mrg   /* Handle def.  */
   5917  1.1  mrg   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   5918  1.1  mrg 
   5919  1.1  mrg   if (scalar_shift_arg && dt[1] != vect_internal_def)
   5920  1.1  mrg     {
   5921  1.1  mrg       /* Vector shl and shr insn patterns can be defined with scalar
   5922  1.1  mrg 	 operand 2 (shift operand).  In this case, use constant or loop
   5923  1.1  mrg 	 invariant op1 directly, without extending it to vector mode
   5924  1.1  mrg 	 first.  */
   5925  1.1  mrg       optab_op2_mode = insn_data[icode].operand[2].mode;
   5926  1.1  mrg       if (!VECTOR_MODE_P (optab_op2_mode))
   5927  1.1  mrg 	{
   5928  1.1  mrg 	  if (dump_enabled_p ())
   5929  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   5930  1.1  mrg 			     "operand 1 using scalar mode.\n");
   5931  1.1  mrg 	  vec_oprnd1 = op1;
   5932  1.1  mrg 	  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
   5933  1.1  mrg 	  vec_oprnds1.quick_push (vec_oprnd1);
   5934  1.1  mrg 	      /* Store vec_oprnd1 for every vector stmt to be created.
   5935  1.1  mrg 		 We check during the analysis that all the shift arguments
   5936  1.1  mrg 		 are the same.
   5937  1.1  mrg 		 TODO: Allow different constants for different vector
   5938  1.1  mrg 		 stmts generated for an SLP instance.  */
   5939  1.1  mrg 	  for (k = 0;
   5940  1.1  mrg 	       k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
   5941  1.1  mrg 	    vec_oprnds1.quick_push (vec_oprnd1);
   5942  1.1  mrg 	}
   5943  1.1  mrg     }
   5944  1.1  mrg   else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
   5945  1.1  mrg     {
   5946  1.1  mrg       if (was_scalar_shift_arg)
   5947  1.1  mrg 	{
   5948  1.1  mrg 	  /* If the argument was the same in all lanes create
   5949  1.1  mrg 	     the correctly typed vector shift amount directly.  */
   5950  1.1  mrg 	  op1 = fold_convert (TREE_TYPE (vectype), op1);
   5951  1.1  mrg 	  op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
   5952  1.1  mrg 				  !loop_vinfo ? gsi : NULL);
   5953  1.1  mrg 	  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
   5954  1.1  mrg 					 !loop_vinfo ? gsi : NULL);
   5955  1.1  mrg 	  vec_oprnds1.create (slp_node->vec_stmts_size);
   5956  1.1  mrg 	  for (k = 0; k < slp_node->vec_stmts_size; k++)
   5957  1.1  mrg 	    vec_oprnds1.quick_push (vec_oprnd1);
   5958  1.1  mrg 	}
   5959  1.1  mrg       else if (dt[1] == vect_constant_def)
   5960  1.1  mrg 	/* The constant shift amount has been adjusted in place.  */
   5961  1.1  mrg 	;
   5962  1.1  mrg       else
   5963  1.1  mrg 	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
   5964  1.1  mrg     }
   5965  1.1  mrg 
   5966  1.1  mrg   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
   5967  1.1  mrg      (a special case for certain kind of vector shifts); otherwise,
   5968  1.1  mrg      operand 1 should be of a vector type (the usual case).  */
   5969  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   5970  1.1  mrg 		     op0, &vec_oprnds0,
   5971  1.1  mrg 		     vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
   5972  1.1  mrg 
   5973  1.1  mrg   /* Arguments are ready.  Create the new vector stmt.  */
   5974  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
   5975  1.1  mrg     {
   5976  1.1  mrg       /* For internal defs where we need to use a scalar shift arg
   5977  1.1  mrg 	 extract the first lane.  */
   5978  1.1  mrg       if (scalar_shift_arg && dt[1] == vect_internal_def)
   5979  1.1  mrg 	{
   5980  1.1  mrg 	  vop1 = vec_oprnds1[0];
   5981  1.1  mrg 	  new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
   5982  1.1  mrg 	  gassign *new_stmt
   5983  1.1  mrg 	    = gimple_build_assign (new_temp,
   5984  1.1  mrg 				   build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
   5985  1.1  mrg 					   vop1,
   5986  1.1  mrg 					   TYPE_SIZE (TREE_TYPE (new_temp)),
   5987  1.1  mrg 					   bitsize_zero_node));
   5988  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5989  1.1  mrg 	  vop1 = new_temp;
   5990  1.1  mrg 	}
   5991  1.1  mrg       else
   5992  1.1  mrg 	vop1 = vec_oprnds1[i];
   5993  1.1  mrg       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
   5994  1.1  mrg       new_temp = make_ssa_name (vec_dest, new_stmt);
   5995  1.1  mrg       gimple_assign_set_lhs (new_stmt, new_temp);
   5996  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   5997  1.1  mrg       if (slp_node)
   5998  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   5999  1.1  mrg       else
   6000  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   6001  1.1  mrg     }
   6002  1.1  mrg 
   6003  1.1  mrg   if (!slp_node)
   6004  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   6005  1.1  mrg 
   6006  1.1  mrg   vec_oprnds0.release ();
   6007  1.1  mrg   vec_oprnds1.release ();
   6008  1.1  mrg 
   6009  1.1  mrg   return true;
   6010  1.1  mrg }
   6011  1.1  mrg 
   6012  1.1  mrg 
   6013  1.1  mrg /* Function vectorizable_operation.
   6014  1.1  mrg 
   6015  1.1  mrg    Check if STMT_INFO performs a binary, unary or ternary operation that can
   6016  1.1  mrg    be vectorized.
   6017  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   6018  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   6019  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   6020  1.1  mrg 
   6021  1.1  mrg static bool
   6022  1.1  mrg vectorizable_operation (vec_info *vinfo,
   6023  1.1  mrg 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   6024  1.1  mrg 			gimple **vec_stmt, slp_tree slp_node,
   6025  1.1  mrg 			stmt_vector_for_cost *cost_vec)
   6026  1.1  mrg {
   6027  1.1  mrg   tree vec_dest;
   6028  1.1  mrg   tree scalar_dest;
   6029  1.1  mrg   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
   6030  1.1  mrg   tree vectype;
   6031  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6032  1.1  mrg   enum tree_code code, orig_code;
   6033  1.1  mrg   machine_mode vec_mode;
   6034  1.1  mrg   tree new_temp;
   6035  1.1  mrg   int op_type;
   6036  1.1  mrg   optab optab;
   6037  1.1  mrg   bool target_support_p;
   6038  1.1  mrg   enum vect_def_type dt[3]
   6039  1.1  mrg     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
   6040  1.1  mrg   int ndts = 3;
   6041  1.1  mrg   poly_uint64 nunits_in;
   6042  1.1  mrg   poly_uint64 nunits_out;
   6043  1.1  mrg   tree vectype_out;
   6044  1.1  mrg   int ncopies, vec_num;
   6045  1.1  mrg   int i;
   6046  1.1  mrg   vec<tree> vec_oprnds0 = vNULL;
   6047  1.1  mrg   vec<tree> vec_oprnds1 = vNULL;
   6048  1.1  mrg   vec<tree> vec_oprnds2 = vNULL;
   6049  1.1  mrg   tree vop0, vop1, vop2;
   6050  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   6051  1.1  mrg 
   6052  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   6053  1.1  mrg     return false;
   6054  1.1  mrg 
   6055  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   6056  1.1  mrg       && ! vec_stmt)
   6057  1.1  mrg     return false;
   6058  1.1  mrg 
   6059  1.1  mrg   /* Is STMT a vectorizable binary/unary operation?   */
   6060  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   6061  1.1  mrg   if (!stmt)
   6062  1.1  mrg     return false;
   6063  1.1  mrg 
   6064  1.1  mrg   /* Loads and stores are handled in vectorizable_{load,store}.  */
   6065  1.1  mrg   if (STMT_VINFO_DATA_REF (stmt_info))
   6066  1.1  mrg     return false;
   6067  1.1  mrg 
   6068  1.1  mrg   orig_code = code = gimple_assign_rhs_code (stmt);
   6069  1.1  mrg 
   6070  1.1  mrg   /* Shifts are handled in vectorizable_shift.  */
   6071  1.1  mrg   if (code == LSHIFT_EXPR
   6072  1.1  mrg       || code == RSHIFT_EXPR
   6073  1.1  mrg       || code == LROTATE_EXPR
   6074  1.1  mrg       || code == RROTATE_EXPR)
   6075  1.1  mrg    return false;
   6076  1.1  mrg 
   6077  1.1  mrg   /* Comparisons are handled in vectorizable_comparison.  */
   6078  1.1  mrg   if (TREE_CODE_CLASS (code) == tcc_comparison)
   6079  1.1  mrg     return false;
   6080  1.1  mrg 
   6081  1.1  mrg   /* Conditions are handled in vectorizable_condition.  */
   6082  1.1  mrg   if (code == COND_EXPR)
   6083  1.1  mrg     return false;
   6084  1.1  mrg 
   6085  1.1  mrg   /* For pointer addition and subtraction, we should use the normal
   6086  1.1  mrg      plus and minus for the vector operation.  */
   6087  1.1  mrg   if (code == POINTER_PLUS_EXPR)
   6088  1.1  mrg     code = PLUS_EXPR;
   6089  1.1  mrg   if (code == POINTER_DIFF_EXPR)
   6090  1.1  mrg     code = MINUS_EXPR;
   6091  1.1  mrg 
   6092  1.1  mrg   /* Support only unary or binary operations.  */
   6093  1.1  mrg   op_type = TREE_CODE_LENGTH (code);
   6094  1.1  mrg   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
   6095  1.1  mrg     {
   6096  1.1  mrg       if (dump_enabled_p ())
   6097  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6098  1.1  mrg                          "num. args = %d (not unary/binary/ternary op).\n",
   6099  1.1  mrg                          op_type);
   6100  1.1  mrg       return false;
   6101  1.1  mrg     }
   6102  1.1  mrg 
   6103  1.1  mrg   scalar_dest = gimple_assign_lhs (stmt);
   6104  1.1  mrg   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   6105  1.1  mrg 
   6106  1.1  mrg   /* Most operations cannot handle bit-precision types without extra
   6107  1.1  mrg      truncations.  */
   6108  1.1  mrg   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
   6109  1.1  mrg   if (!mask_op_p
   6110  1.1  mrg       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
   6111  1.1  mrg       /* Exception are bitwise binary operations.  */
   6112  1.1  mrg       && code != BIT_IOR_EXPR
   6113  1.1  mrg       && code != BIT_XOR_EXPR
   6114  1.1  mrg       && code != BIT_AND_EXPR)
   6115  1.1  mrg     {
   6116  1.1  mrg       if (dump_enabled_p ())
   6117  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6118  1.1  mrg                          "bit-precision arithmetic not supported.\n");
   6119  1.1  mrg       return false;
   6120  1.1  mrg     }
   6121  1.1  mrg 
   6122  1.1  mrg   slp_tree slp_op0;
   6123  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   6124  1.1  mrg 			   0, &op0, &slp_op0, &dt[0], &vectype))
   6125  1.1  mrg     {
   6126  1.1  mrg       if (dump_enabled_p ())
   6127  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6128  1.1  mrg                          "use not simple.\n");
   6129  1.1  mrg       return false;
   6130  1.1  mrg     }
   6131  1.1  mrg   bool is_invariant = (dt[0] == vect_external_def
   6132  1.1  mrg 		       || dt[0] == vect_constant_def);
   6133  1.1  mrg   /* If op0 is an external or constant def, infer the vector type
   6134  1.1  mrg      from the scalar type.  */
   6135  1.1  mrg   if (!vectype)
   6136  1.1  mrg     {
   6137  1.1  mrg       /* For boolean type we cannot determine vectype by
   6138  1.1  mrg 	 invariant value (don't know whether it is a vector
   6139  1.1  mrg 	 of booleans or vector of integers).  We use output
   6140  1.1  mrg 	 vectype because operations on boolean don't change
   6141  1.1  mrg 	 type.  */
   6142  1.1  mrg       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
   6143  1.1  mrg 	{
   6144  1.1  mrg 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
   6145  1.1  mrg 	    {
   6146  1.1  mrg 	      if (dump_enabled_p ())
   6147  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6148  1.1  mrg 				 "not supported operation on bool value.\n");
   6149  1.1  mrg 	      return false;
   6150  1.1  mrg 	    }
   6151  1.1  mrg 	  vectype = vectype_out;
   6152  1.1  mrg 	}
   6153  1.1  mrg       else
   6154  1.1  mrg 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
   6155  1.1  mrg 					       slp_node);
   6156  1.1  mrg     }
   6157  1.1  mrg   if (vec_stmt)
   6158  1.1  mrg     gcc_assert (vectype);
   6159  1.1  mrg   if (!vectype)
   6160  1.1  mrg     {
   6161  1.1  mrg       if (dump_enabled_p ())
   6162  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6163  1.1  mrg 			 "no vectype for scalar type %T\n",
   6164  1.1  mrg 			 TREE_TYPE (op0));
   6165  1.1  mrg 
   6166  1.1  mrg       return false;
   6167  1.1  mrg     }
   6168  1.1  mrg 
   6169  1.1  mrg   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
   6170  1.1  mrg   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
   6171  1.1  mrg   if (maybe_ne (nunits_out, nunits_in))
   6172  1.1  mrg     return false;
   6173  1.1  mrg 
   6174  1.1  mrg   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
   6175  1.1  mrg   slp_tree slp_op1 = NULL, slp_op2 = NULL;
   6176  1.1  mrg   if (op_type == binary_op || op_type == ternary_op)
   6177  1.1  mrg     {
   6178  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   6179  1.1  mrg 			       1, &op1, &slp_op1, &dt[1], &vectype2))
   6180  1.1  mrg 	{
   6181  1.1  mrg 	  if (dump_enabled_p ())
   6182  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6183  1.1  mrg                              "use not simple.\n");
   6184  1.1  mrg 	  return false;
   6185  1.1  mrg 	}
   6186  1.1  mrg       is_invariant &= (dt[1] == vect_external_def
   6187  1.1  mrg 		       || dt[1] == vect_constant_def);
   6188  1.1  mrg       if (vectype2
   6189  1.1  mrg 	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
   6190  1.1  mrg 	return false;
   6191  1.1  mrg     }
   6192  1.1  mrg   if (op_type == ternary_op)
   6193  1.1  mrg     {
   6194  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   6195  1.1  mrg 			       2, &op2, &slp_op2, &dt[2], &vectype3))
   6196  1.1  mrg 	{
   6197  1.1  mrg 	  if (dump_enabled_p ())
   6198  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6199  1.1  mrg                              "use not simple.\n");
   6200  1.1  mrg 	  return false;
   6201  1.1  mrg 	}
   6202  1.1  mrg       is_invariant &= (dt[2] == vect_external_def
   6203  1.1  mrg 		       || dt[2] == vect_constant_def);
   6204  1.1  mrg       if (vectype3
   6205  1.1  mrg 	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
   6206  1.1  mrg 	return false;
   6207  1.1  mrg     }
   6208  1.1  mrg 
   6209  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   6210  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   6211  1.1  mrg      case of SLP.  */
   6212  1.1  mrg   if (slp_node)
   6213  1.1  mrg     {
   6214  1.1  mrg       ncopies = 1;
   6215  1.1  mrg       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   6216  1.1  mrg     }
   6217  1.1  mrg   else
   6218  1.1  mrg     {
   6219  1.1  mrg       ncopies = vect_get_num_copies (loop_vinfo, vectype);
   6220  1.1  mrg       vec_num = 1;
   6221  1.1  mrg     }
   6222  1.1  mrg 
   6223  1.1  mrg   gcc_assert (ncopies >= 1);
   6224  1.1  mrg 
   6225  1.1  mrg   /* Reject attempts to combine mask types with nonmask types, e.g. if
   6226  1.1  mrg      we have an AND between a (nonmask) boolean loaded from memory and
   6227  1.1  mrg      a (mask) boolean result of a comparison.
   6228  1.1  mrg 
   6229  1.1  mrg      TODO: We could easily fix these cases up using pattern statements.  */
   6230  1.1  mrg   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
   6231  1.1  mrg       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
   6232  1.1  mrg       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
   6233  1.1  mrg     {
   6234  1.1  mrg       if (dump_enabled_p ())
   6235  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6236  1.1  mrg 			 "mixed mask and nonmask vector types\n");
   6237  1.1  mrg       return false;
   6238  1.1  mrg     }
   6239  1.1  mrg 
   6240  1.1  mrg   /* Supportable by target?  */
   6241  1.1  mrg 
   6242  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   6243  1.1  mrg   if (code == MULT_HIGHPART_EXPR)
   6244  1.1  mrg     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
   6245  1.1  mrg   else
   6246  1.1  mrg     {
   6247  1.1  mrg       optab = optab_for_tree_code (code, vectype, optab_default);
   6248  1.1  mrg       if (!optab)
   6249  1.1  mrg 	{
   6250  1.1  mrg           if (dump_enabled_p ())
   6251  1.1  mrg             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6252  1.1  mrg                              "no optab.\n");
   6253  1.1  mrg 	  return false;
   6254  1.1  mrg 	}
   6255  1.1  mrg       target_support_p = (optab_handler (optab, vec_mode)
   6256  1.1  mrg 			  != CODE_FOR_nothing);
   6257  1.1  mrg     }
   6258  1.1  mrg 
   6259  1.1  mrg   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
   6260  1.1  mrg   if (!target_support_p)
   6261  1.1  mrg     {
   6262  1.1  mrg       if (dump_enabled_p ())
   6263  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6264  1.1  mrg                          "op not supported by target.\n");
   6265  1.1  mrg       /* Check only during analysis.  */
   6266  1.1  mrg       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
   6267  1.1  mrg 	  || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
   6268  1.1  mrg         return false;
   6269  1.1  mrg       if (dump_enabled_p ())
   6270  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   6271  1.1  mrg                          "proceeding using word mode.\n");
   6272  1.1  mrg       using_emulated_vectors_p = true;
   6273  1.1  mrg     }
   6274  1.1  mrg 
   6275  1.1  mrg   if (using_emulated_vectors_p
   6276  1.1  mrg       && !vect_can_vectorize_without_simd_p (code))
   6277  1.1  mrg     {
   6278  1.1  mrg       if (dump_enabled_p ())
   6279  1.1  mrg 	dump_printf (MSG_NOTE, "using word mode not possible.\n");
   6280  1.1  mrg       return false;
   6281  1.1  mrg     }
   6282  1.1  mrg 
   6283  1.1  mrg   /* ???  We should instead expand the operations here, instead of
   6284  1.1  mrg      relying on vector lowering which has this hard cap on the number
   6285  1.1  mrg      of vector elements below it performs elementwise operations.  */
   6286  1.1  mrg   if (using_emulated_vectors_p
   6287  1.1  mrg       && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
   6288  1.1  mrg       && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
   6289  1.1  mrg 	  || maybe_lt (nunits_out, 4U)))
   6290  1.1  mrg     {
   6291  1.1  mrg       if (dump_enabled_p ())
   6292  1.1  mrg 	dump_printf (MSG_NOTE, "not using word mode for +- and less than "
   6293  1.1  mrg 		     "four vector elements\n");
   6294  1.1  mrg       return false;
   6295  1.1  mrg     }
   6296  1.1  mrg 
   6297  1.1  mrg   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   6298  1.1  mrg   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   6299  1.1  mrg   internal_fn cond_fn = get_conditional_internal_fn (code);
   6300  1.1  mrg 
   6301  1.1  mrg   /* If operating on inactive elements could generate spurious traps,
   6302  1.1  mrg      we need to restrict the operation to active lanes.  Note that this
   6303  1.1  mrg      specifically doesn't apply to unhoisted invariants, since they
   6304  1.1  mrg      operate on the same value for every lane.
   6305  1.1  mrg 
   6306  1.1  mrg      Similarly, if this operation is part of a reduction, a fully-masked
   6307  1.1  mrg      loop should only change the active lanes of the reduction chain,
   6308  1.1  mrg      keeping the inactive lanes as-is.  */
   6309  1.1  mrg   bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
   6310  1.1  mrg 			    || reduc_idx >= 0);
   6311  1.1  mrg 
   6312  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   6313  1.1  mrg     {
   6314  1.1  mrg       if (loop_vinfo
   6315  1.1  mrg 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
   6316  1.1  mrg 	  && mask_out_inactive)
   6317  1.1  mrg 	{
   6318  1.1  mrg 	  if (cond_fn == IFN_LAST
   6319  1.1  mrg 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
   6320  1.1  mrg 						  OPTIMIZE_FOR_SPEED))
   6321  1.1  mrg 	    {
   6322  1.1  mrg 	      if (dump_enabled_p ())
   6323  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6324  1.1  mrg 				 "can't use a fully-masked loop because no"
   6325  1.1  mrg 				 " conditional operation is available.\n");
   6326  1.1  mrg 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   6327  1.1  mrg 	    }
   6328  1.1  mrg 	  else
   6329  1.1  mrg 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
   6330  1.1  mrg 				   vectype, NULL);
   6331  1.1  mrg 	}
   6332  1.1  mrg 
   6333  1.1  mrg       /* Put types on constant and invariant SLP children.  */
   6334  1.1  mrg       if (slp_node
   6335  1.1  mrg 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
   6336  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
   6337  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
   6338  1.1  mrg 	{
   6339  1.1  mrg 	  if (dump_enabled_p ())
   6340  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6341  1.1  mrg 			     "incompatible vector types for invariants\n");
   6342  1.1  mrg 	  return false;
   6343  1.1  mrg 	}
   6344  1.1  mrg 
   6345  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
   6346  1.1  mrg       DUMP_VECT_SCOPE ("vectorizable_operation");
   6347  1.1  mrg       vect_model_simple_cost (vinfo, stmt_info,
   6348  1.1  mrg 			      ncopies, dt, ndts, slp_node, cost_vec);
   6349  1.1  mrg       if (using_emulated_vectors_p)
   6350  1.1  mrg 	{
   6351  1.1  mrg 	  /* The above vect_model_simple_cost call handles constants
   6352  1.1  mrg 	     in the prologue and (mis-)costs one of the stmts as
   6353  1.1  mrg 	     vector stmt.  See tree-vect-generic.cc:do_plus_minus/do_negate
   6354  1.1  mrg 	     for the actual lowering that will be applied.  */
   6355  1.1  mrg 	  unsigned n
   6356  1.1  mrg 	    = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
   6357  1.1  mrg 	  switch (code)
   6358  1.1  mrg 	    {
   6359  1.1  mrg 	    case PLUS_EXPR:
   6360  1.1  mrg 	      n *= 5;
   6361  1.1  mrg 	      break;
   6362  1.1  mrg 	    case MINUS_EXPR:
   6363  1.1  mrg 	      n *= 6;
   6364  1.1  mrg 	      break;
   6365  1.1  mrg 	    case NEGATE_EXPR:
   6366  1.1  mrg 	      n *= 4;
   6367  1.1  mrg 	      break;
   6368  1.1  mrg 	    default:;
   6369  1.1  mrg 	    }
   6370  1.1  mrg 	  record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
   6371  1.1  mrg 	}
   6372  1.1  mrg       return true;
   6373  1.1  mrg     }
   6374  1.1  mrg 
   6375  1.1  mrg   /* Transform.  */
   6376  1.1  mrg 
   6377  1.1  mrg   if (dump_enabled_p ())
   6378  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   6379  1.1  mrg                      "transform binary/unary operation.\n");
   6380  1.1  mrg 
   6381  1.1  mrg   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
   6382  1.1  mrg 
   6383  1.1  mrg   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
   6384  1.1  mrg      vectors with unsigned elements, but the result is signed.  So, we
   6385  1.1  mrg      need to compute the MINUS_EXPR into vectype temporary and
   6386  1.1  mrg      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
   6387  1.1  mrg   tree vec_cvt_dest = NULL_TREE;
   6388  1.1  mrg   if (orig_code == POINTER_DIFF_EXPR)
   6389  1.1  mrg     {
   6390  1.1  mrg       vec_dest = vect_create_destination_var (scalar_dest, vectype);
   6391  1.1  mrg       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
   6392  1.1  mrg     }
   6393  1.1  mrg   /* Handle def.  */
   6394  1.1  mrg   else
   6395  1.1  mrg     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
   6396  1.1  mrg 
   6397  1.1  mrg   /* In case the vectorization factor (VF) is bigger than the number
   6398  1.1  mrg      of elements that we can fit in a vectype (nunits), we have to generate
   6399  1.1  mrg      more than one vector stmt - i.e - we need to "unroll" the
   6400  1.1  mrg      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
   6401  1.1  mrg      from one copy of the vector stmt to the next, in the field
   6402  1.1  mrg      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
   6403  1.1  mrg      stages to find the correct vector defs to be used when vectorizing
   6404  1.1  mrg      stmts that use the defs of the current stmt.  The example below
   6405  1.1  mrg      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
   6406  1.1  mrg      we need to create 4 vectorized stmts):
   6407  1.1  mrg 
   6408  1.1  mrg      before vectorization:
   6409  1.1  mrg                                 RELATED_STMT    VEC_STMT
   6410  1.1  mrg         S1:     x = memref      -               -
   6411  1.1  mrg         S2:     z = x + 1       -               -
   6412  1.1  mrg 
   6413  1.1  mrg      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
   6414  1.1  mrg              there):
   6415  1.1  mrg                                 RELATED_STMT    VEC_STMT
   6416  1.1  mrg         VS1_0:  vx0 = memref0   VS1_1           -
   6417  1.1  mrg         VS1_1:  vx1 = memref1   VS1_2           -
   6418  1.1  mrg         VS1_2:  vx2 = memref2   VS1_3           -
   6419  1.1  mrg         VS1_3:  vx3 = memref3   -               -
   6420  1.1  mrg         S1:     x = load        -               VS1_0
   6421  1.1  mrg         S2:     z = x + 1       -               -
   6422  1.1  mrg 
   6423  1.1  mrg      step2: vectorize stmt S2 (done here):
   6424  1.1  mrg         To vectorize stmt S2 we first need to find the relevant vector
   6425  1.1  mrg         def for the first operand 'x'.  This is, as usual, obtained from
   6426  1.1  mrg         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
   6427  1.1  mrg         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
   6428  1.1  mrg         relevant vector def 'vx0'.  Having found 'vx0' we can generate
   6429  1.1  mrg         the vector stmt VS2_0, and as usual, record it in the
   6430  1.1  mrg         STMT_VINFO_VEC_STMT of stmt S2.
   6431  1.1  mrg         When creating the second copy (VS2_1), we obtain the relevant vector
   6432  1.1  mrg         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
   6433  1.1  mrg         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
   6434  1.1  mrg         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
   6435  1.1  mrg         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
   6436  1.1  mrg         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
   6437  1.1  mrg         chain of stmts and pointers:
   6438  1.1  mrg                                 RELATED_STMT    VEC_STMT
   6439  1.1  mrg         VS1_0:  vx0 = memref0   VS1_1           -
   6440  1.1  mrg         VS1_1:  vx1 = memref1   VS1_2           -
   6441  1.1  mrg         VS1_2:  vx2 = memref2   VS1_3           -
   6442  1.1  mrg         VS1_3:  vx3 = memref3   -               -
   6443  1.1  mrg         S1:     x = load        -               VS1_0
   6444  1.1  mrg         VS2_0:  vz0 = vx0 + v1  VS2_1           -
   6445  1.1  mrg         VS2_1:  vz1 = vx1 + v1  VS2_2           -
   6446  1.1  mrg         VS2_2:  vz2 = vx2 + v1  VS2_3           -
   6447  1.1  mrg         VS2_3:  vz3 = vx3 + v1  -               -
   6448  1.1  mrg         S2:     z = x + 1       -               VS2_0  */
   6449  1.1  mrg 
   6450  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   6451  1.1  mrg 		     op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
   6452  1.1  mrg   /* Arguments are ready.  Create the new vector stmt.  */
   6453  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
   6454  1.1  mrg     {
   6455  1.1  mrg       gimple *new_stmt = NULL;
   6456  1.1  mrg       vop1 = ((op_type == binary_op || op_type == ternary_op)
   6457  1.1  mrg 	      ? vec_oprnds1[i] : NULL_TREE);
   6458  1.1  mrg       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
   6459  1.1  mrg       if (masked_loop_p && mask_out_inactive)
   6460  1.1  mrg 	{
   6461  1.1  mrg 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
   6462  1.1  mrg 					  vectype, i);
   6463  1.1  mrg 	  auto_vec<tree> vops (5);
   6464  1.1  mrg 	  vops.quick_push (mask);
   6465  1.1  mrg 	  vops.quick_push (vop0);
   6466  1.1  mrg 	  if (vop1)
   6467  1.1  mrg 	    vops.quick_push (vop1);
   6468  1.1  mrg 	  if (vop2)
   6469  1.1  mrg 	    vops.quick_push (vop2);
   6470  1.1  mrg 	  if (reduc_idx >= 0)
   6471  1.1  mrg 	    {
   6472  1.1  mrg 	      /* Perform the operation on active elements only and take
   6473  1.1  mrg 		 inactive elements from the reduction chain input.  */
   6474  1.1  mrg 	      gcc_assert (!vop2);
   6475  1.1  mrg 	      vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
   6476  1.1  mrg 	    }
   6477  1.1  mrg 	  else
   6478  1.1  mrg 	    {
   6479  1.1  mrg 	      auto else_value = targetm.preferred_else_value
   6480  1.1  mrg 		(cond_fn, vectype, vops.length () - 1, &vops[1]);
   6481  1.1  mrg 	      vops.quick_push (else_value);
   6482  1.1  mrg 	    }
   6483  1.1  mrg 	  gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
   6484  1.1  mrg 	  new_temp = make_ssa_name (vec_dest, call);
   6485  1.1  mrg 	  gimple_call_set_lhs (call, new_temp);
   6486  1.1  mrg 	  gimple_call_set_nothrow (call, true);
   6487  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   6488  1.1  mrg 	  new_stmt = call;
   6489  1.1  mrg 	}
   6490  1.1  mrg       else
   6491  1.1  mrg 	{
   6492  1.1  mrg 	  tree mask = NULL_TREE;
   6493  1.1  mrg 	  /* When combining two masks check if either of them is elsewhere
   6494  1.1  mrg 	     combined with a loop mask, if that's the case we can mark that the
   6495  1.1  mrg 	     new combined mask doesn't need to be combined with a loop mask.  */
   6496  1.1  mrg 	  if (masked_loop_p
   6497  1.1  mrg 	      && code == BIT_AND_EXPR
   6498  1.1  mrg 	      && VECTOR_BOOLEAN_TYPE_P (vectype))
   6499  1.1  mrg 	    {
   6500  1.1  mrg 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
   6501  1.1  mrg 								 ncopies}))
   6502  1.1  mrg 		{
   6503  1.1  mrg 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
   6504  1.1  mrg 					     vectype, i);
   6505  1.1  mrg 
   6506  1.1  mrg 		  vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
   6507  1.1  mrg 					   vop0, gsi);
   6508  1.1  mrg 		}
   6509  1.1  mrg 
   6510  1.1  mrg 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
   6511  1.1  mrg 								 ncopies }))
   6512  1.1  mrg 		{
   6513  1.1  mrg 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
   6514  1.1  mrg 					     vectype, i);
   6515  1.1  mrg 
   6516  1.1  mrg 		  vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
   6517  1.1  mrg 					   vop1, gsi);
   6518  1.1  mrg 		}
   6519  1.1  mrg 	    }
   6520  1.1  mrg 
   6521  1.1  mrg 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
   6522  1.1  mrg 	  new_temp = make_ssa_name (vec_dest, new_stmt);
   6523  1.1  mrg 	  gimple_assign_set_lhs (new_stmt, new_temp);
   6524  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   6525  1.1  mrg 	  if (using_emulated_vectors_p)
   6526  1.1  mrg 	    suppress_warning (new_stmt, OPT_Wvector_operation_performance);
   6527  1.1  mrg 
   6528  1.1  mrg 	  /* Enter the combined value into the vector cond hash so we don't
   6529  1.1  mrg 	     AND it with a loop mask again.  */
   6530  1.1  mrg 	  if (mask)
   6531  1.1  mrg 	    loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
   6532  1.1  mrg 
   6533  1.1  mrg 	  if (vec_cvt_dest)
   6534  1.1  mrg 	    {
   6535  1.1  mrg 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
   6536  1.1  mrg 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
   6537  1.1  mrg 					      new_temp);
   6538  1.1  mrg 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
   6539  1.1  mrg 	      gimple_assign_set_lhs (new_stmt, new_temp);
   6540  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info,
   6541  1.1  mrg 					   new_stmt, gsi);
   6542  1.1  mrg 	    }
   6543  1.1  mrg 	}
   6544  1.1  mrg       if (slp_node)
   6545  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   6546  1.1  mrg       else
   6547  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   6548  1.1  mrg     }
   6549  1.1  mrg 
   6550  1.1  mrg   if (!slp_node)
   6551  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   6552  1.1  mrg 
   6553  1.1  mrg   vec_oprnds0.release ();
   6554  1.1  mrg   vec_oprnds1.release ();
   6555  1.1  mrg   vec_oprnds2.release ();
   6556  1.1  mrg 
   6557  1.1  mrg   return true;
   6558  1.1  mrg }
   6559  1.1  mrg 
   6560  1.1  mrg /* A helper function to ensure data reference DR_INFO's base alignment.  */
   6561  1.1  mrg 
   6562  1.1  mrg static void
   6563  1.1  mrg ensure_base_align (dr_vec_info *dr_info)
   6564  1.1  mrg {
   6565  1.1  mrg   /* Alignment is only analyzed for the first element of a DR group,
   6566  1.1  mrg      use that to look at base alignment we need to enforce.  */
   6567  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
   6568  1.1  mrg     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
   6569  1.1  mrg 
   6570  1.1  mrg   gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
   6571  1.1  mrg 
   6572  1.1  mrg   if (dr_info->base_misaligned)
   6573  1.1  mrg     {
   6574  1.1  mrg       tree base_decl = dr_info->base_decl;
   6575  1.1  mrg 
   6576  1.1  mrg       // We should only be able to increase the alignment of a base object if
   6577  1.1  mrg       // we know what its new alignment should be at compile time.
   6578  1.1  mrg       unsigned HOST_WIDE_INT align_base_to =
   6579  1.1  mrg 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
   6580  1.1  mrg 
   6581  1.1  mrg       if (decl_in_symtab_p (base_decl))
   6582  1.1  mrg 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
   6583  1.1  mrg       else if (DECL_ALIGN (base_decl) < align_base_to)
   6584  1.1  mrg 	{
   6585  1.1  mrg 	  SET_DECL_ALIGN (base_decl, align_base_to);
   6586  1.1  mrg           DECL_USER_ALIGN (base_decl) = 1;
   6587  1.1  mrg 	}
   6588  1.1  mrg       dr_info->base_misaligned = false;
   6589  1.1  mrg     }
   6590  1.1  mrg }
   6591  1.1  mrg 
   6592  1.1  mrg 
   6593  1.1  mrg /* Function get_group_alias_ptr_type.
   6594  1.1  mrg 
   6595  1.1  mrg    Return the alias type for the group starting at FIRST_STMT_INFO.  */
   6596  1.1  mrg 
   6597  1.1  mrg static tree
   6598  1.1  mrg get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
   6599  1.1  mrg {
   6600  1.1  mrg   struct data_reference *first_dr, *next_dr;
   6601  1.1  mrg 
   6602  1.1  mrg   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
   6603  1.1  mrg   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
   6604  1.1  mrg   while (next_stmt_info)
   6605  1.1  mrg     {
   6606  1.1  mrg       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
   6607  1.1  mrg       if (get_alias_set (DR_REF (first_dr))
   6608  1.1  mrg 	  != get_alias_set (DR_REF (next_dr)))
   6609  1.1  mrg 	{
   6610  1.1  mrg 	  if (dump_enabled_p ())
   6611  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   6612  1.1  mrg 			     "conflicting alias set types.\n");
   6613  1.1  mrg 	  return ptr_type_node;
   6614  1.1  mrg 	}
   6615  1.1  mrg       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   6616  1.1  mrg     }
   6617  1.1  mrg   return reference_alias_ptr_type (DR_REF (first_dr));
   6618  1.1  mrg }
   6619  1.1  mrg 
   6620  1.1  mrg 
   6621  1.1  mrg /* Function scan_operand_equal_p.
   6622  1.1  mrg 
   6623  1.1  mrg    Helper function for check_scan_store.  Compare two references
   6624  1.1  mrg    with .GOMP_SIMD_LANE bases.  */
   6625  1.1  mrg 
   6626  1.1  mrg static bool
   6627  1.1  mrg scan_operand_equal_p (tree ref1, tree ref2)
   6628  1.1  mrg {
   6629  1.1  mrg   tree ref[2] = { ref1, ref2 };
   6630  1.1  mrg   poly_int64 bitsize[2], bitpos[2];
   6631  1.1  mrg   tree offset[2], base[2];
   6632  1.1  mrg   for (int i = 0; i < 2; ++i)
   6633  1.1  mrg     {
   6634  1.1  mrg       machine_mode mode;
   6635  1.1  mrg       int unsignedp, reversep, volatilep = 0;
   6636  1.1  mrg       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
   6637  1.1  mrg       				     &offset[i], &mode, &unsignedp,
   6638  1.1  mrg       				     &reversep, &volatilep);
   6639  1.1  mrg       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
   6640  1.1  mrg 	return false;
   6641  1.1  mrg       if (TREE_CODE (base[i]) == MEM_REF
   6642  1.1  mrg 	  && offset[i] == NULL_TREE
   6643  1.1  mrg 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
   6644  1.1  mrg 	{
   6645  1.1  mrg 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
   6646  1.1  mrg 	  if (is_gimple_assign (def_stmt)
   6647  1.1  mrg 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
   6648  1.1  mrg 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
   6649  1.1  mrg 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
   6650  1.1  mrg 	    {
   6651  1.1  mrg 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
   6652  1.1  mrg 		return false;
   6653  1.1  mrg 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
   6654  1.1  mrg 	      offset[i] = gimple_assign_rhs2 (def_stmt);
   6655  1.1  mrg 	    }
   6656  1.1  mrg 	}
   6657  1.1  mrg     }
   6658  1.1  mrg 
   6659  1.1  mrg   if (!operand_equal_p (base[0], base[1], 0))
   6660  1.1  mrg     return false;
   6661  1.1  mrg   if (maybe_ne (bitsize[0], bitsize[1]))
   6662  1.1  mrg     return false;
   6663  1.1  mrg   if (offset[0] != offset[1])
   6664  1.1  mrg     {
   6665  1.1  mrg       if (!offset[0] || !offset[1])
   6666  1.1  mrg 	return false;
   6667  1.1  mrg       if (!operand_equal_p (offset[0], offset[1], 0))
   6668  1.1  mrg 	{
   6669  1.1  mrg 	  tree step[2];
   6670  1.1  mrg 	  for (int i = 0; i < 2; ++i)
   6671  1.1  mrg 	    {
   6672  1.1  mrg 	      step[i] = integer_one_node;
   6673  1.1  mrg 	      if (TREE_CODE (offset[i]) == SSA_NAME)
   6674  1.1  mrg 		{
   6675  1.1  mrg 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
   6676  1.1  mrg 		  if (is_gimple_assign (def_stmt)
   6677  1.1  mrg 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
   6678  1.1  mrg 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
   6679  1.1  mrg 			  == INTEGER_CST))
   6680  1.1  mrg 		    {
   6681  1.1  mrg 		      step[i] = gimple_assign_rhs2 (def_stmt);
   6682  1.1  mrg 		      offset[i] = gimple_assign_rhs1 (def_stmt);
   6683  1.1  mrg 		    }
   6684  1.1  mrg 		}
   6685  1.1  mrg 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
   6686  1.1  mrg 		{
   6687  1.1  mrg 		  step[i] = TREE_OPERAND (offset[i], 1);
   6688  1.1  mrg 		  offset[i] = TREE_OPERAND (offset[i], 0);
   6689  1.1  mrg 		}
   6690  1.1  mrg 	      tree rhs1 = NULL_TREE;
   6691  1.1  mrg 	      if (TREE_CODE (offset[i]) == SSA_NAME)
   6692  1.1  mrg 		{
   6693  1.1  mrg 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
   6694  1.1  mrg 		  if (gimple_assign_cast_p (def_stmt))
   6695  1.1  mrg 		    rhs1 = gimple_assign_rhs1 (def_stmt);
   6696  1.1  mrg 		}
   6697  1.1  mrg 	      else if (CONVERT_EXPR_P (offset[i]))
   6698  1.1  mrg 		rhs1 = TREE_OPERAND (offset[i], 0);
   6699  1.1  mrg 	      if (rhs1
   6700  1.1  mrg 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
   6701  1.1  mrg 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
   6702  1.1  mrg 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
   6703  1.1  mrg 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
   6704  1.1  mrg 		offset[i] = rhs1;
   6705  1.1  mrg 	    }
   6706  1.1  mrg 	  if (!operand_equal_p (offset[0], offset[1], 0)
   6707  1.1  mrg 	      || !operand_equal_p (step[0], step[1], 0))
   6708  1.1  mrg 	    return false;
   6709  1.1  mrg 	}
   6710  1.1  mrg     }
   6711  1.1  mrg   return true;
   6712  1.1  mrg }
   6713  1.1  mrg 
   6714  1.1  mrg 
   6715  1.1  mrg enum scan_store_kind {
   6716  1.1  mrg   /* Normal permutation.  */
   6717  1.1  mrg   scan_store_kind_perm,
   6718  1.1  mrg 
   6719  1.1  mrg   /* Whole vector left shift permutation with zero init.  */
   6720  1.1  mrg   scan_store_kind_lshift_zero,
   6721  1.1  mrg 
   6722  1.1  mrg   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
   6723  1.1  mrg   scan_store_kind_lshift_cond
   6724  1.1  mrg };
   6725  1.1  mrg 
   6726  1.1  mrg /* Function check_scan_store.
   6727  1.1  mrg 
   6728  1.1  mrg    Verify if we can perform the needed permutations or whole vector shifts.
   6729  1.1  mrg    Return -1 on failure, otherwise exact log2 of vectype's nunits.
   6730  1.1  mrg    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
   6731  1.1  mrg    to do at each step.  */
   6732  1.1  mrg 
   6733  1.1  mrg static int
   6734  1.1  mrg scan_store_can_perm_p (tree vectype, tree init,
   6735  1.1  mrg 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
   6736  1.1  mrg {
   6737  1.1  mrg   enum machine_mode vec_mode = TYPE_MODE (vectype);
   6738  1.1  mrg   unsigned HOST_WIDE_INT nunits;
   6739  1.1  mrg   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
   6740  1.1  mrg     return -1;
   6741  1.1  mrg   int units_log2 = exact_log2 (nunits);
   6742  1.1  mrg   if (units_log2 <= 0)
   6743  1.1  mrg     return -1;
   6744  1.1  mrg 
   6745  1.1  mrg   int i;
   6746  1.1  mrg   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
   6747  1.1  mrg   for (i = 0; i <= units_log2; ++i)
   6748  1.1  mrg     {
   6749  1.1  mrg       unsigned HOST_WIDE_INT j, k;
   6750  1.1  mrg       enum scan_store_kind kind = scan_store_kind_perm;
   6751  1.1  mrg       vec_perm_builder sel (nunits, nunits, 1);
   6752  1.1  mrg       sel.quick_grow (nunits);
   6753  1.1  mrg       if (i == units_log2)
   6754  1.1  mrg 	{
   6755  1.1  mrg 	  for (j = 0; j < nunits; ++j)
   6756  1.1  mrg 	    sel[j] = nunits - 1;
   6757  1.1  mrg 	}
   6758  1.1  mrg       else
   6759  1.1  mrg 	{
   6760  1.1  mrg 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
   6761  1.1  mrg 	    sel[j] = j;
   6762  1.1  mrg 	  for (k = 0; j < nunits; ++j, ++k)
   6763  1.1  mrg 	    sel[j] = nunits + k;
   6764  1.1  mrg 	}
   6765  1.1  mrg       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
   6766  1.1  mrg       if (!can_vec_perm_const_p (vec_mode, indices))
   6767  1.1  mrg 	{
   6768  1.1  mrg 	  if (i == units_log2)
   6769  1.1  mrg 	    return -1;
   6770  1.1  mrg 
   6771  1.1  mrg 	  if (whole_vector_shift_kind == scan_store_kind_perm)
   6772  1.1  mrg 	    {
   6773  1.1  mrg 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
   6774  1.1  mrg 		return -1;
   6775  1.1  mrg 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
   6776  1.1  mrg 	      /* Whole vector shifts shift in zeros, so if init is all zero
   6777  1.1  mrg 		 constant, there is no need to do anything further.  */
   6778  1.1  mrg 	      if ((TREE_CODE (init) != INTEGER_CST
   6779  1.1  mrg 		   && TREE_CODE (init) != REAL_CST)
   6780  1.1  mrg 		  || !initializer_zerop (init))
   6781  1.1  mrg 		{
   6782  1.1  mrg 		  tree masktype = truth_type_for (vectype);
   6783  1.1  mrg 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
   6784  1.1  mrg 		    return -1;
   6785  1.1  mrg 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
   6786  1.1  mrg 		}
   6787  1.1  mrg 	    }
   6788  1.1  mrg 	  kind = whole_vector_shift_kind;
   6789  1.1  mrg 	}
   6790  1.1  mrg       if (use_whole_vector)
   6791  1.1  mrg 	{
   6792  1.1  mrg 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
   6793  1.1  mrg 	    use_whole_vector->safe_grow_cleared (i, true);
   6794  1.1  mrg 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
   6795  1.1  mrg 	    use_whole_vector->safe_push (kind);
   6796  1.1  mrg 	}
   6797  1.1  mrg     }
   6798  1.1  mrg 
   6799  1.1  mrg   return units_log2;
   6800  1.1  mrg }
   6801  1.1  mrg 
   6802  1.1  mrg 
   6803  1.1  mrg /* Function check_scan_store.
   6804  1.1  mrg 
   6805  1.1  mrg    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
   6806  1.1  mrg 
   6807  1.1  mrg static bool
   6808  1.1  mrg check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
   6809  1.1  mrg 		  enum vect_def_type rhs_dt, bool slp, tree mask,
   6810  1.1  mrg 		  vect_memory_access_type memory_access_type)
   6811  1.1  mrg {
   6812  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   6813  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   6814  1.1  mrg   tree ref_type;
   6815  1.1  mrg 
   6816  1.1  mrg   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
   6817  1.1  mrg   if (slp
   6818  1.1  mrg       || mask
   6819  1.1  mrg       || memory_access_type != VMAT_CONTIGUOUS
   6820  1.1  mrg       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
   6821  1.1  mrg       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
   6822  1.1  mrg       || loop_vinfo == NULL
   6823  1.1  mrg       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
   6824  1.1  mrg       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
   6825  1.1  mrg       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
   6826  1.1  mrg       || !integer_zerop (DR_INIT (dr_info->dr))
   6827  1.1  mrg       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
   6828  1.1  mrg       || !alias_sets_conflict_p (get_alias_set (vectype),
   6829  1.1  mrg 				 get_alias_set (TREE_TYPE (ref_type))))
   6830  1.1  mrg     {
   6831  1.1  mrg       if (dump_enabled_p ())
   6832  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6833  1.1  mrg 			 "unsupported OpenMP scan store.\n");
   6834  1.1  mrg       return false;
   6835  1.1  mrg     }
   6836  1.1  mrg 
   6837  1.1  mrg   /* We need to pattern match code built by OpenMP lowering and simplified
   6838  1.1  mrg      by following optimizations into something we can handle.
   6839  1.1  mrg      #pragma omp simd reduction(inscan,+:r)
   6840  1.1  mrg      for (...)
   6841  1.1  mrg        {
   6842  1.1  mrg 	 r += something ();
   6843  1.1  mrg 	 #pragma omp scan inclusive (r)
   6844  1.1  mrg 	 use (r);
   6845  1.1  mrg        }
   6846  1.1  mrg      shall have body with:
   6847  1.1  mrg        // Initialization for input phase, store the reduction initializer:
   6848  1.1  mrg        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
   6849  1.1  mrg        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
   6850  1.1  mrg        D.2042[_21] = 0;
   6851  1.1  mrg        // Actual input phase:
   6852  1.1  mrg        ...
   6853  1.1  mrg        r.0_5 = D.2042[_20];
   6854  1.1  mrg        _6 = _4 + r.0_5;
   6855  1.1  mrg        D.2042[_20] = _6;
   6856  1.1  mrg        // Initialization for scan phase:
   6857  1.1  mrg        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
   6858  1.1  mrg        _26 = D.2043[_25];
   6859  1.1  mrg        _27 = D.2042[_25];
   6860  1.1  mrg        _28 = _26 + _27;
   6861  1.1  mrg        D.2043[_25] = _28;
   6862  1.1  mrg        D.2042[_25] = _28;
   6863  1.1  mrg        // Actual scan phase:
   6864  1.1  mrg        ...
   6865  1.1  mrg        r.1_8 = D.2042[_20];
   6866  1.1  mrg        ...
   6867  1.1  mrg      The "omp simd array" variable D.2042 holds the privatized copy used
   6868  1.1  mrg      inside of the loop and D.2043 is another one that holds copies of
   6869  1.1  mrg      the current original list item.  The separate GOMP_SIMD_LANE ifn
   6870  1.1  mrg      kinds are there in order to allow optimizing the initializer store
   6871  1.1  mrg      and combiner sequence, e.g. if it is originally some C++ish user
   6872  1.1  mrg      defined reduction, but allow the vectorizer to pattern recognize it
   6873  1.1  mrg      and turn into the appropriate vectorized scan.
   6874  1.1  mrg 
   6875  1.1  mrg      For exclusive scan, this is slightly different:
   6876  1.1  mrg      #pragma omp simd reduction(inscan,+:r)
   6877  1.1  mrg      for (...)
   6878  1.1  mrg        {
   6879  1.1  mrg 	 use (r);
   6880  1.1  mrg 	 #pragma omp scan exclusive (r)
   6881  1.1  mrg 	 r += something ();
   6882  1.1  mrg        }
   6883  1.1  mrg      shall have body with:
   6884  1.1  mrg        // Initialization for input phase, store the reduction initializer:
   6885  1.1  mrg        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
   6886  1.1  mrg        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
   6887  1.1  mrg        D.2042[_21] = 0;
   6888  1.1  mrg        // Actual input phase:
   6889  1.1  mrg        ...
   6890  1.1  mrg        r.0_5 = D.2042[_20];
   6891  1.1  mrg        _6 = _4 + r.0_5;
   6892  1.1  mrg        D.2042[_20] = _6;
   6893  1.1  mrg        // Initialization for scan phase:
   6894  1.1  mrg        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
   6895  1.1  mrg        _26 = D.2043[_25];
   6896  1.1  mrg        D.2044[_25] = _26;
   6897  1.1  mrg        _27 = D.2042[_25];
   6898  1.1  mrg        _28 = _26 + _27;
   6899  1.1  mrg        D.2043[_25] = _28;
   6900  1.1  mrg        // Actual scan phase:
   6901  1.1  mrg        ...
   6902  1.1  mrg        r.1_8 = D.2044[_20];
   6903  1.1  mrg        ...  */
   6904  1.1  mrg 
   6905  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
   6906  1.1  mrg     {
   6907  1.1  mrg       /* Match the D.2042[_21] = 0; store above.  Just require that
   6908  1.1  mrg 	 it is a constant or external definition store.  */
   6909  1.1  mrg       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
   6910  1.1  mrg 	{
   6911  1.1  mrg 	 fail_init:
   6912  1.1  mrg 	  if (dump_enabled_p ())
   6913  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6914  1.1  mrg 			     "unsupported OpenMP scan initializer store.\n");
   6915  1.1  mrg 	  return false;
   6916  1.1  mrg 	}
   6917  1.1  mrg 
   6918  1.1  mrg       if (! loop_vinfo->scan_map)
   6919  1.1  mrg 	loop_vinfo->scan_map = new hash_map<tree, tree>;
   6920  1.1  mrg       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
   6921  1.1  mrg       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
   6922  1.1  mrg       if (cached)
   6923  1.1  mrg 	goto fail_init;
   6924  1.1  mrg       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
   6925  1.1  mrg 
   6926  1.1  mrg       /* These stores can be vectorized normally.  */
   6927  1.1  mrg       return true;
   6928  1.1  mrg     }
   6929  1.1  mrg 
   6930  1.1  mrg   if (rhs_dt != vect_internal_def)
   6931  1.1  mrg     {
   6932  1.1  mrg      fail:
   6933  1.1  mrg       if (dump_enabled_p ())
   6934  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   6935  1.1  mrg 			 "unsupported OpenMP scan combiner pattern.\n");
   6936  1.1  mrg       return false;
   6937  1.1  mrg     }
   6938  1.1  mrg 
   6939  1.1  mrg   gimple *stmt = STMT_VINFO_STMT (stmt_info);
   6940  1.1  mrg   tree rhs = gimple_assign_rhs1 (stmt);
   6941  1.1  mrg   if (TREE_CODE (rhs) != SSA_NAME)
   6942  1.1  mrg     goto fail;
   6943  1.1  mrg 
   6944  1.1  mrg   gimple *other_store_stmt = NULL;
   6945  1.1  mrg   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
   6946  1.1  mrg   bool inscan_var_store
   6947  1.1  mrg     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
   6948  1.1  mrg 
   6949  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
   6950  1.1  mrg     {
   6951  1.1  mrg       if (!inscan_var_store)
   6952  1.1  mrg 	{
   6953  1.1  mrg 	  use_operand_p use_p;
   6954  1.1  mrg 	  imm_use_iterator iter;
   6955  1.1  mrg 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
   6956  1.1  mrg 	    {
   6957  1.1  mrg 	      gimple *use_stmt = USE_STMT (use_p);
   6958  1.1  mrg 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
   6959  1.1  mrg 		continue;
   6960  1.1  mrg 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
   6961  1.1  mrg 		  || !is_gimple_assign (use_stmt)
   6962  1.1  mrg 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
   6963  1.1  mrg 		  || other_store_stmt
   6964  1.1  mrg 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
   6965  1.1  mrg 		goto fail;
   6966  1.1  mrg 	      other_store_stmt = use_stmt;
   6967  1.1  mrg 	    }
   6968  1.1  mrg 	  if (other_store_stmt == NULL)
   6969  1.1  mrg 	    goto fail;
   6970  1.1  mrg 	  rhs = gimple_assign_lhs (other_store_stmt);
   6971  1.1  mrg 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
   6972  1.1  mrg 	    goto fail;
   6973  1.1  mrg 	}
   6974  1.1  mrg     }
   6975  1.1  mrg   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
   6976  1.1  mrg     {
   6977  1.1  mrg       use_operand_p use_p;
   6978  1.1  mrg       imm_use_iterator iter;
   6979  1.1  mrg       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
   6980  1.1  mrg 	{
   6981  1.1  mrg 	  gimple *use_stmt = USE_STMT (use_p);
   6982  1.1  mrg 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
   6983  1.1  mrg 	    continue;
   6984  1.1  mrg 	  if (other_store_stmt)
   6985  1.1  mrg 	    goto fail;
   6986  1.1  mrg 	  other_store_stmt = use_stmt;
   6987  1.1  mrg 	}
   6988  1.1  mrg     }
   6989  1.1  mrg   else
   6990  1.1  mrg     goto fail;
   6991  1.1  mrg 
   6992  1.1  mrg   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
   6993  1.1  mrg   if (gimple_bb (def_stmt) != gimple_bb (stmt)
   6994  1.1  mrg       || !is_gimple_assign (def_stmt)
   6995  1.1  mrg       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
   6996  1.1  mrg     goto fail;
   6997  1.1  mrg 
   6998  1.1  mrg   enum tree_code code = gimple_assign_rhs_code (def_stmt);
   6999  1.1  mrg   /* For pointer addition, we should use the normal plus for the vector
   7000  1.1  mrg      operation.  */
   7001  1.1  mrg   switch (code)
   7002  1.1  mrg     {
   7003  1.1  mrg     case POINTER_PLUS_EXPR:
   7004  1.1  mrg       code = PLUS_EXPR;
   7005  1.1  mrg       break;
   7006  1.1  mrg     case MULT_HIGHPART_EXPR:
   7007  1.1  mrg       goto fail;
   7008  1.1  mrg     default:
   7009  1.1  mrg       break;
   7010  1.1  mrg     }
   7011  1.1  mrg   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
   7012  1.1  mrg     goto fail;
   7013  1.1  mrg 
   7014  1.1  mrg   tree rhs1 = gimple_assign_rhs1 (def_stmt);
   7015  1.1  mrg   tree rhs2 = gimple_assign_rhs2 (def_stmt);
   7016  1.1  mrg   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
   7017  1.1  mrg     goto fail;
   7018  1.1  mrg 
   7019  1.1  mrg   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
   7020  1.1  mrg   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
   7021  1.1  mrg   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
   7022  1.1  mrg       || !gimple_assign_load_p (load1_stmt)
   7023  1.1  mrg       || gimple_bb (load2_stmt) != gimple_bb (stmt)
   7024  1.1  mrg       || !gimple_assign_load_p (load2_stmt))
   7025  1.1  mrg     goto fail;
   7026  1.1  mrg 
   7027  1.1  mrg   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
   7028  1.1  mrg   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
   7029  1.1  mrg   if (load1_stmt_info == NULL
   7030  1.1  mrg       || load2_stmt_info == NULL
   7031  1.1  mrg       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
   7032  1.1  mrg 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
   7033  1.1  mrg       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
   7034  1.1  mrg 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
   7035  1.1  mrg     goto fail;
   7036  1.1  mrg 
   7037  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
   7038  1.1  mrg     {
   7039  1.1  mrg       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
   7040  1.1  mrg       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
   7041  1.1  mrg 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
   7042  1.1  mrg 	goto fail;
   7043  1.1  mrg       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
   7044  1.1  mrg       tree lrhs;
   7045  1.1  mrg       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
   7046  1.1  mrg 	lrhs = rhs1;
   7047  1.1  mrg       else
   7048  1.1  mrg 	lrhs = rhs2;
   7049  1.1  mrg       use_operand_p use_p;
   7050  1.1  mrg       imm_use_iterator iter;
   7051  1.1  mrg       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
   7052  1.1  mrg 	{
   7053  1.1  mrg 	  gimple *use_stmt = USE_STMT (use_p);
   7054  1.1  mrg 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
   7055  1.1  mrg 	    continue;
   7056  1.1  mrg 	  if (other_store_stmt)
   7057  1.1  mrg 	    goto fail;
   7058  1.1  mrg 	  other_store_stmt = use_stmt;
   7059  1.1  mrg 	}
   7060  1.1  mrg     }
   7061  1.1  mrg 
   7062  1.1  mrg   if (other_store_stmt == NULL)
   7063  1.1  mrg     goto fail;
   7064  1.1  mrg   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
   7065  1.1  mrg       || !gimple_store_p (other_store_stmt))
   7066  1.1  mrg     goto fail;
   7067  1.1  mrg 
   7068  1.1  mrg   stmt_vec_info other_store_stmt_info
   7069  1.1  mrg     = loop_vinfo->lookup_stmt (other_store_stmt);
   7070  1.1  mrg   if (other_store_stmt_info == NULL
   7071  1.1  mrg       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
   7072  1.1  mrg 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
   7073  1.1  mrg     goto fail;
   7074  1.1  mrg 
   7075  1.1  mrg   gimple *stmt1 = stmt;
   7076  1.1  mrg   gimple *stmt2 = other_store_stmt;
   7077  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
   7078  1.1  mrg     std::swap (stmt1, stmt2);
   7079  1.1  mrg   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
   7080  1.1  mrg 			    gimple_assign_rhs1 (load2_stmt)))
   7081  1.1  mrg     {
   7082  1.1  mrg       std::swap (rhs1, rhs2);
   7083  1.1  mrg       std::swap (load1_stmt, load2_stmt);
   7084  1.1  mrg       std::swap (load1_stmt_info, load2_stmt_info);
   7085  1.1  mrg     }
   7086  1.1  mrg   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
   7087  1.1  mrg 			     gimple_assign_rhs1 (load1_stmt)))
   7088  1.1  mrg     goto fail;
   7089  1.1  mrg 
   7090  1.1  mrg   tree var3 = NULL_TREE;
   7091  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
   7092  1.1  mrg       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
   7093  1.1  mrg 				gimple_assign_rhs1 (load2_stmt)))
   7094  1.1  mrg     goto fail;
   7095  1.1  mrg   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
   7096  1.1  mrg     {
   7097  1.1  mrg       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
   7098  1.1  mrg       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
   7099  1.1  mrg 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
   7100  1.1  mrg 	goto fail;
   7101  1.1  mrg       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
   7102  1.1  mrg       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
   7103  1.1  mrg 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
   7104  1.1  mrg 	  || lookup_attribute ("omp simd inscan exclusive",
   7105  1.1  mrg 			       DECL_ATTRIBUTES (var3)))
   7106  1.1  mrg 	goto fail;
   7107  1.1  mrg     }
   7108  1.1  mrg 
   7109  1.1  mrg   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
   7110  1.1  mrg   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
   7111  1.1  mrg       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
   7112  1.1  mrg     goto fail;
   7113  1.1  mrg 
   7114  1.1  mrg   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
   7115  1.1  mrg   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
   7116  1.1  mrg   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
   7117  1.1  mrg       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
   7118  1.1  mrg       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
   7119  1.1  mrg 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
   7120  1.1  mrg     goto fail;
   7121  1.1  mrg 
   7122  1.1  mrg   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
   7123  1.1  mrg     std::swap (var1, var2);
   7124  1.1  mrg 
   7125  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
   7126  1.1  mrg     {
   7127  1.1  mrg       if (!lookup_attribute ("omp simd inscan exclusive",
   7128  1.1  mrg 			     DECL_ATTRIBUTES (var1)))
   7129  1.1  mrg 	goto fail;
   7130  1.1  mrg       var1 = var3;
   7131  1.1  mrg     }
   7132  1.1  mrg 
   7133  1.1  mrg   if (loop_vinfo->scan_map == NULL)
   7134  1.1  mrg     goto fail;
   7135  1.1  mrg   tree *init = loop_vinfo->scan_map->get (var1);
   7136  1.1  mrg   if (init == NULL)
   7137  1.1  mrg     goto fail;
   7138  1.1  mrg 
   7139  1.1  mrg   /* The IL is as expected, now check if we can actually vectorize it.
   7140  1.1  mrg      Inclusive scan:
   7141  1.1  mrg        _26 = D.2043[_25];
   7142  1.1  mrg        _27 = D.2042[_25];
   7143  1.1  mrg        _28 = _26 + _27;
   7144  1.1  mrg        D.2043[_25] = _28;
   7145  1.1  mrg        D.2042[_25] = _28;
   7146  1.1  mrg      should be vectorized as (where _40 is the vectorized rhs
   7147  1.1  mrg      from the D.2042[_21] = 0; store):
   7148  1.1  mrg        _30 = MEM <vector(8) int> [(int *)&D.2043];
   7149  1.1  mrg        _31 = MEM <vector(8) int> [(int *)&D.2042];
   7150  1.1  mrg        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
   7151  1.1  mrg        _33 = _31 + _32;
   7152  1.1  mrg        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
   7153  1.1  mrg        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
   7154  1.1  mrg        _35 = _33 + _34;
   7155  1.1  mrg        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
   7156  1.1  mrg        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
   7157  1.1  mrg        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
   7158  1.1  mrg        _37 = _35 + _36;
   7159  1.1  mrg        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
   7160  1.1  mrg        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
   7161  1.1  mrg        _38 = _30 + _37;
   7162  1.1  mrg        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
   7163  1.1  mrg        MEM <vector(8) int> [(int *)&D.2043] = _39;
   7164  1.1  mrg        MEM <vector(8) int> [(int *)&D.2042] = _38;
   7165  1.1  mrg      Exclusive scan:
   7166  1.1  mrg        _26 = D.2043[_25];
   7167  1.1  mrg        D.2044[_25] = _26;
   7168  1.1  mrg        _27 = D.2042[_25];
   7169  1.1  mrg        _28 = _26 + _27;
   7170  1.1  mrg        D.2043[_25] = _28;
   7171  1.1  mrg      should be vectorized as (where _40 is the vectorized rhs
   7172  1.1  mrg      from the D.2042[_21] = 0; store):
   7173  1.1  mrg        _30 = MEM <vector(8) int> [(int *)&D.2043];
   7174  1.1  mrg        _31 = MEM <vector(8) int> [(int *)&D.2042];
   7175  1.1  mrg        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
   7176  1.1  mrg        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
   7177  1.1  mrg        _34 = _32 + _33;
   7178  1.1  mrg        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
   7179  1.1  mrg        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
   7180  1.1  mrg        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
   7181  1.1  mrg        _36 = _34 + _35;
   7182  1.1  mrg        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
   7183  1.1  mrg        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
   7184  1.1  mrg        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
   7185  1.1  mrg        _38 = _36 + _37;
   7186  1.1  mrg        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
   7187  1.1  mrg        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
   7188  1.1  mrg        _39 = _30 + _38;
   7189  1.1  mrg        _50 = _31 + _39;
   7190  1.1  mrg        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
   7191  1.1  mrg        MEM <vector(8) int> [(int *)&D.2044] = _39;
   7192  1.1  mrg        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
   7193  1.1  mrg   enum machine_mode vec_mode = TYPE_MODE (vectype);
   7194  1.1  mrg   optab optab = optab_for_tree_code (code, vectype, optab_default);
   7195  1.1  mrg   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
   7196  1.1  mrg     goto fail;
   7197  1.1  mrg 
   7198  1.1  mrg   int units_log2 = scan_store_can_perm_p (vectype, *init);
   7199  1.1  mrg   if (units_log2 == -1)
   7200  1.1  mrg     goto fail;
   7201  1.1  mrg 
   7202  1.1  mrg   return true;
   7203  1.1  mrg }
   7204  1.1  mrg 
   7205  1.1  mrg 
   7206  1.1  mrg /* Function vectorizable_scan_store.
   7207  1.1  mrg 
   7208  1.1  mrg    Helper of vectorizable_score, arguments like on vectorizable_store.
   7209  1.1  mrg    Handle only the transformation, checking is done in check_scan_store.  */
   7210  1.1  mrg 
   7211  1.1  mrg static bool
   7212  1.1  mrg vectorizable_scan_store (vec_info *vinfo,
   7213  1.1  mrg 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   7214  1.1  mrg 			 gimple **vec_stmt, int ncopies)
   7215  1.1  mrg {
   7216  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   7217  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
   7218  1.1  mrg   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
   7219  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   7220  1.1  mrg 
   7221  1.1  mrg   if (dump_enabled_p ())
   7222  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   7223  1.1  mrg 		     "transform scan store. ncopies = %d\n", ncopies);
   7224  1.1  mrg 
   7225  1.1  mrg   gimple *stmt = STMT_VINFO_STMT (stmt_info);
   7226  1.1  mrg   tree rhs = gimple_assign_rhs1 (stmt);
   7227  1.1  mrg   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
   7228  1.1  mrg 
   7229  1.1  mrg   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
   7230  1.1  mrg   bool inscan_var_store
   7231  1.1  mrg     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
   7232  1.1  mrg 
   7233  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
   7234  1.1  mrg     {
   7235  1.1  mrg       use_operand_p use_p;
   7236  1.1  mrg       imm_use_iterator iter;
   7237  1.1  mrg       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
   7238  1.1  mrg 	{
   7239  1.1  mrg 	  gimple *use_stmt = USE_STMT (use_p);
   7240  1.1  mrg 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
   7241  1.1  mrg 	    continue;
   7242  1.1  mrg 	  rhs = gimple_assign_lhs (use_stmt);
   7243  1.1  mrg 	  break;
   7244  1.1  mrg 	}
   7245  1.1  mrg     }
   7246  1.1  mrg 
   7247  1.1  mrg   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
   7248  1.1  mrg   enum tree_code code = gimple_assign_rhs_code (def_stmt);
   7249  1.1  mrg   if (code == POINTER_PLUS_EXPR)
   7250  1.1  mrg     code = PLUS_EXPR;
   7251  1.1  mrg   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
   7252  1.1  mrg 	      && commutative_tree_code (code));
   7253  1.1  mrg   tree rhs1 = gimple_assign_rhs1 (def_stmt);
   7254  1.1  mrg   tree rhs2 = gimple_assign_rhs2 (def_stmt);
   7255  1.1  mrg   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
   7256  1.1  mrg   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
   7257  1.1  mrg   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
   7258  1.1  mrg   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
   7259  1.1  mrg   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
   7260  1.1  mrg   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
   7261  1.1  mrg   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
   7262  1.1  mrg   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
   7263  1.1  mrg   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
   7264  1.1  mrg 
   7265  1.1  mrg   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
   7266  1.1  mrg     {
   7267  1.1  mrg       std::swap (rhs1, rhs2);
   7268  1.1  mrg       std::swap (var1, var2);
   7269  1.1  mrg       std::swap (load1_dr_info, load2_dr_info);
   7270  1.1  mrg     }
   7271  1.1  mrg 
   7272  1.1  mrg   tree *init = loop_vinfo->scan_map->get (var1);
   7273  1.1  mrg   gcc_assert (init);
   7274  1.1  mrg 
   7275  1.1  mrg   unsigned HOST_WIDE_INT nunits;
   7276  1.1  mrg   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
   7277  1.1  mrg     gcc_unreachable ();
   7278  1.1  mrg   auto_vec<enum scan_store_kind, 16> use_whole_vector;
   7279  1.1  mrg   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
   7280  1.1  mrg   gcc_assert (units_log2 > 0);
   7281  1.1  mrg   auto_vec<tree, 16> perms;
   7282  1.1  mrg   perms.quick_grow (units_log2 + 1);
   7283  1.1  mrg   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
   7284  1.1  mrg   for (int i = 0; i <= units_log2; ++i)
   7285  1.1  mrg     {
   7286  1.1  mrg       unsigned HOST_WIDE_INT j, k;
   7287  1.1  mrg       vec_perm_builder sel (nunits, nunits, 1);
   7288  1.1  mrg       sel.quick_grow (nunits);
   7289  1.1  mrg       if (i == units_log2)
   7290  1.1  mrg 	for (j = 0; j < nunits; ++j)
   7291  1.1  mrg 	  sel[j] = nunits - 1;
   7292  1.1  mrg       else
   7293  1.1  mrg 	{
   7294  1.1  mrg 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
   7295  1.1  mrg 	    sel[j] = j;
   7296  1.1  mrg 	  for (k = 0; j < nunits; ++j, ++k)
   7297  1.1  mrg 	    sel[j] = nunits + k;
   7298  1.1  mrg 	}
   7299  1.1  mrg       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
   7300  1.1  mrg       if (!use_whole_vector.is_empty ()
   7301  1.1  mrg 	  && use_whole_vector[i] != scan_store_kind_perm)
   7302  1.1  mrg 	{
   7303  1.1  mrg 	  if (zero_vec == NULL_TREE)
   7304  1.1  mrg 	    zero_vec = build_zero_cst (vectype);
   7305  1.1  mrg 	  if (masktype == NULL_TREE
   7306  1.1  mrg 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
   7307  1.1  mrg 	    masktype = truth_type_for (vectype);
   7308  1.1  mrg 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
   7309  1.1  mrg 	}
   7310  1.1  mrg       else
   7311  1.1  mrg 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
   7312  1.1  mrg     }
   7313  1.1  mrg 
   7314  1.1  mrg   tree vec_oprnd1 = NULL_TREE;
   7315  1.1  mrg   tree vec_oprnd2 = NULL_TREE;
   7316  1.1  mrg   tree vec_oprnd3 = NULL_TREE;
   7317  1.1  mrg   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
   7318  1.1  mrg   tree dataref_offset = build_int_cst (ref_type, 0);
   7319  1.1  mrg   tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
   7320  1.1  mrg 					   vectype, VMAT_CONTIGUOUS);
   7321  1.1  mrg   tree ldataref_ptr = NULL_TREE;
   7322  1.1  mrg   tree orig = NULL_TREE;
   7323  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
   7324  1.1  mrg     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
   7325  1.1  mrg   auto_vec<tree> vec_oprnds1;
   7326  1.1  mrg   auto_vec<tree> vec_oprnds2;
   7327  1.1  mrg   auto_vec<tree> vec_oprnds3;
   7328  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
   7329  1.1  mrg 		     *init, &vec_oprnds1,
   7330  1.1  mrg 		     ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
   7331  1.1  mrg 		     rhs2, &vec_oprnds3);
   7332  1.1  mrg   for (int j = 0; j < ncopies; j++)
   7333  1.1  mrg     {
   7334  1.1  mrg       vec_oprnd1 = vec_oprnds1[j];
   7335  1.1  mrg       if (ldataref_ptr == NULL)
   7336  1.1  mrg 	vec_oprnd2 = vec_oprnds2[j];
   7337  1.1  mrg       vec_oprnd3 = vec_oprnds3[j];
   7338  1.1  mrg       if (j == 0)
   7339  1.1  mrg 	orig = vec_oprnd3;
   7340  1.1  mrg       else if (!inscan_var_store)
   7341  1.1  mrg 	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
   7342  1.1  mrg 
   7343  1.1  mrg       if (ldataref_ptr)
   7344  1.1  mrg 	{
   7345  1.1  mrg 	  vec_oprnd2 = make_ssa_name (vectype);
   7346  1.1  mrg 	  tree data_ref = fold_build2 (MEM_REF, vectype,
   7347  1.1  mrg 				       unshare_expr (ldataref_ptr),
   7348  1.1  mrg 				       dataref_offset);
   7349  1.1  mrg 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
   7350  1.1  mrg 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
   7351  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7352  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7353  1.1  mrg 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   7354  1.1  mrg 	}
   7355  1.1  mrg 
   7356  1.1  mrg       tree v = vec_oprnd2;
   7357  1.1  mrg       for (int i = 0; i < units_log2; ++i)
   7358  1.1  mrg 	{
   7359  1.1  mrg 	  tree new_temp = make_ssa_name (vectype);
   7360  1.1  mrg 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
   7361  1.1  mrg 					   (zero_vec
   7362  1.1  mrg 					    && (use_whole_vector[i]
   7363  1.1  mrg 						!= scan_store_kind_perm))
   7364  1.1  mrg 					   ? zero_vec : vec_oprnd1, v,
   7365  1.1  mrg 					   perms[i]);
   7366  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7367  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7368  1.1  mrg 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   7369  1.1  mrg 
   7370  1.1  mrg 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
   7371  1.1  mrg 	    {
   7372  1.1  mrg 	      /* Whole vector shift shifted in zero bits, but if *init
   7373  1.1  mrg 		 is not initializer_zerop, we need to replace those elements
   7374  1.1  mrg 		 with elements from vec_oprnd1.  */
   7375  1.1  mrg 	      tree_vector_builder vb (masktype, nunits, 1);
   7376  1.1  mrg 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
   7377  1.1  mrg 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
   7378  1.1  mrg 			       ? boolean_false_node : boolean_true_node);
   7379  1.1  mrg 
   7380  1.1  mrg 	      tree new_temp2 = make_ssa_name (vectype);
   7381  1.1  mrg 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
   7382  1.1  mrg 				       new_temp, vec_oprnd1);
   7383  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info,
   7384  1.1  mrg 							   g, gsi);
   7385  1.1  mrg 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7386  1.1  mrg 	      new_temp = new_temp2;
   7387  1.1  mrg 	    }
   7388  1.1  mrg 
   7389  1.1  mrg 	  /* For exclusive scan, perform the perms[i] permutation once
   7390  1.1  mrg 	     more.  */
   7391  1.1  mrg 	  if (i == 0
   7392  1.1  mrg 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
   7393  1.1  mrg 	      && v == vec_oprnd2)
   7394  1.1  mrg 	    {
   7395  1.1  mrg 	      v = new_temp;
   7396  1.1  mrg 	      --i;
   7397  1.1  mrg 	      continue;
   7398  1.1  mrg 	    }
   7399  1.1  mrg 
   7400  1.1  mrg 	  tree new_temp2 = make_ssa_name (vectype);
   7401  1.1  mrg 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
   7402  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7403  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7404  1.1  mrg 
   7405  1.1  mrg 	  v = new_temp2;
   7406  1.1  mrg 	}
   7407  1.1  mrg 
   7408  1.1  mrg       tree new_temp = make_ssa_name (vectype);
   7409  1.1  mrg       gimple *g = gimple_build_assign (new_temp, code, orig, v);
   7410  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7411  1.1  mrg       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7412  1.1  mrg 
   7413  1.1  mrg       tree last_perm_arg = new_temp;
   7414  1.1  mrg       /* For exclusive scan, new_temp computed above is the exclusive scan
   7415  1.1  mrg 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
   7416  1.1  mrg 	 of the last element into orig.  */
   7417  1.1  mrg       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
   7418  1.1  mrg 	{
   7419  1.1  mrg 	  last_perm_arg = make_ssa_name (vectype);
   7420  1.1  mrg 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
   7421  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7422  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7423  1.1  mrg 	}
   7424  1.1  mrg 
   7425  1.1  mrg       orig = make_ssa_name (vectype);
   7426  1.1  mrg       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
   7427  1.1  mrg 			       last_perm_arg, perms[units_log2]);
   7428  1.1  mrg       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7429  1.1  mrg       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7430  1.1  mrg 
   7431  1.1  mrg       if (!inscan_var_store)
   7432  1.1  mrg 	{
   7433  1.1  mrg 	  tree data_ref = fold_build2 (MEM_REF, vectype,
   7434  1.1  mrg 				       unshare_expr (dataref_ptr),
   7435  1.1  mrg 				       dataref_offset);
   7436  1.1  mrg 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
   7437  1.1  mrg 	  g = gimple_build_assign (data_ref, new_temp);
   7438  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7439  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7440  1.1  mrg 	}
   7441  1.1  mrg     }
   7442  1.1  mrg 
   7443  1.1  mrg   if (inscan_var_store)
   7444  1.1  mrg     for (int j = 0; j < ncopies; j++)
   7445  1.1  mrg       {
   7446  1.1  mrg 	if (j != 0)
   7447  1.1  mrg 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
   7448  1.1  mrg 
   7449  1.1  mrg 	tree data_ref = fold_build2 (MEM_REF, vectype,
   7450  1.1  mrg 				     unshare_expr (dataref_ptr),
   7451  1.1  mrg 				     dataref_offset);
   7452  1.1  mrg 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
   7453  1.1  mrg 	gimple *g = gimple_build_assign (data_ref, orig);
   7454  1.1  mrg 	vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   7455  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
   7456  1.1  mrg       }
   7457  1.1  mrg   return true;
   7458  1.1  mrg }
   7459  1.1  mrg 
   7460  1.1  mrg 
   7461  1.1  mrg /* Function vectorizable_store.
   7462  1.1  mrg 
   7463  1.1  mrg    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
   7464  1.1  mrg    that can be vectorized.
   7465  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   7466  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   7467  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   7468  1.1  mrg 
   7469  1.1  mrg static bool
   7470  1.1  mrg vectorizable_store (vec_info *vinfo,
   7471  1.1  mrg 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   7472  1.1  mrg 		    gimple **vec_stmt, slp_tree slp_node,
   7473  1.1  mrg 		    stmt_vector_for_cost *cost_vec)
   7474  1.1  mrg {
   7475  1.1  mrg   tree data_ref;
   7476  1.1  mrg   tree op;
   7477  1.1  mrg   tree vec_oprnd = NULL_TREE;
   7478  1.1  mrg   tree elem_type;
   7479  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   7480  1.1  mrg   class loop *loop = NULL;
   7481  1.1  mrg   machine_mode vec_mode;
   7482  1.1  mrg   tree dummy;
   7483  1.1  mrg   enum vect_def_type rhs_dt = vect_unknown_def_type;
   7484  1.1  mrg   enum vect_def_type mask_dt = vect_unknown_def_type;
   7485  1.1  mrg   tree dataref_ptr = NULL_TREE;
   7486  1.1  mrg   tree dataref_offset = NULL_TREE;
   7487  1.1  mrg   gimple *ptr_incr = NULL;
   7488  1.1  mrg   int ncopies;
   7489  1.1  mrg   int j;
   7490  1.1  mrg   stmt_vec_info first_stmt_info;
   7491  1.1  mrg   bool grouped_store;
   7492  1.1  mrg   unsigned int group_size, i;
   7493  1.1  mrg   vec<tree> oprnds = vNULL;
   7494  1.1  mrg   vec<tree> result_chain = vNULL;
   7495  1.1  mrg   vec<tree> vec_oprnds = vNULL;
   7496  1.1  mrg   bool slp = (slp_node != NULL);
   7497  1.1  mrg   unsigned int vec_num;
   7498  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   7499  1.1  mrg   tree aggr_type;
   7500  1.1  mrg   gather_scatter_info gs_info;
   7501  1.1  mrg   poly_uint64 vf;
   7502  1.1  mrg   vec_load_store_type vls_type;
   7503  1.1  mrg   tree ref_type;
   7504  1.1  mrg 
   7505  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   7506  1.1  mrg     return false;
   7507  1.1  mrg 
   7508  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   7509  1.1  mrg       && ! vec_stmt)
   7510  1.1  mrg     return false;
   7511  1.1  mrg 
   7512  1.1  mrg   /* Is vectorizable store? */
   7513  1.1  mrg 
   7514  1.1  mrg   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
   7515  1.1  mrg   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   7516  1.1  mrg     {
   7517  1.1  mrg       tree scalar_dest = gimple_assign_lhs (assign);
   7518  1.1  mrg       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
   7519  1.1  mrg 	  && is_pattern_stmt_p (stmt_info))
   7520  1.1  mrg 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
   7521  1.1  mrg       if (TREE_CODE (scalar_dest) != ARRAY_REF
   7522  1.1  mrg 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
   7523  1.1  mrg 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
   7524  1.1  mrg 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
   7525  1.1  mrg 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
   7526  1.1  mrg 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
   7527  1.1  mrg 	  && TREE_CODE (scalar_dest) != MEM_REF)
   7528  1.1  mrg 	return false;
   7529  1.1  mrg     }
   7530  1.1  mrg   else
   7531  1.1  mrg     {
   7532  1.1  mrg       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   7533  1.1  mrg       if (!call || !gimple_call_internal_p (call))
   7534  1.1  mrg 	return false;
   7535  1.1  mrg 
   7536  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (call);
   7537  1.1  mrg       if (!internal_store_fn_p (ifn))
   7538  1.1  mrg 	return false;
   7539  1.1  mrg 
   7540  1.1  mrg       if (slp_node != NULL)
   7541  1.1  mrg 	{
   7542  1.1  mrg 	  if (dump_enabled_p ())
   7543  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   7544  1.1  mrg 			     "SLP of masked stores not supported.\n");
   7545  1.1  mrg 	  return false;
   7546  1.1  mrg 	}
   7547  1.1  mrg 
   7548  1.1  mrg       int mask_index = internal_fn_mask_index (ifn);
   7549  1.1  mrg       if (mask_index >= 0
   7550  1.1  mrg 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
   7551  1.1  mrg 				      &mask, NULL, &mask_dt, &mask_vectype))
   7552  1.1  mrg 	return false;
   7553  1.1  mrg     }
   7554  1.1  mrg 
   7555  1.1  mrg   op = vect_get_store_rhs (stmt_info);
   7556  1.1  mrg 
   7557  1.1  mrg   /* Cannot have hybrid store SLP -- that would mean storing to the
   7558  1.1  mrg      same location twice.  */
   7559  1.1  mrg   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
   7560  1.1  mrg 
   7561  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
   7562  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   7563  1.1  mrg 
   7564  1.1  mrg   if (loop_vinfo)
   7565  1.1  mrg     {
   7566  1.1  mrg       loop = LOOP_VINFO_LOOP (loop_vinfo);
   7567  1.1  mrg       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   7568  1.1  mrg     }
   7569  1.1  mrg   else
   7570  1.1  mrg     vf = 1;
   7571  1.1  mrg 
   7572  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   7573  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   7574  1.1  mrg      case of SLP.  */
   7575  1.1  mrg   if (slp)
   7576  1.1  mrg     ncopies = 1;
   7577  1.1  mrg   else
   7578  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   7579  1.1  mrg 
   7580  1.1  mrg   gcc_assert (ncopies >= 1);
   7581  1.1  mrg 
   7582  1.1  mrg   /* FORNOW.  This restriction should be relaxed.  */
   7583  1.1  mrg   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
   7584  1.1  mrg     {
   7585  1.1  mrg       if (dump_enabled_p ())
   7586  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   7587  1.1  mrg 			 "multiple types in nested loop.\n");
   7588  1.1  mrg       return false;
   7589  1.1  mrg     }
   7590  1.1  mrg 
   7591  1.1  mrg   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
   7592  1.1  mrg 			     op, &rhs_dt, &rhs_vectype, &vls_type))
   7593  1.1  mrg     return false;
   7594  1.1  mrg 
   7595  1.1  mrg   elem_type = TREE_TYPE (vectype);
   7596  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   7597  1.1  mrg 
   7598  1.1  mrg   if (!STMT_VINFO_DATA_REF (stmt_info))
   7599  1.1  mrg     return false;
   7600  1.1  mrg 
   7601  1.1  mrg   vect_memory_access_type memory_access_type;
   7602  1.1  mrg   enum dr_alignment_support alignment_support_scheme;
   7603  1.1  mrg   int misalignment;
   7604  1.1  mrg   poly_int64 poffset;
   7605  1.1  mrg   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
   7606  1.1  mrg 			    ncopies, &memory_access_type, &poffset,
   7607  1.1  mrg 			    &alignment_support_scheme, &misalignment, &gs_info))
   7608  1.1  mrg     return false;
   7609  1.1  mrg 
   7610  1.1  mrg   if (mask)
   7611  1.1  mrg     {
   7612  1.1  mrg       if (memory_access_type == VMAT_CONTIGUOUS)
   7613  1.1  mrg 	{
   7614  1.1  mrg 	  if (!VECTOR_MODE_P (vec_mode)
   7615  1.1  mrg 	      || !can_vec_mask_load_store_p (vec_mode,
   7616  1.1  mrg 					     TYPE_MODE (mask_vectype), false))
   7617  1.1  mrg 	    return false;
   7618  1.1  mrg 	}
   7619  1.1  mrg       else if (memory_access_type != VMAT_LOAD_STORE_LANES
   7620  1.1  mrg 	       && (memory_access_type != VMAT_GATHER_SCATTER
   7621  1.1  mrg 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
   7622  1.1  mrg 	{
   7623  1.1  mrg 	  if (dump_enabled_p ())
   7624  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   7625  1.1  mrg 			     "unsupported access type for masked store.\n");
   7626  1.1  mrg 	  return false;
   7627  1.1  mrg 	}
   7628  1.1  mrg     }
   7629  1.1  mrg   else
   7630  1.1  mrg     {
   7631  1.1  mrg       /* FORNOW. In some cases can vectorize even if data-type not supported
   7632  1.1  mrg 	 (e.g. - array initialization with 0).  */
   7633  1.1  mrg       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
   7634  1.1  mrg 	return false;
   7635  1.1  mrg     }
   7636  1.1  mrg 
   7637  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
   7638  1.1  mrg   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
   7639  1.1  mrg 		   && memory_access_type != VMAT_GATHER_SCATTER
   7640  1.1  mrg 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
   7641  1.1  mrg   if (grouped_store)
   7642  1.1  mrg     {
   7643  1.1  mrg       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   7644  1.1  mrg       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   7645  1.1  mrg       group_size = DR_GROUP_SIZE (first_stmt_info);
   7646  1.1  mrg     }
   7647  1.1  mrg   else
   7648  1.1  mrg     {
   7649  1.1  mrg       first_stmt_info = stmt_info;
   7650  1.1  mrg       first_dr_info = dr_info;
   7651  1.1  mrg       group_size = vec_num = 1;
   7652  1.1  mrg     }
   7653  1.1  mrg 
   7654  1.1  mrg   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
   7655  1.1  mrg     {
   7656  1.1  mrg       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
   7657  1.1  mrg 			     memory_access_type))
   7658  1.1  mrg 	return false;
   7659  1.1  mrg     }
   7660  1.1  mrg 
   7661  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   7662  1.1  mrg     {
   7663  1.1  mrg       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
   7664  1.1  mrg 
   7665  1.1  mrg       if (loop_vinfo
   7666  1.1  mrg 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
   7667  1.1  mrg 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
   7668  1.1  mrg 					      vls_type, group_size,
   7669  1.1  mrg 					      memory_access_type, &gs_info,
   7670  1.1  mrg 					      mask);
   7671  1.1  mrg 
   7672  1.1  mrg       if (slp_node
   7673  1.1  mrg 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
   7674  1.1  mrg 						vectype))
   7675  1.1  mrg 	{
   7676  1.1  mrg 	  if (dump_enabled_p ())
   7677  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   7678  1.1  mrg 			     "incompatible vector types for invariants\n");
   7679  1.1  mrg 	  return false;
   7680  1.1  mrg 	}
   7681  1.1  mrg 
   7682  1.1  mrg       if (dump_enabled_p ()
   7683  1.1  mrg 	  && memory_access_type != VMAT_ELEMENTWISE
   7684  1.1  mrg 	  && memory_access_type != VMAT_GATHER_SCATTER
   7685  1.1  mrg 	  && alignment_support_scheme != dr_aligned)
   7686  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   7687  1.1  mrg 			 "Vectorizing an unaligned access.\n");
   7688  1.1  mrg 
   7689  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
   7690  1.1  mrg       vect_model_store_cost (vinfo, stmt_info, ncopies,
   7691  1.1  mrg 			     memory_access_type, alignment_support_scheme,
   7692  1.1  mrg 			     misalignment, vls_type, slp_node, cost_vec);
   7693  1.1  mrg       return true;
   7694  1.1  mrg     }
   7695  1.1  mrg   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
   7696  1.1  mrg 
   7697  1.1  mrg   /* Transform.  */
   7698  1.1  mrg 
   7699  1.1  mrg   ensure_base_align (dr_info);
   7700  1.1  mrg 
   7701  1.1  mrg   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
   7702  1.1  mrg     {
   7703  1.1  mrg       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
   7704  1.1  mrg       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
   7705  1.1  mrg       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
   7706  1.1  mrg       tree ptr, var, scale, vec_mask;
   7707  1.1  mrg       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
   7708  1.1  mrg       tree mask_halfvectype = mask_vectype;
   7709  1.1  mrg       edge pe = loop_preheader_edge (loop);
   7710  1.1  mrg       gimple_seq seq;
   7711  1.1  mrg       basic_block new_bb;
   7712  1.1  mrg       enum { NARROW, NONE, WIDEN } modifier;
   7713  1.1  mrg       poly_uint64 scatter_off_nunits
   7714  1.1  mrg 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
   7715  1.1  mrg 
   7716  1.1  mrg       if (known_eq (nunits, scatter_off_nunits))
   7717  1.1  mrg 	modifier = NONE;
   7718  1.1  mrg       else if (known_eq (nunits * 2, scatter_off_nunits))
   7719  1.1  mrg 	{
   7720  1.1  mrg 	  modifier = WIDEN;
   7721  1.1  mrg 
   7722  1.1  mrg 	  /* Currently gathers and scatters are only supported for
   7723  1.1  mrg 	     fixed-length vectors.  */
   7724  1.1  mrg 	  unsigned int count = scatter_off_nunits.to_constant ();
   7725  1.1  mrg 	  vec_perm_builder sel (count, count, 1);
   7726  1.1  mrg 	  for (i = 0; i < (unsigned int) count; ++i)
   7727  1.1  mrg 	    sel.quick_push (i | (count / 2));
   7728  1.1  mrg 
   7729  1.1  mrg 	  vec_perm_indices indices (sel, 1, count);
   7730  1.1  mrg 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
   7731  1.1  mrg 						  indices);
   7732  1.1  mrg 	  gcc_assert (perm_mask != NULL_TREE);
   7733  1.1  mrg 	}
   7734  1.1  mrg       else if (known_eq (nunits, scatter_off_nunits * 2))
   7735  1.1  mrg 	{
   7736  1.1  mrg 	  modifier = NARROW;
   7737  1.1  mrg 
   7738  1.1  mrg 	  /* Currently gathers and scatters are only supported for
   7739  1.1  mrg 	     fixed-length vectors.  */
   7740  1.1  mrg 	  unsigned int count = nunits.to_constant ();
   7741  1.1  mrg 	  vec_perm_builder sel (count, count, 1);
   7742  1.1  mrg 	  for (i = 0; i < (unsigned int) count; ++i)
   7743  1.1  mrg 	    sel.quick_push (i | (count / 2));
   7744  1.1  mrg 
   7745  1.1  mrg 	  vec_perm_indices indices (sel, 2, count);
   7746  1.1  mrg 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
   7747  1.1  mrg 	  gcc_assert (perm_mask != NULL_TREE);
   7748  1.1  mrg 	  ncopies *= 2;
   7749  1.1  mrg 
   7750  1.1  mrg 	  if (mask)
   7751  1.1  mrg 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
   7752  1.1  mrg 	}
   7753  1.1  mrg       else
   7754  1.1  mrg 	gcc_unreachable ();
   7755  1.1  mrg 
   7756  1.1  mrg       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
   7757  1.1  mrg       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   7758  1.1  mrg       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   7759  1.1  mrg       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   7760  1.1  mrg       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   7761  1.1  mrg       scaletype = TREE_VALUE (arglist);
   7762  1.1  mrg 
   7763  1.1  mrg       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
   7764  1.1  mrg 			   && TREE_CODE (rettype) == VOID_TYPE);
   7765  1.1  mrg 
   7766  1.1  mrg       ptr = fold_convert (ptrtype, gs_info.base);
   7767  1.1  mrg       if (!is_gimple_min_invariant (ptr))
   7768  1.1  mrg 	{
   7769  1.1  mrg 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
   7770  1.1  mrg 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
   7771  1.1  mrg 	  gcc_assert (!new_bb);
   7772  1.1  mrg 	}
   7773  1.1  mrg 
   7774  1.1  mrg       if (mask == NULL_TREE)
   7775  1.1  mrg 	{
   7776  1.1  mrg 	  mask_arg = build_int_cst (masktype, -1);
   7777  1.1  mrg 	  mask_arg = vect_init_vector (vinfo, stmt_info,
   7778  1.1  mrg 				       mask_arg, masktype, NULL);
   7779  1.1  mrg 	}
   7780  1.1  mrg 
   7781  1.1  mrg       scale = build_int_cst (scaletype, gs_info.scale);
   7782  1.1  mrg 
   7783  1.1  mrg       auto_vec<tree> vec_oprnds0;
   7784  1.1  mrg       auto_vec<tree> vec_oprnds1;
   7785  1.1  mrg       auto_vec<tree> vec_masks;
   7786  1.1  mrg       if (mask)
   7787  1.1  mrg 	{
   7788  1.1  mrg 	  tree mask_vectype = truth_type_for (vectype);
   7789  1.1  mrg 	  vect_get_vec_defs_for_operand (vinfo, stmt_info,
   7790  1.1  mrg 					 modifier == NARROW
   7791  1.1  mrg 					 ? ncopies / 2 : ncopies,
   7792  1.1  mrg 					 mask, &vec_masks, mask_vectype);
   7793  1.1  mrg 	}
   7794  1.1  mrg       vect_get_vec_defs_for_operand (vinfo, stmt_info,
   7795  1.1  mrg 				     modifier == WIDEN
   7796  1.1  mrg 				     ? ncopies / 2 : ncopies,
   7797  1.1  mrg 				     gs_info.offset, &vec_oprnds0);
   7798  1.1  mrg       vect_get_vec_defs_for_operand (vinfo, stmt_info,
   7799  1.1  mrg 				     modifier == NARROW
   7800  1.1  mrg 				     ? ncopies / 2 : ncopies,
   7801  1.1  mrg 				     op, &vec_oprnds1);
   7802  1.1  mrg       for (j = 0; j < ncopies; ++j)
   7803  1.1  mrg 	{
   7804  1.1  mrg 	  if (modifier == WIDEN)
   7805  1.1  mrg 	    {
   7806  1.1  mrg 	      if (j & 1)
   7807  1.1  mrg 		op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
   7808  1.1  mrg 					   perm_mask, stmt_info, gsi);
   7809  1.1  mrg 	      else
   7810  1.1  mrg 		op = vec_oprnd0 = vec_oprnds0[j / 2];
   7811  1.1  mrg 	      src = vec_oprnd1 = vec_oprnds1[j];
   7812  1.1  mrg 	      if (mask)
   7813  1.1  mrg 		mask_op = vec_mask = vec_masks[j];
   7814  1.1  mrg 	    }
   7815  1.1  mrg 	  else if (modifier == NARROW)
   7816  1.1  mrg 	    {
   7817  1.1  mrg 	      if (j & 1)
   7818  1.1  mrg 		src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
   7819  1.1  mrg 					    perm_mask, stmt_info, gsi);
   7820  1.1  mrg 	      else
   7821  1.1  mrg 		src = vec_oprnd1 = vec_oprnds1[j / 2];
   7822  1.1  mrg 	      op = vec_oprnd0 = vec_oprnds0[j];
   7823  1.1  mrg 	      if (mask)
   7824  1.1  mrg 		mask_op = vec_mask = vec_masks[j / 2];
   7825  1.1  mrg 	    }
   7826  1.1  mrg 	  else
   7827  1.1  mrg 	    {
   7828  1.1  mrg 	      op = vec_oprnd0 = vec_oprnds0[j];
   7829  1.1  mrg 	      src = vec_oprnd1 = vec_oprnds1[j];
   7830  1.1  mrg 	      if (mask)
   7831  1.1  mrg 		mask_op = vec_mask = vec_masks[j];
   7832  1.1  mrg 	    }
   7833  1.1  mrg 
   7834  1.1  mrg 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
   7835  1.1  mrg 	    {
   7836  1.1  mrg 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
   7837  1.1  mrg 				    TYPE_VECTOR_SUBPARTS (srctype)));
   7838  1.1  mrg 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
   7839  1.1  mrg 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
   7840  1.1  mrg 	      gassign *new_stmt
   7841  1.1  mrg 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
   7842  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7843  1.1  mrg 	      src = var;
   7844  1.1  mrg 	    }
   7845  1.1  mrg 
   7846  1.1  mrg 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
   7847  1.1  mrg 	    {
   7848  1.1  mrg 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
   7849  1.1  mrg 				    TYPE_VECTOR_SUBPARTS (idxtype)));
   7850  1.1  mrg 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
   7851  1.1  mrg 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
   7852  1.1  mrg 	      gassign *new_stmt
   7853  1.1  mrg 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
   7854  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7855  1.1  mrg 	      op = var;
   7856  1.1  mrg 	    }
   7857  1.1  mrg 
   7858  1.1  mrg 	  if (mask)
   7859  1.1  mrg 	    {
   7860  1.1  mrg 	      tree utype;
   7861  1.1  mrg 	      mask_arg = mask_op;
   7862  1.1  mrg 	      if (modifier == NARROW)
   7863  1.1  mrg 		{
   7864  1.1  mrg 		  var = vect_get_new_ssa_name (mask_halfvectype,
   7865  1.1  mrg 					       vect_simple_var);
   7866  1.1  mrg 		  gassign *new_stmt
   7867  1.1  mrg 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
   7868  1.1  mrg 							: VEC_UNPACK_LO_EXPR,
   7869  1.1  mrg 					   mask_op);
   7870  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7871  1.1  mrg 		  mask_arg = var;
   7872  1.1  mrg 		}
   7873  1.1  mrg 	      tree optype = TREE_TYPE (mask_arg);
   7874  1.1  mrg 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
   7875  1.1  mrg 		utype = masktype;
   7876  1.1  mrg 	      else
   7877  1.1  mrg 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
   7878  1.1  mrg 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
   7879  1.1  mrg 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
   7880  1.1  mrg 	      gassign *new_stmt
   7881  1.1  mrg 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
   7882  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7883  1.1  mrg 	      mask_arg = var;
   7884  1.1  mrg 	      if (!useless_type_conversion_p (masktype, utype))
   7885  1.1  mrg 		{
   7886  1.1  mrg 		  gcc_assert (TYPE_PRECISION (utype)
   7887  1.1  mrg 			      <= TYPE_PRECISION (masktype));
   7888  1.1  mrg 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
   7889  1.1  mrg 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
   7890  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7891  1.1  mrg 		  mask_arg = var;
   7892  1.1  mrg 		}
   7893  1.1  mrg 	    }
   7894  1.1  mrg 
   7895  1.1  mrg 	  gcall *new_stmt
   7896  1.1  mrg 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
   7897  1.1  mrg 	   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   7898  1.1  mrg 
   7899  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   7900  1.1  mrg 	}
   7901  1.1  mrg       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   7902  1.1  mrg       return true;
   7903  1.1  mrg     }
   7904  1.1  mrg   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
   7905  1.1  mrg     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
   7906  1.1  mrg 
   7907  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   7908  1.1  mrg     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
   7909  1.1  mrg 
   7910  1.1  mrg   if (grouped_store)
   7911  1.1  mrg     {
   7912  1.1  mrg       /* FORNOW */
   7913  1.1  mrg       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
   7914  1.1  mrg 
   7915  1.1  mrg       /* We vectorize all the stmts of the interleaving group when we
   7916  1.1  mrg 	 reach the last stmt in the group.  */
   7917  1.1  mrg       if (DR_GROUP_STORE_COUNT (first_stmt_info)
   7918  1.1  mrg 	  < DR_GROUP_SIZE (first_stmt_info)
   7919  1.1  mrg 	  && !slp)
   7920  1.1  mrg 	{
   7921  1.1  mrg 	  *vec_stmt = NULL;
   7922  1.1  mrg 	  return true;
   7923  1.1  mrg 	}
   7924  1.1  mrg 
   7925  1.1  mrg       if (slp)
   7926  1.1  mrg         {
   7927  1.1  mrg           grouped_store = false;
   7928  1.1  mrg           /* VEC_NUM is the number of vect stmts to be created for this
   7929  1.1  mrg              group.  */
   7930  1.1  mrg           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   7931  1.1  mrg 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   7932  1.1  mrg 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
   7933  1.1  mrg 		      == first_stmt_info);
   7934  1.1  mrg 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   7935  1.1  mrg 	  op = vect_get_store_rhs (first_stmt_info);
   7936  1.1  mrg         }
   7937  1.1  mrg       else
   7938  1.1  mrg         /* VEC_NUM is the number of vect stmts to be created for this
   7939  1.1  mrg            group.  */
   7940  1.1  mrg 	vec_num = group_size;
   7941  1.1  mrg 
   7942  1.1  mrg       ref_type = get_group_alias_ptr_type (first_stmt_info);
   7943  1.1  mrg     }
   7944  1.1  mrg   else
   7945  1.1  mrg     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
   7946  1.1  mrg 
   7947  1.1  mrg   if (dump_enabled_p ())
   7948  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   7949  1.1  mrg                      "transform store. ncopies = %d\n", ncopies);
   7950  1.1  mrg 
   7951  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
   7952  1.1  mrg       || memory_access_type == VMAT_STRIDED_SLP)
   7953  1.1  mrg     {
   7954  1.1  mrg       gimple_stmt_iterator incr_gsi;
   7955  1.1  mrg       bool insert_after;
   7956  1.1  mrg       gimple *incr;
   7957  1.1  mrg       tree offvar;
   7958  1.1  mrg       tree ivstep;
   7959  1.1  mrg       tree running_off;
   7960  1.1  mrg       tree stride_base, stride_step, alias_off;
   7961  1.1  mrg       tree vec_oprnd;
   7962  1.1  mrg       tree dr_offset;
   7963  1.1  mrg       unsigned int g;
   7964  1.1  mrg       /* Checked by get_load_store_type.  */
   7965  1.1  mrg       unsigned int const_nunits = nunits.to_constant ();
   7966  1.1  mrg 
   7967  1.1  mrg       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
   7968  1.1  mrg       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
   7969  1.1  mrg 
   7970  1.1  mrg       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
   7971  1.1  mrg       stride_base
   7972  1.1  mrg 	= fold_build_pointer_plus
   7973  1.1  mrg 	    (DR_BASE_ADDRESS (first_dr_info->dr),
   7974  1.1  mrg 	     size_binop (PLUS_EXPR,
   7975  1.1  mrg 			 convert_to_ptrofftype (dr_offset),
   7976  1.1  mrg 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
   7977  1.1  mrg       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
   7978  1.1  mrg 
   7979  1.1  mrg       /* For a store with loop-invariant (but other than power-of-2)
   7980  1.1  mrg          stride (i.e. not a grouped access) like so:
   7981  1.1  mrg 
   7982  1.1  mrg 	   for (i = 0; i < n; i += stride)
   7983  1.1  mrg 	     array[i] = ...;
   7984  1.1  mrg 
   7985  1.1  mrg 	 we generate a new induction variable and new stores from
   7986  1.1  mrg 	 the components of the (vectorized) rhs:
   7987  1.1  mrg 
   7988  1.1  mrg 	   for (j = 0; ; j += VF*stride)
   7989  1.1  mrg 	     vectemp = ...;
   7990  1.1  mrg 	     tmp1 = vectemp[0];
   7991  1.1  mrg 	     array[j] = tmp1;
   7992  1.1  mrg 	     tmp2 = vectemp[1];
   7993  1.1  mrg 	     array[j + stride] = tmp2;
   7994  1.1  mrg 	     ...
   7995  1.1  mrg          */
   7996  1.1  mrg 
   7997  1.1  mrg       unsigned nstores = const_nunits;
   7998  1.1  mrg       unsigned lnel = 1;
   7999  1.1  mrg       tree ltype = elem_type;
   8000  1.1  mrg       tree lvectype = vectype;
   8001  1.1  mrg       if (slp)
   8002  1.1  mrg 	{
   8003  1.1  mrg 	  if (group_size < const_nunits
   8004  1.1  mrg 	      && const_nunits % group_size == 0)
   8005  1.1  mrg 	    {
   8006  1.1  mrg 	      nstores = const_nunits / group_size;
   8007  1.1  mrg 	      lnel = group_size;
   8008  1.1  mrg 	      ltype = build_vector_type (elem_type, group_size);
   8009  1.1  mrg 	      lvectype = vectype;
   8010  1.1  mrg 
   8011  1.1  mrg 	      /* First check if vec_extract optab doesn't support extraction
   8012  1.1  mrg 		 of vector elts directly.  */
   8013  1.1  mrg 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
   8014  1.1  mrg 	      machine_mode vmode;
   8015  1.1  mrg 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
   8016  1.1  mrg 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
   8017  1.1  mrg 					   group_size).exists (&vmode)
   8018  1.1  mrg 		  || (convert_optab_handler (vec_extract_optab,
   8019  1.1  mrg 					     TYPE_MODE (vectype), vmode)
   8020  1.1  mrg 		      == CODE_FOR_nothing))
   8021  1.1  mrg 		{
   8022  1.1  mrg 		  /* Try to avoid emitting an extract of vector elements
   8023  1.1  mrg 		     by performing the extracts using an integer type of the
   8024  1.1  mrg 		     same size, extracting from a vector of those and then
   8025  1.1  mrg 		     re-interpreting it as the original vector type if
   8026  1.1  mrg 		     supported.  */
   8027  1.1  mrg 		  unsigned lsize
   8028  1.1  mrg 		    = group_size * GET_MODE_BITSIZE (elmode);
   8029  1.1  mrg 		  unsigned int lnunits = const_nunits / group_size;
   8030  1.1  mrg 		  /* If we can't construct such a vector fall back to
   8031  1.1  mrg 		     element extracts from the original vector type and
   8032  1.1  mrg 		     element size stores.  */
   8033  1.1  mrg 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
   8034  1.1  mrg 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
   8035  1.1  mrg 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
   8036  1.1  mrg 					      lnunits).exists (&vmode)
   8037  1.1  mrg 		      && (convert_optab_handler (vec_extract_optab,
   8038  1.1  mrg 						 vmode, elmode)
   8039  1.1  mrg 			  != CODE_FOR_nothing))
   8040  1.1  mrg 		    {
   8041  1.1  mrg 		      nstores = lnunits;
   8042  1.1  mrg 		      lnel = group_size;
   8043  1.1  mrg 		      ltype = build_nonstandard_integer_type (lsize, 1);
   8044  1.1  mrg 		      lvectype = build_vector_type (ltype, nstores);
   8045  1.1  mrg 		    }
   8046  1.1  mrg 		  /* Else fall back to vector extraction anyway.
   8047  1.1  mrg 		     Fewer stores are more important than avoiding spilling
   8048  1.1  mrg 		     of the vector we extract from.  Compared to the
   8049  1.1  mrg 		     construction case in vectorizable_load no store-forwarding
   8050  1.1  mrg 		     issue exists here for reasonable archs.  */
   8051  1.1  mrg 		}
   8052  1.1  mrg 	    }
   8053  1.1  mrg 	  else if (group_size >= const_nunits
   8054  1.1  mrg 		   && group_size % const_nunits == 0)
   8055  1.1  mrg 	    {
   8056  1.1  mrg 	      nstores = 1;
   8057  1.1  mrg 	      lnel = const_nunits;
   8058  1.1  mrg 	      ltype = vectype;
   8059  1.1  mrg 	      lvectype = vectype;
   8060  1.1  mrg 	    }
   8061  1.1  mrg 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
   8062  1.1  mrg 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   8063  1.1  mrg 	}
   8064  1.1  mrg 
   8065  1.1  mrg       ivstep = stride_step;
   8066  1.1  mrg       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
   8067  1.1  mrg 			    build_int_cst (TREE_TYPE (ivstep), vf));
   8068  1.1  mrg 
   8069  1.1  mrg       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
   8070  1.1  mrg 
   8071  1.1  mrg       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
   8072  1.1  mrg       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
   8073  1.1  mrg       create_iv (stride_base, ivstep, NULL,
   8074  1.1  mrg 		 loop, &incr_gsi, insert_after,
   8075  1.1  mrg 		 &offvar, NULL);
   8076  1.1  mrg       incr = gsi_stmt (incr_gsi);
   8077  1.1  mrg 
   8078  1.1  mrg       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
   8079  1.1  mrg 
   8080  1.1  mrg       alias_off = build_int_cst (ref_type, 0);
   8081  1.1  mrg       stmt_vec_info next_stmt_info = first_stmt_info;
   8082  1.1  mrg       for (g = 0; g < group_size; g++)
   8083  1.1  mrg 	{
   8084  1.1  mrg 	  running_off = offvar;
   8085  1.1  mrg 	  if (g)
   8086  1.1  mrg 	    {
   8087  1.1  mrg 	      tree size = TYPE_SIZE_UNIT (ltype);
   8088  1.1  mrg 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
   8089  1.1  mrg 				      size);
   8090  1.1  mrg 	      tree newoff = copy_ssa_name (running_off, NULL);
   8091  1.1  mrg 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
   8092  1.1  mrg 					  running_off, pos);
   8093  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
   8094  1.1  mrg 	      running_off = newoff;
   8095  1.1  mrg 	    }
   8096  1.1  mrg 	  if (!slp)
   8097  1.1  mrg 	    op = vect_get_store_rhs (next_stmt_info);
   8098  1.1  mrg 	  vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
   8099  1.1  mrg 			     op, &vec_oprnds);
   8100  1.1  mrg 	  unsigned int group_el = 0;
   8101  1.1  mrg 	  unsigned HOST_WIDE_INT
   8102  1.1  mrg 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
   8103  1.1  mrg 	  for (j = 0; j < ncopies; j++)
   8104  1.1  mrg 	    {
   8105  1.1  mrg 	      vec_oprnd = vec_oprnds[j];
   8106  1.1  mrg 	      /* Pun the vector to extract from if necessary.  */
   8107  1.1  mrg 	      if (lvectype != vectype)
   8108  1.1  mrg 		{
   8109  1.1  mrg 		  tree tem = make_ssa_name (lvectype);
   8110  1.1  mrg 		  gimple *pun
   8111  1.1  mrg 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
   8112  1.1  mrg 							lvectype, vec_oprnd));
   8113  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
   8114  1.1  mrg 		  vec_oprnd = tem;
   8115  1.1  mrg 		}
   8116  1.1  mrg 	      for (i = 0; i < nstores; i++)
   8117  1.1  mrg 		{
   8118  1.1  mrg 		  tree newref, newoff;
   8119  1.1  mrg 		  gimple *incr, *assign;
   8120  1.1  mrg 		  tree size = TYPE_SIZE (ltype);
   8121  1.1  mrg 		  /* Extract the i'th component.  */
   8122  1.1  mrg 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
   8123  1.1  mrg 					  bitsize_int (i), size);
   8124  1.1  mrg 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
   8125  1.1  mrg 					   size, pos);
   8126  1.1  mrg 
   8127  1.1  mrg 		  elem = force_gimple_operand_gsi (gsi, elem, true,
   8128  1.1  mrg 						   NULL_TREE, true,
   8129  1.1  mrg 						   GSI_SAME_STMT);
   8130  1.1  mrg 
   8131  1.1  mrg 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
   8132  1.1  mrg 						 group_el * elsz);
   8133  1.1  mrg 		  newref = build2 (MEM_REF, ltype,
   8134  1.1  mrg 				   running_off, this_off);
   8135  1.1  mrg 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
   8136  1.1  mrg 
   8137  1.1  mrg 		  /* And store it to *running_off.  */
   8138  1.1  mrg 		  assign = gimple_build_assign (newref, elem);
   8139  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
   8140  1.1  mrg 
   8141  1.1  mrg 		  group_el += lnel;
   8142  1.1  mrg 		  if (! slp
   8143  1.1  mrg 		      || group_el == group_size)
   8144  1.1  mrg 		    {
   8145  1.1  mrg 		      newoff = copy_ssa_name (running_off, NULL);
   8146  1.1  mrg 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
   8147  1.1  mrg 						  running_off, stride_step);
   8148  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
   8149  1.1  mrg 
   8150  1.1  mrg 		      running_off = newoff;
   8151  1.1  mrg 		      group_el = 0;
   8152  1.1  mrg 		    }
   8153  1.1  mrg 		  if (g == group_size - 1
   8154  1.1  mrg 		      && !slp)
   8155  1.1  mrg 		    {
   8156  1.1  mrg 		      if (j == 0 && i == 0)
   8157  1.1  mrg 			*vec_stmt = assign;
   8158  1.1  mrg 		      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
   8159  1.1  mrg 		    }
   8160  1.1  mrg 		}
   8161  1.1  mrg 	    }
   8162  1.1  mrg 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   8163  1.1  mrg 	  vec_oprnds.release ();
   8164  1.1  mrg 	  if (slp)
   8165  1.1  mrg 	    break;
   8166  1.1  mrg 	}
   8167  1.1  mrg 
   8168  1.1  mrg       return true;
   8169  1.1  mrg     }
   8170  1.1  mrg 
   8171  1.1  mrg   auto_vec<tree> dr_chain (group_size);
   8172  1.1  mrg   oprnds.create (group_size);
   8173  1.1  mrg 
   8174  1.1  mrg   gcc_assert (alignment_support_scheme);
   8175  1.1  mrg   vec_loop_masks *loop_masks
   8176  1.1  mrg     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
   8177  1.1  mrg        ? &LOOP_VINFO_MASKS (loop_vinfo)
   8178  1.1  mrg        : NULL);
   8179  1.1  mrg   vec_loop_lens *loop_lens
   8180  1.1  mrg     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
   8181  1.1  mrg        ? &LOOP_VINFO_LENS (loop_vinfo)
   8182  1.1  mrg        : NULL);
   8183  1.1  mrg 
   8184  1.1  mrg   /* Shouldn't go with length-based approach if fully masked.  */
   8185  1.1  mrg   gcc_assert (!loop_lens || !loop_masks);
   8186  1.1  mrg 
   8187  1.1  mrg   /* Targets with store-lane instructions must not require explicit
   8188  1.1  mrg      realignment.  vect_supportable_dr_alignment always returns either
   8189  1.1  mrg      dr_aligned or dr_unaligned_supported for masked operations.  */
   8190  1.1  mrg   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
   8191  1.1  mrg 	       && !mask
   8192  1.1  mrg 	       && !loop_masks)
   8193  1.1  mrg 	      || alignment_support_scheme == dr_aligned
   8194  1.1  mrg 	      || alignment_support_scheme == dr_unaligned_supported);
   8195  1.1  mrg 
   8196  1.1  mrg   tree offset = NULL_TREE;
   8197  1.1  mrg   if (!known_eq (poffset, 0))
   8198  1.1  mrg     offset = size_int (poffset);
   8199  1.1  mrg 
   8200  1.1  mrg   tree bump;
   8201  1.1  mrg   tree vec_offset = NULL_TREE;
   8202  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   8203  1.1  mrg     {
   8204  1.1  mrg       aggr_type = NULL_TREE;
   8205  1.1  mrg       bump = NULL_TREE;
   8206  1.1  mrg     }
   8207  1.1  mrg   else if (memory_access_type == VMAT_GATHER_SCATTER)
   8208  1.1  mrg     {
   8209  1.1  mrg       aggr_type = elem_type;
   8210  1.1  mrg       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
   8211  1.1  mrg 				       &bump, &vec_offset);
   8212  1.1  mrg     }
   8213  1.1  mrg   else
   8214  1.1  mrg     {
   8215  1.1  mrg       if (memory_access_type == VMAT_LOAD_STORE_LANES)
   8216  1.1  mrg 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
   8217  1.1  mrg       else
   8218  1.1  mrg 	aggr_type = vectype;
   8219  1.1  mrg       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
   8220  1.1  mrg 					  memory_access_type);
   8221  1.1  mrg     }
   8222  1.1  mrg 
   8223  1.1  mrg   if (mask)
   8224  1.1  mrg     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
   8225  1.1  mrg 
   8226  1.1  mrg   /* In case the vectorization factor (VF) is bigger than the number
   8227  1.1  mrg      of elements that we can fit in a vectype (nunits), we have to generate
   8228  1.1  mrg      more than one vector stmt - i.e - we need to "unroll" the
   8229  1.1  mrg      vector stmt by a factor VF/nunits.  */
   8230  1.1  mrg 
   8231  1.1  mrg   /* In case of interleaving (non-unit grouped access):
   8232  1.1  mrg 
   8233  1.1  mrg         S1:  &base + 2 = x2
   8234  1.1  mrg         S2:  &base = x0
   8235  1.1  mrg         S3:  &base + 1 = x1
   8236  1.1  mrg         S4:  &base + 3 = x3
   8237  1.1  mrg 
   8238  1.1  mrg      We create vectorized stores starting from base address (the access of the
   8239  1.1  mrg      first stmt in the chain (S2 in the above example), when the last store stmt
   8240  1.1  mrg      of the chain (S4) is reached:
   8241  1.1  mrg 
   8242  1.1  mrg         VS1: &base = vx2
   8243  1.1  mrg 	VS2: &base + vec_size*1 = vx0
   8244  1.1  mrg 	VS3: &base + vec_size*2 = vx1
   8245  1.1  mrg 	VS4: &base + vec_size*3 = vx3
   8246  1.1  mrg 
   8247  1.1  mrg      Then permutation statements are generated:
   8248  1.1  mrg 
   8249  1.1  mrg 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
   8250  1.1  mrg 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
   8251  1.1  mrg 	...
   8252  1.1  mrg 
   8253  1.1  mrg      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
   8254  1.1  mrg      (the order of the data-refs in the output of vect_permute_store_chain
   8255  1.1  mrg      corresponds to the order of scalar stmts in the interleaving chain - see
   8256  1.1  mrg      the documentation of vect_permute_store_chain()).
   8257  1.1  mrg 
   8258  1.1  mrg      In case of both multiple types and interleaving, above vector stores and
   8259  1.1  mrg      permutation stmts are created for every copy.  The result vector stmts are
   8260  1.1  mrg      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
   8261  1.1  mrg      STMT_VINFO_RELATED_STMT for the next copies.
   8262  1.1  mrg   */
   8263  1.1  mrg 
   8264  1.1  mrg   auto_vec<tree> vec_masks;
   8265  1.1  mrg   tree vec_mask = NULL;
   8266  1.1  mrg   auto_vec<tree> vec_offsets;
   8267  1.1  mrg   auto_vec<vec<tree> > gvec_oprnds;
   8268  1.1  mrg   gvec_oprnds.safe_grow_cleared (group_size, true);
   8269  1.1  mrg   for (j = 0; j < ncopies; j++)
   8270  1.1  mrg     {
   8271  1.1  mrg       gimple *new_stmt;
   8272  1.1  mrg       if (j == 0)
   8273  1.1  mrg 	{
   8274  1.1  mrg           if (slp)
   8275  1.1  mrg             {
   8276  1.1  mrg 	      /* Get vectorized arguments for SLP_NODE.  */
   8277  1.1  mrg 	      vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
   8278  1.1  mrg 				 op, &vec_oprnds);
   8279  1.1  mrg               vec_oprnd = vec_oprnds[0];
   8280  1.1  mrg             }
   8281  1.1  mrg           else
   8282  1.1  mrg             {
   8283  1.1  mrg 	      /* For interleaved stores we collect vectorized defs for all the
   8284  1.1  mrg 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
   8285  1.1  mrg 		 used as an input to vect_permute_store_chain().
   8286  1.1  mrg 
   8287  1.1  mrg 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
   8288  1.1  mrg 		 and OPRNDS are of size 1.  */
   8289  1.1  mrg 	      stmt_vec_info next_stmt_info = first_stmt_info;
   8290  1.1  mrg 	      for (i = 0; i < group_size; i++)
   8291  1.1  mrg 		{
   8292  1.1  mrg 		  /* Since gaps are not supported for interleaved stores,
   8293  1.1  mrg 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
   8294  1.1  mrg 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
   8295  1.1  mrg 		     that there is no interleaving, DR_GROUP_SIZE is 1,
   8296  1.1  mrg 		     and only one iteration of the loop will be executed.  */
   8297  1.1  mrg 		  op = vect_get_store_rhs (next_stmt_info);
   8298  1.1  mrg 		  vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
   8299  1.1  mrg 						 ncopies, op, &gvec_oprnds[i]);
   8300  1.1  mrg 		  vec_oprnd = gvec_oprnds[i][0];
   8301  1.1  mrg 		  dr_chain.quick_push (gvec_oprnds[i][0]);
   8302  1.1  mrg 		  oprnds.quick_push (gvec_oprnds[i][0]);
   8303  1.1  mrg 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   8304  1.1  mrg 		}
   8305  1.1  mrg 	      if (mask)
   8306  1.1  mrg 		{
   8307  1.1  mrg 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
   8308  1.1  mrg 						 mask, &vec_masks, mask_vectype);
   8309  1.1  mrg 		  vec_mask = vec_masks[0];
   8310  1.1  mrg 		}
   8311  1.1  mrg 	    }
   8312  1.1  mrg 
   8313  1.1  mrg 	  /* We should have catched mismatched types earlier.  */
   8314  1.1  mrg 	  gcc_assert (useless_type_conversion_p (vectype,
   8315  1.1  mrg 						 TREE_TYPE (vec_oprnd)));
   8316  1.1  mrg 	  bool simd_lane_access_p
   8317  1.1  mrg 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
   8318  1.1  mrg 	  if (simd_lane_access_p
   8319  1.1  mrg 	      && !loop_masks
   8320  1.1  mrg 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
   8321  1.1  mrg 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
   8322  1.1  mrg 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
   8323  1.1  mrg 	      && integer_zerop (DR_INIT (first_dr_info->dr))
   8324  1.1  mrg 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
   8325  1.1  mrg 					get_alias_set (TREE_TYPE (ref_type))))
   8326  1.1  mrg 	    {
   8327  1.1  mrg 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
   8328  1.1  mrg 	      dataref_offset = build_int_cst (ref_type, 0);
   8329  1.1  mrg 	    }
   8330  1.1  mrg 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   8331  1.1  mrg 	    {
   8332  1.1  mrg 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
   8333  1.1  mrg 					   slp_node, &gs_info, &dataref_ptr,
   8334  1.1  mrg 					   &vec_offsets);
   8335  1.1  mrg 	      vec_offset = vec_offsets[0];
   8336  1.1  mrg 	    }
   8337  1.1  mrg 	  else
   8338  1.1  mrg 	    dataref_ptr
   8339  1.1  mrg 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
   8340  1.1  mrg 					  simd_lane_access_p ? loop : NULL,
   8341  1.1  mrg 					  offset, &dummy, gsi, &ptr_incr,
   8342  1.1  mrg 					  simd_lane_access_p, bump);
   8343  1.1  mrg 	}
   8344  1.1  mrg       else
   8345  1.1  mrg 	{
   8346  1.1  mrg 	  /* For interleaved stores we created vectorized defs for all the
   8347  1.1  mrg 	     defs stored in OPRNDS in the previous iteration (previous copy).
   8348  1.1  mrg 	     DR_CHAIN is then used as an input to vect_permute_store_chain().
   8349  1.1  mrg 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
   8350  1.1  mrg 	     OPRNDS are of size 1.  */
   8351  1.1  mrg 	  for (i = 0; i < group_size; i++)
   8352  1.1  mrg 	    {
   8353  1.1  mrg 	      vec_oprnd = gvec_oprnds[i][j];
   8354  1.1  mrg 	      dr_chain[i] = gvec_oprnds[i][j];
   8355  1.1  mrg 	      oprnds[i] = gvec_oprnds[i][j];
   8356  1.1  mrg 	    }
   8357  1.1  mrg 	  if (mask)
   8358  1.1  mrg 	    vec_mask = vec_masks[j];
   8359  1.1  mrg 	  if (dataref_offset)
   8360  1.1  mrg 	    dataref_offset
   8361  1.1  mrg 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
   8362  1.1  mrg 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   8363  1.1  mrg 	    vec_offset = vec_offsets[j];
   8364  1.1  mrg 	  else
   8365  1.1  mrg 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   8366  1.1  mrg 					   stmt_info, bump);
   8367  1.1  mrg 	}
   8368  1.1  mrg 
   8369  1.1  mrg       if (memory_access_type == VMAT_LOAD_STORE_LANES)
   8370  1.1  mrg 	{
   8371  1.1  mrg 	  tree vec_array;
   8372  1.1  mrg 
   8373  1.1  mrg 	  /* Get an array into which we can store the individual vectors.  */
   8374  1.1  mrg 	  vec_array = create_vector_array (vectype, vec_num);
   8375  1.1  mrg 
   8376  1.1  mrg 	  /* Invalidate the current contents of VEC_ARRAY.  This should
   8377  1.1  mrg 	     become an RTL clobber too, which prevents the vector registers
   8378  1.1  mrg 	     from being upward-exposed.  */
   8379  1.1  mrg 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
   8380  1.1  mrg 
   8381  1.1  mrg 	  /* Store the individual vectors into the array.  */
   8382  1.1  mrg 	  for (i = 0; i < vec_num; i++)
   8383  1.1  mrg 	    {
   8384  1.1  mrg 	      vec_oprnd = dr_chain[i];
   8385  1.1  mrg 	      write_vector_array (vinfo, stmt_info,
   8386  1.1  mrg 				  gsi, vec_oprnd, vec_array, i);
   8387  1.1  mrg 	    }
   8388  1.1  mrg 
   8389  1.1  mrg 	  tree final_mask = NULL;
   8390  1.1  mrg 	  if (loop_masks)
   8391  1.1  mrg 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
   8392  1.1  mrg 					     vectype, j);
   8393  1.1  mrg 	  if (vec_mask)
   8394  1.1  mrg 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   8395  1.1  mrg 					   final_mask, vec_mask, gsi);
   8396  1.1  mrg 
   8397  1.1  mrg 	  gcall *call;
   8398  1.1  mrg 	  if (final_mask)
   8399  1.1  mrg 	    {
   8400  1.1  mrg 	      /* Emit:
   8401  1.1  mrg 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
   8402  1.1  mrg 				     VEC_ARRAY).  */
   8403  1.1  mrg 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
   8404  1.1  mrg 	      tree alias_ptr = build_int_cst (ref_type, align);
   8405  1.1  mrg 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
   8406  1.1  mrg 						 dataref_ptr, alias_ptr,
   8407  1.1  mrg 						 final_mask, vec_array);
   8408  1.1  mrg 	    }
   8409  1.1  mrg 	  else
   8410  1.1  mrg 	    {
   8411  1.1  mrg 	      /* Emit:
   8412  1.1  mrg 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
   8413  1.1  mrg 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
   8414  1.1  mrg 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
   8415  1.1  mrg 						 vec_array);
   8416  1.1  mrg 	      gimple_call_set_lhs (call, data_ref);
   8417  1.1  mrg 	    }
   8418  1.1  mrg 	  gimple_call_set_nothrow (call, true);
   8419  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   8420  1.1  mrg 	  new_stmt = call;
   8421  1.1  mrg 
   8422  1.1  mrg 	  /* Record that VEC_ARRAY is now dead.  */
   8423  1.1  mrg 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
   8424  1.1  mrg 	}
   8425  1.1  mrg       else
   8426  1.1  mrg 	{
   8427  1.1  mrg 	  new_stmt = NULL;
   8428  1.1  mrg 	  if (grouped_store)
   8429  1.1  mrg 	    {
   8430  1.1  mrg 	      if (j == 0)
   8431  1.1  mrg 		result_chain.create (group_size);
   8432  1.1  mrg 	      /* Permute.  */
   8433  1.1  mrg 	      vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
   8434  1.1  mrg 					gsi, &result_chain);
   8435  1.1  mrg 	    }
   8436  1.1  mrg 
   8437  1.1  mrg 	  stmt_vec_info next_stmt_info = first_stmt_info;
   8438  1.1  mrg 	  for (i = 0; i < vec_num; i++)
   8439  1.1  mrg 	    {
   8440  1.1  mrg 	      unsigned misalign;
   8441  1.1  mrg 	      unsigned HOST_WIDE_INT align;
   8442  1.1  mrg 
   8443  1.1  mrg 	      tree final_mask = NULL_TREE;
   8444  1.1  mrg 	      if (loop_masks)
   8445  1.1  mrg 		final_mask = vect_get_loop_mask (gsi, loop_masks,
   8446  1.1  mrg 						 vec_num * ncopies,
   8447  1.1  mrg 						 vectype, vec_num * j + i);
   8448  1.1  mrg 	      if (vec_mask)
   8449  1.1  mrg 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   8450  1.1  mrg 					       final_mask, vec_mask, gsi);
   8451  1.1  mrg 
   8452  1.1  mrg 	      if (memory_access_type == VMAT_GATHER_SCATTER)
   8453  1.1  mrg 		{
   8454  1.1  mrg 		  tree scale = size_int (gs_info.scale);
   8455  1.1  mrg 		  gcall *call;
   8456  1.1  mrg 		  if (final_mask)
   8457  1.1  mrg 		    call = gimple_build_call_internal
   8458  1.1  mrg 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
   8459  1.1  mrg 		       scale, vec_oprnd, final_mask);
   8460  1.1  mrg 		  else
   8461  1.1  mrg 		    call = gimple_build_call_internal
   8462  1.1  mrg 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
   8463  1.1  mrg 		       scale, vec_oprnd);
   8464  1.1  mrg 		  gimple_call_set_nothrow (call, true);
   8465  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   8466  1.1  mrg 		  new_stmt = call;
   8467  1.1  mrg 		  break;
   8468  1.1  mrg 		}
   8469  1.1  mrg 
   8470  1.1  mrg 	      if (i > 0)
   8471  1.1  mrg 		/* Bump the vector pointer.  */
   8472  1.1  mrg 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
   8473  1.1  mrg 					       gsi, stmt_info, bump);
   8474  1.1  mrg 
   8475  1.1  mrg 	      if (slp)
   8476  1.1  mrg 		vec_oprnd = vec_oprnds[i];
   8477  1.1  mrg 	      else if (grouped_store)
   8478  1.1  mrg 		/* For grouped stores vectorized defs are interleaved in
   8479  1.1  mrg 		   vect_permute_store_chain().  */
   8480  1.1  mrg 		vec_oprnd = result_chain[i];
   8481  1.1  mrg 
   8482  1.1  mrg 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
   8483  1.1  mrg 	      if (alignment_support_scheme == dr_aligned)
   8484  1.1  mrg 		misalign = 0;
   8485  1.1  mrg 	      else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
   8486  1.1  mrg 		{
   8487  1.1  mrg 		  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
   8488  1.1  mrg 		  misalign = 0;
   8489  1.1  mrg 		}
   8490  1.1  mrg 	      else
   8491  1.1  mrg 		misalign = misalignment;
   8492  1.1  mrg 	      if (dataref_offset == NULL_TREE
   8493  1.1  mrg 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
   8494  1.1  mrg 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
   8495  1.1  mrg 					misalign);
   8496  1.1  mrg 	      align = least_bit_hwi (misalign | align);
   8497  1.1  mrg 
   8498  1.1  mrg 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   8499  1.1  mrg 		{
   8500  1.1  mrg 		  tree perm_mask = perm_mask_for_reverse (vectype);
   8501  1.1  mrg 		  tree perm_dest = vect_create_destination_var
   8502  1.1  mrg 		    (vect_get_store_rhs (stmt_info), vectype);
   8503  1.1  mrg 		  tree new_temp = make_ssa_name (perm_dest);
   8504  1.1  mrg 
   8505  1.1  mrg 		  /* Generate the permute statement.  */
   8506  1.1  mrg 		  gimple *perm_stmt
   8507  1.1  mrg 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
   8508  1.1  mrg 					   vec_oprnd, perm_mask);
   8509  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   8510  1.1  mrg 
   8511  1.1  mrg 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
   8512  1.1  mrg 		  vec_oprnd = new_temp;
   8513  1.1  mrg 		}
   8514  1.1  mrg 
   8515  1.1  mrg 	      /* Arguments are ready.  Create the new vector stmt.  */
   8516  1.1  mrg 	      if (final_mask)
   8517  1.1  mrg 		{
   8518  1.1  mrg 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
   8519  1.1  mrg 		  gcall *call
   8520  1.1  mrg 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
   8521  1.1  mrg 						  dataref_ptr, ptr,
   8522  1.1  mrg 						  final_mask, vec_oprnd);
   8523  1.1  mrg 		  gimple_call_set_nothrow (call, true);
   8524  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   8525  1.1  mrg 		  new_stmt = call;
   8526  1.1  mrg 		}
   8527  1.1  mrg 	      else if (loop_lens)
   8528  1.1  mrg 		{
   8529  1.1  mrg 		  tree final_len
   8530  1.1  mrg 		    = vect_get_loop_len (loop_vinfo, loop_lens,
   8531  1.1  mrg 					 vec_num * ncopies, vec_num * j + i);
   8532  1.1  mrg 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
   8533  1.1  mrg 		  machine_mode vmode = TYPE_MODE (vectype);
   8534  1.1  mrg 		  opt_machine_mode new_ovmode
   8535  1.1  mrg 		    = get_len_load_store_mode (vmode, false);
   8536  1.1  mrg 		  machine_mode new_vmode = new_ovmode.require ();
   8537  1.1  mrg 		  /* Need conversion if it's wrapped with VnQI.  */
   8538  1.1  mrg 		  if (vmode != new_vmode)
   8539  1.1  mrg 		    {
   8540  1.1  mrg 		      tree new_vtype
   8541  1.1  mrg 			= build_vector_type_for_mode (unsigned_intQI_type_node,
   8542  1.1  mrg 						      new_vmode);
   8543  1.1  mrg 		      tree var
   8544  1.1  mrg 			= vect_get_new_ssa_name (new_vtype, vect_simple_var);
   8545  1.1  mrg 		      vec_oprnd
   8546  1.1  mrg 			= build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
   8547  1.1  mrg 		      gassign *new_stmt
   8548  1.1  mrg 			= gimple_build_assign (var, VIEW_CONVERT_EXPR,
   8549  1.1  mrg 					       vec_oprnd);
   8550  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
   8551  1.1  mrg 						   gsi);
   8552  1.1  mrg 		      vec_oprnd = var;
   8553  1.1  mrg 		    }
   8554  1.1  mrg 
   8555  1.1  mrg 		  signed char biasval =
   8556  1.1  mrg 		    LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   8557  1.1  mrg 
   8558  1.1  mrg 		  tree bias = build_int_cst (intQI_type_node, biasval);
   8559  1.1  mrg 		  gcall *call
   8560  1.1  mrg 		    = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
   8561  1.1  mrg 						  ptr, final_len, vec_oprnd,
   8562  1.1  mrg 						  bias);
   8563  1.1  mrg 		  gimple_call_set_nothrow (call, true);
   8564  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   8565  1.1  mrg 		  new_stmt = call;
   8566  1.1  mrg 		}
   8567  1.1  mrg 	      else
   8568  1.1  mrg 		{
   8569  1.1  mrg 		  data_ref = fold_build2 (MEM_REF, vectype,
   8570  1.1  mrg 					  dataref_ptr,
   8571  1.1  mrg 					  dataref_offset
   8572  1.1  mrg 					  ? dataref_offset
   8573  1.1  mrg 					  : build_int_cst (ref_type, 0));
   8574  1.1  mrg 		  if (alignment_support_scheme == dr_aligned)
   8575  1.1  mrg 		    ;
   8576  1.1  mrg 		  else
   8577  1.1  mrg 		    TREE_TYPE (data_ref)
   8578  1.1  mrg 		      = build_aligned_type (TREE_TYPE (data_ref),
   8579  1.1  mrg 					    align * BITS_PER_UNIT);
   8580  1.1  mrg 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   8581  1.1  mrg 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
   8582  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   8583  1.1  mrg 		}
   8584  1.1  mrg 
   8585  1.1  mrg 	      if (slp)
   8586  1.1  mrg 		continue;
   8587  1.1  mrg 
   8588  1.1  mrg 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   8589  1.1  mrg 	      if (!next_stmt_info)
   8590  1.1  mrg 		break;
   8591  1.1  mrg 	    }
   8592  1.1  mrg 	}
   8593  1.1  mrg       if (!slp)
   8594  1.1  mrg 	{
   8595  1.1  mrg 	  if (j == 0)
   8596  1.1  mrg 	    *vec_stmt = new_stmt;
   8597  1.1  mrg 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   8598  1.1  mrg 	}
   8599  1.1  mrg     }
   8600  1.1  mrg 
   8601  1.1  mrg   for (i = 0; i < group_size; ++i)
   8602  1.1  mrg     {
   8603  1.1  mrg       vec<tree> oprndsi = gvec_oprnds[i];
   8604  1.1  mrg       oprndsi.release ();
   8605  1.1  mrg     }
   8606  1.1  mrg   oprnds.release ();
   8607  1.1  mrg   result_chain.release ();
   8608  1.1  mrg   vec_oprnds.release ();
   8609  1.1  mrg 
   8610  1.1  mrg   return true;
   8611  1.1  mrg }
   8612  1.1  mrg 
   8613  1.1  mrg /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
   8614  1.1  mrg    VECTOR_CST mask.  No checks are made that the target platform supports the
   8615  1.1  mrg    mask, so callers may wish to test can_vec_perm_const_p separately, or use
   8616  1.1  mrg    vect_gen_perm_mask_checked.  */
   8617  1.1  mrg 
   8618  1.1  mrg tree
   8619  1.1  mrg vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
   8620  1.1  mrg {
   8621  1.1  mrg   tree mask_type;
   8622  1.1  mrg 
   8623  1.1  mrg   poly_uint64 nunits = sel.length ();
   8624  1.1  mrg   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
   8625  1.1  mrg 
   8626  1.1  mrg   mask_type = build_vector_type (ssizetype, nunits);
   8627  1.1  mrg   return vec_perm_indices_to_tree (mask_type, sel);
   8628  1.1  mrg }
   8629  1.1  mrg 
   8630  1.1  mrg /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
   8631  1.1  mrg    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
   8632  1.1  mrg 
   8633  1.1  mrg tree
   8634  1.1  mrg vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
   8635  1.1  mrg {
   8636  1.1  mrg   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
   8637  1.1  mrg   return vect_gen_perm_mask_any (vectype, sel);
   8638  1.1  mrg }
   8639  1.1  mrg 
   8640  1.1  mrg /* Given a vector variable X and Y, that was generated for the scalar
   8641  1.1  mrg    STMT_INFO, generate instructions to permute the vector elements of X and Y
   8642  1.1  mrg    using permutation mask MASK_VEC, insert them at *GSI and return the
   8643  1.1  mrg    permuted vector variable.  */
   8644  1.1  mrg 
   8645  1.1  mrg static tree
   8646  1.1  mrg permute_vec_elements (vec_info *vinfo,
   8647  1.1  mrg 		      tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
   8648  1.1  mrg 		      gimple_stmt_iterator *gsi)
   8649  1.1  mrg {
   8650  1.1  mrg   tree vectype = TREE_TYPE (x);
   8651  1.1  mrg   tree perm_dest, data_ref;
   8652  1.1  mrg   gimple *perm_stmt;
   8653  1.1  mrg 
   8654  1.1  mrg   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
   8655  1.1  mrg   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
   8656  1.1  mrg     perm_dest = vect_create_destination_var (scalar_dest, vectype);
   8657  1.1  mrg   else
   8658  1.1  mrg     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
   8659  1.1  mrg   data_ref = make_ssa_name (perm_dest);
   8660  1.1  mrg 
   8661  1.1  mrg   /* Generate the permute statement.  */
   8662  1.1  mrg   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
   8663  1.1  mrg   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
   8664  1.1  mrg 
   8665  1.1  mrg   return data_ref;
   8666  1.1  mrg }
   8667  1.1  mrg 
   8668  1.1  mrg /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
   8669  1.1  mrg    inserting them on the loops preheader edge.  Returns true if we
   8670  1.1  mrg    were successful in doing so (and thus STMT_INFO can be moved then),
   8671  1.1  mrg    otherwise returns false.  */
   8672  1.1  mrg 
   8673  1.1  mrg static bool
   8674  1.1  mrg hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
   8675  1.1  mrg {
   8676  1.1  mrg   ssa_op_iter i;
   8677  1.1  mrg   tree op;
   8678  1.1  mrg   bool any = false;
   8679  1.1  mrg 
   8680  1.1  mrg   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
   8681  1.1  mrg     {
   8682  1.1  mrg       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
   8683  1.1  mrg       if (!gimple_nop_p (def_stmt)
   8684  1.1  mrg 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
   8685  1.1  mrg 	{
   8686  1.1  mrg 	  /* Make sure we don't need to recurse.  While we could do
   8687  1.1  mrg 	     so in simple cases when there are more complex use webs
   8688  1.1  mrg 	     we don't have an easy way to preserve stmt order to fulfil
   8689  1.1  mrg 	     dependencies within them.  */
   8690  1.1  mrg 	  tree op2;
   8691  1.1  mrg 	  ssa_op_iter i2;
   8692  1.1  mrg 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
   8693  1.1  mrg 	    return false;
   8694  1.1  mrg 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
   8695  1.1  mrg 	    {
   8696  1.1  mrg 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
   8697  1.1  mrg 	      if (!gimple_nop_p (def_stmt2)
   8698  1.1  mrg 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
   8699  1.1  mrg 		return false;
   8700  1.1  mrg 	    }
   8701  1.1  mrg 	  any = true;
   8702  1.1  mrg 	}
   8703  1.1  mrg     }
   8704  1.1  mrg 
   8705  1.1  mrg   if (!any)
   8706  1.1  mrg     return true;
   8707  1.1  mrg 
   8708  1.1  mrg   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
   8709  1.1  mrg     {
   8710  1.1  mrg       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
   8711  1.1  mrg       if (!gimple_nop_p (def_stmt)
   8712  1.1  mrg 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
   8713  1.1  mrg 	{
   8714  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
   8715  1.1  mrg 	  gsi_remove (&gsi, false);
   8716  1.1  mrg 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
   8717  1.1  mrg 	}
   8718  1.1  mrg     }
   8719  1.1  mrg 
   8720  1.1  mrg   return true;
   8721  1.1  mrg }
   8722  1.1  mrg 
   8723  1.1  mrg /* vectorizable_load.
   8724  1.1  mrg 
   8725  1.1  mrg    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
   8726  1.1  mrg    that can be vectorized.
   8727  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   8728  1.1  mrg    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
   8729  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   8730  1.1  mrg 
   8731  1.1  mrg static bool
   8732  1.1  mrg vectorizable_load (vec_info *vinfo,
   8733  1.1  mrg 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   8734  1.1  mrg 		   gimple **vec_stmt, slp_tree slp_node,
   8735  1.1  mrg 		   stmt_vector_for_cost *cost_vec)
   8736  1.1  mrg {
   8737  1.1  mrg   tree scalar_dest;
   8738  1.1  mrg   tree vec_dest = NULL;
   8739  1.1  mrg   tree data_ref = NULL;
   8740  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   8741  1.1  mrg   class loop *loop = NULL;
   8742  1.1  mrg   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
   8743  1.1  mrg   bool nested_in_vect_loop = false;
   8744  1.1  mrg   tree elem_type;
   8745  1.1  mrg   tree new_temp;
   8746  1.1  mrg   machine_mode mode;
   8747  1.1  mrg   tree dummy;
   8748  1.1  mrg   tree dataref_ptr = NULL_TREE;
   8749  1.1  mrg   tree dataref_offset = NULL_TREE;
   8750  1.1  mrg   gimple *ptr_incr = NULL;
   8751  1.1  mrg   int ncopies;
   8752  1.1  mrg   int i, j;
   8753  1.1  mrg   unsigned int group_size;
   8754  1.1  mrg   poly_uint64 group_gap_adj;
   8755  1.1  mrg   tree msq = NULL_TREE, lsq;
   8756  1.1  mrg   tree realignment_token = NULL_TREE;
   8757  1.1  mrg   gphi *phi = NULL;
   8758  1.1  mrg   vec<tree> dr_chain = vNULL;
   8759  1.1  mrg   bool grouped_load = false;
   8760  1.1  mrg   stmt_vec_info first_stmt_info;
   8761  1.1  mrg   stmt_vec_info first_stmt_info_for_drptr = NULL;
   8762  1.1  mrg   bool compute_in_loop = false;
   8763  1.1  mrg   class loop *at_loop;
   8764  1.1  mrg   int vec_num;
   8765  1.1  mrg   bool slp = (slp_node != NULL);
   8766  1.1  mrg   bool slp_perm = false;
   8767  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   8768  1.1  mrg   poly_uint64 vf;
   8769  1.1  mrg   tree aggr_type;
   8770  1.1  mrg   gather_scatter_info gs_info;
   8771  1.1  mrg   tree ref_type;
   8772  1.1  mrg   enum vect_def_type mask_dt = vect_unknown_def_type;
   8773  1.1  mrg 
   8774  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   8775  1.1  mrg     return false;
   8776  1.1  mrg 
   8777  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
   8778  1.1  mrg       && ! vec_stmt)
   8779  1.1  mrg     return false;
   8780  1.1  mrg 
   8781  1.1  mrg   if (!STMT_VINFO_DATA_REF (stmt_info))
   8782  1.1  mrg     return false;
   8783  1.1  mrg 
   8784  1.1  mrg   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
   8785  1.1  mrg   int mask_index = -1;
   8786  1.1  mrg   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
   8787  1.1  mrg     {
   8788  1.1  mrg       scalar_dest = gimple_assign_lhs (assign);
   8789  1.1  mrg       if (TREE_CODE (scalar_dest) != SSA_NAME)
   8790  1.1  mrg 	return false;
   8791  1.1  mrg 
   8792  1.1  mrg       tree_code code = gimple_assign_rhs_code (assign);
   8793  1.1  mrg       if (code != ARRAY_REF
   8794  1.1  mrg 	  && code != BIT_FIELD_REF
   8795  1.1  mrg 	  && code != INDIRECT_REF
   8796  1.1  mrg 	  && code != COMPONENT_REF
   8797  1.1  mrg 	  && code != IMAGPART_EXPR
   8798  1.1  mrg 	  && code != REALPART_EXPR
   8799  1.1  mrg 	  && code != MEM_REF
   8800  1.1  mrg 	  && TREE_CODE_CLASS (code) != tcc_declaration)
   8801  1.1  mrg 	return false;
   8802  1.1  mrg     }
   8803  1.1  mrg   else
   8804  1.1  mrg     {
   8805  1.1  mrg       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   8806  1.1  mrg       if (!call || !gimple_call_internal_p (call))
   8807  1.1  mrg 	return false;
   8808  1.1  mrg 
   8809  1.1  mrg       internal_fn ifn = gimple_call_internal_fn (call);
   8810  1.1  mrg       if (!internal_load_fn_p (ifn))
   8811  1.1  mrg 	return false;
   8812  1.1  mrg 
   8813  1.1  mrg       scalar_dest = gimple_call_lhs (call);
   8814  1.1  mrg       if (!scalar_dest)
   8815  1.1  mrg 	return false;
   8816  1.1  mrg 
   8817  1.1  mrg       mask_index = internal_fn_mask_index (ifn);
   8818  1.1  mrg       /* ??? For SLP the mask operand is always last.  */
   8819  1.1  mrg       if (mask_index >= 0 && slp_node)
   8820  1.1  mrg 	mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
   8821  1.1  mrg       if (mask_index >= 0
   8822  1.1  mrg 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
   8823  1.1  mrg 				      &mask, NULL, &mask_dt, &mask_vectype))
   8824  1.1  mrg 	return false;
   8825  1.1  mrg     }
   8826  1.1  mrg 
   8827  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   8828  1.1  mrg   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   8829  1.1  mrg 
   8830  1.1  mrg   if (loop_vinfo)
   8831  1.1  mrg     {
   8832  1.1  mrg       loop = LOOP_VINFO_LOOP (loop_vinfo);
   8833  1.1  mrg       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
   8834  1.1  mrg       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   8835  1.1  mrg     }
   8836  1.1  mrg   else
   8837  1.1  mrg     vf = 1;
   8838  1.1  mrg 
   8839  1.1  mrg   /* Multiple types in SLP are handled by creating the appropriate number of
   8840  1.1  mrg      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
   8841  1.1  mrg      case of SLP.  */
   8842  1.1  mrg   if (slp)
   8843  1.1  mrg     ncopies = 1;
   8844  1.1  mrg   else
   8845  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   8846  1.1  mrg 
   8847  1.1  mrg   gcc_assert (ncopies >= 1);
   8848  1.1  mrg 
   8849  1.1  mrg   /* FORNOW. This restriction should be relaxed.  */
   8850  1.1  mrg   if (nested_in_vect_loop && ncopies > 1)
   8851  1.1  mrg     {
   8852  1.1  mrg       if (dump_enabled_p ())
   8853  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8854  1.1  mrg                          "multiple types in nested loop.\n");
   8855  1.1  mrg       return false;
   8856  1.1  mrg     }
   8857  1.1  mrg 
   8858  1.1  mrg   /* Invalidate assumptions made by dependence analysis when vectorization
   8859  1.1  mrg      on the unrolled body effectively re-orders stmts.  */
   8860  1.1  mrg   if (ncopies > 1
   8861  1.1  mrg       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
   8862  1.1  mrg       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
   8863  1.1  mrg 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
   8864  1.1  mrg     {
   8865  1.1  mrg       if (dump_enabled_p ())
   8866  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8867  1.1  mrg 			 "cannot perform implicit CSE when unrolling "
   8868  1.1  mrg 			 "with negative dependence distance\n");
   8869  1.1  mrg       return false;
   8870  1.1  mrg     }
   8871  1.1  mrg 
   8872  1.1  mrg   elem_type = TREE_TYPE (vectype);
   8873  1.1  mrg   mode = TYPE_MODE (vectype);
   8874  1.1  mrg 
   8875  1.1  mrg   /* FORNOW. In some cases can vectorize even if data-type not supported
   8876  1.1  mrg     (e.g. - data copies).  */
   8877  1.1  mrg   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
   8878  1.1  mrg     {
   8879  1.1  mrg       if (dump_enabled_p ())
   8880  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8881  1.1  mrg                          "Aligned load, but unsupported type.\n");
   8882  1.1  mrg       return false;
   8883  1.1  mrg     }
   8884  1.1  mrg 
   8885  1.1  mrg   /* Check if the load is a part of an interleaving chain.  */
   8886  1.1  mrg   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
   8887  1.1  mrg     {
   8888  1.1  mrg       grouped_load = true;
   8889  1.1  mrg       /* FORNOW */
   8890  1.1  mrg       gcc_assert (!nested_in_vect_loop);
   8891  1.1  mrg       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
   8892  1.1  mrg 
   8893  1.1  mrg       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   8894  1.1  mrg       group_size = DR_GROUP_SIZE (first_stmt_info);
   8895  1.1  mrg 
   8896  1.1  mrg       /* Refuse non-SLP vectorization of SLP-only groups.  */
   8897  1.1  mrg       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
   8898  1.1  mrg 	{
   8899  1.1  mrg 	  if (dump_enabled_p ())
   8900  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8901  1.1  mrg 			     "cannot vectorize load in non-SLP mode.\n");
   8902  1.1  mrg 	  return false;
   8903  1.1  mrg 	}
   8904  1.1  mrg 
   8905  1.1  mrg       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   8906  1.1  mrg 	{
   8907  1.1  mrg 	  slp_perm = true;
   8908  1.1  mrg 
   8909  1.1  mrg 	  if (!loop_vinfo)
   8910  1.1  mrg 	    {
   8911  1.1  mrg 	      /* In BB vectorization we may not actually use a loaded vector
   8912  1.1  mrg 		 accessing elements in excess of DR_GROUP_SIZE.  */
   8913  1.1  mrg 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   8914  1.1  mrg 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
   8915  1.1  mrg 	      unsigned HOST_WIDE_INT nunits;
   8916  1.1  mrg 	      unsigned j, k, maxk = 0;
   8917  1.1  mrg 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
   8918  1.1  mrg 		if (k > maxk)
   8919  1.1  mrg 		  maxk = k;
   8920  1.1  mrg 	      tree vectype = SLP_TREE_VECTYPE (slp_node);
   8921  1.1  mrg 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
   8922  1.1  mrg 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
   8923  1.1  mrg 		{
   8924  1.1  mrg 		  if (dump_enabled_p ())
   8925  1.1  mrg 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8926  1.1  mrg 				     "BB vectorization with gaps at the end of "
   8927  1.1  mrg 				     "a load is not supported\n");
   8928  1.1  mrg 		  return false;
   8929  1.1  mrg 		}
   8930  1.1  mrg 	    }
   8931  1.1  mrg 
   8932  1.1  mrg 	  auto_vec<tree> tem;
   8933  1.1  mrg 	  unsigned n_perms;
   8934  1.1  mrg 	  if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
   8935  1.1  mrg 					     true, &n_perms))
   8936  1.1  mrg 	    {
   8937  1.1  mrg 	      if (dump_enabled_p ())
   8938  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION,
   8939  1.1  mrg 				 vect_location,
   8940  1.1  mrg 				 "unsupported load permutation\n");
   8941  1.1  mrg 	      return false;
   8942  1.1  mrg 	    }
   8943  1.1  mrg 	}
   8944  1.1  mrg 
   8945  1.1  mrg       /* Invalidate assumptions made by dependence analysis when vectorization
   8946  1.1  mrg 	 on the unrolled body effectively re-orders stmts.  */
   8947  1.1  mrg       if (!PURE_SLP_STMT (stmt_info)
   8948  1.1  mrg 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
   8949  1.1  mrg 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
   8950  1.1  mrg 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
   8951  1.1  mrg 	{
   8952  1.1  mrg 	  if (dump_enabled_p ())
   8953  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8954  1.1  mrg 			     "cannot perform implicit CSE when performing "
   8955  1.1  mrg 			     "group loads with negative dependence distance\n");
   8956  1.1  mrg 	  return false;
   8957  1.1  mrg 	}
   8958  1.1  mrg     }
   8959  1.1  mrg   else
   8960  1.1  mrg     group_size = 1;
   8961  1.1  mrg 
   8962  1.1  mrg   vect_memory_access_type memory_access_type;
   8963  1.1  mrg   enum dr_alignment_support alignment_support_scheme;
   8964  1.1  mrg   int misalignment;
   8965  1.1  mrg   poly_int64 poffset;
   8966  1.1  mrg   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
   8967  1.1  mrg 			    ncopies, &memory_access_type, &poffset,
   8968  1.1  mrg 			    &alignment_support_scheme, &misalignment, &gs_info))
   8969  1.1  mrg     return false;
   8970  1.1  mrg 
   8971  1.1  mrg   if (mask)
   8972  1.1  mrg     {
   8973  1.1  mrg       if (memory_access_type == VMAT_CONTIGUOUS)
   8974  1.1  mrg 	{
   8975  1.1  mrg 	  machine_mode vec_mode = TYPE_MODE (vectype);
   8976  1.1  mrg 	  if (!VECTOR_MODE_P (vec_mode)
   8977  1.1  mrg 	      || !can_vec_mask_load_store_p (vec_mode,
   8978  1.1  mrg 					     TYPE_MODE (mask_vectype), true))
   8979  1.1  mrg 	    return false;
   8980  1.1  mrg 	}
   8981  1.1  mrg       else if (memory_access_type != VMAT_LOAD_STORE_LANES
   8982  1.1  mrg 	       && memory_access_type != VMAT_GATHER_SCATTER)
   8983  1.1  mrg 	{
   8984  1.1  mrg 	  if (dump_enabled_p ())
   8985  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8986  1.1  mrg 			     "unsupported access type for masked load.\n");
   8987  1.1  mrg 	  return false;
   8988  1.1  mrg 	}
   8989  1.1  mrg       else if (memory_access_type == VMAT_GATHER_SCATTER
   8990  1.1  mrg 	       && gs_info.ifn == IFN_LAST
   8991  1.1  mrg 	       && !gs_info.decl)
   8992  1.1  mrg 	{
   8993  1.1  mrg 	  if (dump_enabled_p ())
   8994  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   8995  1.1  mrg 			     "unsupported masked emulated gather.\n");
   8996  1.1  mrg 	  return false;
   8997  1.1  mrg 	}
   8998  1.1  mrg       else if (memory_access_type == VMAT_ELEMENTWISE
   8999  1.1  mrg 	       || memory_access_type == VMAT_STRIDED_SLP)
   9000  1.1  mrg 	{
   9001  1.1  mrg 	  if (dump_enabled_p ())
   9002  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   9003  1.1  mrg 			     "unsupported masked strided access.\n");
   9004  1.1  mrg 	  return false;
   9005  1.1  mrg 	}
   9006  1.1  mrg     }
   9007  1.1  mrg 
   9008  1.1  mrg   if (!vec_stmt) /* transformation not required.  */
   9009  1.1  mrg     {
   9010  1.1  mrg       if (slp_node
   9011  1.1  mrg 	  && mask
   9012  1.1  mrg 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
   9013  1.1  mrg 						mask_vectype))
   9014  1.1  mrg 	{
   9015  1.1  mrg 	  if (dump_enabled_p ())
   9016  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   9017  1.1  mrg 			     "incompatible vector types for invariants\n");
   9018  1.1  mrg 	  return false;
   9019  1.1  mrg 	}
   9020  1.1  mrg 
   9021  1.1  mrg       if (!slp)
   9022  1.1  mrg 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
   9023  1.1  mrg 
   9024  1.1  mrg       if (loop_vinfo
   9025  1.1  mrg 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
   9026  1.1  mrg 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
   9027  1.1  mrg 					      VLS_LOAD, group_size,
   9028  1.1  mrg 					      memory_access_type, &gs_info,
   9029  1.1  mrg 					      mask);
   9030  1.1  mrg 
   9031  1.1  mrg       if (dump_enabled_p ()
   9032  1.1  mrg 	  && memory_access_type != VMAT_ELEMENTWISE
   9033  1.1  mrg 	  && memory_access_type != VMAT_GATHER_SCATTER
   9034  1.1  mrg 	  && alignment_support_scheme != dr_aligned)
   9035  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   9036  1.1  mrg 			 "Vectorizing an unaligned access.\n");
   9037  1.1  mrg 
   9038  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
   9039  1.1  mrg       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
   9040  1.1  mrg 			    alignment_support_scheme, misalignment,
   9041  1.1  mrg 			    &gs_info, slp_node, cost_vec);
   9042  1.1  mrg       return true;
   9043  1.1  mrg     }
   9044  1.1  mrg 
   9045  1.1  mrg   if (!slp)
   9046  1.1  mrg     gcc_assert (memory_access_type
   9047  1.1  mrg 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
   9048  1.1  mrg 
   9049  1.1  mrg   if (dump_enabled_p ())
   9050  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location,
   9051  1.1  mrg                      "transform load. ncopies = %d\n", ncopies);
   9052  1.1  mrg 
   9053  1.1  mrg   /* Transform.  */
   9054  1.1  mrg 
   9055  1.1  mrg   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
   9056  1.1  mrg   ensure_base_align (dr_info);
   9057  1.1  mrg 
   9058  1.1  mrg   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
   9059  1.1  mrg     {
   9060  1.1  mrg       vect_build_gather_load_calls (vinfo,
   9061  1.1  mrg 				    stmt_info, gsi, vec_stmt, &gs_info, mask);
   9062  1.1  mrg       return true;
   9063  1.1  mrg     }
   9064  1.1  mrg 
   9065  1.1  mrg   if (memory_access_type == VMAT_INVARIANT)
   9066  1.1  mrg     {
   9067  1.1  mrg       gcc_assert (!grouped_load && !mask && !bb_vinfo);
   9068  1.1  mrg       /* If we have versioned for aliasing or the loop doesn't
   9069  1.1  mrg 	 have any data dependencies that would preclude this,
   9070  1.1  mrg 	 then we are sure this is a loop invariant load and
   9071  1.1  mrg 	 thus we can insert it on the preheader edge.  */
   9072  1.1  mrg       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
   9073  1.1  mrg 		      && !nested_in_vect_loop
   9074  1.1  mrg 		      && hoist_defs_of_uses (stmt_info, loop));
   9075  1.1  mrg       if (hoist_p)
   9076  1.1  mrg 	{
   9077  1.1  mrg 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
   9078  1.1  mrg 	  if (dump_enabled_p ())
   9079  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   9080  1.1  mrg 			     "hoisting out of the vectorized loop: %G", stmt);
   9081  1.1  mrg 	  scalar_dest = copy_ssa_name (scalar_dest);
   9082  1.1  mrg 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
   9083  1.1  mrg 	  gsi_insert_on_edge_immediate
   9084  1.1  mrg 	    (loop_preheader_edge (loop),
   9085  1.1  mrg 	     gimple_build_assign (scalar_dest, rhs));
   9086  1.1  mrg 	}
   9087  1.1  mrg       /* These copies are all equivalent, but currently the representation
   9088  1.1  mrg 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
   9089  1.1  mrg       gimple_stmt_iterator gsi2 = *gsi;
   9090  1.1  mrg       gsi_next (&gsi2);
   9091  1.1  mrg       for (j = 0; j < ncopies; j++)
   9092  1.1  mrg 	{
   9093  1.1  mrg 	  if (hoist_p)
   9094  1.1  mrg 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
   9095  1.1  mrg 					 vectype, NULL);
   9096  1.1  mrg 	  else
   9097  1.1  mrg 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
   9098  1.1  mrg 					 vectype, &gsi2);
   9099  1.1  mrg 	  gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
   9100  1.1  mrg 	  if (slp)
   9101  1.1  mrg 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   9102  1.1  mrg 	  else
   9103  1.1  mrg 	    {
   9104  1.1  mrg 	      if (j == 0)
   9105  1.1  mrg 		*vec_stmt = new_stmt;
   9106  1.1  mrg 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   9107  1.1  mrg 	    }
   9108  1.1  mrg 	}
   9109  1.1  mrg       return true;
   9110  1.1  mrg     }
   9111  1.1  mrg 
   9112  1.1  mrg   if (memory_access_type == VMAT_ELEMENTWISE
   9113  1.1  mrg       || memory_access_type == VMAT_STRIDED_SLP)
   9114  1.1  mrg     {
   9115  1.1  mrg       gimple_stmt_iterator incr_gsi;
   9116  1.1  mrg       bool insert_after;
   9117  1.1  mrg       tree offvar;
   9118  1.1  mrg       tree ivstep;
   9119  1.1  mrg       tree running_off;
   9120  1.1  mrg       vec<constructor_elt, va_gc> *v = NULL;
   9121  1.1  mrg       tree stride_base, stride_step, alias_off;
   9122  1.1  mrg       /* Checked by get_load_store_type.  */
   9123  1.1  mrg       unsigned int const_nunits = nunits.to_constant ();
   9124  1.1  mrg       unsigned HOST_WIDE_INT cst_offset = 0;
   9125  1.1  mrg       tree dr_offset;
   9126  1.1  mrg 
   9127  1.1  mrg       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
   9128  1.1  mrg       gcc_assert (!nested_in_vect_loop);
   9129  1.1  mrg 
   9130  1.1  mrg       if (grouped_load)
   9131  1.1  mrg 	{
   9132  1.1  mrg 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   9133  1.1  mrg 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   9134  1.1  mrg 	}
   9135  1.1  mrg       else
   9136  1.1  mrg 	{
   9137  1.1  mrg 	  first_stmt_info = stmt_info;
   9138  1.1  mrg 	  first_dr_info = dr_info;
   9139  1.1  mrg 	}
   9140  1.1  mrg       if (slp && grouped_load)
   9141  1.1  mrg 	{
   9142  1.1  mrg 	  group_size = DR_GROUP_SIZE (first_stmt_info);
   9143  1.1  mrg 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
   9144  1.1  mrg 	}
   9145  1.1  mrg       else
   9146  1.1  mrg 	{
   9147  1.1  mrg 	  if (grouped_load)
   9148  1.1  mrg 	    cst_offset
   9149  1.1  mrg 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
   9150  1.1  mrg 		 * vect_get_place_in_interleaving_chain (stmt_info,
   9151  1.1  mrg 							 first_stmt_info));
   9152  1.1  mrg 	  group_size = 1;
   9153  1.1  mrg 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
   9154  1.1  mrg 	}
   9155  1.1  mrg 
   9156  1.1  mrg       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
   9157  1.1  mrg       stride_base
   9158  1.1  mrg 	= fold_build_pointer_plus
   9159  1.1  mrg 	    (DR_BASE_ADDRESS (first_dr_info->dr),
   9160  1.1  mrg 	     size_binop (PLUS_EXPR,
   9161  1.1  mrg 			 convert_to_ptrofftype (dr_offset),
   9162  1.1  mrg 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
   9163  1.1  mrg       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
   9164  1.1  mrg 
   9165  1.1  mrg       /* For a load with loop-invariant (but other than power-of-2)
   9166  1.1  mrg          stride (i.e. not a grouped access) like so:
   9167  1.1  mrg 
   9168  1.1  mrg 	   for (i = 0; i < n; i += stride)
   9169  1.1  mrg 	     ... = array[i];
   9170  1.1  mrg 
   9171  1.1  mrg 	 we generate a new induction variable and new accesses to
   9172  1.1  mrg 	 form a new vector (or vectors, depending on ncopies):
   9173  1.1  mrg 
   9174  1.1  mrg 	   for (j = 0; ; j += VF*stride)
   9175  1.1  mrg 	     tmp1 = array[j];
   9176  1.1  mrg 	     tmp2 = array[j + stride];
   9177  1.1  mrg 	     ...
   9178  1.1  mrg 	     vectemp = {tmp1, tmp2, ...}
   9179  1.1  mrg          */
   9180  1.1  mrg 
   9181  1.1  mrg       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
   9182  1.1  mrg 			    build_int_cst (TREE_TYPE (stride_step), vf));
   9183  1.1  mrg 
   9184  1.1  mrg       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
   9185  1.1  mrg 
   9186  1.1  mrg       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
   9187  1.1  mrg       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
   9188  1.1  mrg       create_iv (stride_base, ivstep, NULL,
   9189  1.1  mrg 		 loop, &incr_gsi, insert_after,
   9190  1.1  mrg 		 &offvar, NULL);
   9191  1.1  mrg 
   9192  1.1  mrg       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
   9193  1.1  mrg 
   9194  1.1  mrg       running_off = offvar;
   9195  1.1  mrg       alias_off = build_int_cst (ref_type, 0);
   9196  1.1  mrg       int nloads = const_nunits;
   9197  1.1  mrg       int lnel = 1;
   9198  1.1  mrg       tree ltype = TREE_TYPE (vectype);
   9199  1.1  mrg       tree lvectype = vectype;
   9200  1.1  mrg       auto_vec<tree> dr_chain;
   9201  1.1  mrg       if (memory_access_type == VMAT_STRIDED_SLP)
   9202  1.1  mrg 	{
   9203  1.1  mrg 	  if (group_size < const_nunits)
   9204  1.1  mrg 	    {
   9205  1.1  mrg 	      /* First check if vec_init optab supports construction from vector
   9206  1.1  mrg 		 elts directly.  Otherwise avoid emitting a constructor of
   9207  1.1  mrg 		 vector elements by performing the loads using an integer type
   9208  1.1  mrg 		 of the same size, constructing a vector of those and then
   9209  1.1  mrg 		 re-interpreting it as the original vector type.  This avoids a
   9210  1.1  mrg 		 huge runtime penalty due to the general inability to perform
   9211  1.1  mrg 		 store forwarding from smaller stores to a larger load.  */
   9212  1.1  mrg 	      tree ptype;
   9213  1.1  mrg 	      tree vtype
   9214  1.1  mrg 		= vector_vector_composition_type (vectype,
   9215  1.1  mrg 						  const_nunits / group_size,
   9216  1.1  mrg 						  &ptype);
   9217  1.1  mrg 	      if (vtype != NULL_TREE)
   9218  1.1  mrg 		{
   9219  1.1  mrg 		  nloads = const_nunits / group_size;
   9220  1.1  mrg 		  lnel = group_size;
   9221  1.1  mrg 		  lvectype = vtype;
   9222  1.1  mrg 		  ltype = ptype;
   9223  1.1  mrg 		}
   9224  1.1  mrg 	    }
   9225  1.1  mrg 	  else
   9226  1.1  mrg 	    {
   9227  1.1  mrg 	      nloads = 1;
   9228  1.1  mrg 	      lnel = const_nunits;
   9229  1.1  mrg 	      ltype = vectype;
   9230  1.1  mrg 	    }
   9231  1.1  mrg 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
   9232  1.1  mrg 	}
   9233  1.1  mrg       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
   9234  1.1  mrg       else if (nloads == 1)
   9235  1.1  mrg 	ltype = vectype;
   9236  1.1  mrg 
   9237  1.1  mrg       if (slp)
   9238  1.1  mrg 	{
   9239  1.1  mrg 	  /* For SLP permutation support we need to load the whole group,
   9240  1.1  mrg 	     not only the number of vector stmts the permutation result
   9241  1.1  mrg 	     fits in.  */
   9242  1.1  mrg 	  if (slp_perm)
   9243  1.1  mrg 	    {
   9244  1.1  mrg 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
   9245  1.1  mrg 		 variable VF.  */
   9246  1.1  mrg 	      unsigned int const_vf = vf.to_constant ();
   9247  1.1  mrg 	      ncopies = CEIL (group_size * const_vf, const_nunits);
   9248  1.1  mrg 	      dr_chain.create (ncopies);
   9249  1.1  mrg 	    }
   9250  1.1  mrg 	  else
   9251  1.1  mrg 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   9252  1.1  mrg 	}
   9253  1.1  mrg       unsigned int group_el = 0;
   9254  1.1  mrg       unsigned HOST_WIDE_INT
   9255  1.1  mrg 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
   9256  1.1  mrg       unsigned int n_groups = 0;
   9257  1.1  mrg       for (j = 0; j < ncopies; j++)
   9258  1.1  mrg 	{
   9259  1.1  mrg 	  if (nloads > 1)
   9260  1.1  mrg 	    vec_alloc (v, nloads);
   9261  1.1  mrg 	  gimple *new_stmt = NULL;
   9262  1.1  mrg 	  for (i = 0; i < nloads; i++)
   9263  1.1  mrg 	    {
   9264  1.1  mrg 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
   9265  1.1  mrg 					     group_el * elsz + cst_offset);
   9266  1.1  mrg 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
   9267  1.1  mrg 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   9268  1.1  mrg 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
   9269  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   9270  1.1  mrg 	      if (nloads > 1)
   9271  1.1  mrg 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   9272  1.1  mrg 					gimple_assign_lhs (new_stmt));
   9273  1.1  mrg 
   9274  1.1  mrg 	      group_el += lnel;
   9275  1.1  mrg 	      if (! slp
   9276  1.1  mrg 		  || group_el == group_size)
   9277  1.1  mrg 		{
   9278  1.1  mrg 		  n_groups++;
   9279  1.1  mrg 		  /* When doing SLP make sure to not load elements from
   9280  1.1  mrg 		     the next vector iteration, those will not be accessed
   9281  1.1  mrg 		     so just use the last element again.  See PR107451.  */
   9282  1.1  mrg 		  if (!slp || known_lt (n_groups, vf))
   9283  1.1  mrg 		    {
   9284  1.1  mrg 		      tree newoff = copy_ssa_name (running_off);
   9285  1.1  mrg 		      gimple *incr
   9286  1.1  mrg 			= gimple_build_assign (newoff, POINTER_PLUS_EXPR,
   9287  1.1  mrg 					       running_off, stride_step);
   9288  1.1  mrg 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
   9289  1.1  mrg 		      running_off = newoff;
   9290  1.1  mrg 		    }
   9291  1.1  mrg 		  group_el = 0;
   9292  1.1  mrg 		}
   9293  1.1  mrg 	    }
   9294  1.1  mrg 	  if (nloads > 1)
   9295  1.1  mrg 	    {
   9296  1.1  mrg 	      tree vec_inv = build_constructor (lvectype, v);
   9297  1.1  mrg 	      new_temp = vect_init_vector (vinfo, stmt_info,
   9298  1.1  mrg 					   vec_inv, lvectype, gsi);
   9299  1.1  mrg 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
   9300  1.1  mrg 	      if (lvectype != vectype)
   9301  1.1  mrg 		{
   9302  1.1  mrg 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
   9303  1.1  mrg 						  VIEW_CONVERT_EXPR,
   9304  1.1  mrg 						  build1 (VIEW_CONVERT_EXPR,
   9305  1.1  mrg 							  vectype, new_temp));
   9306  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   9307  1.1  mrg 		}
   9308  1.1  mrg 	    }
   9309  1.1  mrg 
   9310  1.1  mrg 	  if (slp)
   9311  1.1  mrg 	    {
   9312  1.1  mrg 	      if (slp_perm)
   9313  1.1  mrg 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
   9314  1.1  mrg 	      else
   9315  1.1  mrg 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   9316  1.1  mrg 	    }
   9317  1.1  mrg 	  else
   9318  1.1  mrg 	    {
   9319  1.1  mrg 	      if (j == 0)
   9320  1.1  mrg 		*vec_stmt = new_stmt;
   9321  1.1  mrg 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   9322  1.1  mrg 	    }
   9323  1.1  mrg 	}
   9324  1.1  mrg       if (slp_perm)
   9325  1.1  mrg 	{
   9326  1.1  mrg 	  unsigned n_perms;
   9327  1.1  mrg 	  vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
   9328  1.1  mrg 					false, &n_perms);
   9329  1.1  mrg 	}
   9330  1.1  mrg       return true;
   9331  1.1  mrg     }
   9332  1.1  mrg 
   9333  1.1  mrg   if (memory_access_type == VMAT_GATHER_SCATTER
   9334  1.1  mrg       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
   9335  1.1  mrg     grouped_load = false;
   9336  1.1  mrg 
   9337  1.1  mrg   if (grouped_load)
   9338  1.1  mrg     {
   9339  1.1  mrg       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   9340  1.1  mrg       group_size = DR_GROUP_SIZE (first_stmt_info);
   9341  1.1  mrg       /* For SLP vectorization we directly vectorize a subchain
   9342  1.1  mrg          without permutation.  */
   9343  1.1  mrg       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   9344  1.1  mrg 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   9345  1.1  mrg       /* For BB vectorization always use the first stmt to base
   9346  1.1  mrg 	 the data ref pointer on.  */
   9347  1.1  mrg       if (bb_vinfo)
   9348  1.1  mrg 	first_stmt_info_for_drptr
   9349  1.1  mrg 	  = vect_find_first_scalar_stmt_in_slp (slp_node);
   9350  1.1  mrg 
   9351  1.1  mrg       /* Check if the chain of loads is already vectorized.  */
   9352  1.1  mrg       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
   9353  1.1  mrg 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
   9354  1.1  mrg 	     ???  But we can only do so if there is exactly one
   9355  1.1  mrg 	     as we have no way to get at the rest.  Leave the CSE
   9356  1.1  mrg 	     opportunity alone.
   9357  1.1  mrg 	     ???  With the group load eventually participating
   9358  1.1  mrg 	     in multiple different permutations (having multiple
   9359  1.1  mrg 	     slp nodes which refer to the same group) the CSE
   9360  1.1  mrg 	     is even wrong code.  See PR56270.  */
   9361  1.1  mrg 	  && !slp)
   9362  1.1  mrg 	{
   9363  1.1  mrg 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   9364  1.1  mrg 	  return true;
   9365  1.1  mrg 	}
   9366  1.1  mrg       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   9367  1.1  mrg       group_gap_adj = 0;
   9368  1.1  mrg 
   9369  1.1  mrg       /* VEC_NUM is the number of vect stmts to be created for this group.  */
   9370  1.1  mrg       if (slp)
   9371  1.1  mrg 	{
   9372  1.1  mrg 	  grouped_load = false;
   9373  1.1  mrg 	  /* If an SLP permutation is from N elements to N elements,
   9374  1.1  mrg 	     and if one vector holds a whole number of N, we can load
   9375  1.1  mrg 	     the inputs to the permutation in the same way as an
   9376  1.1  mrg 	     unpermuted sequence.  In other cases we need to load the
   9377  1.1  mrg 	     whole group, not only the number of vector stmts the
   9378  1.1  mrg 	     permutation result fits in.  */
   9379  1.1  mrg 	  unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
   9380  1.1  mrg 	  if (slp_perm
   9381  1.1  mrg 	      && (group_size != scalar_lanes
   9382  1.1  mrg 		  || !multiple_p (nunits, group_size)))
   9383  1.1  mrg 	    {
   9384  1.1  mrg 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
   9385  1.1  mrg 		 variable VF; see vect_transform_slp_perm_load.  */
   9386  1.1  mrg 	      unsigned int const_vf = vf.to_constant ();
   9387  1.1  mrg 	      unsigned int const_nunits = nunits.to_constant ();
   9388  1.1  mrg 	      vec_num = CEIL (group_size * const_vf, const_nunits);
   9389  1.1  mrg 	      group_gap_adj = vf * group_size - nunits * vec_num;
   9390  1.1  mrg 	    }
   9391  1.1  mrg 	  else
   9392  1.1  mrg 	    {
   9393  1.1  mrg 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   9394  1.1  mrg 	      group_gap_adj
   9395  1.1  mrg 		= group_size - scalar_lanes;
   9396  1.1  mrg 	    }
   9397  1.1  mrg     	}
   9398  1.1  mrg       else
   9399  1.1  mrg 	vec_num = group_size;
   9400  1.1  mrg 
   9401  1.1  mrg       ref_type = get_group_alias_ptr_type (first_stmt_info);
   9402  1.1  mrg     }
   9403  1.1  mrg   else
   9404  1.1  mrg     {
   9405  1.1  mrg       first_stmt_info = stmt_info;
   9406  1.1  mrg       first_dr_info = dr_info;
   9407  1.1  mrg       group_size = vec_num = 1;
   9408  1.1  mrg       group_gap_adj = 0;
   9409  1.1  mrg       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
   9410  1.1  mrg       if (slp)
   9411  1.1  mrg 	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   9412  1.1  mrg     }
   9413  1.1  mrg 
   9414  1.1  mrg   gcc_assert (alignment_support_scheme);
   9415  1.1  mrg   vec_loop_masks *loop_masks
   9416  1.1  mrg     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
   9417  1.1  mrg        ? &LOOP_VINFO_MASKS (loop_vinfo)
   9418  1.1  mrg        : NULL);
   9419  1.1  mrg   vec_loop_lens *loop_lens
   9420  1.1  mrg     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
   9421  1.1  mrg        ? &LOOP_VINFO_LENS (loop_vinfo)
   9422  1.1  mrg        : NULL);
   9423  1.1  mrg 
   9424  1.1  mrg   /* Shouldn't go with length-based approach if fully masked.  */
   9425  1.1  mrg   gcc_assert (!loop_lens || !loop_masks);
   9426  1.1  mrg 
   9427  1.1  mrg   /* Targets with store-lane instructions must not require explicit
   9428  1.1  mrg      realignment.  vect_supportable_dr_alignment always returns either
   9429  1.1  mrg      dr_aligned or dr_unaligned_supported for masked operations.  */
   9430  1.1  mrg   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
   9431  1.1  mrg 	       && !mask
   9432  1.1  mrg 	       && !loop_masks)
   9433  1.1  mrg 	      || alignment_support_scheme == dr_aligned
   9434  1.1  mrg 	      || alignment_support_scheme == dr_unaligned_supported);
   9435  1.1  mrg 
   9436  1.1  mrg   /* In case the vectorization factor (VF) is bigger than the number
   9437  1.1  mrg      of elements that we can fit in a vectype (nunits), we have to generate
   9438  1.1  mrg      more than one vector stmt - i.e - we need to "unroll" the
   9439  1.1  mrg      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
   9440  1.1  mrg      from one copy of the vector stmt to the next, in the field
   9441  1.1  mrg      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
   9442  1.1  mrg      stages to find the correct vector defs to be used when vectorizing
   9443  1.1  mrg      stmts that use the defs of the current stmt.  The example below
   9444  1.1  mrg      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
   9445  1.1  mrg      need to create 4 vectorized stmts):
   9446  1.1  mrg 
   9447  1.1  mrg      before vectorization:
   9448  1.1  mrg                                 RELATED_STMT    VEC_STMT
   9449  1.1  mrg         S1:     x = memref      -               -
   9450  1.1  mrg         S2:     z = x + 1       -               -
   9451  1.1  mrg 
   9452  1.1  mrg      step 1: vectorize stmt S1:
   9453  1.1  mrg         We first create the vector stmt VS1_0, and, as usual, record a
   9454  1.1  mrg         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
   9455  1.1  mrg         Next, we create the vector stmt VS1_1, and record a pointer to
   9456  1.1  mrg         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
   9457  1.1  mrg         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
   9458  1.1  mrg         stmts and pointers:
   9459  1.1  mrg                                 RELATED_STMT    VEC_STMT
   9460  1.1  mrg         VS1_0:  vx0 = memref0   VS1_1           -
   9461  1.1  mrg         VS1_1:  vx1 = memref1   VS1_2           -
   9462  1.1  mrg         VS1_2:  vx2 = memref2   VS1_3           -
   9463  1.1  mrg         VS1_3:  vx3 = memref3   -               -
   9464  1.1  mrg         S1:     x = load        -               VS1_0
   9465  1.1  mrg         S2:     z = x + 1       -               -
   9466  1.1  mrg   */
   9467  1.1  mrg 
   9468  1.1  mrg   /* In case of interleaving (non-unit grouped access):
   9469  1.1  mrg 
   9470  1.1  mrg      S1:  x2 = &base + 2
   9471  1.1  mrg      S2:  x0 = &base
   9472  1.1  mrg      S3:  x1 = &base + 1
   9473  1.1  mrg      S4:  x3 = &base + 3
   9474  1.1  mrg 
   9475  1.1  mrg      Vectorized loads are created in the order of memory accesses
   9476  1.1  mrg      starting from the access of the first stmt of the chain:
   9477  1.1  mrg 
   9478  1.1  mrg      VS1: vx0 = &base
   9479  1.1  mrg      VS2: vx1 = &base + vec_size*1
   9480  1.1  mrg      VS3: vx3 = &base + vec_size*2
   9481  1.1  mrg      VS4: vx4 = &base + vec_size*3
   9482  1.1  mrg 
   9483  1.1  mrg      Then permutation statements are generated:
   9484  1.1  mrg 
   9485  1.1  mrg      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
   9486  1.1  mrg      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
   9487  1.1  mrg        ...
   9488  1.1  mrg 
   9489  1.1  mrg      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
   9490  1.1  mrg      (the order of the data-refs in the output of vect_permute_load_chain
   9491  1.1  mrg      corresponds to the order of scalar stmts in the interleaving chain - see
   9492  1.1  mrg      the documentation of vect_permute_load_chain()).
   9493  1.1  mrg      The generation of permutation stmts and recording them in
   9494  1.1  mrg      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
   9495  1.1  mrg 
   9496  1.1  mrg      In case of both multiple types and interleaving, the vector loads and
   9497  1.1  mrg      permutation stmts above are created for every copy.  The result vector
   9498  1.1  mrg      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
   9499  1.1  mrg      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
   9500  1.1  mrg 
   9501  1.1  mrg   /* If the data reference is aligned (dr_aligned) or potentially unaligned
   9502  1.1  mrg      on a target that supports unaligned accesses (dr_unaligned_supported)
   9503  1.1  mrg      we generate the following code:
   9504  1.1  mrg          p = initial_addr;
   9505  1.1  mrg          indx = 0;
   9506  1.1  mrg          loop {
   9507  1.1  mrg 	   p = p + indx * vectype_size;
   9508  1.1  mrg            vec_dest = *(p);
   9509  1.1  mrg            indx = indx + 1;
   9510  1.1  mrg          }
   9511  1.1  mrg 
   9512  1.1  mrg      Otherwise, the data reference is potentially unaligned on a target that
   9513  1.1  mrg      does not support unaligned accesses (dr_explicit_realign_optimized) -
   9514  1.1  mrg      then generate the following code, in which the data in each iteration is
   9515  1.1  mrg      obtained by two vector loads, one from the previous iteration, and one
   9516  1.1  mrg      from the current iteration:
   9517  1.1  mrg          p1 = initial_addr;
   9518  1.1  mrg          msq_init = *(floor(p1))
   9519  1.1  mrg          p2 = initial_addr + VS - 1;
   9520  1.1  mrg          realignment_token = call target_builtin;
   9521  1.1  mrg          indx = 0;
   9522  1.1  mrg          loop {
   9523  1.1  mrg            p2 = p2 + indx * vectype_size
   9524  1.1  mrg            lsq = *(floor(p2))
   9525  1.1  mrg            vec_dest = realign_load (msq, lsq, realignment_token)
   9526  1.1  mrg            indx = indx + 1;
   9527  1.1  mrg            msq = lsq;
   9528  1.1  mrg          }   */
   9529  1.1  mrg 
   9530  1.1  mrg   /* If the misalignment remains the same throughout the execution of the
   9531  1.1  mrg      loop, we can create the init_addr and permutation mask at the loop
   9532  1.1  mrg      preheader.  Otherwise, it needs to be created inside the loop.
   9533  1.1  mrg      This can only occur when vectorizing memory accesses in the inner-loop
   9534  1.1  mrg      nested within an outer-loop that is being vectorized.  */
   9535  1.1  mrg 
   9536  1.1  mrg   if (nested_in_vect_loop
   9537  1.1  mrg       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
   9538  1.1  mrg 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
   9539  1.1  mrg     {
   9540  1.1  mrg       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
   9541  1.1  mrg       compute_in_loop = true;
   9542  1.1  mrg     }
   9543  1.1  mrg 
   9544  1.1  mrg   bool diff_first_stmt_info
   9545  1.1  mrg     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
   9546  1.1  mrg 
   9547  1.1  mrg   tree offset = NULL_TREE;
   9548  1.1  mrg   if ((alignment_support_scheme == dr_explicit_realign_optimized
   9549  1.1  mrg        || alignment_support_scheme == dr_explicit_realign)
   9550  1.1  mrg       && !compute_in_loop)
   9551  1.1  mrg     {
   9552  1.1  mrg       /* If we have different first_stmt_info, we can't set up realignment
   9553  1.1  mrg 	 here, since we can't guarantee first_stmt_info DR has been
   9554  1.1  mrg 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
   9555  1.1  mrg 	 distance from first_stmt_info DR instead as below.  */
   9556  1.1  mrg       if (!diff_first_stmt_info)
   9557  1.1  mrg 	msq = vect_setup_realignment (vinfo,
   9558  1.1  mrg 				      first_stmt_info, gsi, &realignment_token,
   9559  1.1  mrg 				      alignment_support_scheme, NULL_TREE,
   9560  1.1  mrg 				      &at_loop);
   9561  1.1  mrg       if (alignment_support_scheme == dr_explicit_realign_optimized)
   9562  1.1  mrg 	{
   9563  1.1  mrg 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
   9564  1.1  mrg 	  offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
   9565  1.1  mrg 			       size_one_node);
   9566  1.1  mrg 	  gcc_assert (!first_stmt_info_for_drptr);
   9567  1.1  mrg 	}
   9568  1.1  mrg     }
   9569  1.1  mrg   else
   9570  1.1  mrg     at_loop = loop;
   9571  1.1  mrg 
   9572  1.1  mrg   if (!known_eq (poffset, 0))
   9573  1.1  mrg     offset = (offset
   9574  1.1  mrg 	      ? size_binop (PLUS_EXPR, offset, size_int (poffset))
   9575  1.1  mrg 	      : size_int (poffset));
   9576  1.1  mrg 
   9577  1.1  mrg   tree bump;
   9578  1.1  mrg   tree vec_offset = NULL_TREE;
   9579  1.1  mrg   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   9580  1.1  mrg     {
   9581  1.1  mrg       aggr_type = NULL_TREE;
   9582  1.1  mrg       bump = NULL_TREE;
   9583  1.1  mrg     }
   9584  1.1  mrg   else if (memory_access_type == VMAT_GATHER_SCATTER)
   9585  1.1  mrg     {
   9586  1.1  mrg       aggr_type = elem_type;
   9587  1.1  mrg       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
   9588  1.1  mrg 				       &bump, &vec_offset);
   9589  1.1  mrg     }
   9590  1.1  mrg   else
   9591  1.1  mrg     {
   9592  1.1  mrg       if (memory_access_type == VMAT_LOAD_STORE_LANES)
   9593  1.1  mrg 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
   9594  1.1  mrg       else
   9595  1.1  mrg 	aggr_type = vectype;
   9596  1.1  mrg       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
   9597  1.1  mrg 					  memory_access_type);
   9598  1.1  mrg     }
   9599  1.1  mrg 
   9600  1.1  mrg   auto_vec<tree> vec_offsets;
   9601  1.1  mrg   auto_vec<tree> vec_masks;
   9602  1.1  mrg   if (mask)
   9603  1.1  mrg     {
   9604  1.1  mrg       if (slp_node)
   9605  1.1  mrg 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
   9606  1.1  mrg 			   &vec_masks);
   9607  1.1  mrg       else
   9608  1.1  mrg 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
   9609  1.1  mrg 				       &vec_masks, mask_vectype);
   9610  1.1  mrg     }
   9611  1.1  mrg   tree vec_mask = NULL_TREE;
   9612  1.1  mrg   poly_uint64 group_elt = 0;
   9613  1.1  mrg   for (j = 0; j < ncopies; j++)
   9614  1.1  mrg     {
   9615  1.1  mrg       /* 1. Create the vector or array pointer update chain.  */
   9616  1.1  mrg       if (j == 0)
   9617  1.1  mrg 	{
   9618  1.1  mrg 	  bool simd_lane_access_p
   9619  1.1  mrg 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
   9620  1.1  mrg 	  if (simd_lane_access_p
   9621  1.1  mrg 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
   9622  1.1  mrg 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
   9623  1.1  mrg 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
   9624  1.1  mrg 	      && integer_zerop (DR_INIT (first_dr_info->dr))
   9625  1.1  mrg 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
   9626  1.1  mrg 					get_alias_set (TREE_TYPE (ref_type)))
   9627  1.1  mrg 	      && (alignment_support_scheme == dr_aligned
   9628  1.1  mrg 		  || alignment_support_scheme == dr_unaligned_supported))
   9629  1.1  mrg 	    {
   9630  1.1  mrg 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
   9631  1.1  mrg 	      dataref_offset = build_int_cst (ref_type, 0);
   9632  1.1  mrg 	    }
   9633  1.1  mrg 	  else if (diff_first_stmt_info)
   9634  1.1  mrg 	    {
   9635  1.1  mrg 	      dataref_ptr
   9636  1.1  mrg 		= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
   9637  1.1  mrg 					    aggr_type, at_loop, offset, &dummy,
   9638  1.1  mrg 					    gsi, &ptr_incr, simd_lane_access_p,
   9639  1.1  mrg 					    bump);
   9640  1.1  mrg 	      /* Adjust the pointer by the difference to first_stmt.  */
   9641  1.1  mrg 	      data_reference_p ptrdr
   9642  1.1  mrg 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
   9643  1.1  mrg 	      tree diff
   9644  1.1  mrg 		= fold_convert (sizetype,
   9645  1.1  mrg 				size_binop (MINUS_EXPR,
   9646  1.1  mrg 					    DR_INIT (first_dr_info->dr),
   9647  1.1  mrg 					    DR_INIT (ptrdr)));
   9648  1.1  mrg 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   9649  1.1  mrg 					     stmt_info, diff);
   9650  1.1  mrg 	      if (alignment_support_scheme == dr_explicit_realign)
   9651  1.1  mrg 		{
   9652  1.1  mrg 		  msq = vect_setup_realignment (vinfo,
   9653  1.1  mrg 						first_stmt_info_for_drptr, gsi,
   9654  1.1  mrg 						&realignment_token,
   9655  1.1  mrg 						alignment_support_scheme,
   9656  1.1  mrg 						dataref_ptr, &at_loop);
   9657  1.1  mrg 		  gcc_assert (!compute_in_loop);
   9658  1.1  mrg 		}
   9659  1.1  mrg 	    }
   9660  1.1  mrg 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   9661  1.1  mrg 	    {
   9662  1.1  mrg 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
   9663  1.1  mrg 					   slp_node, &gs_info, &dataref_ptr,
   9664  1.1  mrg 					   &vec_offsets);
   9665  1.1  mrg 	    }
   9666  1.1  mrg 	  else
   9667  1.1  mrg 	    dataref_ptr
   9668  1.1  mrg 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
   9669  1.1  mrg 					  at_loop,
   9670  1.1  mrg 					  offset, &dummy, gsi, &ptr_incr,
   9671  1.1  mrg 					  simd_lane_access_p, bump);
   9672  1.1  mrg 	  if (mask)
   9673  1.1  mrg 	    vec_mask = vec_masks[0];
   9674  1.1  mrg 	}
   9675  1.1  mrg       else
   9676  1.1  mrg 	{
   9677  1.1  mrg 	  if (dataref_offset)
   9678  1.1  mrg 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
   9679  1.1  mrg 					      bump);
   9680  1.1  mrg 	  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   9681  1.1  mrg 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   9682  1.1  mrg 					   stmt_info, bump);
   9683  1.1  mrg 	  if (mask)
   9684  1.1  mrg 	    vec_mask = vec_masks[j];
   9685  1.1  mrg 	}
   9686  1.1  mrg 
   9687  1.1  mrg       if (grouped_load || slp_perm)
   9688  1.1  mrg 	dr_chain.create (vec_num);
   9689  1.1  mrg 
   9690  1.1  mrg       gimple *new_stmt = NULL;
   9691  1.1  mrg       if (memory_access_type == VMAT_LOAD_STORE_LANES)
   9692  1.1  mrg 	{
   9693  1.1  mrg 	  tree vec_array;
   9694  1.1  mrg 
   9695  1.1  mrg 	  vec_array = create_vector_array (vectype, vec_num);
   9696  1.1  mrg 
   9697  1.1  mrg 	  tree final_mask = NULL_TREE;
   9698  1.1  mrg 	  if (loop_masks)
   9699  1.1  mrg 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
   9700  1.1  mrg 					     vectype, j);
   9701  1.1  mrg 	  if (vec_mask)
   9702  1.1  mrg 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   9703  1.1  mrg 					   final_mask, vec_mask, gsi);
   9704  1.1  mrg 
   9705  1.1  mrg 	  gcall *call;
   9706  1.1  mrg 	  if (final_mask)
   9707  1.1  mrg 	    {
   9708  1.1  mrg 	      /* Emit:
   9709  1.1  mrg 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
   9710  1.1  mrg 		                                VEC_MASK).  */
   9711  1.1  mrg 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
   9712  1.1  mrg 	      tree alias_ptr = build_int_cst (ref_type, align);
   9713  1.1  mrg 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
   9714  1.1  mrg 						 dataref_ptr, alias_ptr,
   9715  1.1  mrg 						 final_mask);
   9716  1.1  mrg 	    }
   9717  1.1  mrg 	  else
   9718  1.1  mrg 	    {
   9719  1.1  mrg 	      /* Emit:
   9720  1.1  mrg 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
   9721  1.1  mrg 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
   9722  1.1  mrg 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
   9723  1.1  mrg 	    }
   9724  1.1  mrg 	  gimple_call_set_lhs (call, vec_array);
   9725  1.1  mrg 	  gimple_call_set_nothrow (call, true);
   9726  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   9727  1.1  mrg 	  new_stmt = call;
   9728  1.1  mrg 
   9729  1.1  mrg 	  /* Extract each vector into an SSA_NAME.  */
   9730  1.1  mrg 	  for (i = 0; i < vec_num; i++)
   9731  1.1  mrg 	    {
   9732  1.1  mrg 	      new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
   9733  1.1  mrg 					    vec_array, i);
   9734  1.1  mrg 	      dr_chain.quick_push (new_temp);
   9735  1.1  mrg 	    }
   9736  1.1  mrg 
   9737  1.1  mrg 	  /* Record the mapping between SSA_NAMEs and statements.  */
   9738  1.1  mrg 	  vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
   9739  1.1  mrg 
   9740  1.1  mrg 	  /* Record that VEC_ARRAY is now dead.  */
   9741  1.1  mrg 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
   9742  1.1  mrg 	}
   9743  1.1  mrg       else
   9744  1.1  mrg 	{
   9745  1.1  mrg 	  for (i = 0; i < vec_num; i++)
   9746  1.1  mrg 	    {
   9747  1.1  mrg 	      tree final_mask = NULL_TREE;
   9748  1.1  mrg 	      if (loop_masks
   9749  1.1  mrg 		  && memory_access_type != VMAT_INVARIANT)
   9750  1.1  mrg 		final_mask = vect_get_loop_mask (gsi, loop_masks,
   9751  1.1  mrg 						 vec_num * ncopies,
   9752  1.1  mrg 						 vectype, vec_num * j + i);
   9753  1.1  mrg 	      if (vec_mask)
   9754  1.1  mrg 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   9755  1.1  mrg 					       final_mask, vec_mask, gsi);
   9756  1.1  mrg 
   9757  1.1  mrg 	      if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   9758  1.1  mrg 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
   9759  1.1  mrg 					       gsi, stmt_info, bump);
   9760  1.1  mrg 
   9761  1.1  mrg 	      /* 2. Create the vector-load in the loop.  */
   9762  1.1  mrg 	      switch (alignment_support_scheme)
   9763  1.1  mrg 		{
   9764  1.1  mrg 		case dr_aligned:
   9765  1.1  mrg 		case dr_unaligned_supported:
   9766  1.1  mrg 		  {
   9767  1.1  mrg 		    unsigned int misalign;
   9768  1.1  mrg 		    unsigned HOST_WIDE_INT align;
   9769  1.1  mrg 
   9770  1.1  mrg 		    if (memory_access_type == VMAT_GATHER_SCATTER
   9771  1.1  mrg 			&& gs_info.ifn != IFN_LAST)
   9772  1.1  mrg 		      {
   9773  1.1  mrg 			if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   9774  1.1  mrg 			  vec_offset = vec_offsets[vec_num * j + i];
   9775  1.1  mrg 			tree zero = build_zero_cst (vectype);
   9776  1.1  mrg 			tree scale = size_int (gs_info.scale);
   9777  1.1  mrg 			gcall *call;
   9778  1.1  mrg 			if (final_mask)
   9779  1.1  mrg 			  call = gimple_build_call_internal
   9780  1.1  mrg 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
   9781  1.1  mrg 			     vec_offset, scale, zero, final_mask);
   9782  1.1  mrg 			else
   9783  1.1  mrg 			  call = gimple_build_call_internal
   9784  1.1  mrg 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
   9785  1.1  mrg 			     vec_offset, scale, zero);
   9786  1.1  mrg 			gimple_call_set_nothrow (call, true);
   9787  1.1  mrg 			new_stmt = call;
   9788  1.1  mrg 			data_ref = NULL_TREE;
   9789  1.1  mrg 			break;
   9790  1.1  mrg 		      }
   9791  1.1  mrg 		    else if (memory_access_type == VMAT_GATHER_SCATTER)
   9792  1.1  mrg 		      {
   9793  1.1  mrg 			/* Emulated gather-scatter.  */
   9794  1.1  mrg 			gcc_assert (!final_mask);
   9795  1.1  mrg 			unsigned HOST_WIDE_INT const_nunits
   9796  1.1  mrg 			  = nunits.to_constant ();
   9797  1.1  mrg 			unsigned HOST_WIDE_INT const_offset_nunits
   9798  1.1  mrg 			  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
   9799  1.1  mrg 			      .to_constant ();
   9800  1.1  mrg 			vec<constructor_elt, va_gc> *ctor_elts;
   9801  1.1  mrg 			vec_alloc (ctor_elts, const_nunits);
   9802  1.1  mrg 			gimple_seq stmts = NULL;
   9803  1.1  mrg 			/* We support offset vectors with more elements
   9804  1.1  mrg 			   than the data vector for now.  */
   9805  1.1  mrg 			unsigned HOST_WIDE_INT factor
   9806  1.1  mrg 			  = const_offset_nunits / const_nunits;
   9807  1.1  mrg 			vec_offset = vec_offsets[j / factor];
   9808  1.1  mrg 			unsigned elt_offset = (j % factor) * const_nunits;
   9809  1.1  mrg 			tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
   9810  1.1  mrg 			tree scale = size_int (gs_info.scale);
   9811  1.1  mrg 			align
   9812  1.1  mrg 			  = get_object_alignment (DR_REF (first_dr_info->dr));
   9813  1.1  mrg 			tree ltype = build_aligned_type (TREE_TYPE (vectype),
   9814  1.1  mrg 							 align);
   9815  1.1  mrg 			for (unsigned k = 0; k < const_nunits; ++k)
   9816  1.1  mrg 			  {
   9817  1.1  mrg 			    tree boff = size_binop (MULT_EXPR,
   9818  1.1  mrg 						    TYPE_SIZE (idx_type),
   9819  1.1  mrg 						    bitsize_int
   9820  1.1  mrg 						      (k + elt_offset));
   9821  1.1  mrg 			    tree idx = gimple_build (&stmts, BIT_FIELD_REF,
   9822  1.1  mrg 						     idx_type, vec_offset,
   9823  1.1  mrg 						     TYPE_SIZE (idx_type),
   9824  1.1  mrg 						     boff);
   9825  1.1  mrg 			    idx = gimple_convert (&stmts, sizetype, idx);
   9826  1.1  mrg 			    idx = gimple_build (&stmts, MULT_EXPR,
   9827  1.1  mrg 						sizetype, idx, scale);
   9828  1.1  mrg 			    tree ptr = gimple_build (&stmts, PLUS_EXPR,
   9829  1.1  mrg 						     TREE_TYPE (dataref_ptr),
   9830  1.1  mrg 						     dataref_ptr, idx);
   9831  1.1  mrg 			    ptr = gimple_convert (&stmts, ptr_type_node, ptr);
   9832  1.1  mrg 			    tree elt = make_ssa_name (TREE_TYPE (vectype));
   9833  1.1  mrg 			    tree ref = build2 (MEM_REF, ltype, ptr,
   9834  1.1  mrg 					       build_int_cst (ref_type, 0));
   9835  1.1  mrg 			    new_stmt = gimple_build_assign (elt, ref);
   9836  1.1  mrg 			    gimple_seq_add_stmt (&stmts, new_stmt);
   9837  1.1  mrg 			    CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
   9838  1.1  mrg 			  }
   9839  1.1  mrg 			gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   9840  1.1  mrg 			new_stmt = gimple_build_assign (NULL_TREE,
   9841  1.1  mrg 							build_constructor
   9842  1.1  mrg 							  (vectype, ctor_elts));
   9843  1.1  mrg 			data_ref = NULL_TREE;
   9844  1.1  mrg 			break;
   9845  1.1  mrg 		      }
   9846  1.1  mrg 
   9847  1.1  mrg 		    align =
   9848  1.1  mrg 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
   9849  1.1  mrg 		    if (alignment_support_scheme == dr_aligned)
   9850  1.1  mrg 		      misalign = 0;
   9851  1.1  mrg 		    else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
   9852  1.1  mrg 		      {
   9853  1.1  mrg 			align = dr_alignment
   9854  1.1  mrg 			  (vect_dr_behavior (vinfo, first_dr_info));
   9855  1.1  mrg 			misalign = 0;
   9856  1.1  mrg 		      }
   9857  1.1  mrg 		    else
   9858  1.1  mrg 		      misalign = misalignment;
   9859  1.1  mrg 		    if (dataref_offset == NULL_TREE
   9860  1.1  mrg 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
   9861  1.1  mrg 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
   9862  1.1  mrg 					      align, misalign);
   9863  1.1  mrg 		    align = least_bit_hwi (misalign | align);
   9864  1.1  mrg 
   9865  1.1  mrg 		    if (final_mask)
   9866  1.1  mrg 		      {
   9867  1.1  mrg 			tree ptr = build_int_cst (ref_type,
   9868  1.1  mrg 						  align * BITS_PER_UNIT);
   9869  1.1  mrg 			gcall *call
   9870  1.1  mrg 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
   9871  1.1  mrg 							dataref_ptr, ptr,
   9872  1.1  mrg 							final_mask);
   9873  1.1  mrg 			gimple_call_set_nothrow (call, true);
   9874  1.1  mrg 			new_stmt = call;
   9875  1.1  mrg 			data_ref = NULL_TREE;
   9876  1.1  mrg 		      }
   9877  1.1  mrg 		    else if (loop_lens && memory_access_type != VMAT_INVARIANT)
   9878  1.1  mrg 		      {
   9879  1.1  mrg 			tree final_len
   9880  1.1  mrg 			  = vect_get_loop_len (loop_vinfo, loop_lens,
   9881  1.1  mrg 					       vec_num * ncopies,
   9882  1.1  mrg 					       vec_num * j + i);
   9883  1.1  mrg 			tree ptr = build_int_cst (ref_type,
   9884  1.1  mrg 						  align * BITS_PER_UNIT);
   9885  1.1  mrg 
   9886  1.1  mrg 			machine_mode vmode = TYPE_MODE (vectype);
   9887  1.1  mrg 			opt_machine_mode new_ovmode
   9888  1.1  mrg 			  = get_len_load_store_mode (vmode, true);
   9889  1.1  mrg 			machine_mode new_vmode = new_ovmode.require ();
   9890  1.1  mrg 			tree qi_type = unsigned_intQI_type_node;
   9891  1.1  mrg 
   9892  1.1  mrg 			signed char biasval =
   9893  1.1  mrg 			  LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   9894  1.1  mrg 
   9895  1.1  mrg 			tree bias = build_int_cst (intQI_type_node, biasval);
   9896  1.1  mrg 
   9897  1.1  mrg 			gcall *call
   9898  1.1  mrg 			  = gimple_build_call_internal (IFN_LEN_LOAD, 4,
   9899  1.1  mrg 							dataref_ptr, ptr,
   9900  1.1  mrg 							final_len, bias);
   9901  1.1  mrg 			gimple_call_set_nothrow (call, true);
   9902  1.1  mrg 			new_stmt = call;
   9903  1.1  mrg 			data_ref = NULL_TREE;
   9904  1.1  mrg 
   9905  1.1  mrg 			/* Need conversion if it's wrapped with VnQI.  */
   9906  1.1  mrg 			if (vmode != new_vmode)
   9907  1.1  mrg 			  {
   9908  1.1  mrg 			    tree new_vtype
   9909  1.1  mrg 			      = build_vector_type_for_mode (qi_type, new_vmode);
   9910  1.1  mrg 			    tree var = vect_get_new_ssa_name (new_vtype,
   9911  1.1  mrg 							      vect_simple_var);
   9912  1.1  mrg 			    gimple_set_lhs (call, var);
   9913  1.1  mrg 			    vect_finish_stmt_generation (vinfo, stmt_info, call,
   9914  1.1  mrg 							 gsi);
   9915  1.1  mrg 			    tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
   9916  1.1  mrg 			    new_stmt
   9917  1.1  mrg 			      = gimple_build_assign (vec_dest,
   9918  1.1  mrg 						     VIEW_CONVERT_EXPR, op);
   9919  1.1  mrg 			  }
   9920  1.1  mrg 		      }
   9921  1.1  mrg 		    else
   9922  1.1  mrg 		      {
   9923  1.1  mrg 			tree ltype = vectype;
   9924  1.1  mrg 			tree new_vtype = NULL_TREE;
   9925  1.1  mrg 			unsigned HOST_WIDE_INT gap
   9926  1.1  mrg 			  = DR_GROUP_GAP (first_stmt_info);
   9927  1.1  mrg 			unsigned int vect_align
   9928  1.1  mrg 			  = vect_known_alignment_in_bytes (first_dr_info,
   9929  1.1  mrg 							   vectype);
   9930  1.1  mrg 			unsigned int scalar_dr_size
   9931  1.1  mrg 			  = vect_get_scalar_dr_size (first_dr_info);
   9932  1.1  mrg 			/* If there's no peeling for gaps but we have a gap
   9933  1.1  mrg 			   with slp loads then load the lower half of the
   9934  1.1  mrg 			   vector only.  See get_group_load_store_type for
   9935  1.1  mrg 			   when we apply this optimization.  */
   9936  1.1  mrg 			if (slp
   9937  1.1  mrg 			    && loop_vinfo
   9938  1.1  mrg 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
   9939  1.1  mrg 			    && gap != 0
   9940  1.1  mrg 			    && known_eq (nunits, (group_size - gap) * 2)
   9941  1.1  mrg 			    && known_eq (nunits, group_size)
   9942  1.1  mrg 			    && gap >= (vect_align / scalar_dr_size))
   9943  1.1  mrg 			  {
   9944  1.1  mrg 			    tree half_vtype;
   9945  1.1  mrg 			    new_vtype
   9946  1.1  mrg 			      = vector_vector_composition_type (vectype, 2,
   9947  1.1  mrg 								&half_vtype);
   9948  1.1  mrg 			    if (new_vtype != NULL_TREE)
   9949  1.1  mrg 			      ltype = half_vtype;
   9950  1.1  mrg 			  }
   9951  1.1  mrg 			tree offset
   9952  1.1  mrg 			  = (dataref_offset ? dataref_offset
   9953  1.1  mrg 					    : build_int_cst (ref_type, 0));
   9954  1.1  mrg 			if (ltype != vectype
   9955  1.1  mrg 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   9956  1.1  mrg 			  {
   9957  1.1  mrg 			    unsigned HOST_WIDE_INT gap_offset
   9958  1.1  mrg 			      = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
   9959  1.1  mrg 			    tree gapcst = build_int_cst (ref_type, gap_offset);
   9960  1.1  mrg 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
   9961  1.1  mrg 			  }
   9962  1.1  mrg 			data_ref
   9963  1.1  mrg 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
   9964  1.1  mrg 			if (alignment_support_scheme == dr_aligned)
   9965  1.1  mrg 			  ;
   9966  1.1  mrg 			else
   9967  1.1  mrg 			  TREE_TYPE (data_ref)
   9968  1.1  mrg 			    = build_aligned_type (TREE_TYPE (data_ref),
   9969  1.1  mrg 						  align * BITS_PER_UNIT);
   9970  1.1  mrg 			if (ltype != vectype)
   9971  1.1  mrg 			  {
   9972  1.1  mrg 			    vect_copy_ref_info (data_ref,
   9973  1.1  mrg 						DR_REF (first_dr_info->dr));
   9974  1.1  mrg 			    tree tem = make_ssa_name (ltype);
   9975  1.1  mrg 			    new_stmt = gimple_build_assign (tem, data_ref);
   9976  1.1  mrg 			    vect_finish_stmt_generation (vinfo, stmt_info,
   9977  1.1  mrg 							 new_stmt, gsi);
   9978  1.1  mrg 			    data_ref = NULL;
   9979  1.1  mrg 			    vec<constructor_elt, va_gc> *v;
   9980  1.1  mrg 			    vec_alloc (v, 2);
   9981  1.1  mrg 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   9982  1.1  mrg 			      {
   9983  1.1  mrg 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   9984  1.1  mrg 							build_zero_cst (ltype));
   9985  1.1  mrg 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
   9986  1.1  mrg 			      }
   9987  1.1  mrg 			    else
   9988  1.1  mrg 			      {
   9989  1.1  mrg 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
   9990  1.1  mrg 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   9991  1.1  mrg 							build_zero_cst (ltype));
   9992  1.1  mrg 			      }
   9993  1.1  mrg 			    gcc_assert (new_vtype != NULL_TREE);
   9994  1.1  mrg 			    if (new_vtype == vectype)
   9995  1.1  mrg 			      new_stmt = gimple_build_assign (
   9996  1.1  mrg 				vec_dest, build_constructor (vectype, v));
   9997  1.1  mrg 			    else
   9998  1.1  mrg 			      {
   9999  1.1  mrg 				tree new_vname = make_ssa_name (new_vtype);
   10000  1.1  mrg 				new_stmt = gimple_build_assign (
   10001  1.1  mrg 				  new_vname, build_constructor (new_vtype, v));
   10002  1.1  mrg 				vect_finish_stmt_generation (vinfo, stmt_info,
   10003  1.1  mrg 							     new_stmt, gsi);
   10004  1.1  mrg 				new_stmt = gimple_build_assign (
   10005  1.1  mrg 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
   10006  1.1  mrg 						    new_vname));
   10007  1.1  mrg 			      }
   10008  1.1  mrg 			  }
   10009  1.1  mrg 		      }
   10010  1.1  mrg 		    break;
   10011  1.1  mrg 		  }
   10012  1.1  mrg 		case dr_explicit_realign:
   10013  1.1  mrg 		  {
   10014  1.1  mrg 		    tree ptr, bump;
   10015  1.1  mrg 
   10016  1.1  mrg 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
   10017  1.1  mrg 
   10018  1.1  mrg 		    if (compute_in_loop)
   10019  1.1  mrg 		      msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
   10020  1.1  mrg 						    &realignment_token,
   10021  1.1  mrg 						    dr_explicit_realign,
   10022  1.1  mrg 						    dataref_ptr, NULL);
   10023  1.1  mrg 
   10024  1.1  mrg 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
   10025  1.1  mrg 		      ptr = copy_ssa_name (dataref_ptr);
   10026  1.1  mrg 		    else
   10027  1.1  mrg 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
   10028  1.1  mrg 		    // For explicit realign the target alignment should be
   10029  1.1  mrg 		    // known at compile time.
   10030  1.1  mrg 		    unsigned HOST_WIDE_INT align =
   10031  1.1  mrg 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
   10032  1.1  mrg 		    new_stmt = gimple_build_assign
   10033  1.1  mrg 				 (ptr, BIT_AND_EXPR, dataref_ptr,
   10034  1.1  mrg 				  build_int_cst
   10035  1.1  mrg 				  (TREE_TYPE (dataref_ptr),
   10036  1.1  mrg 				   -(HOST_WIDE_INT) align));
   10037  1.1  mrg 		    vect_finish_stmt_generation (vinfo, stmt_info,
   10038  1.1  mrg 						 new_stmt, gsi);
   10039  1.1  mrg 		    data_ref
   10040  1.1  mrg 		      = build2 (MEM_REF, vectype, ptr,
   10041  1.1  mrg 				build_int_cst (ref_type, 0));
   10042  1.1  mrg 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   10043  1.1  mrg 		    vec_dest = vect_create_destination_var (scalar_dest,
   10044  1.1  mrg 							    vectype);
   10045  1.1  mrg 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
   10046  1.1  mrg 		    new_temp = make_ssa_name (vec_dest, new_stmt);
   10047  1.1  mrg 		    gimple_assign_set_lhs (new_stmt, new_temp);
   10048  1.1  mrg 		    gimple_move_vops (new_stmt, stmt_info->stmt);
   10049  1.1  mrg 		    vect_finish_stmt_generation (vinfo, stmt_info,
   10050  1.1  mrg 						 new_stmt, gsi);
   10051  1.1  mrg 		    msq = new_temp;
   10052  1.1  mrg 
   10053  1.1  mrg 		    bump = size_binop (MULT_EXPR, vs,
   10054  1.1  mrg 				       TYPE_SIZE_UNIT (elem_type));
   10055  1.1  mrg 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
   10056  1.1  mrg 		    ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
   10057  1.1  mrg 					   stmt_info, bump);
   10058  1.1  mrg 		    new_stmt = gimple_build_assign
   10059  1.1  mrg 				 (NULL_TREE, BIT_AND_EXPR, ptr,
   10060  1.1  mrg 				  build_int_cst
   10061  1.1  mrg 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
   10062  1.1  mrg 		    ptr = copy_ssa_name (ptr, new_stmt);
   10063  1.1  mrg 		    gimple_assign_set_lhs (new_stmt, ptr);
   10064  1.1  mrg 		    vect_finish_stmt_generation (vinfo, stmt_info,
   10065  1.1  mrg 						 new_stmt, gsi);
   10066  1.1  mrg 		    data_ref
   10067  1.1  mrg 		      = build2 (MEM_REF, vectype, ptr,
   10068  1.1  mrg 				build_int_cst (ref_type, 0));
   10069  1.1  mrg 		    break;
   10070  1.1  mrg 		  }
   10071  1.1  mrg 		case dr_explicit_realign_optimized:
   10072  1.1  mrg 		  {
   10073  1.1  mrg 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
   10074  1.1  mrg 		      new_temp = copy_ssa_name (dataref_ptr);
   10075  1.1  mrg 		    else
   10076  1.1  mrg 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
   10077  1.1  mrg 		    // We should only be doing this if we know the target
   10078  1.1  mrg 		    // alignment at compile time.
   10079  1.1  mrg 		    unsigned HOST_WIDE_INT align =
   10080  1.1  mrg 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
   10081  1.1  mrg 		    new_stmt = gimple_build_assign
   10082  1.1  mrg 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
   10083  1.1  mrg 		       build_int_cst (TREE_TYPE (dataref_ptr),
   10084  1.1  mrg 				     -(HOST_WIDE_INT) align));
   10085  1.1  mrg 		    vect_finish_stmt_generation (vinfo, stmt_info,
   10086  1.1  mrg 						 new_stmt, gsi);
   10087  1.1  mrg 		    data_ref
   10088  1.1  mrg 		      = build2 (MEM_REF, vectype, new_temp,
   10089  1.1  mrg 				build_int_cst (ref_type, 0));
   10090  1.1  mrg 		    break;
   10091  1.1  mrg 		  }
   10092  1.1  mrg 		default:
   10093  1.1  mrg 		  gcc_unreachable ();
   10094  1.1  mrg 		}
   10095  1.1  mrg 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
   10096  1.1  mrg 	      /* DATA_REF is null if we've already built the statement.  */
   10097  1.1  mrg 	      if (data_ref)
   10098  1.1  mrg 		{
   10099  1.1  mrg 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   10100  1.1  mrg 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
   10101  1.1  mrg 		}
   10102  1.1  mrg 	      new_temp = make_ssa_name (vec_dest, new_stmt);
   10103  1.1  mrg 	      gimple_set_lhs (new_stmt, new_temp);
   10104  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10105  1.1  mrg 
   10106  1.1  mrg 	      /* 3. Handle explicit realignment if necessary/supported.
   10107  1.1  mrg 		 Create in loop:
   10108  1.1  mrg 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
   10109  1.1  mrg 	      if (alignment_support_scheme == dr_explicit_realign_optimized
   10110  1.1  mrg 		  || alignment_support_scheme == dr_explicit_realign)
   10111  1.1  mrg 		{
   10112  1.1  mrg 		  lsq = gimple_assign_lhs (new_stmt);
   10113  1.1  mrg 		  if (!realignment_token)
   10114  1.1  mrg 		    realignment_token = dataref_ptr;
   10115  1.1  mrg 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
   10116  1.1  mrg 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
   10117  1.1  mrg 						  msq, lsq, realignment_token);
   10118  1.1  mrg 		  new_temp = make_ssa_name (vec_dest, new_stmt);
   10119  1.1  mrg 		  gimple_assign_set_lhs (new_stmt, new_temp);
   10120  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10121  1.1  mrg 
   10122  1.1  mrg 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
   10123  1.1  mrg 		    {
   10124  1.1  mrg 		      gcc_assert (phi);
   10125  1.1  mrg 		      if (i == vec_num - 1 && j == ncopies - 1)
   10126  1.1  mrg 			add_phi_arg (phi, lsq,
   10127  1.1  mrg 				     loop_latch_edge (containing_loop),
   10128  1.1  mrg 				     UNKNOWN_LOCATION);
   10129  1.1  mrg 		      msq = lsq;
   10130  1.1  mrg 		    }
   10131  1.1  mrg 		}
   10132  1.1  mrg 
   10133  1.1  mrg 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   10134  1.1  mrg 		{
   10135  1.1  mrg 		  tree perm_mask = perm_mask_for_reverse (vectype);
   10136  1.1  mrg 		  new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
   10137  1.1  mrg 						   perm_mask, stmt_info, gsi);
   10138  1.1  mrg 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
   10139  1.1  mrg 		}
   10140  1.1  mrg 
   10141  1.1  mrg 	      /* Collect vector loads and later create their permutation in
   10142  1.1  mrg 		 vect_transform_grouped_load ().  */
   10143  1.1  mrg 	      if (grouped_load || slp_perm)
   10144  1.1  mrg 		dr_chain.quick_push (new_temp);
   10145  1.1  mrg 
   10146  1.1  mrg 	      /* Store vector loads in the corresponding SLP_NODE.  */
   10147  1.1  mrg 	      if (slp && !slp_perm)
   10148  1.1  mrg 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   10149  1.1  mrg 
   10150  1.1  mrg 	      /* With SLP permutation we load the gaps as well, without
   10151  1.1  mrg 	         we need to skip the gaps after we manage to fully load
   10152  1.1  mrg 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
   10153  1.1  mrg 	      group_elt += nunits;
   10154  1.1  mrg 	      if (maybe_ne (group_gap_adj, 0U)
   10155  1.1  mrg 		  && !slp_perm
   10156  1.1  mrg 		  && known_eq (group_elt, group_size - group_gap_adj))
   10157  1.1  mrg 		{
   10158  1.1  mrg 		  poly_wide_int bump_val
   10159  1.1  mrg 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
   10160  1.1  mrg 		       * group_gap_adj);
   10161  1.1  mrg 		  if (tree_int_cst_sgn
   10162  1.1  mrg 			(vect_dr_behavior (vinfo, dr_info)->step) == -1)
   10163  1.1  mrg 		    bump_val = -bump_val;
   10164  1.1  mrg 		  tree bump = wide_int_to_tree (sizetype, bump_val);
   10165  1.1  mrg 		  dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
   10166  1.1  mrg 						 gsi, stmt_info, bump);
   10167  1.1  mrg 		  group_elt = 0;
   10168  1.1  mrg 		}
   10169  1.1  mrg 	    }
   10170  1.1  mrg 	  /* Bump the vector pointer to account for a gap or for excess
   10171  1.1  mrg 	     elements loaded for a permuted SLP load.  */
   10172  1.1  mrg 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
   10173  1.1  mrg 	    {
   10174  1.1  mrg 	      poly_wide_int bump_val
   10175  1.1  mrg 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
   10176  1.1  mrg 		   * group_gap_adj);
   10177  1.1  mrg 	      if (tree_int_cst_sgn
   10178  1.1  mrg 		    (vect_dr_behavior (vinfo, dr_info)->step) == -1)
   10179  1.1  mrg 		bump_val = -bump_val;
   10180  1.1  mrg 	      tree bump = wide_int_to_tree (sizetype, bump_val);
   10181  1.1  mrg 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   10182  1.1  mrg 					     stmt_info, bump);
   10183  1.1  mrg 	    }
   10184  1.1  mrg 	}
   10185  1.1  mrg 
   10186  1.1  mrg       if (slp && !slp_perm)
   10187  1.1  mrg 	continue;
   10188  1.1  mrg 
   10189  1.1  mrg       if (slp_perm)
   10190  1.1  mrg         {
   10191  1.1  mrg 	  unsigned n_perms;
   10192  1.1  mrg 	  /* For SLP we know we've seen all possible uses of dr_chain so
   10193  1.1  mrg 	     direct vect_transform_slp_perm_load to DCE the unused parts.
   10194  1.1  mrg 	     ???  This is a hack to prevent compile-time issues as seen
   10195  1.1  mrg 	     in PR101120 and friends.  */
   10196  1.1  mrg 	  bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
   10197  1.1  mrg 						  gsi, vf, false, &n_perms,
   10198  1.1  mrg 						  nullptr, true);
   10199  1.1  mrg 	  gcc_assert (ok);
   10200  1.1  mrg         }
   10201  1.1  mrg       else
   10202  1.1  mrg         {
   10203  1.1  mrg           if (grouped_load)
   10204  1.1  mrg   	    {
   10205  1.1  mrg 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
   10206  1.1  mrg 		vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
   10207  1.1  mrg 					     group_size, gsi);
   10208  1.1  mrg 	      *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   10209  1.1  mrg 	    }
   10210  1.1  mrg           else
   10211  1.1  mrg 	    {
   10212  1.1  mrg 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   10213  1.1  mrg 	    }
   10214  1.1  mrg         }
   10215  1.1  mrg       dr_chain.release ();
   10216  1.1  mrg     }
   10217  1.1  mrg   if (!slp)
   10218  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   10219  1.1  mrg 
   10220  1.1  mrg   return true;
   10221  1.1  mrg }
   10222  1.1  mrg 
   10223  1.1  mrg /* Function vect_is_simple_cond.
   10224  1.1  mrg 
   10225  1.1  mrg    Input:
   10226  1.1  mrg    LOOP - the loop that is being vectorized.
   10227  1.1  mrg    COND - Condition that is checked for simple use.
   10228  1.1  mrg 
   10229  1.1  mrg    Output:
   10230  1.1  mrg    *COMP_VECTYPE - the vector type for the comparison.
   10231  1.1  mrg    *DTS - The def types for the arguments of the comparison
   10232  1.1  mrg 
   10233  1.1  mrg    Returns whether a COND can be vectorized.  Checks whether
   10234  1.1  mrg    condition operands are supportable using vec_is_simple_use.  */
   10235  1.1  mrg 
   10236  1.1  mrg static bool
   10237  1.1  mrg vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
   10238  1.1  mrg 		     slp_tree slp_node, tree *comp_vectype,
   10239  1.1  mrg 		     enum vect_def_type *dts, tree vectype)
   10240  1.1  mrg {
   10241  1.1  mrg   tree lhs, rhs;
   10242  1.1  mrg   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   10243  1.1  mrg   slp_tree slp_op;
   10244  1.1  mrg 
   10245  1.1  mrg   /* Mask case.  */
   10246  1.1  mrg   if (TREE_CODE (cond) == SSA_NAME
   10247  1.1  mrg       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
   10248  1.1  mrg     {
   10249  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
   10250  1.1  mrg 			       &slp_op, &dts[0], comp_vectype)
   10251  1.1  mrg 	  || !*comp_vectype
   10252  1.1  mrg 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
   10253  1.1  mrg 	return false;
   10254  1.1  mrg       return true;
   10255  1.1  mrg     }
   10256  1.1  mrg 
   10257  1.1  mrg   if (!COMPARISON_CLASS_P (cond))
   10258  1.1  mrg     return false;
   10259  1.1  mrg 
   10260  1.1  mrg   lhs = TREE_OPERAND (cond, 0);
   10261  1.1  mrg   rhs = TREE_OPERAND (cond, 1);
   10262  1.1  mrg 
   10263  1.1  mrg   if (TREE_CODE (lhs) == SSA_NAME)
   10264  1.1  mrg     {
   10265  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
   10266  1.1  mrg 			       &lhs, &slp_op, &dts[0], &vectype1))
   10267  1.1  mrg 	return false;
   10268  1.1  mrg     }
   10269  1.1  mrg   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
   10270  1.1  mrg 	   || TREE_CODE (lhs) == FIXED_CST)
   10271  1.1  mrg     dts[0] = vect_constant_def;
   10272  1.1  mrg   else
   10273  1.1  mrg     return false;
   10274  1.1  mrg 
   10275  1.1  mrg   if (TREE_CODE (rhs) == SSA_NAME)
   10276  1.1  mrg     {
   10277  1.1  mrg       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
   10278  1.1  mrg 			       &rhs, &slp_op, &dts[1], &vectype2))
   10279  1.1  mrg 	return false;
   10280  1.1  mrg     }
   10281  1.1  mrg   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
   10282  1.1  mrg 	   || TREE_CODE (rhs) == FIXED_CST)
   10283  1.1  mrg     dts[1] = vect_constant_def;
   10284  1.1  mrg   else
   10285  1.1  mrg     return false;
   10286  1.1  mrg 
   10287  1.1  mrg   if (vectype1 && vectype2
   10288  1.1  mrg       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   10289  1.1  mrg 		   TYPE_VECTOR_SUBPARTS (vectype2)))
   10290  1.1  mrg     return false;
   10291  1.1  mrg 
   10292  1.1  mrg   *comp_vectype = vectype1 ? vectype1 : vectype2;
   10293  1.1  mrg   /* Invariant comparison.  */
   10294  1.1  mrg   if (! *comp_vectype)
   10295  1.1  mrg     {
   10296  1.1  mrg       tree scalar_type = TREE_TYPE (lhs);
   10297  1.1  mrg       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
   10298  1.1  mrg 	*comp_vectype = truth_type_for (vectype);
   10299  1.1  mrg       else
   10300  1.1  mrg 	{
   10301  1.1  mrg 	  /* If we can widen the comparison to match vectype do so.  */
   10302  1.1  mrg 	  if (INTEGRAL_TYPE_P (scalar_type)
   10303  1.1  mrg 	      && !slp_node
   10304  1.1  mrg 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
   10305  1.1  mrg 				  TYPE_SIZE (TREE_TYPE (vectype))))
   10306  1.1  mrg 	    scalar_type = build_nonstandard_integer_type
   10307  1.1  mrg 	      (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
   10308  1.1  mrg 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
   10309  1.1  mrg 						       slp_node);
   10310  1.1  mrg 	}
   10311  1.1  mrg     }
   10312  1.1  mrg 
   10313  1.1  mrg   return true;
   10314  1.1  mrg }
   10315  1.1  mrg 
   10316  1.1  mrg /* vectorizable_condition.
   10317  1.1  mrg 
   10318  1.1  mrg    Check if STMT_INFO is conditional modify expression that can be vectorized.
   10319  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   10320  1.1  mrg    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
   10321  1.1  mrg    at GSI.
   10322  1.1  mrg 
   10323  1.1  mrg    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
   10324  1.1  mrg 
   10325  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   10326  1.1  mrg 
   10327  1.1  mrg static bool
   10328  1.1  mrg vectorizable_condition (vec_info *vinfo,
   10329  1.1  mrg 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   10330  1.1  mrg 			gimple **vec_stmt,
   10331  1.1  mrg 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   10332  1.1  mrg {
   10333  1.1  mrg   tree scalar_dest = NULL_TREE;
   10334  1.1  mrg   tree vec_dest = NULL_TREE;
   10335  1.1  mrg   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
   10336  1.1  mrg   tree then_clause, else_clause;
   10337  1.1  mrg   tree comp_vectype = NULL_TREE;
   10338  1.1  mrg   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
   10339  1.1  mrg   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   10340  1.1  mrg   tree vec_compare;
   10341  1.1  mrg   tree new_temp;
   10342  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   10343  1.1  mrg   enum vect_def_type dts[4]
   10344  1.1  mrg     = {vect_unknown_def_type, vect_unknown_def_type,
   10345  1.1  mrg        vect_unknown_def_type, vect_unknown_def_type};
   10346  1.1  mrg   int ndts = 4;
   10347  1.1  mrg   int ncopies;
   10348  1.1  mrg   int vec_num;
   10349  1.1  mrg   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
   10350  1.1  mrg   int i;
   10351  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   10352  1.1  mrg   vec<tree> vec_oprnds0 = vNULL;
   10353  1.1  mrg   vec<tree> vec_oprnds1 = vNULL;
   10354  1.1  mrg   vec<tree> vec_oprnds2 = vNULL;
   10355  1.1  mrg   vec<tree> vec_oprnds3 = vNULL;
   10356  1.1  mrg   tree vec_cmp_type;
   10357  1.1  mrg   bool masked = false;
   10358  1.1  mrg 
   10359  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   10360  1.1  mrg     return false;
   10361  1.1  mrg 
   10362  1.1  mrg   /* Is vectorizable conditional operation?  */
   10363  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   10364  1.1  mrg   if (!stmt)
   10365  1.1  mrg     return false;
   10366  1.1  mrg 
   10367  1.1  mrg   code = gimple_assign_rhs_code (stmt);
   10368  1.1  mrg   if (code != COND_EXPR)
   10369  1.1  mrg     return false;
   10370  1.1  mrg 
   10371  1.1  mrg   stmt_vec_info reduc_info = NULL;
   10372  1.1  mrg   int reduc_index = -1;
   10373  1.1  mrg   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
   10374  1.1  mrg   bool for_reduction
   10375  1.1  mrg     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
   10376  1.1  mrg   if (for_reduction)
   10377  1.1  mrg     {
   10378  1.1  mrg       if (slp_node)
   10379  1.1  mrg 	return false;
   10380  1.1  mrg       reduc_info = info_for_reduction (vinfo, stmt_info);
   10381  1.1  mrg       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
   10382  1.1  mrg       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
   10383  1.1  mrg       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
   10384  1.1  mrg 		  || reduc_index != -1);
   10385  1.1  mrg     }
   10386  1.1  mrg   else
   10387  1.1  mrg     {
   10388  1.1  mrg       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
   10389  1.1  mrg 	return false;
   10390  1.1  mrg     }
   10391  1.1  mrg 
   10392  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   10393  1.1  mrg   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   10394  1.1  mrg 
   10395  1.1  mrg   if (slp_node)
   10396  1.1  mrg     {
   10397  1.1  mrg       ncopies = 1;
   10398  1.1  mrg       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   10399  1.1  mrg     }
   10400  1.1  mrg   else
   10401  1.1  mrg     {
   10402  1.1  mrg       ncopies = vect_get_num_copies (loop_vinfo, vectype);
   10403  1.1  mrg       vec_num = 1;
   10404  1.1  mrg     }
   10405  1.1  mrg 
   10406  1.1  mrg   gcc_assert (ncopies >= 1);
   10407  1.1  mrg   if (for_reduction && ncopies > 1)
   10408  1.1  mrg     return false; /* FORNOW */
   10409  1.1  mrg 
   10410  1.1  mrg   cond_expr = gimple_assign_rhs1 (stmt);
   10411  1.1  mrg 
   10412  1.1  mrg   if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
   10413  1.1  mrg 			    &comp_vectype, &dts[0], vectype)
   10414  1.1  mrg       || !comp_vectype)
   10415  1.1  mrg     return false;
   10416  1.1  mrg 
   10417  1.1  mrg   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
   10418  1.1  mrg   slp_tree then_slp_node, else_slp_node;
   10419  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
   10420  1.1  mrg 			   &then_clause, &then_slp_node, &dts[2], &vectype1))
   10421  1.1  mrg     return false;
   10422  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
   10423  1.1  mrg 			   &else_clause, &else_slp_node, &dts[3], &vectype2))
   10424  1.1  mrg     return false;
   10425  1.1  mrg 
   10426  1.1  mrg   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
   10427  1.1  mrg     return false;
   10428  1.1  mrg 
   10429  1.1  mrg   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
   10430  1.1  mrg     return false;
   10431  1.1  mrg 
   10432  1.1  mrg   masked = !COMPARISON_CLASS_P (cond_expr);
   10433  1.1  mrg   vec_cmp_type = truth_type_for (comp_vectype);
   10434  1.1  mrg 
   10435  1.1  mrg   if (vec_cmp_type == NULL_TREE)
   10436  1.1  mrg     return false;
   10437  1.1  mrg 
   10438  1.1  mrg   cond_code = TREE_CODE (cond_expr);
   10439  1.1  mrg   if (!masked)
   10440  1.1  mrg     {
   10441  1.1  mrg       cond_expr0 = TREE_OPERAND (cond_expr, 0);
   10442  1.1  mrg       cond_expr1 = TREE_OPERAND (cond_expr, 1);
   10443  1.1  mrg     }
   10444  1.1  mrg 
   10445  1.1  mrg   /* For conditional reductions, the "then" value needs to be the candidate
   10446  1.1  mrg      value calculated by this iteration while the "else" value needs to be
   10447  1.1  mrg      the result carried over from previous iterations.  If the COND_EXPR
   10448  1.1  mrg      is the other way around, we need to swap it.  */
   10449  1.1  mrg   bool must_invert_cmp_result = false;
   10450  1.1  mrg   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
   10451  1.1  mrg     {
   10452  1.1  mrg       if (masked)
   10453  1.1  mrg 	must_invert_cmp_result = true;
   10454  1.1  mrg       else
   10455  1.1  mrg 	{
   10456  1.1  mrg 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
   10457  1.1  mrg 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
   10458  1.1  mrg 	  if (new_code == ERROR_MARK)
   10459  1.1  mrg 	    must_invert_cmp_result = true;
   10460  1.1  mrg 	  else
   10461  1.1  mrg 	    {
   10462  1.1  mrg 	      cond_code = new_code;
   10463  1.1  mrg 	      /* Make sure we don't accidentally use the old condition.  */
   10464  1.1  mrg 	      cond_expr = NULL_TREE;
   10465  1.1  mrg 	    }
   10466  1.1  mrg 	}
   10467  1.1  mrg       std::swap (then_clause, else_clause);
   10468  1.1  mrg     }
   10469  1.1  mrg 
   10470  1.1  mrg   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
   10471  1.1  mrg     {
   10472  1.1  mrg       /* Boolean values may have another representation in vectors
   10473  1.1  mrg 	 and therefore we prefer bit operations over comparison for
   10474  1.1  mrg 	 them (which also works for scalar masks).  We store opcodes
   10475  1.1  mrg 	 to use in bitop1 and bitop2.  Statement is vectorized as
   10476  1.1  mrg 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
   10477  1.1  mrg 	 depending on bitop1 and bitop2 arity.  */
   10478  1.1  mrg       switch (cond_code)
   10479  1.1  mrg 	{
   10480  1.1  mrg 	case GT_EXPR:
   10481  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10482  1.1  mrg 	  bitop2 = BIT_AND_EXPR;
   10483  1.1  mrg 	  break;
   10484  1.1  mrg 	case GE_EXPR:
   10485  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10486  1.1  mrg 	  bitop2 = BIT_IOR_EXPR;
   10487  1.1  mrg 	  break;
   10488  1.1  mrg 	case LT_EXPR:
   10489  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10490  1.1  mrg 	  bitop2 = BIT_AND_EXPR;
   10491  1.1  mrg 	  std::swap (cond_expr0, cond_expr1);
   10492  1.1  mrg 	  break;
   10493  1.1  mrg 	case LE_EXPR:
   10494  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10495  1.1  mrg 	  bitop2 = BIT_IOR_EXPR;
   10496  1.1  mrg 	  std::swap (cond_expr0, cond_expr1);
   10497  1.1  mrg 	  break;
   10498  1.1  mrg 	case NE_EXPR:
   10499  1.1  mrg 	  bitop1 = BIT_XOR_EXPR;
   10500  1.1  mrg 	  break;
   10501  1.1  mrg 	case EQ_EXPR:
   10502  1.1  mrg 	  bitop1 = BIT_XOR_EXPR;
   10503  1.1  mrg 	  bitop2 = BIT_NOT_EXPR;
   10504  1.1  mrg 	  break;
   10505  1.1  mrg 	default:
   10506  1.1  mrg 	  return false;
   10507  1.1  mrg 	}
   10508  1.1  mrg       cond_code = SSA_NAME;
   10509  1.1  mrg     }
   10510  1.1  mrg 
   10511  1.1  mrg   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
   10512  1.1  mrg       && reduction_type == EXTRACT_LAST_REDUCTION
   10513  1.1  mrg       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
   10514  1.1  mrg     {
   10515  1.1  mrg       if (dump_enabled_p ())
   10516  1.1  mrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10517  1.1  mrg 			 "reduction comparison operation not supported.\n");
   10518  1.1  mrg       return false;
   10519  1.1  mrg     }
   10520  1.1  mrg 
   10521  1.1  mrg   if (!vec_stmt)
   10522  1.1  mrg     {
   10523  1.1  mrg       if (bitop1 != NOP_EXPR)
   10524  1.1  mrg 	{
   10525  1.1  mrg 	  machine_mode mode = TYPE_MODE (comp_vectype);
   10526  1.1  mrg 	  optab optab;
   10527  1.1  mrg 
   10528  1.1  mrg 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
   10529  1.1  mrg 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
   10530  1.1  mrg 	    return false;
   10531  1.1  mrg 
   10532  1.1  mrg 	  if (bitop2 != NOP_EXPR)
   10533  1.1  mrg 	    {
   10534  1.1  mrg 	      optab = optab_for_tree_code (bitop2, comp_vectype,
   10535  1.1  mrg 					   optab_default);
   10536  1.1  mrg 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
   10537  1.1  mrg 		return false;
   10538  1.1  mrg 	    }
   10539  1.1  mrg 	}
   10540  1.1  mrg 
   10541  1.1  mrg       vect_cost_for_stmt kind = vector_stmt;
   10542  1.1  mrg       if (reduction_type == EXTRACT_LAST_REDUCTION)
   10543  1.1  mrg 	/* Count one reduction-like operation per vector.  */
   10544  1.1  mrg 	kind = vec_to_scalar;
   10545  1.1  mrg       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
   10546  1.1  mrg 	return false;
   10547  1.1  mrg 
   10548  1.1  mrg       if (slp_node
   10549  1.1  mrg 	  && (!vect_maybe_update_slp_op_vectype
   10550  1.1  mrg 		 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
   10551  1.1  mrg 	      || (op_adjust == 1
   10552  1.1  mrg 		  && !vect_maybe_update_slp_op_vectype
   10553  1.1  mrg 			(SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
   10554  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
   10555  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
   10556  1.1  mrg 	{
   10557  1.1  mrg 	  if (dump_enabled_p ())
   10558  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10559  1.1  mrg 			     "incompatible vector types for invariants\n");
   10560  1.1  mrg 	  return false;
   10561  1.1  mrg 	}
   10562  1.1  mrg 
   10563  1.1  mrg       if (loop_vinfo && for_reduction
   10564  1.1  mrg 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
   10565  1.1  mrg 	{
   10566  1.1  mrg 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
   10567  1.1  mrg 	    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
   10568  1.1  mrg 				   ncopies * vec_num, vectype, NULL);
   10569  1.1  mrg 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
   10570  1.1  mrg 	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
   10571  1.1  mrg 	    {
   10572  1.1  mrg 	      if (dump_enabled_p ())
   10573  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10574  1.1  mrg 				 "conditional reduction prevents the use"
   10575  1.1  mrg 				 " of partial vectors.\n");
   10576  1.1  mrg 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   10577  1.1  mrg 	    }
   10578  1.1  mrg 	}
   10579  1.1  mrg 
   10580  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
   10581  1.1  mrg       vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
   10582  1.1  mrg 			      cost_vec, kind);
   10583  1.1  mrg       return true;
   10584  1.1  mrg     }
   10585  1.1  mrg 
   10586  1.1  mrg   /* Transform.  */
   10587  1.1  mrg 
   10588  1.1  mrg   /* Handle def.  */
   10589  1.1  mrg   scalar_dest = gimple_assign_lhs (stmt);
   10590  1.1  mrg   if (reduction_type != EXTRACT_LAST_REDUCTION)
   10591  1.1  mrg     vec_dest = vect_create_destination_var (scalar_dest, vectype);
   10592  1.1  mrg 
   10593  1.1  mrg   bool swap_cond_operands = false;
   10594  1.1  mrg 
   10595  1.1  mrg   /* See whether another part of the vectorized code applies a loop
   10596  1.1  mrg      mask to the condition, or to its inverse.  */
   10597  1.1  mrg 
   10598  1.1  mrg   vec_loop_masks *masks = NULL;
   10599  1.1  mrg   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
   10600  1.1  mrg     {
   10601  1.1  mrg       if (reduction_type == EXTRACT_LAST_REDUCTION)
   10602  1.1  mrg 	masks = &LOOP_VINFO_MASKS (loop_vinfo);
   10603  1.1  mrg       else
   10604  1.1  mrg 	{
   10605  1.1  mrg 	  scalar_cond_masked_key cond (cond_expr, ncopies);
   10606  1.1  mrg 	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
   10607  1.1  mrg 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
   10608  1.1  mrg 	  else
   10609  1.1  mrg 	    {
   10610  1.1  mrg 	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
   10611  1.1  mrg 	      tree_code orig_code = cond.code;
   10612  1.1  mrg 	      cond.code = invert_tree_comparison (cond.code, honor_nans);
   10613  1.1  mrg 	      if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
   10614  1.1  mrg 		{
   10615  1.1  mrg 		  masks = &LOOP_VINFO_MASKS (loop_vinfo);
   10616  1.1  mrg 		  cond_code = cond.code;
   10617  1.1  mrg 		  swap_cond_operands = true;
   10618  1.1  mrg 		}
   10619  1.1  mrg 	      else
   10620  1.1  mrg 		{
   10621  1.1  mrg 		  /* Try the inverse of the current mask.  We check if the
   10622  1.1  mrg 		     inverse mask is live and if so we generate a negate of
   10623  1.1  mrg 		     the current mask such that we still honor NaNs.  */
   10624  1.1  mrg 		  cond.inverted_p = true;
   10625  1.1  mrg 		  cond.code = orig_code;
   10626  1.1  mrg 		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
   10627  1.1  mrg 		    {
   10628  1.1  mrg 		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
   10629  1.1  mrg 		      cond_code = cond.code;
   10630  1.1  mrg 		      swap_cond_operands = true;
   10631  1.1  mrg 		      must_invert_cmp_result = true;
   10632  1.1  mrg 		    }
   10633  1.1  mrg 		}
   10634  1.1  mrg 	    }
   10635  1.1  mrg 	}
   10636  1.1  mrg     }
   10637  1.1  mrg 
   10638  1.1  mrg   /* Handle cond expr.  */
   10639  1.1  mrg   if (masked)
   10640  1.1  mrg     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   10641  1.1  mrg 		       cond_expr, &vec_oprnds0, comp_vectype,
   10642  1.1  mrg 		       then_clause, &vec_oprnds2, vectype,
   10643  1.1  mrg 		       reduction_type != EXTRACT_LAST_REDUCTION
   10644  1.1  mrg 		       ? else_clause : NULL, &vec_oprnds3, vectype);
   10645  1.1  mrg   else
   10646  1.1  mrg     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   10647  1.1  mrg 		       cond_expr0, &vec_oprnds0, comp_vectype,
   10648  1.1  mrg 		       cond_expr1, &vec_oprnds1, comp_vectype,
   10649  1.1  mrg 		       then_clause, &vec_oprnds2, vectype,
   10650  1.1  mrg 		       reduction_type != EXTRACT_LAST_REDUCTION
   10651  1.1  mrg 		       ? else_clause : NULL, &vec_oprnds3, vectype);
   10652  1.1  mrg 
   10653  1.1  mrg   /* Arguments are ready.  Create the new vector stmt.  */
   10654  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
   10655  1.1  mrg     {
   10656  1.1  mrg       vec_then_clause = vec_oprnds2[i];
   10657  1.1  mrg       if (reduction_type != EXTRACT_LAST_REDUCTION)
   10658  1.1  mrg 	vec_else_clause = vec_oprnds3[i];
   10659  1.1  mrg 
   10660  1.1  mrg       if (swap_cond_operands)
   10661  1.1  mrg 	std::swap (vec_then_clause, vec_else_clause);
   10662  1.1  mrg 
   10663  1.1  mrg       if (masked)
   10664  1.1  mrg 	vec_compare = vec_cond_lhs;
   10665  1.1  mrg       else
   10666  1.1  mrg 	{
   10667  1.1  mrg 	  vec_cond_rhs = vec_oprnds1[i];
   10668  1.1  mrg 	  if (bitop1 == NOP_EXPR)
   10669  1.1  mrg 	    {
   10670  1.1  mrg 	      gimple_seq stmts = NULL;
   10671  1.1  mrg 	      vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
   10672  1.1  mrg 					   vec_cond_lhs, vec_cond_rhs);
   10673  1.1  mrg 	      gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
   10674  1.1  mrg 	    }
   10675  1.1  mrg 	  else
   10676  1.1  mrg 	    {
   10677  1.1  mrg 	      new_temp = make_ssa_name (vec_cmp_type);
   10678  1.1  mrg 	      gassign *new_stmt;
   10679  1.1  mrg 	      if (bitop1 == BIT_NOT_EXPR)
   10680  1.1  mrg 		new_stmt = gimple_build_assign (new_temp, bitop1,
   10681  1.1  mrg 						vec_cond_rhs);
   10682  1.1  mrg 	      else
   10683  1.1  mrg 		new_stmt
   10684  1.1  mrg 		  = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
   10685  1.1  mrg 					 vec_cond_rhs);
   10686  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10687  1.1  mrg 	      if (bitop2 == NOP_EXPR)
   10688  1.1  mrg 		vec_compare = new_temp;
   10689  1.1  mrg 	      else if (bitop2 == BIT_NOT_EXPR)
   10690  1.1  mrg 		{
   10691  1.1  mrg 		  /* Instead of doing ~x ? y : z do x ? z : y.  */
   10692  1.1  mrg 		  vec_compare = new_temp;
   10693  1.1  mrg 		  std::swap (vec_then_clause, vec_else_clause);
   10694  1.1  mrg 		}
   10695  1.1  mrg 	      else
   10696  1.1  mrg 		{
   10697  1.1  mrg 		  vec_compare = make_ssa_name (vec_cmp_type);
   10698  1.1  mrg 		  new_stmt
   10699  1.1  mrg 		    = gimple_build_assign (vec_compare, bitop2,
   10700  1.1  mrg 					   vec_cond_lhs, new_temp);
   10701  1.1  mrg 		  vect_finish_stmt_generation (vinfo, stmt_info,
   10702  1.1  mrg 					       new_stmt, gsi);
   10703  1.1  mrg 		}
   10704  1.1  mrg 	    }
   10705  1.1  mrg 	}
   10706  1.1  mrg 
   10707  1.1  mrg       /* If we decided to apply a loop mask to the result of the vector
   10708  1.1  mrg 	 comparison, AND the comparison with the mask now.  Later passes
   10709  1.1  mrg 	 should then be able to reuse the AND results between mulitple
   10710  1.1  mrg 	 vector statements.
   10711  1.1  mrg 
   10712  1.1  mrg 	 For example:
   10713  1.1  mrg 	 for (int i = 0; i < 100; ++i)
   10714  1.1  mrg 	 x[i] = y[i] ? z[i] : 10;
   10715  1.1  mrg 
   10716  1.1  mrg 	 results in following optimized GIMPLE:
   10717  1.1  mrg 
   10718  1.1  mrg 	 mask__35.8_43 = vect__4.7_41 != { 0, ... };
   10719  1.1  mrg 	 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
   10720  1.1  mrg 	 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
   10721  1.1  mrg 	 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
   10722  1.1  mrg 	 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
   10723  1.1  mrg 	 vect_iftmp.11_47, { 10, ... }>;
   10724  1.1  mrg 
   10725  1.1  mrg 	 instead of using a masked and unmasked forms of
   10726  1.1  mrg 	 vec != { 0, ... } (masked in the MASK_LOAD,
   10727  1.1  mrg 	 unmasked in the VEC_COND_EXPR).  */
   10728  1.1  mrg 
   10729  1.1  mrg       /* Force vec_compare to be an SSA_NAME rather than a comparison,
   10730  1.1  mrg 	 in cases where that's necessary.  */
   10731  1.1  mrg 
   10732  1.1  mrg       if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
   10733  1.1  mrg 	{
   10734  1.1  mrg 	  if (!is_gimple_val (vec_compare))
   10735  1.1  mrg 	    {
   10736  1.1  mrg 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
   10737  1.1  mrg 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
   10738  1.1  mrg 						       vec_compare);
   10739  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10740  1.1  mrg 	      vec_compare = vec_compare_name;
   10741  1.1  mrg 	    }
   10742  1.1  mrg 
   10743  1.1  mrg 	  if (must_invert_cmp_result)
   10744  1.1  mrg 	    {
   10745  1.1  mrg 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
   10746  1.1  mrg 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
   10747  1.1  mrg 						       BIT_NOT_EXPR,
   10748  1.1  mrg 						       vec_compare);
   10749  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10750  1.1  mrg 	      vec_compare = vec_compare_name;
   10751  1.1  mrg 	    }
   10752  1.1  mrg 
   10753  1.1  mrg 	  if (masks)
   10754  1.1  mrg 	    {
   10755  1.1  mrg 	      tree loop_mask
   10756  1.1  mrg 		= vect_get_loop_mask (gsi, masks, vec_num * ncopies,
   10757  1.1  mrg 				      vectype, i);
   10758  1.1  mrg 	      tree tmp2 = make_ssa_name (vec_cmp_type);
   10759  1.1  mrg 	      gassign *g
   10760  1.1  mrg 		= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
   10761  1.1  mrg 				       loop_mask);
   10762  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   10763  1.1  mrg 	      vec_compare = tmp2;
   10764  1.1  mrg 	    }
   10765  1.1  mrg 	}
   10766  1.1  mrg 
   10767  1.1  mrg       gimple *new_stmt;
   10768  1.1  mrg       if (reduction_type == EXTRACT_LAST_REDUCTION)
   10769  1.1  mrg 	{
   10770  1.1  mrg 	  gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
   10771  1.1  mrg 	  tree lhs = gimple_get_lhs (old_stmt);
   10772  1.1  mrg 	  new_stmt = gimple_build_call_internal
   10773  1.1  mrg 	      (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
   10774  1.1  mrg 	       vec_then_clause);
   10775  1.1  mrg 	  gimple_call_set_lhs (new_stmt, lhs);
   10776  1.1  mrg 	  SSA_NAME_DEF_STMT (lhs) = new_stmt;
   10777  1.1  mrg 	  if (old_stmt == gsi_stmt (*gsi))
   10778  1.1  mrg 	    vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
   10779  1.1  mrg 	  else
   10780  1.1  mrg 	    {
   10781  1.1  mrg 	      /* In this case we're moving the definition to later in the
   10782  1.1  mrg 		 block.  That doesn't matter because the only uses of the
   10783  1.1  mrg 		 lhs are in phi statements.  */
   10784  1.1  mrg 	      gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
   10785  1.1  mrg 	      gsi_remove (&old_gsi, true);
   10786  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10787  1.1  mrg 	    }
   10788  1.1  mrg 	}
   10789  1.1  mrg       else
   10790  1.1  mrg 	{
   10791  1.1  mrg 	  new_temp = make_ssa_name (vec_dest);
   10792  1.1  mrg 	  new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
   10793  1.1  mrg 					  vec_then_clause, vec_else_clause);
   10794  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10795  1.1  mrg 	}
   10796  1.1  mrg       if (slp_node)
   10797  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   10798  1.1  mrg       else
   10799  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   10800  1.1  mrg     }
   10801  1.1  mrg 
   10802  1.1  mrg   if (!slp_node)
   10803  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   10804  1.1  mrg 
   10805  1.1  mrg   vec_oprnds0.release ();
   10806  1.1  mrg   vec_oprnds1.release ();
   10807  1.1  mrg   vec_oprnds2.release ();
   10808  1.1  mrg   vec_oprnds3.release ();
   10809  1.1  mrg 
   10810  1.1  mrg   return true;
   10811  1.1  mrg }
   10812  1.1  mrg 
   10813  1.1  mrg /* vectorizable_comparison.
   10814  1.1  mrg 
   10815  1.1  mrg    Check if STMT_INFO is comparison expression that can be vectorized.
   10816  1.1  mrg    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
   10817  1.1  mrg    comparison, put it in VEC_STMT, and insert it at GSI.
   10818  1.1  mrg 
   10819  1.1  mrg    Return true if STMT_INFO is vectorizable in this way.  */
   10820  1.1  mrg 
   10821  1.1  mrg static bool
   10822  1.1  mrg vectorizable_comparison (vec_info *vinfo,
   10823  1.1  mrg 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   10824  1.1  mrg 			 gimple **vec_stmt,
   10825  1.1  mrg 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   10826  1.1  mrg {
   10827  1.1  mrg   tree lhs, rhs1, rhs2;
   10828  1.1  mrg   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   10829  1.1  mrg   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   10830  1.1  mrg   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
   10831  1.1  mrg   tree new_temp;
   10832  1.1  mrg   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   10833  1.1  mrg   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
   10834  1.1  mrg   int ndts = 2;
   10835  1.1  mrg   poly_uint64 nunits;
   10836  1.1  mrg   int ncopies;
   10837  1.1  mrg   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
   10838  1.1  mrg   int i;
   10839  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   10840  1.1  mrg   vec<tree> vec_oprnds0 = vNULL;
   10841  1.1  mrg   vec<tree> vec_oprnds1 = vNULL;
   10842  1.1  mrg   tree mask_type;
   10843  1.1  mrg   tree mask;
   10844  1.1  mrg 
   10845  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   10846  1.1  mrg     return false;
   10847  1.1  mrg 
   10848  1.1  mrg   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
   10849  1.1  mrg     return false;
   10850  1.1  mrg 
   10851  1.1  mrg   mask_type = vectype;
   10852  1.1  mrg   nunits = TYPE_VECTOR_SUBPARTS (vectype);
   10853  1.1  mrg 
   10854  1.1  mrg   if (slp_node)
   10855  1.1  mrg     ncopies = 1;
   10856  1.1  mrg   else
   10857  1.1  mrg     ncopies = vect_get_num_copies (loop_vinfo, vectype);
   10858  1.1  mrg 
   10859  1.1  mrg   gcc_assert (ncopies >= 1);
   10860  1.1  mrg   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
   10861  1.1  mrg     return false;
   10862  1.1  mrg 
   10863  1.1  mrg   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   10864  1.1  mrg   if (!stmt)
   10865  1.1  mrg     return false;
   10866  1.1  mrg 
   10867  1.1  mrg   code = gimple_assign_rhs_code (stmt);
   10868  1.1  mrg 
   10869  1.1  mrg   if (TREE_CODE_CLASS (code) != tcc_comparison)
   10870  1.1  mrg     return false;
   10871  1.1  mrg 
   10872  1.1  mrg   slp_tree slp_rhs1, slp_rhs2;
   10873  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   10874  1.1  mrg 			   0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
   10875  1.1  mrg     return false;
   10876  1.1  mrg 
   10877  1.1  mrg   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
   10878  1.1  mrg 			   1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
   10879  1.1  mrg     return false;
   10880  1.1  mrg 
   10881  1.1  mrg   if (vectype1 && vectype2
   10882  1.1  mrg       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   10883  1.1  mrg 		   TYPE_VECTOR_SUBPARTS (vectype2)))
   10884  1.1  mrg     return false;
   10885  1.1  mrg 
   10886  1.1  mrg   vectype = vectype1 ? vectype1 : vectype2;
   10887  1.1  mrg 
   10888  1.1  mrg   /* Invariant comparison.  */
   10889  1.1  mrg   if (!vectype)
   10890  1.1  mrg     {
   10891  1.1  mrg       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
   10892  1.1  mrg 	vectype = mask_type;
   10893  1.1  mrg       else
   10894  1.1  mrg 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
   10895  1.1  mrg 					       slp_node);
   10896  1.1  mrg       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
   10897  1.1  mrg 	return false;
   10898  1.1  mrg     }
   10899  1.1  mrg   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
   10900  1.1  mrg     return false;
   10901  1.1  mrg 
   10902  1.1  mrg   /* Can't compare mask and non-mask types.  */
   10903  1.1  mrg   if (vectype1 && vectype2
   10904  1.1  mrg       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
   10905  1.1  mrg     return false;
   10906  1.1  mrg 
   10907  1.1  mrg   /* Boolean values may have another representation in vectors
   10908  1.1  mrg      and therefore we prefer bit operations over comparison for
   10909  1.1  mrg      them (which also works for scalar masks).  We store opcodes
   10910  1.1  mrg      to use in bitop1 and bitop2.  Statement is vectorized as
   10911  1.1  mrg        BITOP2 (rhs1 BITOP1 rhs2) or
   10912  1.1  mrg        rhs1 BITOP2 (BITOP1 rhs2)
   10913  1.1  mrg      depending on bitop1 and bitop2 arity.  */
   10914  1.1  mrg   bool swap_p = false;
   10915  1.1  mrg   if (VECTOR_BOOLEAN_TYPE_P (vectype))
   10916  1.1  mrg     {
   10917  1.1  mrg       if (code == GT_EXPR)
   10918  1.1  mrg 	{
   10919  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10920  1.1  mrg 	  bitop2 = BIT_AND_EXPR;
   10921  1.1  mrg 	}
   10922  1.1  mrg       else if (code == GE_EXPR)
   10923  1.1  mrg 	{
   10924  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10925  1.1  mrg 	  bitop2 = BIT_IOR_EXPR;
   10926  1.1  mrg 	}
   10927  1.1  mrg       else if (code == LT_EXPR)
   10928  1.1  mrg 	{
   10929  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10930  1.1  mrg 	  bitop2 = BIT_AND_EXPR;
   10931  1.1  mrg 	  swap_p = true;
   10932  1.1  mrg 	}
   10933  1.1  mrg       else if (code == LE_EXPR)
   10934  1.1  mrg 	{
   10935  1.1  mrg 	  bitop1 = BIT_NOT_EXPR;
   10936  1.1  mrg 	  bitop2 = BIT_IOR_EXPR;
   10937  1.1  mrg 	  swap_p = true;
   10938  1.1  mrg 	}
   10939  1.1  mrg       else
   10940  1.1  mrg 	{
   10941  1.1  mrg 	  bitop1 = BIT_XOR_EXPR;
   10942  1.1  mrg 	  if (code == EQ_EXPR)
   10943  1.1  mrg 	    bitop2 = BIT_NOT_EXPR;
   10944  1.1  mrg 	}
   10945  1.1  mrg     }
   10946  1.1  mrg 
   10947  1.1  mrg   if (!vec_stmt)
   10948  1.1  mrg     {
   10949  1.1  mrg       if (bitop1 == NOP_EXPR)
   10950  1.1  mrg 	{
   10951  1.1  mrg 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
   10952  1.1  mrg 	    return false;
   10953  1.1  mrg 	}
   10954  1.1  mrg       else
   10955  1.1  mrg 	{
   10956  1.1  mrg 	  machine_mode mode = TYPE_MODE (vectype);
   10957  1.1  mrg 	  optab optab;
   10958  1.1  mrg 
   10959  1.1  mrg 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
   10960  1.1  mrg 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
   10961  1.1  mrg 	    return false;
   10962  1.1  mrg 
   10963  1.1  mrg 	  if (bitop2 != NOP_EXPR)
   10964  1.1  mrg 	    {
   10965  1.1  mrg 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
   10966  1.1  mrg 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
   10967  1.1  mrg 		return false;
   10968  1.1  mrg 	    }
   10969  1.1  mrg 	}
   10970  1.1  mrg 
   10971  1.1  mrg       /* Put types on constant and invariant SLP children.  */
   10972  1.1  mrg       if (slp_node
   10973  1.1  mrg 	  && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
   10974  1.1  mrg 	      || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
   10975  1.1  mrg 	{
   10976  1.1  mrg 	  if (dump_enabled_p ())
   10977  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10978  1.1  mrg 			     "incompatible vector types for invariants\n");
   10979  1.1  mrg 	  return false;
   10980  1.1  mrg 	}
   10981  1.1  mrg 
   10982  1.1  mrg       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
   10983  1.1  mrg       vect_model_simple_cost (vinfo, stmt_info,
   10984  1.1  mrg 			      ncopies * (1 + (bitop2 != NOP_EXPR)),
   10985  1.1  mrg 			      dts, ndts, slp_node, cost_vec);
   10986  1.1  mrg       return true;
   10987  1.1  mrg     }
   10988  1.1  mrg 
   10989  1.1  mrg   /* Transform.  */
   10990  1.1  mrg 
   10991  1.1  mrg   /* Handle def.  */
   10992  1.1  mrg   lhs = gimple_assign_lhs (stmt);
   10993  1.1  mrg   mask = vect_create_destination_var (lhs, mask_type);
   10994  1.1  mrg 
   10995  1.1  mrg   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
   10996  1.1  mrg 		     rhs1, &vec_oprnds0, vectype,
   10997  1.1  mrg 		     rhs2, &vec_oprnds1, vectype);
   10998  1.1  mrg   if (swap_p)
   10999  1.1  mrg     std::swap (vec_oprnds0, vec_oprnds1);
   11000  1.1  mrg 
   11001  1.1  mrg   /* Arguments are ready.  Create the new vector stmt.  */
   11002  1.1  mrg   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
   11003  1.1  mrg     {
   11004  1.1  mrg       gimple *new_stmt;
   11005  1.1  mrg       vec_rhs2 = vec_oprnds1[i];
   11006  1.1  mrg 
   11007  1.1  mrg       new_temp = make_ssa_name (mask);
   11008  1.1  mrg       if (bitop1 == NOP_EXPR)
   11009  1.1  mrg 	{
   11010  1.1  mrg 	  new_stmt = gimple_build_assign (new_temp, code,
   11011  1.1  mrg 					  vec_rhs1, vec_rhs2);
   11012  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11013  1.1  mrg 	}
   11014  1.1  mrg       else
   11015  1.1  mrg 	{
   11016  1.1  mrg 	  if (bitop1 == BIT_NOT_EXPR)
   11017  1.1  mrg 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
   11018  1.1  mrg 	  else
   11019  1.1  mrg 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
   11020  1.1  mrg 					    vec_rhs2);
   11021  1.1  mrg 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11022  1.1  mrg 	  if (bitop2 != NOP_EXPR)
   11023  1.1  mrg 	    {
   11024  1.1  mrg 	      tree res = make_ssa_name (mask);
   11025  1.1  mrg 	      if (bitop2 == BIT_NOT_EXPR)
   11026  1.1  mrg 		new_stmt = gimple_build_assign (res, bitop2, new_temp);
   11027  1.1  mrg 	      else
   11028  1.1  mrg 		new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
   11029  1.1  mrg 						new_temp);
   11030  1.1  mrg 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11031  1.1  mrg 	    }
   11032  1.1  mrg 	}
   11033  1.1  mrg       if (slp_node)
   11034  1.1  mrg 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
   11035  1.1  mrg       else
   11036  1.1  mrg 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
   11037  1.1  mrg     }
   11038  1.1  mrg 
   11039  1.1  mrg   if (!slp_node)
   11040  1.1  mrg     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
   11041  1.1  mrg 
   11042  1.1  mrg   vec_oprnds0.release ();
   11043  1.1  mrg   vec_oprnds1.release ();
   11044  1.1  mrg 
   11045  1.1  mrg   return true;
   11046  1.1  mrg }
   11047  1.1  mrg 
   11048  1.1  mrg /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
   11049  1.1  mrg    can handle all live statements in the node.  Otherwise return true
   11050  1.1  mrg    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
   11051  1.1  mrg    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
   11052  1.1  mrg 
   11053  1.1  mrg static bool
   11054  1.1  mrg can_vectorize_live_stmts (vec_info *vinfo,
   11055  1.1  mrg 			  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   11056  1.1  mrg 			  slp_tree slp_node, slp_instance slp_node_instance,
   11057  1.1  mrg 			  bool vec_stmt_p,
   11058  1.1  mrg 			  stmt_vector_for_cost *cost_vec)
   11059  1.1  mrg {
   11060  1.1  mrg   if (slp_node)
   11061  1.1  mrg     {
   11062  1.1  mrg       stmt_vec_info slp_stmt_info;
   11063  1.1  mrg       unsigned int i;
   11064  1.1  mrg       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
   11065  1.1  mrg 	{
   11066  1.1  mrg 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
   11067  1.1  mrg 	      && !vectorizable_live_operation (vinfo,
   11068  1.1  mrg 					       slp_stmt_info, gsi, slp_node,
   11069  1.1  mrg 					       slp_node_instance, i,
   11070  1.1  mrg 					       vec_stmt_p, cost_vec))
   11071  1.1  mrg 	    return false;
   11072  1.1  mrg 	}
   11073  1.1  mrg     }
   11074  1.1  mrg   else if (STMT_VINFO_LIVE_P (stmt_info)
   11075  1.1  mrg 	   && !vectorizable_live_operation (vinfo, stmt_info, gsi,
   11076  1.1  mrg 					    slp_node, slp_node_instance, -1,
   11077  1.1  mrg 					    vec_stmt_p, cost_vec))
   11078  1.1  mrg     return false;
   11079  1.1  mrg 
   11080  1.1  mrg   return true;
   11081  1.1  mrg }
   11082  1.1  mrg 
   11083  1.1  mrg /* Make sure the statement is vectorizable.  */
   11084  1.1  mrg 
   11085  1.1  mrg opt_result
   11086  1.1  mrg vect_analyze_stmt (vec_info *vinfo,
   11087  1.1  mrg 		   stmt_vec_info stmt_info, bool *need_to_vectorize,
   11088  1.1  mrg 		   slp_tree node, slp_instance node_instance,
   11089  1.1  mrg 		   stmt_vector_for_cost *cost_vec)
   11090  1.1  mrg {
   11091  1.1  mrg   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   11092  1.1  mrg   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   11093  1.1  mrg   bool ok;
   11094  1.1  mrg   gimple_seq pattern_def_seq;
   11095  1.1  mrg 
   11096  1.1  mrg   if (dump_enabled_p ())
   11097  1.1  mrg     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
   11098  1.1  mrg 		     stmt_info->stmt);
   11099  1.1  mrg 
   11100  1.1  mrg   if (gimple_has_volatile_ops (stmt_info->stmt))
   11101  1.1  mrg     return opt_result::failure_at (stmt_info->stmt,
   11102  1.1  mrg 				   "not vectorized:"
   11103  1.1  mrg 				   " stmt has volatile operands: %G\n",
   11104  1.1  mrg 				   stmt_info->stmt);
   11105  1.1  mrg 
   11106  1.1  mrg   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
   11107  1.1  mrg       && node == NULL
   11108  1.1  mrg       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
   11109  1.1  mrg     {
   11110  1.1  mrg       gimple_stmt_iterator si;
   11111  1.1  mrg 
   11112  1.1  mrg       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
   11113  1.1  mrg 	{
   11114  1.1  mrg 	  stmt_vec_info pattern_def_stmt_info
   11115  1.1  mrg 	    = vinfo->lookup_stmt (gsi_stmt (si));
   11116  1.1  mrg 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
   11117  1.1  mrg 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
   11118  1.1  mrg 	    {
   11119  1.1  mrg 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
   11120  1.1  mrg 	      if (dump_enabled_p ())
   11121  1.1  mrg 		dump_printf_loc (MSG_NOTE, vect_location,
   11122  1.1  mrg 				 "==> examining pattern def statement: %G",
   11123  1.1  mrg 				 pattern_def_stmt_info->stmt);
   11124  1.1  mrg 
   11125  1.1  mrg 	      opt_result res
   11126  1.1  mrg 		= vect_analyze_stmt (vinfo, pattern_def_stmt_info,
   11127  1.1  mrg 				     need_to_vectorize, node, node_instance,
   11128  1.1  mrg 				     cost_vec);
   11129  1.1  mrg 	      if (!res)
   11130  1.1  mrg 		return res;
   11131  1.1  mrg 	    }
   11132  1.1  mrg 	}
   11133  1.1  mrg     }
   11134  1.1  mrg 
   11135  1.1  mrg   /* Skip stmts that do not need to be vectorized. In loops this is expected
   11136  1.1  mrg      to include:
   11137  1.1  mrg      - the COND_EXPR which is the loop exit condition
   11138  1.1  mrg      - any LABEL_EXPRs in the loop
   11139  1.1  mrg      - computations that are used only for array indexing or loop control.
   11140  1.1  mrg      In basic blocks we only analyze statements that are a part of some SLP
   11141  1.1  mrg      instance, therefore, all the statements are relevant.
   11142  1.1  mrg 
   11143  1.1  mrg      Pattern statement needs to be analyzed instead of the original statement
   11144  1.1  mrg      if the original statement is not relevant.  Otherwise, we analyze both
   11145  1.1  mrg      statements.  In basic blocks we are called from some SLP instance
   11146  1.1  mrg      traversal, don't analyze pattern stmts instead, the pattern stmts
   11147  1.1  mrg      already will be part of SLP instance.  */
   11148  1.1  mrg 
   11149  1.1  mrg   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
   11150  1.1  mrg   if (!STMT_VINFO_RELEVANT_P (stmt_info)
   11151  1.1  mrg       && !STMT_VINFO_LIVE_P (stmt_info))
   11152  1.1  mrg     {
   11153  1.1  mrg       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
   11154  1.1  mrg 	  && pattern_stmt_info
   11155  1.1  mrg 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
   11156  1.1  mrg 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
   11157  1.1  mrg         {
   11158  1.1  mrg           /* Analyze PATTERN_STMT instead of the original stmt.  */
   11159  1.1  mrg 	  stmt_info = pattern_stmt_info;
   11160  1.1  mrg           if (dump_enabled_p ())
   11161  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   11162  1.1  mrg 			     "==> examining pattern statement: %G",
   11163  1.1  mrg 			     stmt_info->stmt);
   11164  1.1  mrg         }
   11165  1.1  mrg       else
   11166  1.1  mrg         {
   11167  1.1  mrg           if (dump_enabled_p ())
   11168  1.1  mrg             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
   11169  1.1  mrg 
   11170  1.1  mrg           return opt_result::success ();
   11171  1.1  mrg         }
   11172  1.1  mrg     }
   11173  1.1  mrg   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
   11174  1.1  mrg 	   && node == NULL
   11175  1.1  mrg 	   && pattern_stmt_info
   11176  1.1  mrg 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
   11177  1.1  mrg 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
   11178  1.1  mrg     {
   11179  1.1  mrg       /* Analyze PATTERN_STMT too.  */
   11180  1.1  mrg       if (dump_enabled_p ())
   11181  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   11182  1.1  mrg 			 "==> examining pattern statement: %G",
   11183  1.1  mrg 			 pattern_stmt_info->stmt);
   11184  1.1  mrg 
   11185  1.1  mrg       opt_result res
   11186  1.1  mrg 	= vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
   11187  1.1  mrg 			     node_instance, cost_vec);
   11188  1.1  mrg       if (!res)
   11189  1.1  mrg 	return res;
   11190  1.1  mrg    }
   11191  1.1  mrg 
   11192  1.1  mrg   switch (STMT_VINFO_DEF_TYPE (stmt_info))
   11193  1.1  mrg     {
   11194  1.1  mrg       case vect_internal_def:
   11195  1.1  mrg         break;
   11196  1.1  mrg 
   11197  1.1  mrg       case vect_reduction_def:
   11198  1.1  mrg       case vect_nested_cycle:
   11199  1.1  mrg          gcc_assert (!bb_vinfo
   11200  1.1  mrg 		     && (relevance == vect_used_in_outer
   11201  1.1  mrg 			 || relevance == vect_used_in_outer_by_reduction
   11202  1.1  mrg 			 || relevance == vect_used_by_reduction
   11203  1.1  mrg 			 || relevance == vect_unused_in_scope
   11204  1.1  mrg 			 || relevance == vect_used_only_live));
   11205  1.1  mrg          break;
   11206  1.1  mrg 
   11207  1.1  mrg       case vect_induction_def:
   11208  1.1  mrg 	gcc_assert (!bb_vinfo);
   11209  1.1  mrg 	break;
   11210  1.1  mrg 
   11211  1.1  mrg       case vect_constant_def:
   11212  1.1  mrg       case vect_external_def:
   11213  1.1  mrg       case vect_unknown_def_type:
   11214  1.1  mrg       default:
   11215  1.1  mrg         gcc_unreachable ();
   11216  1.1  mrg     }
   11217  1.1  mrg 
   11218  1.1  mrg   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
   11219  1.1  mrg   if (node)
   11220  1.1  mrg     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
   11221  1.1  mrg 
   11222  1.1  mrg   if (STMT_VINFO_RELEVANT_P (stmt_info))
   11223  1.1  mrg     {
   11224  1.1  mrg       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   11225  1.1  mrg       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
   11226  1.1  mrg 		  || (call && gimple_call_lhs (call) == NULL_TREE));
   11227  1.1  mrg       *need_to_vectorize = true;
   11228  1.1  mrg     }
   11229  1.1  mrg 
   11230  1.1  mrg   if (PURE_SLP_STMT (stmt_info) && !node)
   11231  1.1  mrg     {
   11232  1.1  mrg       if (dump_enabled_p ())
   11233  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   11234  1.1  mrg 			 "handled only by SLP analysis\n");
   11235  1.1  mrg       return opt_result::success ();
   11236  1.1  mrg     }
   11237  1.1  mrg 
   11238  1.1  mrg   ok = true;
   11239  1.1  mrg   if (!bb_vinfo
   11240  1.1  mrg       && (STMT_VINFO_RELEVANT_P (stmt_info)
   11241  1.1  mrg 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
   11242  1.1  mrg     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
   11243  1.1  mrg        -mveclibabi= takes preference over library functions with
   11244  1.1  mrg        the simd attribute.  */
   11245  1.1  mrg     ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
   11246  1.1  mrg 	  || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
   11247  1.1  mrg 					   cost_vec)
   11248  1.1  mrg 	  || vectorizable_conversion (vinfo, stmt_info,
   11249  1.1  mrg 				      NULL, NULL, node, cost_vec)
   11250  1.1  mrg 	  || vectorizable_operation (vinfo, stmt_info,
   11251  1.1  mrg 				     NULL, NULL, node, cost_vec)
   11252  1.1  mrg 	  || vectorizable_assignment (vinfo, stmt_info,
   11253  1.1  mrg 				      NULL, NULL, node, cost_vec)
   11254  1.1  mrg 	  || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
   11255  1.1  mrg 	  || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
   11256  1.1  mrg 	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
   11257  1.1  mrg 				     node, node_instance, cost_vec)
   11258  1.1  mrg 	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
   11259  1.1  mrg 				     NULL, node, cost_vec)
   11260  1.1  mrg 	  || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
   11261  1.1  mrg 	  || vectorizable_condition (vinfo, stmt_info,
   11262  1.1  mrg 				     NULL, NULL, node, cost_vec)
   11263  1.1  mrg 	  || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
   11264  1.1  mrg 				      cost_vec)
   11265  1.1  mrg 	  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
   11266  1.1  mrg 				  stmt_info, NULL, node));
   11267  1.1  mrg   else
   11268  1.1  mrg     {
   11269  1.1  mrg       if (bb_vinfo)
   11270  1.1  mrg 	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
   11271  1.1  mrg 	      || vectorizable_simd_clone_call (vinfo, stmt_info,
   11272  1.1  mrg 					       NULL, NULL, node, cost_vec)
   11273  1.1  mrg 	      || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
   11274  1.1  mrg 					  cost_vec)
   11275  1.1  mrg 	      || vectorizable_shift (vinfo, stmt_info,
   11276  1.1  mrg 				     NULL, NULL, node, cost_vec)
   11277  1.1  mrg 	      || vectorizable_operation (vinfo, stmt_info,
   11278  1.1  mrg 					 NULL, NULL, node, cost_vec)
   11279  1.1  mrg 	      || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
   11280  1.1  mrg 					  cost_vec)
   11281  1.1  mrg 	      || vectorizable_load (vinfo, stmt_info,
   11282  1.1  mrg 				    NULL, NULL, node, cost_vec)
   11283  1.1  mrg 	      || vectorizable_store (vinfo, stmt_info,
   11284  1.1  mrg 				     NULL, NULL, node, cost_vec)
   11285  1.1  mrg 	      || vectorizable_condition (vinfo, stmt_info,
   11286  1.1  mrg 					 NULL, NULL, node, cost_vec)
   11287  1.1  mrg 	      || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
   11288  1.1  mrg 					  cost_vec)
   11289  1.1  mrg 	      || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
   11290  1.1  mrg     }
   11291  1.1  mrg 
   11292  1.1  mrg   if (node)
   11293  1.1  mrg     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
   11294  1.1  mrg 
   11295  1.1  mrg   if (!ok)
   11296  1.1  mrg     return opt_result::failure_at (stmt_info->stmt,
   11297  1.1  mrg 				   "not vectorized:"
   11298  1.1  mrg 				   " relevant stmt not supported: %G",
   11299  1.1  mrg 				   stmt_info->stmt);
   11300  1.1  mrg 
   11301  1.1  mrg   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
   11302  1.1  mrg       need extra handling, except for vectorizable reductions.  */
   11303  1.1  mrg   if (!bb_vinfo
   11304  1.1  mrg       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
   11305  1.1  mrg       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
   11306  1.1  mrg       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
   11307  1.1  mrg 				    stmt_info, NULL, node, node_instance,
   11308  1.1  mrg 				    false, cost_vec))
   11309  1.1  mrg     return opt_result::failure_at (stmt_info->stmt,
   11310  1.1  mrg 				   "not vectorized:"
   11311  1.1  mrg 				   " live stmt not supported: %G",
   11312  1.1  mrg 				   stmt_info->stmt);
   11313  1.1  mrg 
   11314  1.1  mrg   return opt_result::success ();
   11315  1.1  mrg }
   11316  1.1  mrg 
   11317  1.1  mrg 
   11318  1.1  mrg /* Function vect_transform_stmt.
   11319  1.1  mrg 
   11320  1.1  mrg    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
   11321  1.1  mrg 
   11322  1.1  mrg bool
   11323  1.1  mrg vect_transform_stmt (vec_info *vinfo,
   11324  1.1  mrg 		     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   11325  1.1  mrg 		     slp_tree slp_node, slp_instance slp_node_instance)
   11326  1.1  mrg {
   11327  1.1  mrg   bool is_store = false;
   11328  1.1  mrg   gimple *vec_stmt = NULL;
   11329  1.1  mrg   bool done;
   11330  1.1  mrg 
   11331  1.1  mrg   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
   11332  1.1  mrg 
   11333  1.1  mrg   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
   11334  1.1  mrg   if (slp_node)
   11335  1.1  mrg     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
   11336  1.1  mrg 
   11337  1.1  mrg   switch (STMT_VINFO_TYPE (stmt_info))
   11338  1.1  mrg     {
   11339  1.1  mrg     case type_demotion_vec_info_type:
   11340  1.1  mrg     case type_promotion_vec_info_type:
   11341  1.1  mrg     case type_conversion_vec_info_type:
   11342  1.1  mrg       done = vectorizable_conversion (vinfo, stmt_info,
   11343  1.1  mrg 				      gsi, &vec_stmt, slp_node, NULL);
   11344  1.1  mrg       gcc_assert (done);
   11345  1.1  mrg       break;
   11346  1.1  mrg 
   11347  1.1  mrg     case induc_vec_info_type:
   11348  1.1  mrg       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
   11349  1.1  mrg 				     stmt_info, &vec_stmt, slp_node,
   11350  1.1  mrg 				     NULL);
   11351  1.1  mrg       gcc_assert (done);
   11352  1.1  mrg       break;
   11353  1.1  mrg 
   11354  1.1  mrg     case shift_vec_info_type:
   11355  1.1  mrg       done = vectorizable_shift (vinfo, stmt_info,
   11356  1.1  mrg 				 gsi, &vec_stmt, slp_node, NULL);
   11357  1.1  mrg       gcc_assert (done);
   11358  1.1  mrg       break;
   11359  1.1  mrg 
   11360  1.1  mrg     case op_vec_info_type:
   11361  1.1  mrg       done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
   11362  1.1  mrg 				     NULL);
   11363  1.1  mrg       gcc_assert (done);
   11364  1.1  mrg       break;
   11365  1.1  mrg 
   11366  1.1  mrg     case assignment_vec_info_type:
   11367  1.1  mrg       done = vectorizable_assignment (vinfo, stmt_info,
   11368  1.1  mrg 				      gsi, &vec_stmt, slp_node, NULL);
   11369  1.1  mrg       gcc_assert (done);
   11370  1.1  mrg       break;
   11371  1.1  mrg 
   11372  1.1  mrg     case load_vec_info_type:
   11373  1.1  mrg       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
   11374  1.1  mrg 				NULL);
   11375  1.1  mrg       gcc_assert (done);
   11376  1.1  mrg       break;
   11377  1.1  mrg 
   11378  1.1  mrg     case store_vec_info_type:
   11379  1.1  mrg       done = vectorizable_store (vinfo, stmt_info,
   11380  1.1  mrg 				 gsi, &vec_stmt, slp_node, NULL);
   11381  1.1  mrg       gcc_assert (done);
   11382  1.1  mrg       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
   11383  1.1  mrg 	{
   11384  1.1  mrg 	  /* In case of interleaving, the whole chain is vectorized when the
   11385  1.1  mrg 	     last store in the chain is reached.  Store stmts before the last
   11386  1.1  mrg 	     one are skipped, and there vec_stmt_info shouldn't be freed
   11387  1.1  mrg 	     meanwhile.  */
   11388  1.1  mrg 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   11389  1.1  mrg 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
   11390  1.1  mrg 	    is_store = true;
   11391  1.1  mrg 	}
   11392  1.1  mrg       else
   11393  1.1  mrg 	is_store = true;
   11394  1.1  mrg       break;
   11395  1.1  mrg 
   11396  1.1  mrg     case condition_vec_info_type:
   11397  1.1  mrg       done = vectorizable_condition (vinfo, stmt_info,
   11398  1.1  mrg 				     gsi, &vec_stmt, slp_node, NULL);
   11399  1.1  mrg       gcc_assert (done);
   11400  1.1  mrg       break;
   11401  1.1  mrg 
   11402  1.1  mrg     case comparison_vec_info_type:
   11403  1.1  mrg       done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
   11404  1.1  mrg 				      slp_node, NULL);
   11405  1.1  mrg       gcc_assert (done);
   11406  1.1  mrg       break;
   11407  1.1  mrg 
   11408  1.1  mrg     case call_vec_info_type:
   11409  1.1  mrg       done = vectorizable_call (vinfo, stmt_info,
   11410  1.1  mrg 				gsi, &vec_stmt, slp_node, NULL);
   11411  1.1  mrg       break;
   11412  1.1  mrg 
   11413  1.1  mrg     case call_simd_clone_vec_info_type:
   11414  1.1  mrg       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
   11415  1.1  mrg 					   slp_node, NULL);
   11416  1.1  mrg       break;
   11417  1.1  mrg 
   11418  1.1  mrg     case reduc_vec_info_type:
   11419  1.1  mrg       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
   11420  1.1  mrg 				       gsi, &vec_stmt, slp_node);
   11421  1.1  mrg       gcc_assert (done);
   11422  1.1  mrg       break;
   11423  1.1  mrg 
   11424  1.1  mrg     case cycle_phi_info_type:
   11425  1.1  mrg       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
   11426  1.1  mrg 				       &vec_stmt, slp_node, slp_node_instance);
   11427  1.1  mrg       gcc_assert (done);
   11428  1.1  mrg       break;
   11429  1.1  mrg 
   11430  1.1  mrg     case lc_phi_info_type:
   11431  1.1  mrg       done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
   11432  1.1  mrg 				  stmt_info, &vec_stmt, slp_node);
   11433  1.1  mrg       gcc_assert (done);
   11434  1.1  mrg       break;
   11435  1.1  mrg 
   11436  1.1  mrg     case phi_info_type:
   11437  1.1  mrg       done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
   11438  1.1  mrg       gcc_assert (done);
   11439  1.1  mrg       break;
   11440  1.1  mrg 
   11441  1.1  mrg     default:
   11442  1.1  mrg       if (!STMT_VINFO_LIVE_P (stmt_info))
   11443  1.1  mrg 	{
   11444  1.1  mrg 	  if (dump_enabled_p ())
   11445  1.1  mrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   11446  1.1  mrg                              "stmt not supported.\n");
   11447  1.1  mrg 	  gcc_unreachable ();
   11448  1.1  mrg 	}
   11449  1.1  mrg       done = true;
   11450  1.1  mrg     }
   11451  1.1  mrg 
   11452  1.1  mrg   if (!slp_node && vec_stmt)
   11453  1.1  mrg     gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
   11454  1.1  mrg 
   11455  1.1  mrg   if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
   11456  1.1  mrg     {
   11457  1.1  mrg       /* Handle stmts whose DEF is used outside the loop-nest that is
   11458  1.1  mrg 	 being vectorized.  */
   11459  1.1  mrg       done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
   11460  1.1  mrg 				       slp_node_instance, true, NULL);
   11461  1.1  mrg       gcc_assert (done);
   11462  1.1  mrg     }
   11463  1.1  mrg 
   11464  1.1  mrg   if (slp_node)
   11465  1.1  mrg     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
   11466  1.1  mrg 
   11467  1.1  mrg   return is_store;
   11468  1.1  mrg }
   11469  1.1  mrg 
   11470  1.1  mrg 
   11471  1.1  mrg /* Remove a group of stores (for SLP or interleaving), free their
   11472  1.1  mrg    stmt_vec_info.  */
   11473  1.1  mrg 
   11474  1.1  mrg void
   11475  1.1  mrg vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
   11476  1.1  mrg {
   11477  1.1  mrg   stmt_vec_info next_stmt_info = first_stmt_info;
   11478  1.1  mrg 
   11479  1.1  mrg   while (next_stmt_info)
   11480  1.1  mrg     {
   11481  1.1  mrg       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   11482  1.1  mrg       next_stmt_info = vect_orig_stmt (next_stmt_info);
   11483  1.1  mrg       /* Free the attached stmt_vec_info and remove the stmt.  */
   11484  1.1  mrg       vinfo->remove_stmt (next_stmt_info);
   11485  1.1  mrg       next_stmt_info = tmp;
   11486  1.1  mrg     }
   11487  1.1  mrg }
   11488  1.1  mrg 
   11489  1.1  mrg /* If NUNITS is nonzero, return a vector type that contains NUNITS
   11490  1.1  mrg    elements of type SCALAR_TYPE, or null if the target doesn't support
   11491  1.1  mrg    such a type.
   11492  1.1  mrg 
   11493  1.1  mrg    If NUNITS is zero, return a vector type that contains elements of
   11494  1.1  mrg    type SCALAR_TYPE, choosing whichever vector size the target prefers.
   11495  1.1  mrg 
   11496  1.1  mrg    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
   11497  1.1  mrg    for this vectorization region and want to "autodetect" the best choice.
   11498  1.1  mrg    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
   11499  1.1  mrg    and we want the new type to be interoperable with it.   PREVAILING_MODE
   11500  1.1  mrg    in this case can be a scalar integer mode or a vector mode; when it
   11501  1.1  mrg    is a vector mode, the function acts like a tree-level version of
   11502  1.1  mrg    related_vector_mode.  */
   11503  1.1  mrg 
   11504  1.1  mrg tree
   11505  1.1  mrg get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
   11506  1.1  mrg 				     tree scalar_type, poly_uint64 nunits)
   11507  1.1  mrg {
   11508  1.1  mrg   tree orig_scalar_type = scalar_type;
   11509  1.1  mrg   scalar_mode inner_mode;
   11510  1.1  mrg   machine_mode simd_mode;
   11511  1.1  mrg   tree vectype;
   11512  1.1  mrg 
   11513  1.1  mrg   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
   11514  1.1  mrg       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
   11515  1.1  mrg     return NULL_TREE;
   11516  1.1  mrg 
   11517  1.1  mrg   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
   11518  1.1  mrg 
   11519  1.1  mrg   /* For vector types of elements whose mode precision doesn't
   11520  1.1  mrg      match their types precision we use a element type of mode
   11521  1.1  mrg      precision.  The vectorization routines will have to make sure
   11522  1.1  mrg      they support the proper result truncation/extension.
   11523  1.1  mrg      We also make sure to build vector types with INTEGER_TYPE
   11524  1.1  mrg      component type only.  */
   11525  1.1  mrg   if (INTEGRAL_TYPE_P (scalar_type)
   11526  1.1  mrg       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
   11527  1.1  mrg 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
   11528  1.1  mrg     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
   11529  1.1  mrg 						  TYPE_UNSIGNED (scalar_type));
   11530  1.1  mrg 
   11531  1.1  mrg   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
   11532  1.1  mrg      When the component mode passes the above test simply use a type
   11533  1.1  mrg      corresponding to that mode.  The theory is that any use that
   11534  1.1  mrg      would cause problems with this will disable vectorization anyway.  */
   11535  1.1  mrg   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
   11536  1.1  mrg 	   && !INTEGRAL_TYPE_P (scalar_type))
   11537  1.1  mrg     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
   11538  1.1  mrg 
   11539  1.1  mrg   /* We can't build a vector type of elements with alignment bigger than
   11540  1.1  mrg      their size.  */
   11541  1.1  mrg   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
   11542  1.1  mrg     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
   11543  1.1  mrg 						  TYPE_UNSIGNED (scalar_type));
   11544  1.1  mrg 
   11545  1.1  mrg   /* If we felt back to using the mode fail if there was
   11546  1.1  mrg      no scalar type for it.  */
   11547  1.1  mrg   if (scalar_type == NULL_TREE)
   11548  1.1  mrg     return NULL_TREE;
   11549  1.1  mrg 
   11550  1.1  mrg   /* If no prevailing mode was supplied, use the mode the target prefers.
   11551  1.1  mrg      Otherwise lookup a vector mode based on the prevailing mode.  */
   11552  1.1  mrg   if (prevailing_mode == VOIDmode)
   11553  1.1  mrg     {
   11554  1.1  mrg       gcc_assert (known_eq (nunits, 0U));
   11555  1.1  mrg       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
   11556  1.1  mrg       if (SCALAR_INT_MODE_P (simd_mode))
   11557  1.1  mrg 	{
   11558  1.1  mrg 	  /* Traditional behavior is not to take the integer mode
   11559  1.1  mrg 	     literally, but simply to use it as a way of determining
   11560  1.1  mrg 	     the vector size.  It is up to mode_for_vector to decide
   11561  1.1  mrg 	     what the TYPE_MODE should be.
   11562  1.1  mrg 
   11563  1.1  mrg 	     Note that nunits == 1 is allowed in order to support single
   11564  1.1  mrg 	     element vector types.  */
   11565  1.1  mrg 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
   11566  1.1  mrg 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
   11567  1.1  mrg 	    return NULL_TREE;
   11568  1.1  mrg 	}
   11569  1.1  mrg     }
   11570  1.1  mrg   else if (SCALAR_INT_MODE_P (prevailing_mode)
   11571  1.1  mrg 	   || !related_vector_mode (prevailing_mode,
   11572  1.1  mrg 				    inner_mode, nunits).exists (&simd_mode))
   11573  1.1  mrg     {
   11574  1.1  mrg       /* Fall back to using mode_for_vector, mostly in the hope of being
   11575  1.1  mrg 	 able to use an integer mode.  */
   11576  1.1  mrg       if (known_eq (nunits, 0U)
   11577  1.1  mrg 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
   11578  1.1  mrg 	return NULL_TREE;
   11579  1.1  mrg 
   11580  1.1  mrg       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
   11581  1.1  mrg 	return NULL_TREE;
   11582  1.1  mrg     }
   11583  1.1  mrg 
   11584  1.1  mrg   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
   11585  1.1  mrg 
   11586  1.1  mrg   /* In cases where the mode was chosen by mode_for_vector, check that
   11587  1.1  mrg      the target actually supports the chosen mode, or that it at least
   11588  1.1  mrg      allows the vector mode to be replaced by a like-sized integer.  */
   11589  1.1  mrg   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
   11590  1.1  mrg       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
   11591  1.1  mrg     return NULL_TREE;
   11592  1.1  mrg 
   11593  1.1  mrg   /* Re-attach the address-space qualifier if we canonicalized the scalar
   11594  1.1  mrg      type.  */
   11595  1.1  mrg   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
   11596  1.1  mrg     return build_qualified_type
   11597  1.1  mrg 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
   11598  1.1  mrg 
   11599  1.1  mrg   return vectype;
   11600  1.1  mrg }
   11601  1.1  mrg 
   11602  1.1  mrg /* Function get_vectype_for_scalar_type.
   11603  1.1  mrg 
   11604  1.1  mrg    Returns the vector type corresponding to SCALAR_TYPE as supported
   11605  1.1  mrg    by the target.  If GROUP_SIZE is nonzero and we're performing BB
   11606  1.1  mrg    vectorization, make sure that the number of elements in the vector
   11607  1.1  mrg    is no bigger than GROUP_SIZE.  */
   11608  1.1  mrg 
   11609  1.1  mrg tree
   11610  1.1  mrg get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
   11611  1.1  mrg 			     unsigned int group_size)
   11612  1.1  mrg {
   11613  1.1  mrg   /* For BB vectorization, we should always have a group size once we've
   11614  1.1  mrg      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
   11615  1.1  mrg      are tentative requests during things like early data reference
   11616  1.1  mrg      analysis and pattern recognition.  */
   11617  1.1  mrg   if (is_a <bb_vec_info> (vinfo))
   11618  1.1  mrg     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
   11619  1.1  mrg   else
   11620  1.1  mrg     group_size = 0;
   11621  1.1  mrg 
   11622  1.1  mrg   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
   11623  1.1  mrg 						      scalar_type);
   11624  1.1  mrg   if (vectype && vinfo->vector_mode == VOIDmode)
   11625  1.1  mrg     vinfo->vector_mode = TYPE_MODE (vectype);
   11626  1.1  mrg 
   11627  1.1  mrg   /* Register the natural choice of vector type, before the group size
   11628  1.1  mrg      has been applied.  */
   11629  1.1  mrg   if (vectype)
   11630  1.1  mrg     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
   11631  1.1  mrg 
   11632  1.1  mrg   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
   11633  1.1  mrg      try again with an explicit number of elements.  */
   11634  1.1  mrg   if (vectype
   11635  1.1  mrg       && group_size
   11636  1.1  mrg       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
   11637  1.1  mrg     {
   11638  1.1  mrg       /* Start with the biggest number of units that fits within
   11639  1.1  mrg 	 GROUP_SIZE and halve it until we find a valid vector type.
   11640  1.1  mrg 	 Usually either the first attempt will succeed or all will
   11641  1.1  mrg 	 fail (in the latter case because GROUP_SIZE is too small
   11642  1.1  mrg 	 for the target), but it's possible that a target could have
   11643  1.1  mrg 	 a hole between supported vector types.
   11644  1.1  mrg 
   11645  1.1  mrg 	 If GROUP_SIZE is not a power of 2, this has the effect of
   11646  1.1  mrg 	 trying the largest power of 2 that fits within the group,
   11647  1.1  mrg 	 even though the group is not a multiple of that vector size.
   11648  1.1  mrg 	 The BB vectorizer will then try to carve up the group into
   11649  1.1  mrg 	 smaller pieces.  */
   11650  1.1  mrg       unsigned int nunits = 1 << floor_log2 (group_size);
   11651  1.1  mrg       do
   11652  1.1  mrg 	{
   11653  1.1  mrg 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
   11654  1.1  mrg 							 scalar_type, nunits);
   11655  1.1  mrg 	  nunits /= 2;
   11656  1.1  mrg 	}
   11657  1.1  mrg       while (nunits > 1 && !vectype);
   11658  1.1  mrg     }
   11659  1.1  mrg 
   11660  1.1  mrg   return vectype;
   11661  1.1  mrg }
   11662  1.1  mrg 
   11663  1.1  mrg /* Return the vector type corresponding to SCALAR_TYPE as supported
   11664  1.1  mrg    by the target.  NODE, if nonnull, is the SLP tree node that will
   11665  1.1  mrg    use the returned vector type.  */
   11666  1.1  mrg 
   11667  1.1  mrg tree
   11668  1.1  mrg get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
   11669  1.1  mrg {
   11670  1.1  mrg   unsigned int group_size = 0;
   11671  1.1  mrg   if (node)
   11672  1.1  mrg     group_size = SLP_TREE_LANES (node);
   11673  1.1  mrg   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   11674  1.1  mrg }
   11675  1.1  mrg 
   11676  1.1  mrg /* Function get_mask_type_for_scalar_type.
   11677  1.1  mrg 
   11678  1.1  mrg    Returns the mask type corresponding to a result of comparison
   11679  1.1  mrg    of vectors of specified SCALAR_TYPE as supported by target.
   11680  1.1  mrg    If GROUP_SIZE is nonzero and we're performing BB vectorization,
   11681  1.1  mrg    make sure that the number of elements in the vector is no bigger
   11682  1.1  mrg    than GROUP_SIZE.  */
   11683  1.1  mrg 
   11684  1.1  mrg tree
   11685  1.1  mrg get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
   11686  1.1  mrg 			       unsigned int group_size)
   11687  1.1  mrg {
   11688  1.1  mrg   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   11689  1.1  mrg 
   11690  1.1  mrg   if (!vectype)
   11691  1.1  mrg     return NULL;
   11692  1.1  mrg 
   11693  1.1  mrg   return truth_type_for (vectype);
   11694  1.1  mrg }
   11695  1.1  mrg 
   11696  1.1  mrg /* Function get_same_sized_vectype
   11697  1.1  mrg 
   11698  1.1  mrg    Returns a vector type corresponding to SCALAR_TYPE of size
   11699  1.1  mrg    VECTOR_TYPE if supported by the target.  */
   11700  1.1  mrg 
   11701  1.1  mrg tree
   11702  1.1  mrg get_same_sized_vectype (tree scalar_type, tree vector_type)
   11703  1.1  mrg {
   11704  1.1  mrg   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
   11705  1.1  mrg     return truth_type_for (vector_type);
   11706  1.1  mrg 
   11707  1.1  mrg   poly_uint64 nunits;
   11708  1.1  mrg   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
   11709  1.1  mrg 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
   11710  1.1  mrg     return NULL_TREE;
   11711  1.1  mrg 
   11712  1.1  mrg   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
   11713  1.1  mrg 					      scalar_type, nunits);
   11714  1.1  mrg }
   11715  1.1  mrg 
   11716  1.1  mrg /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
   11717  1.1  mrg    would not change the chosen vector modes.  */
   11718  1.1  mrg 
   11719  1.1  mrg bool
   11720  1.1  mrg vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
   11721  1.1  mrg {
   11722  1.1  mrg   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
   11723  1.1  mrg        i != vinfo->used_vector_modes.end (); ++i)
   11724  1.1  mrg     if (!VECTOR_MODE_P (*i)
   11725  1.1  mrg 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
   11726  1.1  mrg       return false;
   11727  1.1  mrg   return true;
   11728  1.1  mrg }
   11729  1.1  mrg 
   11730  1.1  mrg /* Function vect_is_simple_use.
   11731  1.1  mrg 
   11732  1.1  mrg    Input:
   11733  1.1  mrg    VINFO - the vect info of the loop or basic block that is being vectorized.
   11734  1.1  mrg    OPERAND - operand in the loop or bb.
   11735  1.1  mrg    Output:
   11736  1.1  mrg    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
   11737  1.1  mrg      case OPERAND is an SSA_NAME that is defined in the vectorizable region
   11738  1.1  mrg    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
   11739  1.1  mrg      the definition could be anywhere in the function
   11740  1.1  mrg    DT - the type of definition
   11741  1.1  mrg 
   11742  1.1  mrg    Returns whether a stmt with OPERAND can be vectorized.
   11743  1.1  mrg    For loops, supportable operands are constants, loop invariants, and operands
   11744  1.1  mrg    that are defined by the current iteration of the loop.  Unsupportable
   11745  1.1  mrg    operands are those that are defined by a previous iteration of the loop (as
   11746  1.1  mrg    is the case in reduction/induction computations).
   11747  1.1  mrg    For basic blocks, supportable operands are constants and bb invariants.
   11748  1.1  mrg    For now, operands defined outside the basic block are not supported.  */
   11749  1.1  mrg 
   11750  1.1  mrg bool
   11751  1.1  mrg vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
   11752  1.1  mrg 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
   11753  1.1  mrg {
   11754  1.1  mrg   if (def_stmt_info_out)
   11755  1.1  mrg     *def_stmt_info_out = NULL;
   11756  1.1  mrg   if (def_stmt_out)
   11757  1.1  mrg     *def_stmt_out = NULL;
   11758  1.1  mrg   *dt = vect_unknown_def_type;
   11759  1.1  mrg 
   11760  1.1  mrg   if (dump_enabled_p ())
   11761  1.1  mrg     {
   11762  1.1  mrg       dump_printf_loc (MSG_NOTE, vect_location,
   11763  1.1  mrg                        "vect_is_simple_use: operand ");
   11764  1.1  mrg       if (TREE_CODE (operand) == SSA_NAME
   11765  1.1  mrg 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
   11766  1.1  mrg 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
   11767  1.1  mrg       else
   11768  1.1  mrg 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
   11769  1.1  mrg     }
   11770  1.1  mrg 
   11771  1.1  mrg   if (CONSTANT_CLASS_P (operand))
   11772  1.1  mrg     *dt = vect_constant_def;
   11773  1.1  mrg   else if (is_gimple_min_invariant (operand))
   11774  1.1  mrg     *dt = vect_external_def;
   11775  1.1  mrg   else if (TREE_CODE (operand) != SSA_NAME)
   11776  1.1  mrg     *dt = vect_unknown_def_type;
   11777  1.1  mrg   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
   11778  1.1  mrg     *dt = vect_external_def;
   11779  1.1  mrg   else
   11780  1.1  mrg     {
   11781  1.1  mrg       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
   11782  1.1  mrg       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
   11783  1.1  mrg       if (!stmt_vinfo)
   11784  1.1  mrg 	*dt = vect_external_def;
   11785  1.1  mrg       else
   11786  1.1  mrg 	{
   11787  1.1  mrg 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
   11788  1.1  mrg 	  def_stmt = stmt_vinfo->stmt;
   11789  1.1  mrg 	  *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
   11790  1.1  mrg 	  if (def_stmt_info_out)
   11791  1.1  mrg 	    *def_stmt_info_out = stmt_vinfo;
   11792  1.1  mrg 	}
   11793  1.1  mrg       if (def_stmt_out)
   11794  1.1  mrg 	*def_stmt_out = def_stmt;
   11795  1.1  mrg     }
   11796  1.1  mrg 
   11797  1.1  mrg   if (dump_enabled_p ())
   11798  1.1  mrg     {
   11799  1.1  mrg       dump_printf (MSG_NOTE, ", type of def: ");
   11800  1.1  mrg       switch (*dt)
   11801  1.1  mrg 	{
   11802  1.1  mrg 	case vect_uninitialized_def:
   11803  1.1  mrg 	  dump_printf (MSG_NOTE, "uninitialized\n");
   11804  1.1  mrg 	  break;
   11805  1.1  mrg 	case vect_constant_def:
   11806  1.1  mrg 	  dump_printf (MSG_NOTE, "constant\n");
   11807  1.1  mrg 	  break;
   11808  1.1  mrg 	case vect_external_def:
   11809  1.1  mrg 	  dump_printf (MSG_NOTE, "external\n");
   11810  1.1  mrg 	  break;
   11811  1.1  mrg 	case vect_internal_def:
   11812  1.1  mrg 	  dump_printf (MSG_NOTE, "internal\n");
   11813  1.1  mrg 	  break;
   11814  1.1  mrg 	case vect_induction_def:
   11815  1.1  mrg 	  dump_printf (MSG_NOTE, "induction\n");
   11816  1.1  mrg 	  break;
   11817  1.1  mrg 	case vect_reduction_def:
   11818  1.1  mrg 	  dump_printf (MSG_NOTE, "reduction\n");
   11819  1.1  mrg 	  break;
   11820  1.1  mrg 	case vect_double_reduction_def:
   11821  1.1  mrg 	  dump_printf (MSG_NOTE, "double reduction\n");
   11822  1.1  mrg 	  break;
   11823  1.1  mrg 	case vect_nested_cycle:
   11824  1.1  mrg 	  dump_printf (MSG_NOTE, "nested cycle\n");
   11825  1.1  mrg 	  break;
   11826  1.1  mrg 	case vect_unknown_def_type:
   11827  1.1  mrg 	  dump_printf (MSG_NOTE, "unknown\n");
   11828  1.1  mrg 	  break;
   11829  1.1  mrg 	}
   11830  1.1  mrg     }
   11831  1.1  mrg 
   11832  1.1  mrg   if (*dt == vect_unknown_def_type)
   11833  1.1  mrg     {
   11834  1.1  mrg       if (dump_enabled_p ())
   11835  1.1  mrg         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   11836  1.1  mrg                          "Unsupported pattern.\n");
   11837  1.1  mrg       return false;
   11838  1.1  mrg     }
   11839  1.1  mrg 
   11840  1.1  mrg   return true;
   11841  1.1  mrg }
   11842  1.1  mrg 
   11843  1.1  mrg /* Function vect_is_simple_use.
   11844  1.1  mrg 
   11845  1.1  mrg    Same as vect_is_simple_use but also determines the vector operand
   11846  1.1  mrg    type of OPERAND and stores it to *VECTYPE.  If the definition of
   11847  1.1  mrg    OPERAND is vect_uninitialized_def, vect_constant_def or
   11848  1.1  mrg    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
   11849  1.1  mrg    is responsible to compute the best suited vector type for the
   11850  1.1  mrg    scalar operand.  */
   11851  1.1  mrg 
   11852  1.1  mrg bool
   11853  1.1  mrg vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
   11854  1.1  mrg 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
   11855  1.1  mrg 		    gimple **def_stmt_out)
   11856  1.1  mrg {
   11857  1.1  mrg   stmt_vec_info def_stmt_info;
   11858  1.1  mrg   gimple *def_stmt;
   11859  1.1  mrg   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
   11860  1.1  mrg     return false;
   11861  1.1  mrg 
   11862  1.1  mrg   if (def_stmt_out)
   11863  1.1  mrg     *def_stmt_out = def_stmt;
   11864  1.1  mrg   if (def_stmt_info_out)
   11865  1.1  mrg     *def_stmt_info_out = def_stmt_info;
   11866  1.1  mrg 
   11867  1.1  mrg   /* Now get a vector type if the def is internal, otherwise supply
   11868  1.1  mrg      NULL_TREE and leave it up to the caller to figure out a proper
   11869  1.1  mrg      type for the use stmt.  */
   11870  1.1  mrg   if (*dt == vect_internal_def
   11871  1.1  mrg       || *dt == vect_induction_def
   11872  1.1  mrg       || *dt == vect_reduction_def
   11873  1.1  mrg       || *dt == vect_double_reduction_def
   11874  1.1  mrg       || *dt == vect_nested_cycle)
   11875  1.1  mrg     {
   11876  1.1  mrg       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
   11877  1.1  mrg       gcc_assert (*vectype != NULL_TREE);
   11878  1.1  mrg       if (dump_enabled_p ())
   11879  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   11880  1.1  mrg 			 "vect_is_simple_use: vectype %T\n", *vectype);
   11881  1.1  mrg     }
   11882  1.1  mrg   else if (*dt == vect_uninitialized_def
   11883  1.1  mrg 	   || *dt == vect_constant_def
   11884  1.1  mrg 	   || *dt == vect_external_def)
   11885  1.1  mrg     *vectype = NULL_TREE;
   11886  1.1  mrg   else
   11887  1.1  mrg     gcc_unreachable ();
   11888  1.1  mrg 
   11889  1.1  mrg   return true;
   11890  1.1  mrg }
   11891  1.1  mrg 
   11892  1.1  mrg /* Function vect_is_simple_use.
   11893  1.1  mrg 
   11894  1.1  mrg    Same as vect_is_simple_use but determines the operand by operand
   11895  1.1  mrg    position OPERAND from either STMT or SLP_NODE, filling in *OP
   11896  1.1  mrg    and *SLP_DEF (when SLP_NODE is not NULL).  */
   11897  1.1  mrg 
   11898  1.1  mrg bool
   11899  1.1  mrg vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
   11900  1.1  mrg 		    unsigned operand, tree *op, slp_tree *slp_def,
   11901  1.1  mrg 		    enum vect_def_type *dt,
   11902  1.1  mrg 		    tree *vectype, stmt_vec_info *def_stmt_info_out)
   11903  1.1  mrg {
   11904  1.1  mrg   if (slp_node)
   11905  1.1  mrg     {
   11906  1.1  mrg       slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
   11907  1.1  mrg       *slp_def = child;
   11908  1.1  mrg       *vectype = SLP_TREE_VECTYPE (child);
   11909  1.1  mrg       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
   11910  1.1  mrg 	{
   11911  1.1  mrg 	  *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
   11912  1.1  mrg 	  return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
   11913  1.1  mrg 	}
   11914  1.1  mrg       else
   11915  1.1  mrg 	{
   11916  1.1  mrg 	  if (def_stmt_info_out)
   11917  1.1  mrg 	    *def_stmt_info_out = NULL;
   11918  1.1  mrg 	  *op = SLP_TREE_SCALAR_OPS (child)[0];
   11919  1.1  mrg 	  *dt = SLP_TREE_DEF_TYPE (child);
   11920  1.1  mrg 	  return true;
   11921  1.1  mrg 	}
   11922  1.1  mrg     }
   11923  1.1  mrg   else
   11924  1.1  mrg     {
   11925  1.1  mrg       *slp_def = NULL;
   11926  1.1  mrg       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
   11927  1.1  mrg 	{
   11928  1.1  mrg 	  if (gimple_assign_rhs_code (ass) == COND_EXPR
   11929  1.1  mrg 	      && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
   11930  1.1  mrg 	    {
   11931  1.1  mrg 	      if (operand < 2)
   11932  1.1  mrg 		*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
   11933  1.1  mrg 	      else
   11934  1.1  mrg 		*op = gimple_op (ass, operand);
   11935  1.1  mrg 	    }
   11936  1.1  mrg 	  else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
   11937  1.1  mrg 	    *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
   11938  1.1  mrg 	  else
   11939  1.1  mrg 	    *op = gimple_op (ass, operand + 1);
   11940  1.1  mrg 	}
   11941  1.1  mrg       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
   11942  1.1  mrg 	*op = gimple_call_arg (call, operand);
   11943  1.1  mrg       else
   11944  1.1  mrg 	gcc_unreachable ();
   11945  1.1  mrg       return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
   11946  1.1  mrg     }
   11947  1.1  mrg }
   11948  1.1  mrg 
   11949  1.1  mrg /* If OP is not NULL and is external or constant update its vector
   11950  1.1  mrg    type with VECTYPE.  Returns true if successful or false if not,
   11951  1.1  mrg    for example when conflicting vector types are present.  */
   11952  1.1  mrg 
   11953  1.1  mrg bool
   11954  1.1  mrg vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
   11955  1.1  mrg {
   11956  1.1  mrg   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
   11957  1.1  mrg     return true;
   11958  1.1  mrg   if (SLP_TREE_VECTYPE (op))
   11959  1.1  mrg     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
   11960  1.1  mrg   SLP_TREE_VECTYPE (op) = vectype;
   11961  1.1  mrg   return true;
   11962  1.1  mrg }
   11963  1.1  mrg 
   11964  1.1  mrg /* Function supportable_widening_operation
   11965  1.1  mrg 
   11966  1.1  mrg    Check whether an operation represented by the code CODE is a
   11967  1.1  mrg    widening operation that is supported by the target platform in
   11968  1.1  mrg    vector form (i.e., when operating on arguments of type VECTYPE_IN
   11969  1.1  mrg    producing a result of type VECTYPE_OUT).
   11970  1.1  mrg 
   11971  1.1  mrg    Widening operations we currently support are NOP (CONVERT), FLOAT,
   11972  1.1  mrg    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
   11973  1.1  mrg    are supported by the target platform either directly (via vector
   11974  1.1  mrg    tree-codes), or via target builtins.
   11975  1.1  mrg 
   11976  1.1  mrg    Output:
   11977  1.1  mrg    - CODE1 and CODE2 are codes of vector operations to be used when
   11978  1.1  mrg    vectorizing the operation, if available.
   11979  1.1  mrg    - MULTI_STEP_CVT determines the number of required intermediate steps in
   11980  1.1  mrg    case of multi-step conversion (like char->short->int - in that case
   11981  1.1  mrg    MULTI_STEP_CVT will be 1).
   11982  1.1  mrg    - INTERM_TYPES contains the intermediate type required to perform the
   11983  1.1  mrg    widening operation (short in the above example).  */
   11984  1.1  mrg 
   11985  1.1  mrg bool
   11986  1.1  mrg supportable_widening_operation (vec_info *vinfo,
   11987  1.1  mrg 				enum tree_code code, stmt_vec_info stmt_info,
   11988  1.1  mrg 				tree vectype_out, tree vectype_in,
   11989  1.1  mrg                                 enum tree_code *code1, enum tree_code *code2,
   11990  1.1  mrg                                 int *multi_step_cvt,
   11991  1.1  mrg                                 vec<tree> *interm_types)
   11992  1.1  mrg {
   11993  1.1  mrg   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
   11994  1.1  mrg   class loop *vect_loop = NULL;
   11995  1.1  mrg   machine_mode vec_mode;
   11996  1.1  mrg   enum insn_code icode1, icode2;
   11997  1.1  mrg   optab optab1, optab2;
   11998  1.1  mrg   tree vectype = vectype_in;
   11999  1.1  mrg   tree wide_vectype = vectype_out;
   12000  1.1  mrg   enum tree_code c1, c2;
   12001  1.1  mrg   int i;
   12002  1.1  mrg   tree prev_type, intermediate_type;
   12003  1.1  mrg   machine_mode intermediate_mode, prev_mode;
   12004  1.1  mrg   optab optab3, optab4;
   12005  1.1  mrg 
   12006  1.1  mrg   *multi_step_cvt = 0;
   12007  1.1  mrg   if (loop_info)
   12008  1.1  mrg     vect_loop = LOOP_VINFO_LOOP (loop_info);
   12009  1.1  mrg 
   12010  1.1  mrg   switch (code)
   12011  1.1  mrg     {
   12012  1.1  mrg     case WIDEN_MULT_EXPR:
   12013  1.1  mrg       /* The result of a vectorized widening operation usually requires
   12014  1.1  mrg 	 two vectors (because the widened results do not fit into one vector).
   12015  1.1  mrg 	 The generated vector results would normally be expected to be
   12016  1.1  mrg 	 generated in the same order as in the original scalar computation,
   12017  1.1  mrg 	 i.e. if 8 results are generated in each vector iteration, they are
   12018  1.1  mrg 	 to be organized as follows:
   12019  1.1  mrg 		vect1: [res1,res2,res3,res4],
   12020  1.1  mrg 		vect2: [res5,res6,res7,res8].
   12021  1.1  mrg 
   12022  1.1  mrg 	 However, in the special case that the result of the widening
   12023  1.1  mrg 	 operation is used in a reduction computation only, the order doesn't
   12024  1.1  mrg 	 matter (because when vectorizing a reduction we change the order of
   12025  1.1  mrg 	 the computation).  Some targets can take advantage of this and
   12026  1.1  mrg 	 generate more efficient code.  For example, targets like Altivec,
   12027  1.1  mrg 	 that support widen_mult using a sequence of {mult_even,mult_odd}
   12028  1.1  mrg 	 generate the following vectors:
   12029  1.1  mrg 		vect1: [res1,res3,res5,res7],
   12030  1.1  mrg 		vect2: [res2,res4,res6,res8].
   12031  1.1  mrg 
   12032  1.1  mrg 	 When vectorizing outer-loops, we execute the inner-loop sequentially
   12033  1.1  mrg 	 (each vectorized inner-loop iteration contributes to VF outer-loop
   12034  1.1  mrg 	 iterations in parallel).  We therefore don't allow to change the
   12035  1.1  mrg 	 order of the computation in the inner-loop during outer-loop
   12036  1.1  mrg 	 vectorization.  */
   12037  1.1  mrg       /* TODO: Another case in which order doesn't *really* matter is when we
   12038  1.1  mrg 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
   12039  1.1  mrg 	 Normally, pack_trunc performs an even/odd permute, whereas the
   12040  1.1  mrg 	 repack from an even/odd expansion would be an interleave, which
   12041  1.1  mrg 	 would be significantly simpler for e.g. AVX2.  */
   12042  1.1  mrg       /* In any case, in order to avoid duplicating the code below, recurse
   12043  1.1  mrg 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
   12044  1.1  mrg 	 are properly set up for the caller.  If we fail, we'll continue with
   12045  1.1  mrg 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
   12046  1.1  mrg       if (vect_loop
   12047  1.1  mrg 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
   12048  1.1  mrg 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
   12049  1.1  mrg 	  && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
   12050  1.1  mrg 					     stmt_info, vectype_out,
   12051  1.1  mrg 					     vectype_in, code1, code2,
   12052  1.1  mrg 					     multi_step_cvt, interm_types))
   12053  1.1  mrg         {
   12054  1.1  mrg           /* Elements in a vector with vect_used_by_reduction property cannot
   12055  1.1  mrg              be reordered if the use chain with this property does not have the
   12056  1.1  mrg              same operation.  One such an example is s += a * b, where elements
   12057  1.1  mrg              in a and b cannot be reordered.  Here we check if the vector defined
   12058  1.1  mrg              by STMT is only directly used in the reduction statement.  */
   12059  1.1  mrg 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
   12060  1.1  mrg 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
   12061  1.1  mrg 	  if (use_stmt_info
   12062  1.1  mrg 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
   12063  1.1  mrg 	    return true;
   12064  1.1  mrg         }
   12065  1.1  mrg       c1 = VEC_WIDEN_MULT_LO_EXPR;
   12066  1.1  mrg       c2 = VEC_WIDEN_MULT_HI_EXPR;
   12067  1.1  mrg       break;
   12068  1.1  mrg 
   12069  1.1  mrg     case DOT_PROD_EXPR:
   12070  1.1  mrg       c1 = DOT_PROD_EXPR;
   12071  1.1  mrg       c2 = DOT_PROD_EXPR;
   12072  1.1  mrg       break;
   12073  1.1  mrg 
   12074  1.1  mrg     case SAD_EXPR:
   12075  1.1  mrg       c1 = SAD_EXPR;
   12076  1.1  mrg       c2 = SAD_EXPR;
   12077  1.1  mrg       break;
   12078  1.1  mrg 
   12079  1.1  mrg     case VEC_WIDEN_MULT_EVEN_EXPR:
   12080  1.1  mrg       /* Support the recursion induced just above.  */
   12081  1.1  mrg       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
   12082  1.1  mrg       c2 = VEC_WIDEN_MULT_ODD_EXPR;
   12083  1.1  mrg       break;
   12084  1.1  mrg 
   12085  1.1  mrg     case WIDEN_LSHIFT_EXPR:
   12086  1.1  mrg       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
   12087  1.1  mrg       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
   12088  1.1  mrg       break;
   12089  1.1  mrg 
   12090  1.1  mrg     case WIDEN_PLUS_EXPR:
   12091  1.1  mrg       c1 = VEC_WIDEN_PLUS_LO_EXPR;
   12092  1.1  mrg       c2 = VEC_WIDEN_PLUS_HI_EXPR;
   12093  1.1  mrg       break;
   12094  1.1  mrg 
   12095  1.1  mrg     case WIDEN_MINUS_EXPR:
   12096  1.1  mrg       c1 = VEC_WIDEN_MINUS_LO_EXPR;
   12097  1.1  mrg       c2 = VEC_WIDEN_MINUS_HI_EXPR;
   12098  1.1  mrg       break;
   12099  1.1  mrg 
   12100  1.1  mrg     CASE_CONVERT:
   12101  1.1  mrg       c1 = VEC_UNPACK_LO_EXPR;
   12102  1.1  mrg       c2 = VEC_UNPACK_HI_EXPR;
   12103  1.1  mrg       break;
   12104  1.1  mrg 
   12105  1.1  mrg     case FLOAT_EXPR:
   12106  1.1  mrg       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
   12107  1.1  mrg       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
   12108  1.1  mrg       break;
   12109  1.1  mrg 
   12110  1.1  mrg     case FIX_TRUNC_EXPR:
   12111  1.1  mrg       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
   12112  1.1  mrg       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
   12113  1.1  mrg       break;
   12114  1.1  mrg 
   12115  1.1  mrg     default:
   12116  1.1  mrg       gcc_unreachable ();
   12117  1.1  mrg     }
   12118  1.1  mrg 
   12119  1.1  mrg   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
   12120  1.1  mrg     std::swap (c1, c2);
   12121  1.1  mrg 
   12122  1.1  mrg   if (code == FIX_TRUNC_EXPR)
   12123  1.1  mrg     {
   12124  1.1  mrg       /* The signedness is determined from output operand.  */
   12125  1.1  mrg       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
   12126  1.1  mrg       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
   12127  1.1  mrg     }
   12128  1.1  mrg   else if (CONVERT_EXPR_CODE_P (code)
   12129  1.1  mrg 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
   12130  1.1  mrg 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
   12131  1.1  mrg 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
   12132  1.1  mrg 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
   12133  1.1  mrg     {
   12134  1.1  mrg       /* If the input and result modes are the same, a different optab
   12135  1.1  mrg 	 is needed where we pass in the number of units in vectype.  */
   12136  1.1  mrg       optab1 = vec_unpacks_sbool_lo_optab;
   12137  1.1  mrg       optab2 = vec_unpacks_sbool_hi_optab;
   12138  1.1  mrg     }
   12139  1.1  mrg   else
   12140  1.1  mrg     {
   12141  1.1  mrg       optab1 = optab_for_tree_code (c1, vectype, optab_default);
   12142  1.1  mrg       optab2 = optab_for_tree_code (c2, vectype, optab_default);
   12143  1.1  mrg     }
   12144  1.1  mrg 
   12145  1.1  mrg   if (!optab1 || !optab2)
   12146  1.1  mrg     return false;
   12147  1.1  mrg 
   12148  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   12149  1.1  mrg   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
   12150  1.1  mrg        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
   12151  1.1  mrg     return false;
   12152  1.1  mrg 
   12153  1.1  mrg   *code1 = c1;
   12154  1.1  mrg   *code2 = c2;
   12155  1.1  mrg 
   12156  1.1  mrg   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
   12157  1.1  mrg       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
   12158  1.1  mrg     {
   12159  1.1  mrg       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   12160  1.1  mrg 	return true;
   12161  1.1  mrg       /* For scalar masks we may have different boolean
   12162  1.1  mrg 	 vector types having the same QImode.  Thus we
   12163  1.1  mrg 	 add additional check for elements number.  */
   12164  1.1  mrg       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
   12165  1.1  mrg 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
   12166  1.1  mrg 	return true;
   12167  1.1  mrg     }
   12168  1.1  mrg 
   12169  1.1  mrg   /* Check if it's a multi-step conversion that can be done using intermediate
   12170  1.1  mrg      types.  */
   12171  1.1  mrg 
   12172  1.1  mrg   prev_type = vectype;
   12173  1.1  mrg   prev_mode = vec_mode;
   12174  1.1  mrg 
   12175  1.1  mrg   if (!CONVERT_EXPR_CODE_P (code))
   12176  1.1  mrg     return false;
   12177  1.1  mrg 
   12178  1.1  mrg   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
   12179  1.1  mrg      intermediate steps in promotion sequence.  We try
   12180  1.1  mrg      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
   12181  1.1  mrg      not.  */
   12182  1.1  mrg   interm_types->create (MAX_INTERM_CVT_STEPS);
   12183  1.1  mrg   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
   12184  1.1  mrg     {
   12185  1.1  mrg       intermediate_mode = insn_data[icode1].operand[0].mode;
   12186  1.1  mrg       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
   12187  1.1  mrg 	intermediate_type
   12188  1.1  mrg 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
   12189  1.1  mrg       else
   12190  1.1  mrg 	intermediate_type
   12191  1.1  mrg 	  = lang_hooks.types.type_for_mode (intermediate_mode,
   12192  1.1  mrg 					    TYPE_UNSIGNED (prev_type));
   12193  1.1  mrg 
   12194  1.1  mrg       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
   12195  1.1  mrg 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
   12196  1.1  mrg 	  && intermediate_mode == prev_mode
   12197  1.1  mrg 	  && SCALAR_INT_MODE_P (prev_mode))
   12198  1.1  mrg 	{
   12199  1.1  mrg 	  /* If the input and result modes are the same, a different optab
   12200  1.1  mrg 	     is needed where we pass in the number of units in vectype.  */
   12201  1.1  mrg 	  optab3 = vec_unpacks_sbool_lo_optab;
   12202  1.1  mrg 	  optab4 = vec_unpacks_sbool_hi_optab;
   12203  1.1  mrg 	}
   12204  1.1  mrg       else
   12205  1.1  mrg 	{
   12206  1.1  mrg 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
   12207  1.1  mrg 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
   12208  1.1  mrg 	}
   12209  1.1  mrg 
   12210  1.1  mrg       if (!optab3 || !optab4
   12211  1.1  mrg           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
   12212  1.1  mrg 	  || insn_data[icode1].operand[0].mode != intermediate_mode
   12213  1.1  mrg 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
   12214  1.1  mrg 	  || insn_data[icode2].operand[0].mode != intermediate_mode
   12215  1.1  mrg 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
   12216  1.1  mrg 	      == CODE_FOR_nothing)
   12217  1.1  mrg 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
   12218  1.1  mrg 	      == CODE_FOR_nothing))
   12219  1.1  mrg 	break;
   12220  1.1  mrg 
   12221  1.1  mrg       interm_types->quick_push (intermediate_type);
   12222  1.1  mrg       (*multi_step_cvt)++;
   12223  1.1  mrg 
   12224  1.1  mrg       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
   12225  1.1  mrg 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
   12226  1.1  mrg 	{
   12227  1.1  mrg 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   12228  1.1  mrg 	    return true;
   12229  1.1  mrg 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
   12230  1.1  mrg 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
   12231  1.1  mrg 	    return true;
   12232  1.1  mrg 	}
   12233  1.1  mrg 
   12234  1.1  mrg       prev_type = intermediate_type;
   12235  1.1  mrg       prev_mode = intermediate_mode;
   12236  1.1  mrg     }
   12237  1.1  mrg 
   12238  1.1  mrg   interm_types->release ();
   12239  1.1  mrg   return false;
   12240  1.1  mrg }
   12241  1.1  mrg 
   12242  1.1  mrg 
   12243  1.1  mrg /* Function supportable_narrowing_operation
   12244  1.1  mrg 
   12245  1.1  mrg    Check whether an operation represented by the code CODE is a
   12246  1.1  mrg    narrowing operation that is supported by the target platform in
   12247  1.1  mrg    vector form (i.e., when operating on arguments of type VECTYPE_IN
   12248  1.1  mrg    and producing a result of type VECTYPE_OUT).
   12249  1.1  mrg 
   12250  1.1  mrg    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
   12251  1.1  mrg    and FLOAT.  This function checks if these operations are supported by
   12252  1.1  mrg    the target platform directly via vector tree-codes.
   12253  1.1  mrg 
   12254  1.1  mrg    Output:
   12255  1.1  mrg    - CODE1 is the code of a vector operation to be used when
   12256  1.1  mrg    vectorizing the operation, if available.
   12257  1.1  mrg    - MULTI_STEP_CVT determines the number of required intermediate steps in
   12258  1.1  mrg    case of multi-step conversion (like int->short->char - in that case
   12259  1.1  mrg    MULTI_STEP_CVT will be 1).
   12260  1.1  mrg    - INTERM_TYPES contains the intermediate type required to perform the
   12261  1.1  mrg    narrowing operation (short in the above example).   */
   12262  1.1  mrg 
   12263  1.1  mrg bool
   12264  1.1  mrg supportable_narrowing_operation (enum tree_code code,
   12265  1.1  mrg 				 tree vectype_out, tree vectype_in,
   12266  1.1  mrg 				 enum tree_code *code1, int *multi_step_cvt,
   12267  1.1  mrg                                  vec<tree> *interm_types)
   12268  1.1  mrg {
   12269  1.1  mrg   machine_mode vec_mode;
   12270  1.1  mrg   enum insn_code icode1;
   12271  1.1  mrg   optab optab1, interm_optab;
   12272  1.1  mrg   tree vectype = vectype_in;
   12273  1.1  mrg   tree narrow_vectype = vectype_out;
   12274  1.1  mrg   enum tree_code c1;
   12275  1.1  mrg   tree intermediate_type, prev_type;
   12276  1.1  mrg   machine_mode intermediate_mode, prev_mode;
   12277  1.1  mrg   int i;
   12278  1.1  mrg   unsigned HOST_WIDE_INT n_elts;
   12279  1.1  mrg   bool uns;
   12280  1.1  mrg 
   12281  1.1  mrg   *multi_step_cvt = 0;
   12282  1.1  mrg   switch (code)
   12283  1.1  mrg     {
   12284  1.1  mrg     CASE_CONVERT:
   12285  1.1  mrg       c1 = VEC_PACK_TRUNC_EXPR;
   12286  1.1  mrg       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
   12287  1.1  mrg 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
   12288  1.1  mrg 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
   12289  1.1  mrg 	  && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
   12290  1.1  mrg 	  && n_elts < BITS_PER_UNIT)
   12291  1.1  mrg 	optab1 = vec_pack_sbool_trunc_optab;
   12292  1.1  mrg       else
   12293  1.1  mrg 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
   12294  1.1  mrg       break;
   12295  1.1  mrg 
   12296  1.1  mrg     case FIX_TRUNC_EXPR:
   12297  1.1  mrg       c1 = VEC_PACK_FIX_TRUNC_EXPR;
   12298  1.1  mrg       /* The signedness is determined from output operand.  */
   12299  1.1  mrg       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
   12300  1.1  mrg       break;
   12301  1.1  mrg 
   12302  1.1  mrg     case FLOAT_EXPR:
   12303  1.1  mrg       c1 = VEC_PACK_FLOAT_EXPR;
   12304  1.1  mrg       optab1 = optab_for_tree_code (c1, vectype, optab_default);
   12305  1.1  mrg       break;
   12306  1.1  mrg 
   12307  1.1  mrg     default:
   12308  1.1  mrg       gcc_unreachable ();
   12309  1.1  mrg     }
   12310  1.1  mrg 
   12311  1.1  mrg   if (!optab1)
   12312  1.1  mrg     return false;
   12313  1.1  mrg 
   12314  1.1  mrg   vec_mode = TYPE_MODE (vectype);
   12315  1.1  mrg   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
   12316  1.1  mrg     return false;
   12317  1.1  mrg 
   12318  1.1  mrg   *code1 = c1;
   12319  1.1  mrg 
   12320  1.1  mrg   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
   12321  1.1  mrg     {
   12322  1.1  mrg       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   12323  1.1  mrg 	return true;
   12324  1.1  mrg       /* For scalar masks we may have different boolean
   12325  1.1  mrg 	 vector types having the same QImode.  Thus we
   12326  1.1  mrg 	 add additional check for elements number.  */
   12327  1.1  mrg       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
   12328  1.1  mrg 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
   12329  1.1  mrg 	return true;
   12330  1.1  mrg     }
   12331  1.1  mrg 
   12332  1.1  mrg   if (code == FLOAT_EXPR)
   12333  1.1  mrg     return false;
   12334  1.1  mrg 
   12335  1.1  mrg   /* Check if it's a multi-step conversion that can be done using intermediate
   12336  1.1  mrg      types.  */
   12337  1.1  mrg   prev_mode = vec_mode;
   12338  1.1  mrg   prev_type = vectype;
   12339  1.1  mrg   if (code == FIX_TRUNC_EXPR)
   12340  1.1  mrg     uns = TYPE_UNSIGNED (vectype_out);
   12341  1.1  mrg   else
   12342  1.1  mrg     uns = TYPE_UNSIGNED (vectype);
   12343  1.1  mrg 
   12344  1.1  mrg   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
   12345  1.1  mrg      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
   12346  1.1  mrg      costly than signed.  */
   12347  1.1  mrg   if (code == FIX_TRUNC_EXPR && uns)
   12348  1.1  mrg     {
   12349  1.1  mrg       enum insn_code icode2;
   12350  1.1  mrg 
   12351  1.1  mrg       intermediate_type
   12352  1.1  mrg 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
   12353  1.1  mrg       interm_optab
   12354  1.1  mrg 	= optab_for_tree_code (c1, intermediate_type, optab_default);
   12355  1.1  mrg       if (interm_optab != unknown_optab
   12356  1.1  mrg 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
   12357  1.1  mrg 	  && insn_data[icode1].operand[0].mode
   12358  1.1  mrg 	     == insn_data[icode2].operand[0].mode)
   12359  1.1  mrg 	{
   12360  1.1  mrg 	  uns = false;
   12361  1.1  mrg 	  optab1 = interm_optab;
   12362  1.1  mrg 	  icode1 = icode2;
   12363  1.1  mrg 	}
   12364  1.1  mrg     }
   12365  1.1  mrg 
   12366  1.1  mrg   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
   12367  1.1  mrg      intermediate steps in promotion sequence.  We try
   12368  1.1  mrg      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
   12369  1.1  mrg   interm_types->create (MAX_INTERM_CVT_STEPS);
   12370  1.1  mrg   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
   12371  1.1  mrg     {
   12372  1.1  mrg       intermediate_mode = insn_data[icode1].operand[0].mode;
   12373  1.1  mrg       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
   12374  1.1  mrg 	intermediate_type
   12375  1.1  mrg 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
   12376  1.1  mrg       else
   12377  1.1  mrg 	intermediate_type
   12378  1.1  mrg 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
   12379  1.1  mrg       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
   12380  1.1  mrg 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
   12381  1.1  mrg 	  && SCALAR_INT_MODE_P (prev_mode)
   12382  1.1  mrg 	  && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
   12383  1.1  mrg 	  && n_elts < BITS_PER_UNIT)
   12384  1.1  mrg 	interm_optab = vec_pack_sbool_trunc_optab;
   12385  1.1  mrg       else
   12386  1.1  mrg 	interm_optab
   12387  1.1  mrg 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
   12388  1.1  mrg 				 optab_default);
   12389  1.1  mrg       if (!interm_optab
   12390  1.1  mrg 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
   12391  1.1  mrg 	  || insn_data[icode1].operand[0].mode != intermediate_mode
   12392  1.1  mrg 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
   12393  1.1  mrg 	      == CODE_FOR_nothing))
   12394  1.1  mrg 	break;
   12395  1.1  mrg 
   12396  1.1  mrg       interm_types->quick_push (intermediate_type);
   12397  1.1  mrg       (*multi_step_cvt)++;
   12398  1.1  mrg 
   12399  1.1  mrg       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
   12400  1.1  mrg 	{
   12401  1.1  mrg 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   12402  1.1  mrg 	    return true;
   12403  1.1  mrg 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
   12404  1.1  mrg 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
   12405  1.1  mrg 	    return true;
   12406  1.1  mrg 	}
   12407  1.1  mrg 
   12408  1.1  mrg       prev_mode = intermediate_mode;
   12409  1.1  mrg       prev_type = intermediate_type;
   12410  1.1  mrg       optab1 = interm_optab;
   12411  1.1  mrg     }
   12412  1.1  mrg 
   12413  1.1  mrg   interm_types->release ();
   12414  1.1  mrg   return false;
   12415  1.1  mrg }
   12416  1.1  mrg 
   12417  1.1  mrg /* Generate and return a vector mask of MASK_TYPE such that
   12418  1.1  mrg    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
   12419  1.1  mrg    Add the statements to SEQ.  */
   12420  1.1  mrg 
   12421  1.1  mrg tree
   12422  1.1  mrg vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
   12423  1.1  mrg 		tree end_index, const char *name)
   12424  1.1  mrg {
   12425  1.1  mrg   tree cmp_type = TREE_TYPE (start_index);
   12426  1.1  mrg   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
   12427  1.1  mrg 						       cmp_type, mask_type,
   12428  1.1  mrg 						       OPTIMIZE_FOR_SPEED));
   12429  1.1  mrg   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
   12430  1.1  mrg 					    start_index, end_index,
   12431  1.1  mrg 					    build_zero_cst (mask_type));
   12432  1.1  mrg   tree tmp;
   12433  1.1  mrg   if (name)
   12434  1.1  mrg     tmp = make_temp_ssa_name (mask_type, NULL, name);
   12435  1.1  mrg   else
   12436  1.1  mrg     tmp = make_ssa_name (mask_type);
   12437  1.1  mrg   gimple_call_set_lhs (call, tmp);
   12438  1.1  mrg   gimple_seq_add_stmt (seq, call);
   12439  1.1  mrg   return tmp;
   12440  1.1  mrg }
   12441  1.1  mrg 
   12442  1.1  mrg /* Generate a vector mask of type MASK_TYPE for which index I is false iff
   12443  1.1  mrg    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
   12444  1.1  mrg 
   12445  1.1  mrg tree
   12446  1.1  mrg vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
   12447  1.1  mrg 		    tree end_index)
   12448  1.1  mrg {
   12449  1.1  mrg   tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
   12450  1.1  mrg   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
   12451  1.1  mrg }
   12452  1.1  mrg 
   12453  1.1  mrg /* Try to compute the vector types required to vectorize STMT_INFO,
   12454  1.1  mrg    returning true on success and false if vectorization isn't possible.
   12455  1.1  mrg    If GROUP_SIZE is nonzero and we're performing BB vectorization,
   12456  1.1  mrg    take sure that the number of elements in the vectors is no bigger
   12457  1.1  mrg    than GROUP_SIZE.
   12458  1.1  mrg 
   12459  1.1  mrg    On success:
   12460  1.1  mrg 
   12461  1.1  mrg    - Set *STMT_VECTYPE_OUT to:
   12462  1.1  mrg      - NULL_TREE if the statement doesn't need to be vectorized;
   12463  1.1  mrg      - the equivalent of STMT_VINFO_VECTYPE otherwise.
   12464  1.1  mrg 
   12465  1.1  mrg    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
   12466  1.1  mrg      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
   12467  1.1  mrg      statement does not help to determine the overall number of units.  */
   12468  1.1  mrg 
   12469  1.1  mrg opt_result
   12470  1.1  mrg vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
   12471  1.1  mrg 				tree *stmt_vectype_out,
   12472  1.1  mrg 				tree *nunits_vectype_out,
   12473  1.1  mrg 				unsigned int group_size)
   12474  1.1  mrg {
   12475  1.1  mrg   gimple *stmt = stmt_info->stmt;
   12476  1.1  mrg 
   12477  1.1  mrg   /* For BB vectorization, we should always have a group size once we've
   12478  1.1  mrg      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
   12479  1.1  mrg      are tentative requests during things like early data reference
   12480  1.1  mrg      analysis and pattern recognition.  */
   12481  1.1  mrg   if (is_a <bb_vec_info> (vinfo))
   12482  1.1  mrg     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
   12483  1.1  mrg   else
   12484  1.1  mrg     group_size = 0;
   12485  1.1  mrg 
   12486  1.1  mrg   *stmt_vectype_out = NULL_TREE;
   12487  1.1  mrg   *nunits_vectype_out = NULL_TREE;
   12488  1.1  mrg 
   12489  1.1  mrg   if (gimple_get_lhs (stmt) == NULL_TREE
   12490  1.1  mrg       /* MASK_STORE has no lhs, but is ok.  */
   12491  1.1  mrg       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
   12492  1.1  mrg     {
   12493  1.1  mrg       if (is_a <gcall *> (stmt))
   12494  1.1  mrg 	{
   12495  1.1  mrg 	  /* Ignore calls with no lhs.  These must be calls to
   12496  1.1  mrg 	     #pragma omp simd functions, and what vectorization factor
   12497  1.1  mrg 	     it really needs can't be determined until
   12498  1.1  mrg 	     vectorizable_simd_clone_call.  */
   12499  1.1  mrg 	  if (dump_enabled_p ())
   12500  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   12501  1.1  mrg 			     "defer to SIMD clone analysis.\n");
   12502  1.1  mrg 	  return opt_result::success ();
   12503  1.1  mrg 	}
   12504  1.1  mrg 
   12505  1.1  mrg       return opt_result::failure_at (stmt,
   12506  1.1  mrg 				     "not vectorized: irregular stmt.%G", stmt);
   12507  1.1  mrg     }
   12508  1.1  mrg 
   12509  1.1  mrg   tree vectype;
   12510  1.1  mrg   tree scalar_type = NULL_TREE;
   12511  1.1  mrg   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
   12512  1.1  mrg     {
   12513  1.1  mrg       vectype = STMT_VINFO_VECTYPE (stmt_info);
   12514  1.1  mrg       if (dump_enabled_p ())
   12515  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location,
   12516  1.1  mrg 			 "precomputed vectype: %T\n", vectype);
   12517  1.1  mrg     }
   12518  1.1  mrg   else if (vect_use_mask_type_p (stmt_info))
   12519  1.1  mrg     {
   12520  1.1  mrg       unsigned int precision = stmt_info->mask_precision;
   12521  1.1  mrg       scalar_type = build_nonstandard_integer_type (precision, 1);
   12522  1.1  mrg       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
   12523  1.1  mrg       if (!vectype)
   12524  1.1  mrg 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
   12525  1.1  mrg 				       " data-type %T\n", scalar_type);
   12526  1.1  mrg       if (dump_enabled_p ())
   12527  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
   12528  1.1  mrg     }
   12529  1.1  mrg   else
   12530  1.1  mrg     {
   12531  1.1  mrg       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
   12532  1.1  mrg 	scalar_type = TREE_TYPE (DR_REF (dr));
   12533  1.1  mrg       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
   12534  1.1  mrg 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
   12535  1.1  mrg       else
   12536  1.1  mrg 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
   12537  1.1  mrg 
   12538  1.1  mrg       if (dump_enabled_p ())
   12539  1.1  mrg 	{
   12540  1.1  mrg 	  if (group_size)
   12541  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   12542  1.1  mrg 			     "get vectype for scalar type (group size %d):"
   12543  1.1  mrg 			     " %T\n", group_size, scalar_type);
   12544  1.1  mrg 	  else
   12545  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   12546  1.1  mrg 			     "get vectype for scalar type: %T\n", scalar_type);
   12547  1.1  mrg 	}
   12548  1.1  mrg       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   12549  1.1  mrg       if (!vectype)
   12550  1.1  mrg 	return opt_result::failure_at (stmt,
   12551  1.1  mrg 				       "not vectorized:"
   12552  1.1  mrg 				       " unsupported data-type %T\n",
   12553  1.1  mrg 				       scalar_type);
   12554  1.1  mrg 
   12555  1.1  mrg       if (dump_enabled_p ())
   12556  1.1  mrg 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
   12557  1.1  mrg     }
   12558  1.1  mrg 
   12559  1.1  mrg   if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
   12560  1.1  mrg     return opt_result::failure_at (stmt,
   12561  1.1  mrg 				   "not vectorized: vector stmt in loop:%G",
   12562  1.1  mrg 				   stmt);
   12563  1.1  mrg 
   12564  1.1  mrg   *stmt_vectype_out = vectype;
   12565  1.1  mrg 
   12566  1.1  mrg   /* Don't try to compute scalar types if the stmt produces a boolean
   12567  1.1  mrg      vector; use the existing vector type instead.  */
   12568  1.1  mrg   tree nunits_vectype = vectype;
   12569  1.1  mrg   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   12570  1.1  mrg     {
   12571  1.1  mrg       /* The number of units is set according to the smallest scalar
   12572  1.1  mrg 	 type (or the largest vector size, but we only support one
   12573  1.1  mrg 	 vector size per vectorization).  */
   12574  1.1  mrg       scalar_type = vect_get_smallest_scalar_type (stmt_info,
   12575  1.1  mrg 						   TREE_TYPE (vectype));
   12576  1.1  mrg       if (scalar_type != TREE_TYPE (vectype))
   12577  1.1  mrg 	{
   12578  1.1  mrg 	  if (dump_enabled_p ())
   12579  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location,
   12580  1.1  mrg 			     "get vectype for smallest scalar type: %T\n",
   12581  1.1  mrg 			     scalar_type);
   12582  1.1  mrg 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
   12583  1.1  mrg 							group_size);
   12584  1.1  mrg 	  if (!nunits_vectype)
   12585  1.1  mrg 	    return opt_result::failure_at
   12586  1.1  mrg 	      (stmt, "not vectorized: unsupported data-type %T\n",
   12587  1.1  mrg 	       scalar_type);
   12588  1.1  mrg 	  if (dump_enabled_p ())
   12589  1.1  mrg 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
   12590  1.1  mrg 			     nunits_vectype);
   12591  1.1  mrg 	}
   12592  1.1  mrg     }
   12593  1.1  mrg 
   12594  1.1  mrg   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
   12595  1.1  mrg 		   TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
   12596  1.1  mrg     return opt_result::failure_at (stmt,
   12597  1.1  mrg 				   "Not vectorized: Incompatible number "
   12598  1.1  mrg 				   "of vector subparts between %T and %T\n",
   12599  1.1  mrg 				   nunits_vectype, *stmt_vectype_out);
   12600  1.1  mrg 
   12601  1.1  mrg   if (dump_enabled_p ())
   12602  1.1  mrg     {
   12603  1.1  mrg       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
   12604  1.1  mrg       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
   12605  1.1  mrg       dump_printf (MSG_NOTE, "\n");
   12606  1.1  mrg     }
   12607  1.1  mrg 
   12608  1.1  mrg   *nunits_vectype_out = nunits_vectype;
   12609  1.1  mrg   return opt_result::success ();
   12610  1.1  mrg }
   12611  1.1  mrg 
   12612  1.1  mrg /* Generate and return statement sequence that sets vector length LEN that is:
   12613  1.1  mrg 
   12614  1.1  mrg    min_of_start_and_end = min (START_INDEX, END_INDEX);
   12615  1.1  mrg    left_len = END_INDEX - min_of_start_and_end;
   12616  1.1  mrg    rhs = min (left_len, LEN_LIMIT);
   12617  1.1  mrg    LEN = rhs;
   12618  1.1  mrg 
   12619  1.1  mrg    Note: the cost of the code generated by this function is modeled
   12620  1.1  mrg    by vect_estimate_min_profitable_iters, so changes here may need
   12621  1.1  mrg    corresponding changes there.  */
   12622  1.1  mrg 
   12623  1.1  mrg gimple_seq
   12624  1.1  mrg vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
   12625  1.1  mrg {
   12626  1.1  mrg   gimple_seq stmts = NULL;
   12627  1.1  mrg   tree len_type = TREE_TYPE (len);
   12628  1.1  mrg   gcc_assert (TREE_TYPE (start_index) == len_type);
   12629  1.1  mrg 
   12630  1.1  mrg   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
   12631  1.1  mrg   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
   12632  1.1  mrg   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
   12633  1.1  mrg   gimple* stmt = gimple_build_assign (len, rhs);
   12634  1.1  mrg   gimple_seq_add_stmt (&stmts, stmt);
   12635  1.1  mrg 
   12636  1.1  mrg   return stmts;
   12637  1.1  mrg }
   12638  1.1  mrg 
   12639