1 1.1 mrg /* Statement Analysis and Transformation for Vectorization 2 1.1 mrg Copyright (C) 2003-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Dorit Naishlos <dorit (at) il.ibm.com> 4 1.1 mrg and Ira Rosen <irar (at) il.ibm.com> 5 1.1 mrg 6 1.1 mrg This file is part of GCC. 7 1.1 mrg 8 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 9 1.1 mrg the terms of the GNU General Public License as published by the Free 10 1.1 mrg Software Foundation; either version 3, or (at your option) any later 11 1.1 mrg version. 12 1.1 mrg 13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 1.1 mrg for more details. 17 1.1 mrg 18 1.1 mrg You should have received a copy of the GNU General Public License 19 1.1 mrg along with GCC; see the file COPYING3. If not see 20 1.1 mrg <http://www.gnu.org/licenses/>. */ 21 1.1 mrg 22 1.1 mrg #include "config.h" 23 1.1 mrg #include "system.h" 24 1.1 mrg #include "coretypes.h" 25 1.1 mrg #include "backend.h" 26 1.1 mrg #include "target.h" 27 1.1 mrg #include "rtl.h" 28 1.1 mrg #include "tree.h" 29 1.1 mrg #include "gimple.h" 30 1.1 mrg #include "ssa.h" 31 1.1 mrg #include "optabs-tree.h" 32 1.1 mrg #include "insn-config.h" 33 1.1 mrg #include "recog.h" /* FIXME: for insn_data */ 34 1.1 mrg #include "cgraph.h" 35 1.1 mrg #include "dumpfile.h" 36 1.1 mrg #include "alias.h" 37 1.1 mrg #include "fold-const.h" 38 1.1 mrg #include "stor-layout.h" 39 1.1 mrg #include "tree-eh.h" 40 1.1 mrg #include "gimplify.h" 41 1.1 mrg #include "gimple-iterator.h" 42 1.1 mrg #include "gimplify-me.h" 43 1.1 mrg #include "tree-cfg.h" 44 1.1 mrg #include "tree-ssa-loop-manip.h" 45 1.1 mrg #include "cfgloop.h" 46 1.1 mrg #include "explow.h" 47 1.1 mrg #include "tree-ssa-loop.h" 48 1.1 mrg #include "tree-scalar-evolution.h" 49 1.1 mrg #include "tree-vectorizer.h" 50 1.1 mrg #include "builtins.h" 51 1.1 mrg #include "internal-fn.h" 52 1.1 mrg #include "tree-vector-builder.h" 53 1.1 mrg #include "vec-perm-indices.h" 54 1.1 mrg #include "tree-ssa-loop-niter.h" 55 1.1 mrg #include "gimple-fold.h" 56 1.1 mrg #include "regs.h" 57 1.1 mrg #include "attribs.h" 58 1.1 mrg 59 1.1 mrg /* For lang_hooks.types.type_for_mode. */ 60 1.1 mrg #include "langhooks.h" 61 1.1 mrg 62 1.1 mrg /* Return the vectorized type for the given statement. */ 63 1.1 mrg 64 1.1 mrg tree 65 1.1 mrg stmt_vectype (class _stmt_vec_info *stmt_info) 66 1.1 mrg { 67 1.1 mrg return STMT_VINFO_VECTYPE (stmt_info); 68 1.1 mrg } 69 1.1 mrg 70 1.1 mrg /* Return TRUE iff the given statement is in an inner loop relative to 71 1.1 mrg the loop being vectorized. */ 72 1.1 mrg bool 73 1.1 mrg stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info) 74 1.1 mrg { 75 1.1 mrg gimple *stmt = STMT_VINFO_STMT (stmt_info); 76 1.1 mrg basic_block bb = gimple_bb (stmt); 77 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 78 1.1 mrg class loop* loop; 79 1.1 mrg 80 1.1 mrg if (!loop_vinfo) 81 1.1 mrg return false; 82 1.1 mrg 83 1.1 mrg loop = LOOP_VINFO_LOOP (loop_vinfo); 84 1.1 mrg 85 1.1 mrg return (bb->loop_father == loop->inner); 86 1.1 mrg } 87 1.1 mrg 88 1.1 mrg /* Record the cost of a statement, either by directly informing the 89 1.1 mrg target model or by saving it in a vector for later processing. 90 1.1 mrg Return a preliminary estimate of the statement's cost. */ 91 1.1 mrg 92 1.1 mrg static unsigned 93 1.1 mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 94 1.1 mrg enum vect_cost_for_stmt kind, 95 1.1 mrg stmt_vec_info stmt_info, slp_tree node, 96 1.1 mrg tree vectype, int misalign, 97 1.1 mrg enum vect_cost_model_location where) 98 1.1 mrg { 99 1.1 mrg if ((kind == vector_load || kind == unaligned_load) 100 1.1 mrg && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))) 101 1.1 mrg kind = vector_gather_load; 102 1.1 mrg if ((kind == vector_store || kind == unaligned_store) 103 1.1 mrg && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))) 104 1.1 mrg kind = vector_scatter_store; 105 1.1 mrg 106 1.1 mrg stmt_info_for_cost si 107 1.1 mrg = { count, kind, where, stmt_info, node, vectype, misalign }; 108 1.1 mrg body_cost_vec->safe_push (si); 109 1.1 mrg 110 1.1 mrg return (unsigned) 111 1.1 mrg (builtin_vectorization_cost (kind, vectype, misalign) * count); 112 1.1 mrg } 113 1.1 mrg 114 1.1 mrg unsigned 115 1.1 mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 116 1.1 mrg enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, 117 1.1 mrg tree vectype, int misalign, 118 1.1 mrg enum vect_cost_model_location where) 119 1.1 mrg { 120 1.1 mrg return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL, 121 1.1 mrg vectype, misalign, where); 122 1.1 mrg } 123 1.1 mrg 124 1.1 mrg unsigned 125 1.1 mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 126 1.1 mrg enum vect_cost_for_stmt kind, slp_tree node, 127 1.1 mrg tree vectype, int misalign, 128 1.1 mrg enum vect_cost_model_location where) 129 1.1 mrg { 130 1.1 mrg return record_stmt_cost (body_cost_vec, count, kind, NULL, node, 131 1.1 mrg vectype, misalign, where); 132 1.1 mrg } 133 1.1 mrg 134 1.1 mrg unsigned 135 1.1 mrg record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 136 1.1 mrg enum vect_cost_for_stmt kind, 137 1.1 mrg enum vect_cost_model_location where) 138 1.1 mrg { 139 1.1 mrg gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken 140 1.1 mrg || kind == scalar_stmt); 141 1.1 mrg return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL, 142 1.1 mrg NULL_TREE, 0, where); 143 1.1 mrg } 144 1.1 mrg 145 1.1 mrg /* Return a variable of type ELEM_TYPE[NELEMS]. */ 146 1.1 mrg 147 1.1 mrg static tree 148 1.1 mrg create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 149 1.1 mrg { 150 1.1 mrg return create_tmp_var (build_array_type_nelts (elem_type, nelems), 151 1.1 mrg "vect_array"); 152 1.1 mrg } 153 1.1 mrg 154 1.1 mrg /* ARRAY is an array of vectors created by create_vector_array. 155 1.1 mrg Return an SSA_NAME for the vector in index N. The reference 156 1.1 mrg is part of the vectorization of STMT_INFO and the vector is associated 157 1.1 mrg with scalar destination SCALAR_DEST. */ 158 1.1 mrg 159 1.1 mrg static tree 160 1.1 mrg read_vector_array (vec_info *vinfo, 161 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 162 1.1 mrg tree scalar_dest, tree array, unsigned HOST_WIDE_INT n) 163 1.1 mrg { 164 1.1 mrg tree vect_type, vect, vect_name, array_ref; 165 1.1 mrg gimple *new_stmt; 166 1.1 mrg 167 1.1 mrg gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 168 1.1 mrg vect_type = TREE_TYPE (TREE_TYPE (array)); 169 1.1 mrg vect = vect_create_destination_var (scalar_dest, vect_type); 170 1.1 mrg array_ref = build4 (ARRAY_REF, vect_type, array, 171 1.1 mrg build_int_cst (size_type_node, n), 172 1.1 mrg NULL_TREE, NULL_TREE); 173 1.1 mrg 174 1.1 mrg new_stmt = gimple_build_assign (vect, array_ref); 175 1.1 mrg vect_name = make_ssa_name (vect, new_stmt); 176 1.1 mrg gimple_assign_set_lhs (new_stmt, vect_name); 177 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 178 1.1 mrg 179 1.1 mrg return vect_name; 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg /* ARRAY is an array of vectors created by create_vector_array. 183 1.1 mrg Emit code to store SSA_NAME VECT in index N of the array. 184 1.1 mrg The store is part of the vectorization of STMT_INFO. */ 185 1.1 mrg 186 1.1 mrg static void 187 1.1 mrg write_vector_array (vec_info *vinfo, 188 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 189 1.1 mrg tree vect, tree array, unsigned HOST_WIDE_INT n) 190 1.1 mrg { 191 1.1 mrg tree array_ref; 192 1.1 mrg gimple *new_stmt; 193 1.1 mrg 194 1.1 mrg array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 195 1.1 mrg build_int_cst (size_type_node, n), 196 1.1 mrg NULL_TREE, NULL_TREE); 197 1.1 mrg 198 1.1 mrg new_stmt = gimple_build_assign (array_ref, vect); 199 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 200 1.1 mrg } 201 1.1 mrg 202 1.1 mrg /* PTR is a pointer to an array of type TYPE. Return a representation 203 1.1 mrg of *PTR. The memory reference replaces those in FIRST_DR 204 1.1 mrg (and its group). */ 205 1.1 mrg 206 1.1 mrg static tree 207 1.1 mrg create_array_ref (tree type, tree ptr, tree alias_ptr_type) 208 1.1 mrg { 209 1.1 mrg tree mem_ref; 210 1.1 mrg 211 1.1 mrg mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 212 1.1 mrg /* Arrays have the same alignment as their type. */ 213 1.1 mrg set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0); 214 1.1 mrg return mem_ref; 215 1.1 mrg } 216 1.1 mrg 217 1.1 mrg /* Add a clobber of variable VAR to the vectorization of STMT_INFO. 218 1.1 mrg Emit the clobber before *GSI. */ 219 1.1 mrg 220 1.1 mrg static void 221 1.1 mrg vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info, 222 1.1 mrg gimple_stmt_iterator *gsi, tree var) 223 1.1 mrg { 224 1.1 mrg tree clobber = build_clobber (TREE_TYPE (var)); 225 1.1 mrg gimple *new_stmt = gimple_build_assign (var, clobber); 226 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 227 1.1 mrg } 228 1.1 mrg 229 1.1 mrg /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 230 1.1 mrg 231 1.1 mrg /* Function vect_mark_relevant. 232 1.1 mrg 233 1.1 mrg Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */ 234 1.1 mrg 235 1.1 mrg static void 236 1.1 mrg vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info, 237 1.1 mrg enum vect_relevant relevant, bool live_p) 238 1.1 mrg { 239 1.1 mrg enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 240 1.1 mrg bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 241 1.1 mrg 242 1.1 mrg if (dump_enabled_p ()) 243 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 244 1.1 mrg "mark relevant %d, live %d: %G", relevant, live_p, 245 1.1 mrg stmt_info->stmt); 246 1.1 mrg 247 1.1 mrg /* If this stmt is an original stmt in a pattern, we might need to mark its 248 1.1 mrg related pattern stmt instead of the original stmt. However, such stmts 249 1.1 mrg may have their own uses that are not in any pattern, in such cases the 250 1.1 mrg stmt itself should be marked. */ 251 1.1 mrg if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 252 1.1 mrg { 253 1.1 mrg /* This is the last stmt in a sequence that was detected as a 254 1.1 mrg pattern that can potentially be vectorized. Don't mark the stmt 255 1.1 mrg as relevant/live because it's not going to be vectorized. 256 1.1 mrg Instead mark the pattern-stmt that replaces it. */ 257 1.1 mrg 258 1.1 mrg if (dump_enabled_p ()) 259 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 260 1.1 mrg "last stmt in pattern. don't mark" 261 1.1 mrg " relevant/live.\n"); 262 1.1 mrg stmt_vec_info old_stmt_info = stmt_info; 263 1.1 mrg stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); 264 1.1 mrg gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info); 265 1.1 mrg save_relevant = STMT_VINFO_RELEVANT (stmt_info); 266 1.1 mrg save_live_p = STMT_VINFO_LIVE_P (stmt_info); 267 1.1 mrg } 268 1.1 mrg 269 1.1 mrg STMT_VINFO_LIVE_P (stmt_info) |= live_p; 270 1.1 mrg if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 271 1.1 mrg STMT_VINFO_RELEVANT (stmt_info) = relevant; 272 1.1 mrg 273 1.1 mrg if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 274 1.1 mrg && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 275 1.1 mrg { 276 1.1 mrg if (dump_enabled_p ()) 277 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 278 1.1 mrg "already marked relevant/live.\n"); 279 1.1 mrg return; 280 1.1 mrg } 281 1.1 mrg 282 1.1 mrg worklist->safe_push (stmt_info); 283 1.1 mrg } 284 1.1 mrg 285 1.1 mrg 286 1.1 mrg /* Function is_simple_and_all_uses_invariant 287 1.1 mrg 288 1.1 mrg Return true if STMT_INFO is simple and all uses of it are invariant. */ 289 1.1 mrg 290 1.1 mrg bool 291 1.1 mrg is_simple_and_all_uses_invariant (stmt_vec_info stmt_info, 292 1.1 mrg loop_vec_info loop_vinfo) 293 1.1 mrg { 294 1.1 mrg tree op; 295 1.1 mrg ssa_op_iter iter; 296 1.1 mrg 297 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 298 1.1 mrg if (!stmt) 299 1.1 mrg return false; 300 1.1 mrg 301 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 302 1.1 mrg { 303 1.1 mrg enum vect_def_type dt = vect_uninitialized_def; 304 1.1 mrg 305 1.1 mrg if (!vect_is_simple_use (op, loop_vinfo, &dt)) 306 1.1 mrg { 307 1.1 mrg if (dump_enabled_p ()) 308 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 309 1.1 mrg "use not simple.\n"); 310 1.1 mrg return false; 311 1.1 mrg } 312 1.1 mrg 313 1.1 mrg if (dt != vect_external_def && dt != vect_constant_def) 314 1.1 mrg return false; 315 1.1 mrg } 316 1.1 mrg return true; 317 1.1 mrg } 318 1.1 mrg 319 1.1 mrg /* Function vect_stmt_relevant_p. 320 1.1 mrg 321 1.1 mrg Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO, 322 1.1 mrg is "relevant for vectorization". 323 1.1 mrg 324 1.1 mrg A stmt is considered "relevant for vectorization" if: 325 1.1 mrg - it has uses outside the loop. 326 1.1 mrg - it has vdefs (it alters memory). 327 1.1 mrg - control stmts in the loop (except for the exit condition). 328 1.1 mrg 329 1.1 mrg CHECKME: what other side effects would the vectorizer allow? */ 330 1.1 mrg 331 1.1 mrg static bool 332 1.1 mrg vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, 333 1.1 mrg enum vect_relevant *relevant, bool *live_p) 334 1.1 mrg { 335 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 336 1.1 mrg ssa_op_iter op_iter; 337 1.1 mrg imm_use_iterator imm_iter; 338 1.1 mrg use_operand_p use_p; 339 1.1 mrg def_operand_p def_p; 340 1.1 mrg 341 1.1 mrg *relevant = vect_unused_in_scope; 342 1.1 mrg *live_p = false; 343 1.1 mrg 344 1.1 mrg /* cond stmt other than loop exit cond. */ 345 1.1 mrg if (is_ctrl_stmt (stmt_info->stmt) 346 1.1 mrg && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type) 347 1.1 mrg *relevant = vect_used_in_scope; 348 1.1 mrg 349 1.1 mrg /* changing memory. */ 350 1.1 mrg if (gimple_code (stmt_info->stmt) != GIMPLE_PHI) 351 1.1 mrg if (gimple_vdef (stmt_info->stmt) 352 1.1 mrg && !gimple_clobber_p (stmt_info->stmt)) 353 1.1 mrg { 354 1.1 mrg if (dump_enabled_p ()) 355 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 356 1.1 mrg "vec_stmt_relevant_p: stmt has vdefs.\n"); 357 1.1 mrg *relevant = vect_used_in_scope; 358 1.1 mrg } 359 1.1 mrg 360 1.1 mrg /* uses outside the loop. */ 361 1.1 mrg FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF) 362 1.1 mrg { 363 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 364 1.1 mrg { 365 1.1 mrg basic_block bb = gimple_bb (USE_STMT (use_p)); 366 1.1 mrg if (!flow_bb_inside_loop_p (loop, bb)) 367 1.1 mrg { 368 1.1 mrg if (is_gimple_debug (USE_STMT (use_p))) 369 1.1 mrg continue; 370 1.1 mrg 371 1.1 mrg if (dump_enabled_p ()) 372 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 373 1.1 mrg "vec_stmt_relevant_p: used out of loop.\n"); 374 1.1 mrg 375 1.1 mrg /* We expect all such uses to be in the loop exit phis 376 1.1 mrg (because of loop closed form) */ 377 1.1 mrg gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 378 1.1 mrg gcc_assert (bb == single_exit (loop)->dest); 379 1.1 mrg 380 1.1 mrg *live_p = true; 381 1.1 mrg } 382 1.1 mrg } 383 1.1 mrg } 384 1.1 mrg 385 1.1 mrg if (*live_p && *relevant == vect_unused_in_scope 386 1.1 mrg && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo)) 387 1.1 mrg { 388 1.1 mrg if (dump_enabled_p ()) 389 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 390 1.1 mrg "vec_stmt_relevant_p: stmt live but not relevant.\n"); 391 1.1 mrg *relevant = vect_used_only_live; 392 1.1 mrg } 393 1.1 mrg 394 1.1 mrg return (*live_p || *relevant); 395 1.1 mrg } 396 1.1 mrg 397 1.1 mrg 398 1.1 mrg /* Function exist_non_indexing_operands_for_use_p 399 1.1 mrg 400 1.1 mrg USE is one of the uses attached to STMT_INFO. Check if USE is 401 1.1 mrg used in STMT_INFO for anything other than indexing an array. */ 402 1.1 mrg 403 1.1 mrg static bool 404 1.1 mrg exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info) 405 1.1 mrg { 406 1.1 mrg tree operand; 407 1.1 mrg 408 1.1 mrg /* USE corresponds to some operand in STMT. If there is no data 409 1.1 mrg reference in STMT, then any operand that corresponds to USE 410 1.1 mrg is not indexing an array. */ 411 1.1 mrg if (!STMT_VINFO_DATA_REF (stmt_info)) 412 1.1 mrg return true; 413 1.1 mrg 414 1.1 mrg /* STMT has a data_ref. FORNOW this means that its of one of 415 1.1 mrg the following forms: 416 1.1 mrg -1- ARRAY_REF = var 417 1.1 mrg -2- var = ARRAY_REF 418 1.1 mrg (This should have been verified in analyze_data_refs). 419 1.1 mrg 420 1.1 mrg 'var' in the second case corresponds to a def, not a use, 421 1.1 mrg so USE cannot correspond to any operands that are not used 422 1.1 mrg for array indexing. 423 1.1 mrg 424 1.1 mrg Therefore, all we need to check is if STMT falls into the 425 1.1 mrg first case, and whether var corresponds to USE. */ 426 1.1 mrg 427 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); 428 1.1 mrg if (!assign || !gimple_assign_copy_p (assign)) 429 1.1 mrg { 430 1.1 mrg gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 431 1.1 mrg if (call && gimple_call_internal_p (call)) 432 1.1 mrg { 433 1.1 mrg internal_fn ifn = gimple_call_internal_fn (call); 434 1.1 mrg int mask_index = internal_fn_mask_index (ifn); 435 1.1 mrg if (mask_index >= 0 436 1.1 mrg && use == gimple_call_arg (call, mask_index)) 437 1.1 mrg return true; 438 1.1 mrg int stored_value_index = internal_fn_stored_value_index (ifn); 439 1.1 mrg if (stored_value_index >= 0 440 1.1 mrg && use == gimple_call_arg (call, stored_value_index)) 441 1.1 mrg return true; 442 1.1 mrg if (internal_gather_scatter_fn_p (ifn) 443 1.1 mrg && use == gimple_call_arg (call, 1)) 444 1.1 mrg return true; 445 1.1 mrg } 446 1.1 mrg return false; 447 1.1 mrg } 448 1.1 mrg 449 1.1 mrg if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME) 450 1.1 mrg return false; 451 1.1 mrg operand = gimple_assign_rhs1 (assign); 452 1.1 mrg if (TREE_CODE (operand) != SSA_NAME) 453 1.1 mrg return false; 454 1.1 mrg 455 1.1 mrg if (operand == use) 456 1.1 mrg return true; 457 1.1 mrg 458 1.1 mrg return false; 459 1.1 mrg } 460 1.1 mrg 461 1.1 mrg 462 1.1 mrg /* 463 1.1 mrg Function process_use. 464 1.1 mrg 465 1.1 mrg Inputs: 466 1.1 mrg - a USE in STMT_VINFO in a loop represented by LOOP_VINFO 467 1.1 mrg - RELEVANT - enum value to be set in the STMT_VINFO of the stmt 468 1.1 mrg that defined USE. This is done by calling mark_relevant and passing it 469 1.1 mrg the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 470 1.1 mrg - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 471 1.1 mrg be performed. 472 1.1 mrg 473 1.1 mrg Outputs: 474 1.1 mrg Generally, LIVE_P and RELEVANT are used to define the liveness and 475 1.1 mrg relevance info of the DEF_STMT of this USE: 476 1.1 mrg STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p 477 1.1 mrg STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant 478 1.1 mrg Exceptions: 479 1.1 mrg - case 1: If USE is used only for address computations (e.g. array indexing), 480 1.1 mrg which does not need to be directly vectorized, then the liveness/relevance 481 1.1 mrg of the respective DEF_STMT is left unchanged. 482 1.1 mrg - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt, 483 1.1 mrg we skip DEF_STMT cause it had already been processed. 484 1.1 mrg - case 3: If DEF_STMT and STMT_VINFO are in different nests, then 485 1.1 mrg "relevant" will be modified accordingly. 486 1.1 mrg 487 1.1 mrg Return true if everything is as expected. Return false otherwise. */ 488 1.1 mrg 489 1.1 mrg static opt_result 490 1.1 mrg process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo, 491 1.1 mrg enum vect_relevant relevant, vec<stmt_vec_info> *worklist, 492 1.1 mrg bool force) 493 1.1 mrg { 494 1.1 mrg stmt_vec_info dstmt_vinfo; 495 1.1 mrg enum vect_def_type dt; 496 1.1 mrg 497 1.1 mrg /* case 1: we are only interested in uses that need to be vectorized. Uses 498 1.1 mrg that are used for address computation are not considered relevant. */ 499 1.1 mrg if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo)) 500 1.1 mrg return opt_result::success (); 501 1.1 mrg 502 1.1 mrg if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo)) 503 1.1 mrg return opt_result::failure_at (stmt_vinfo->stmt, 504 1.1 mrg "not vectorized:" 505 1.1 mrg " unsupported use in stmt.\n"); 506 1.1 mrg 507 1.1 mrg if (!dstmt_vinfo) 508 1.1 mrg return opt_result::success (); 509 1.1 mrg 510 1.1 mrg basic_block def_bb = gimple_bb (dstmt_vinfo->stmt); 511 1.1 mrg basic_block bb = gimple_bb (stmt_vinfo->stmt); 512 1.1 mrg 513 1.1 mrg /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO). 514 1.1 mrg We have to force the stmt live since the epilogue loop needs it to 515 1.1 mrg continue computing the reduction. */ 516 1.1 mrg if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI 517 1.1 mrg && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 518 1.1 mrg && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI 519 1.1 mrg && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 520 1.1 mrg && bb->loop_father == def_bb->loop_father) 521 1.1 mrg { 522 1.1 mrg if (dump_enabled_p ()) 523 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 524 1.1 mrg "reduc-stmt defining reduc-phi in the same nest.\n"); 525 1.1 mrg vect_mark_relevant (worklist, dstmt_vinfo, relevant, true); 526 1.1 mrg return opt_result::success (); 527 1.1 mrg } 528 1.1 mrg 529 1.1 mrg /* case 3a: outer-loop stmt defining an inner-loop stmt: 530 1.1 mrg outer-loop-header-bb: 531 1.1 mrg d = dstmt_vinfo 532 1.1 mrg inner-loop: 533 1.1 mrg stmt # use (d) 534 1.1 mrg outer-loop-tail-bb: 535 1.1 mrg ... */ 536 1.1 mrg if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 537 1.1 mrg { 538 1.1 mrg if (dump_enabled_p ()) 539 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 540 1.1 mrg "outer-loop def-stmt defining inner-loop stmt.\n"); 541 1.1 mrg 542 1.1 mrg switch (relevant) 543 1.1 mrg { 544 1.1 mrg case vect_unused_in_scope: 545 1.1 mrg relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 546 1.1 mrg vect_used_in_scope : vect_unused_in_scope; 547 1.1 mrg break; 548 1.1 mrg 549 1.1 mrg case vect_used_in_outer_by_reduction: 550 1.1 mrg gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 551 1.1 mrg relevant = vect_used_by_reduction; 552 1.1 mrg break; 553 1.1 mrg 554 1.1 mrg case vect_used_in_outer: 555 1.1 mrg gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 556 1.1 mrg relevant = vect_used_in_scope; 557 1.1 mrg break; 558 1.1 mrg 559 1.1 mrg case vect_used_in_scope: 560 1.1 mrg break; 561 1.1 mrg 562 1.1 mrg default: 563 1.1 mrg gcc_unreachable (); 564 1.1 mrg } 565 1.1 mrg } 566 1.1 mrg 567 1.1 mrg /* case 3b: inner-loop stmt defining an outer-loop stmt: 568 1.1 mrg outer-loop-header-bb: 569 1.1 mrg ... 570 1.1 mrg inner-loop: 571 1.1 mrg d = dstmt_vinfo 572 1.1 mrg outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 573 1.1 mrg stmt # use (d) */ 574 1.1 mrg else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 575 1.1 mrg { 576 1.1 mrg if (dump_enabled_p ()) 577 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 578 1.1 mrg "inner-loop def-stmt defining outer-loop stmt.\n"); 579 1.1 mrg 580 1.1 mrg switch (relevant) 581 1.1 mrg { 582 1.1 mrg case vect_unused_in_scope: 583 1.1 mrg relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 584 1.1 mrg || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 585 1.1 mrg vect_used_in_outer_by_reduction : vect_unused_in_scope; 586 1.1 mrg break; 587 1.1 mrg 588 1.1 mrg case vect_used_by_reduction: 589 1.1 mrg case vect_used_only_live: 590 1.1 mrg relevant = vect_used_in_outer_by_reduction; 591 1.1 mrg break; 592 1.1 mrg 593 1.1 mrg case vect_used_in_scope: 594 1.1 mrg relevant = vect_used_in_outer; 595 1.1 mrg break; 596 1.1 mrg 597 1.1 mrg default: 598 1.1 mrg gcc_unreachable (); 599 1.1 mrg } 600 1.1 mrg } 601 1.1 mrg /* We are also not interested in uses on loop PHI backedges that are 602 1.1 mrg inductions. Otherwise we'll needlessly vectorize the IV increment 603 1.1 mrg and cause hybrid SLP for SLP inductions. Unless the PHI is live 604 1.1 mrg of course. */ 605 1.1 mrg else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI 606 1.1 mrg && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def 607 1.1 mrg && ! STMT_VINFO_LIVE_P (stmt_vinfo) 608 1.1 mrg && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, 609 1.1 mrg loop_latch_edge (bb->loop_father)) 610 1.1 mrg == use)) 611 1.1 mrg { 612 1.1 mrg if (dump_enabled_p ()) 613 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 614 1.1 mrg "induction value on backedge.\n"); 615 1.1 mrg return opt_result::success (); 616 1.1 mrg } 617 1.1 mrg 618 1.1 mrg 619 1.1 mrg vect_mark_relevant (worklist, dstmt_vinfo, relevant, false); 620 1.1 mrg return opt_result::success (); 621 1.1 mrg } 622 1.1 mrg 623 1.1 mrg 624 1.1 mrg /* Function vect_mark_stmts_to_be_vectorized. 625 1.1 mrg 626 1.1 mrg Not all stmts in the loop need to be vectorized. For example: 627 1.1 mrg 628 1.1 mrg for i... 629 1.1 mrg for j... 630 1.1 mrg 1. T0 = i + j 631 1.1 mrg 2. T1 = a[T0] 632 1.1 mrg 633 1.1 mrg 3. j = j + 1 634 1.1 mrg 635 1.1 mrg Stmt 1 and 3 do not need to be vectorized, because loop control and 636 1.1 mrg addressing of vectorized data-refs are handled differently. 637 1.1 mrg 638 1.1 mrg This pass detects such stmts. */ 639 1.1 mrg 640 1.1 mrg opt_result 641 1.1 mrg vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) 642 1.1 mrg { 643 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 644 1.1 mrg basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 645 1.1 mrg unsigned int nbbs = loop->num_nodes; 646 1.1 mrg gimple_stmt_iterator si; 647 1.1 mrg unsigned int i; 648 1.1 mrg basic_block bb; 649 1.1 mrg bool live_p; 650 1.1 mrg enum vect_relevant relevant; 651 1.1 mrg 652 1.1 mrg DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized"); 653 1.1 mrg 654 1.1 mrg auto_vec<stmt_vec_info, 64> worklist; 655 1.1 mrg 656 1.1 mrg /* 1. Init worklist. */ 657 1.1 mrg for (i = 0; i < nbbs; i++) 658 1.1 mrg { 659 1.1 mrg bb = bbs[i]; 660 1.1 mrg for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 661 1.1 mrg { 662 1.1 mrg stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); 663 1.1 mrg if (dump_enabled_p ()) 664 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", 665 1.1 mrg phi_info->stmt); 666 1.1 mrg 667 1.1 mrg if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p)) 668 1.1 mrg vect_mark_relevant (&worklist, phi_info, relevant, live_p); 669 1.1 mrg } 670 1.1 mrg for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 671 1.1 mrg { 672 1.1 mrg if (is_gimple_debug (gsi_stmt (si))) 673 1.1 mrg continue; 674 1.1 mrg stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); 675 1.1 mrg if (dump_enabled_p ()) 676 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 677 1.1 mrg "init: stmt relevant? %G", stmt_info->stmt); 678 1.1 mrg 679 1.1 mrg if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p)) 680 1.1 mrg vect_mark_relevant (&worklist, stmt_info, relevant, live_p); 681 1.1 mrg } 682 1.1 mrg } 683 1.1 mrg 684 1.1 mrg /* 2. Process_worklist */ 685 1.1 mrg while (worklist.length () > 0) 686 1.1 mrg { 687 1.1 mrg use_operand_p use_p; 688 1.1 mrg ssa_op_iter iter; 689 1.1 mrg 690 1.1 mrg stmt_vec_info stmt_vinfo = worklist.pop (); 691 1.1 mrg if (dump_enabled_p ()) 692 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 693 1.1 mrg "worklist: examine stmt: %G", stmt_vinfo->stmt); 694 1.1 mrg 695 1.1 mrg /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 696 1.1 mrg (DEF_STMT) as relevant/irrelevant according to the relevance property 697 1.1 mrg of STMT. */ 698 1.1 mrg relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 699 1.1 mrg 700 1.1 mrg /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is 701 1.1 mrg propagated as is to the DEF_STMTs of its USEs. 702 1.1 mrg 703 1.1 mrg One exception is when STMT has been identified as defining a reduction 704 1.1 mrg variable; in this case we set the relevance to vect_used_by_reduction. 705 1.1 mrg This is because we distinguish between two kinds of relevant stmts - 706 1.1 mrg those that are used by a reduction computation, and those that are 707 1.1 mrg (also) used by a regular computation. This allows us later on to 708 1.1 mrg identify stmts that are used solely by a reduction, and therefore the 709 1.1 mrg order of the results that they produce does not have to be kept. */ 710 1.1 mrg 711 1.1 mrg switch (STMT_VINFO_DEF_TYPE (stmt_vinfo)) 712 1.1 mrg { 713 1.1 mrg case vect_reduction_def: 714 1.1 mrg gcc_assert (relevant != vect_unused_in_scope); 715 1.1 mrg if (relevant != vect_unused_in_scope 716 1.1 mrg && relevant != vect_used_in_scope 717 1.1 mrg && relevant != vect_used_by_reduction 718 1.1 mrg && relevant != vect_used_only_live) 719 1.1 mrg return opt_result::failure_at 720 1.1 mrg (stmt_vinfo->stmt, "unsupported use of reduction.\n"); 721 1.1 mrg break; 722 1.1 mrg 723 1.1 mrg case vect_nested_cycle: 724 1.1 mrg if (relevant != vect_unused_in_scope 725 1.1 mrg && relevant != vect_used_in_outer_by_reduction 726 1.1 mrg && relevant != vect_used_in_outer) 727 1.1 mrg return opt_result::failure_at 728 1.1 mrg (stmt_vinfo->stmt, "unsupported use of nested cycle.\n"); 729 1.1 mrg break; 730 1.1 mrg 731 1.1 mrg case vect_double_reduction_def: 732 1.1 mrg if (relevant != vect_unused_in_scope 733 1.1 mrg && relevant != vect_used_by_reduction 734 1.1 mrg && relevant != vect_used_only_live) 735 1.1 mrg return opt_result::failure_at 736 1.1 mrg (stmt_vinfo->stmt, "unsupported use of double reduction.\n"); 737 1.1 mrg break; 738 1.1 mrg 739 1.1 mrg default: 740 1.1 mrg break; 741 1.1 mrg } 742 1.1 mrg 743 1.1 mrg if (is_pattern_stmt_p (stmt_vinfo)) 744 1.1 mrg { 745 1.1 mrg /* Pattern statements are not inserted into the code, so 746 1.1 mrg FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 747 1.1 mrg have to scan the RHS or function arguments instead. */ 748 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt)) 749 1.1 mrg { 750 1.1 mrg enum tree_code rhs_code = gimple_assign_rhs_code (assign); 751 1.1 mrg tree op = gimple_assign_rhs1 (assign); 752 1.1 mrg 753 1.1 mrg i = 1; 754 1.1 mrg if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 755 1.1 mrg { 756 1.1 mrg opt_result res 757 1.1 mrg = process_use (stmt_vinfo, TREE_OPERAND (op, 0), 758 1.1 mrg loop_vinfo, relevant, &worklist, false); 759 1.1 mrg if (!res) 760 1.1 mrg return res; 761 1.1 mrg res = process_use (stmt_vinfo, TREE_OPERAND (op, 1), 762 1.1 mrg loop_vinfo, relevant, &worklist, false); 763 1.1 mrg if (!res) 764 1.1 mrg return res; 765 1.1 mrg i = 2; 766 1.1 mrg } 767 1.1 mrg for (; i < gimple_num_ops (assign); i++) 768 1.1 mrg { 769 1.1 mrg op = gimple_op (assign, i); 770 1.1 mrg if (TREE_CODE (op) == SSA_NAME) 771 1.1 mrg { 772 1.1 mrg opt_result res 773 1.1 mrg = process_use (stmt_vinfo, op, loop_vinfo, relevant, 774 1.1 mrg &worklist, false); 775 1.1 mrg if (!res) 776 1.1 mrg return res; 777 1.1 mrg } 778 1.1 mrg } 779 1.1 mrg } 780 1.1 mrg else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt)) 781 1.1 mrg { 782 1.1 mrg for (i = 0; i < gimple_call_num_args (call); i++) 783 1.1 mrg { 784 1.1 mrg tree arg = gimple_call_arg (call, i); 785 1.1 mrg opt_result res 786 1.1 mrg = process_use (stmt_vinfo, arg, loop_vinfo, relevant, 787 1.1 mrg &worklist, false); 788 1.1 mrg if (!res) 789 1.1 mrg return res; 790 1.1 mrg } 791 1.1 mrg } 792 1.1 mrg } 793 1.1 mrg else 794 1.1 mrg FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE) 795 1.1 mrg { 796 1.1 mrg tree op = USE_FROM_PTR (use_p); 797 1.1 mrg opt_result res 798 1.1 mrg = process_use (stmt_vinfo, op, loop_vinfo, relevant, 799 1.1 mrg &worklist, false); 800 1.1 mrg if (!res) 801 1.1 mrg return res; 802 1.1 mrg } 803 1.1 mrg 804 1.1 mrg if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo)) 805 1.1 mrg { 806 1.1 mrg gather_scatter_info gs_info; 807 1.1 mrg if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info)) 808 1.1 mrg gcc_unreachable (); 809 1.1 mrg opt_result res 810 1.1 mrg = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant, 811 1.1 mrg &worklist, true); 812 1.1 mrg if (!res) 813 1.1 mrg { 814 1.1 mrg if (fatal) 815 1.1 mrg *fatal = false; 816 1.1 mrg return res; 817 1.1 mrg } 818 1.1 mrg } 819 1.1 mrg } /* while worklist */ 820 1.1 mrg 821 1.1 mrg return opt_result::success (); 822 1.1 mrg } 823 1.1 mrg 824 1.1 mrg /* Function vect_model_simple_cost. 825 1.1 mrg 826 1.1 mrg Models cost for simple operations, i.e. those that only emit ncopies of a 827 1.1 mrg single op. Right now, this does not account for multiple insns that could 828 1.1 mrg be generated for the single vector op. We will handle that shortly. */ 829 1.1 mrg 830 1.1 mrg static void 831 1.1 mrg vect_model_simple_cost (vec_info *, 832 1.1 mrg stmt_vec_info stmt_info, int ncopies, 833 1.1 mrg enum vect_def_type *dt, 834 1.1 mrg int ndts, 835 1.1 mrg slp_tree node, 836 1.1 mrg stmt_vector_for_cost *cost_vec, 837 1.1 mrg vect_cost_for_stmt kind = vector_stmt) 838 1.1 mrg { 839 1.1 mrg int inside_cost = 0, prologue_cost = 0; 840 1.1 mrg 841 1.1 mrg gcc_assert (cost_vec != NULL); 842 1.1 mrg 843 1.1 mrg /* ??? Somehow we need to fix this at the callers. */ 844 1.1 mrg if (node) 845 1.1 mrg ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node); 846 1.1 mrg 847 1.1 mrg if (!node) 848 1.1 mrg /* Cost the "broadcast" of a scalar operand in to a vector operand. 849 1.1 mrg Use scalar_to_vec to cost the broadcast, as elsewhere in the vector 850 1.1 mrg cost model. */ 851 1.1 mrg for (int i = 0; i < ndts; i++) 852 1.1 mrg if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 853 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, 854 1.1 mrg stmt_info, 0, vect_prologue); 855 1.1 mrg 856 1.1 mrg /* Pass the inside-of-loop statements to the target-specific cost model. */ 857 1.1 mrg inside_cost += record_stmt_cost (cost_vec, ncopies, kind, 858 1.1 mrg stmt_info, 0, vect_body); 859 1.1 mrg 860 1.1 mrg if (dump_enabled_p ()) 861 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 862 1.1 mrg "vect_model_simple_cost: inside_cost = %d, " 863 1.1 mrg "prologue_cost = %d .\n", inside_cost, prologue_cost); 864 1.1 mrg } 865 1.1 mrg 866 1.1 mrg 867 1.1 mrg /* Model cost for type demotion and promotion operations. PWR is 868 1.1 mrg normally zero for single-step promotions and demotions. It will be 869 1.1 mrg one if two-step promotion/demotion is required, and so on. NCOPIES 870 1.1 mrg is the number of vector results (and thus number of instructions) 871 1.1 mrg for the narrowest end of the operation chain. Each additional 872 1.1 mrg step doubles the number of instructions required. If WIDEN_ARITH 873 1.1 mrg is true the stmt is doing widening arithmetic. */ 874 1.1 mrg 875 1.1 mrg static void 876 1.1 mrg vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 877 1.1 mrg enum vect_def_type *dt, 878 1.1 mrg unsigned int ncopies, int pwr, 879 1.1 mrg stmt_vector_for_cost *cost_vec, 880 1.1 mrg bool widen_arith) 881 1.1 mrg { 882 1.1 mrg int i; 883 1.1 mrg int inside_cost = 0, prologue_cost = 0; 884 1.1 mrg 885 1.1 mrg for (i = 0; i < pwr + 1; i++) 886 1.1 mrg { 887 1.1 mrg inside_cost += record_stmt_cost (cost_vec, ncopies, 888 1.1 mrg widen_arith 889 1.1 mrg ? vector_stmt : vec_promote_demote, 890 1.1 mrg stmt_info, 0, vect_body); 891 1.1 mrg ncopies *= 2; 892 1.1 mrg } 893 1.1 mrg 894 1.1 mrg /* FORNOW: Assuming maximum 2 args per stmts. */ 895 1.1 mrg for (i = 0; i < 2; i++) 896 1.1 mrg if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 897 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, 898 1.1 mrg stmt_info, 0, vect_prologue); 899 1.1 mrg 900 1.1 mrg if (dump_enabled_p ()) 901 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 902 1.1 mrg "vect_model_promotion_demotion_cost: inside_cost = %d, " 903 1.1 mrg "prologue_cost = %d .\n", inside_cost, prologue_cost); 904 1.1 mrg } 905 1.1 mrg 906 1.1 mrg /* Returns true if the current function returns DECL. */ 907 1.1 mrg 908 1.1 mrg static bool 909 1.1 mrg cfun_returns (tree decl) 910 1.1 mrg { 911 1.1 mrg edge_iterator ei; 912 1.1 mrg edge e; 913 1.1 mrg FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 914 1.1 mrg { 915 1.1 mrg greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src)); 916 1.1 mrg if (!ret) 917 1.1 mrg continue; 918 1.1 mrg if (gimple_return_retval (ret) == decl) 919 1.1 mrg return true; 920 1.1 mrg /* We often end up with an aggregate copy to the result decl, 921 1.1 mrg handle that case as well. First skip intermediate clobbers 922 1.1 mrg though. */ 923 1.1 mrg gimple *def = ret; 924 1.1 mrg do 925 1.1 mrg { 926 1.1 mrg def = SSA_NAME_DEF_STMT (gimple_vuse (def)); 927 1.1 mrg } 928 1.1 mrg while (gimple_clobber_p (def)); 929 1.1 mrg if (is_a <gassign *> (def) 930 1.1 mrg && gimple_assign_lhs (def) == gimple_return_retval (ret) 931 1.1 mrg && gimple_assign_rhs1 (def) == decl) 932 1.1 mrg return true; 933 1.1 mrg } 934 1.1 mrg return false; 935 1.1 mrg } 936 1.1 mrg 937 1.1 mrg /* Function vect_model_store_cost 938 1.1 mrg 939 1.1 mrg Models cost for stores. In the case of grouped accesses, one access 940 1.1 mrg has the overhead of the grouped access attributed to it. */ 941 1.1 mrg 942 1.1 mrg static void 943 1.1 mrg vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies, 944 1.1 mrg vect_memory_access_type memory_access_type, 945 1.1 mrg dr_alignment_support alignment_support_scheme, 946 1.1 mrg int misalignment, 947 1.1 mrg vec_load_store_type vls_type, slp_tree slp_node, 948 1.1 mrg stmt_vector_for_cost *cost_vec) 949 1.1 mrg { 950 1.1 mrg unsigned int inside_cost = 0, prologue_cost = 0; 951 1.1 mrg stmt_vec_info first_stmt_info = stmt_info; 952 1.1 mrg bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 953 1.1 mrg 954 1.1 mrg /* ??? Somehow we need to fix this at the callers. */ 955 1.1 mrg if (slp_node) 956 1.1 mrg ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 957 1.1 mrg 958 1.1 mrg if (vls_type == VLS_STORE_INVARIANT) 959 1.1 mrg { 960 1.1 mrg if (!slp_node) 961 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, 962 1.1 mrg stmt_info, 0, vect_prologue); 963 1.1 mrg } 964 1.1 mrg 965 1.1 mrg /* Grouped stores update all elements in the group at once, 966 1.1 mrg so we want the DR for the first statement. */ 967 1.1 mrg if (!slp_node && grouped_access_p) 968 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 969 1.1 mrg 970 1.1 mrg /* True if we should include any once-per-group costs as well as 971 1.1 mrg the cost of the statement itself. For SLP we only get called 972 1.1 mrg once per group anyhow. */ 973 1.1 mrg bool first_stmt_p = (first_stmt_info == stmt_info); 974 1.1 mrg 975 1.1 mrg /* We assume that the cost of a single store-lanes instruction is 976 1.1 mrg equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped 977 1.1 mrg access is instead being provided by a permute-and-store operation, 978 1.1 mrg include the cost of the permutes. */ 979 1.1 mrg if (first_stmt_p 980 1.1 mrg && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 981 1.1 mrg { 982 1.1 mrg /* Uses a high and low interleave or shuffle operations for each 983 1.1 mrg needed permute. */ 984 1.1 mrg int group_size = DR_GROUP_SIZE (first_stmt_info); 985 1.1 mrg int nstmts = ncopies * ceil_log2 (group_size) * group_size; 986 1.1 mrg inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm, 987 1.1 mrg stmt_info, 0, vect_body); 988 1.1 mrg 989 1.1 mrg if (dump_enabled_p ()) 990 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 991 1.1 mrg "vect_model_store_cost: strided group_size = %d .\n", 992 1.1 mrg group_size); 993 1.1 mrg } 994 1.1 mrg 995 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 996 1.1 mrg /* Costs of the stores. */ 997 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 998 1.1 mrg || memory_access_type == VMAT_GATHER_SCATTER) 999 1.1 mrg { 1000 1.1 mrg /* N scalar stores plus extracting the elements. */ 1001 1.1 mrg unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1002 1.1 mrg inside_cost += record_stmt_cost (cost_vec, 1003 1.1 mrg ncopies * assumed_nunits, 1004 1.1 mrg scalar_store, stmt_info, 0, vect_body); 1005 1.1 mrg } 1006 1.1 mrg else 1007 1.1 mrg vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme, 1008 1.1 mrg misalignment, &inside_cost, cost_vec); 1009 1.1 mrg 1010 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 1011 1.1 mrg || memory_access_type == VMAT_STRIDED_SLP) 1012 1.1 mrg { 1013 1.1 mrg /* N scalar stores plus extracting the elements. */ 1014 1.1 mrg unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1015 1.1 mrg inside_cost += record_stmt_cost (cost_vec, 1016 1.1 mrg ncopies * assumed_nunits, 1017 1.1 mrg vec_to_scalar, stmt_info, 0, vect_body); 1018 1.1 mrg } 1019 1.1 mrg 1020 1.1 mrg /* When vectorizing a store into the function result assign 1021 1.1 mrg a penalty if the function returns in a multi-register location. 1022 1.1 mrg In this case we assume we'll end up with having to spill the 1023 1.1 mrg vector result and do piecewise loads as a conservative estimate. */ 1024 1.1 mrg tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref); 1025 1.1 mrg if (base 1026 1.1 mrg && (TREE_CODE (base) == RESULT_DECL 1027 1.1 mrg || (DECL_P (base) && cfun_returns (base))) 1028 1.1 mrg && !aggregate_value_p (base, cfun->decl)) 1029 1.1 mrg { 1030 1.1 mrg rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1); 1031 1.1 mrg /* ??? Handle PARALLEL in some way. */ 1032 1.1 mrg if (REG_P (reg)) 1033 1.1 mrg { 1034 1.1 mrg int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg)); 1035 1.1 mrg /* Assume that a single reg-reg move is possible and cheap, 1036 1.1 mrg do not account for vector to gp register move cost. */ 1037 1.1 mrg if (nregs > 1) 1038 1.1 mrg { 1039 1.1 mrg /* Spill. */ 1040 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, ncopies, 1041 1.1 mrg vector_store, 1042 1.1 mrg stmt_info, 0, vect_epilogue); 1043 1.1 mrg /* Loads. */ 1044 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs, 1045 1.1 mrg scalar_load, 1046 1.1 mrg stmt_info, 0, vect_epilogue); 1047 1.1 mrg } 1048 1.1 mrg } 1049 1.1 mrg } 1050 1.1 mrg 1051 1.1 mrg if (dump_enabled_p ()) 1052 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1053 1.1 mrg "vect_model_store_cost: inside_cost = %d, " 1054 1.1 mrg "prologue_cost = %d .\n", inside_cost, prologue_cost); 1055 1.1 mrg } 1056 1.1 mrg 1057 1.1 mrg 1058 1.1 mrg /* Calculate cost of DR's memory access. */ 1059 1.1 mrg void 1060 1.1 mrg vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies, 1061 1.1 mrg dr_alignment_support alignment_support_scheme, 1062 1.1 mrg int misalignment, 1063 1.1 mrg unsigned int *inside_cost, 1064 1.1 mrg stmt_vector_for_cost *body_cost_vec) 1065 1.1 mrg { 1066 1.1 mrg switch (alignment_support_scheme) 1067 1.1 mrg { 1068 1.1 mrg case dr_aligned: 1069 1.1 mrg { 1070 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1071 1.1 mrg vector_store, stmt_info, 0, 1072 1.1 mrg vect_body); 1073 1.1 mrg 1074 1.1 mrg if (dump_enabled_p ()) 1075 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1076 1.1 mrg "vect_model_store_cost: aligned.\n"); 1077 1.1 mrg break; 1078 1.1 mrg } 1079 1.1 mrg 1080 1.1 mrg case dr_unaligned_supported: 1081 1.1 mrg { 1082 1.1 mrg /* Here, we assign an additional cost for the unaligned store. */ 1083 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1084 1.1 mrg unaligned_store, stmt_info, 1085 1.1 mrg misalignment, vect_body); 1086 1.1 mrg if (dump_enabled_p ()) 1087 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1088 1.1 mrg "vect_model_store_cost: unaligned supported by " 1089 1.1 mrg "hardware.\n"); 1090 1.1 mrg break; 1091 1.1 mrg } 1092 1.1 mrg 1093 1.1 mrg case dr_unaligned_unsupported: 1094 1.1 mrg { 1095 1.1 mrg *inside_cost = VECT_MAX_COST; 1096 1.1 mrg 1097 1.1 mrg if (dump_enabled_p ()) 1098 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1099 1.1 mrg "vect_model_store_cost: unsupported access.\n"); 1100 1.1 mrg break; 1101 1.1 mrg } 1102 1.1 mrg 1103 1.1 mrg default: 1104 1.1 mrg gcc_unreachable (); 1105 1.1 mrg } 1106 1.1 mrg } 1107 1.1 mrg 1108 1.1 mrg 1109 1.1 mrg /* Function vect_model_load_cost 1110 1.1 mrg 1111 1.1 mrg Models cost for loads. In the case of grouped accesses, one access has 1112 1.1 mrg the overhead of the grouped access attributed to it. Since unaligned 1113 1.1 mrg accesses are supported for loads, we also account for the costs of the 1114 1.1 mrg access scheme chosen. */ 1115 1.1 mrg 1116 1.1 mrg static void 1117 1.1 mrg vect_model_load_cost (vec_info *vinfo, 1118 1.1 mrg stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf, 1119 1.1 mrg vect_memory_access_type memory_access_type, 1120 1.1 mrg dr_alignment_support alignment_support_scheme, 1121 1.1 mrg int misalignment, 1122 1.1 mrg gather_scatter_info *gs_info, 1123 1.1 mrg slp_tree slp_node, 1124 1.1 mrg stmt_vector_for_cost *cost_vec) 1125 1.1 mrg { 1126 1.1 mrg unsigned int inside_cost = 0, prologue_cost = 0; 1127 1.1 mrg bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 1128 1.1 mrg 1129 1.1 mrg gcc_assert (cost_vec); 1130 1.1 mrg 1131 1.1 mrg /* ??? Somehow we need to fix this at the callers. */ 1132 1.1 mrg if (slp_node) 1133 1.1 mrg ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1134 1.1 mrg 1135 1.1 mrg if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 1136 1.1 mrg { 1137 1.1 mrg /* If the load is permuted then the alignment is determined by 1138 1.1 mrg the first group element not by the first scalar stmt DR. */ 1139 1.1 mrg stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 1140 1.1 mrg /* Record the cost for the permutation. */ 1141 1.1 mrg unsigned n_perms, n_loads; 1142 1.1 mrg vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, 1143 1.1 mrg vf, true, &n_perms, &n_loads); 1144 1.1 mrg inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, 1145 1.1 mrg first_stmt_info, 0, vect_body); 1146 1.1 mrg 1147 1.1 mrg /* And adjust the number of loads performed. This handles 1148 1.1 mrg redundancies as well as loads that are later dead. */ 1149 1.1 mrg ncopies = n_loads; 1150 1.1 mrg } 1151 1.1 mrg 1152 1.1 mrg /* Grouped loads read all elements in the group at once, 1153 1.1 mrg so we want the DR for the first statement. */ 1154 1.1 mrg stmt_vec_info first_stmt_info = stmt_info; 1155 1.1 mrg if (!slp_node && grouped_access_p) 1156 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 1157 1.1 mrg 1158 1.1 mrg /* True if we should include any once-per-group costs as well as 1159 1.1 mrg the cost of the statement itself. For SLP we only get called 1160 1.1 mrg once per group anyhow. */ 1161 1.1 mrg bool first_stmt_p = (first_stmt_info == stmt_info); 1162 1.1 mrg 1163 1.1 mrg /* An IFN_LOAD_LANES will load all its vector results, regardless of which 1164 1.1 mrg ones we actually need. Account for the cost of unused results. */ 1165 1.1 mrg if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES) 1166 1.1 mrg { 1167 1.1 mrg unsigned int gaps = DR_GROUP_SIZE (first_stmt_info); 1168 1.1 mrg stmt_vec_info next_stmt_info = first_stmt_info; 1169 1.1 mrg do 1170 1.1 mrg { 1171 1.1 mrg gaps -= 1; 1172 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 1173 1.1 mrg } 1174 1.1 mrg while (next_stmt_info); 1175 1.1 mrg if (gaps) 1176 1.1 mrg { 1177 1.1 mrg if (dump_enabled_p ()) 1178 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1179 1.1 mrg "vect_model_load_cost: %d unused vectors.\n", 1180 1.1 mrg gaps); 1181 1.1 mrg vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, 1182 1.1 mrg alignment_support_scheme, misalignment, false, 1183 1.1 mrg &inside_cost, &prologue_cost, 1184 1.1 mrg cost_vec, cost_vec, true); 1185 1.1 mrg } 1186 1.1 mrg } 1187 1.1 mrg 1188 1.1 mrg /* We assume that the cost of a single load-lanes instruction is 1189 1.1 mrg equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped 1190 1.1 mrg access is instead being provided by a load-and-permute operation, 1191 1.1 mrg include the cost of the permutes. */ 1192 1.1 mrg if (first_stmt_p 1193 1.1 mrg && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 1194 1.1 mrg { 1195 1.1 mrg /* Uses an even and odd extract operations or shuffle operations 1196 1.1 mrg for each needed permute. */ 1197 1.1 mrg int group_size = DR_GROUP_SIZE (first_stmt_info); 1198 1.1 mrg int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1199 1.1 mrg inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, 1200 1.1 mrg stmt_info, 0, vect_body); 1201 1.1 mrg 1202 1.1 mrg if (dump_enabled_p ()) 1203 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1204 1.1 mrg "vect_model_load_cost: strided group_size = %d .\n", 1205 1.1 mrg group_size); 1206 1.1 mrg } 1207 1.1 mrg 1208 1.1 mrg /* The loads themselves. */ 1209 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 1210 1.1 mrg || memory_access_type == VMAT_GATHER_SCATTER) 1211 1.1 mrg { 1212 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1213 1.1 mrg unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1214 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER 1215 1.1 mrg && gs_info->ifn == IFN_LAST && !gs_info->decl) 1216 1.1 mrg /* For emulated gathers N offset vector element extracts 1217 1.1 mrg (we assume the scalar scaling and ptr + offset add is consumed by 1218 1.1 mrg the load). */ 1219 1.1 mrg inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits, 1220 1.1 mrg vec_to_scalar, stmt_info, 0, 1221 1.1 mrg vect_body); 1222 1.1 mrg /* N scalar loads plus gathering them into a vector. */ 1223 1.1 mrg inside_cost += record_stmt_cost (cost_vec, 1224 1.1 mrg ncopies * assumed_nunits, 1225 1.1 mrg scalar_load, stmt_info, 0, vect_body); 1226 1.1 mrg } 1227 1.1 mrg else if (memory_access_type == VMAT_INVARIANT) 1228 1.1 mrg { 1229 1.1 mrg /* Invariant loads will ideally be hoisted and splat to a vector. */ 1230 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, 1, 1231 1.1 mrg scalar_load, stmt_info, 0, 1232 1.1 mrg vect_prologue); 1233 1.1 mrg prologue_cost += record_stmt_cost (cost_vec, 1, 1234 1.1 mrg scalar_to_vec, stmt_info, 0, 1235 1.1 mrg vect_prologue); 1236 1.1 mrg } 1237 1.1 mrg else 1238 1.1 mrg vect_get_load_cost (vinfo, stmt_info, ncopies, 1239 1.1 mrg alignment_support_scheme, misalignment, first_stmt_p, 1240 1.1 mrg &inside_cost, &prologue_cost, 1241 1.1 mrg cost_vec, cost_vec, true); 1242 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 1243 1.1 mrg || memory_access_type == VMAT_STRIDED_SLP 1244 1.1 mrg || (memory_access_type == VMAT_GATHER_SCATTER 1245 1.1 mrg && gs_info->ifn == IFN_LAST && !gs_info->decl)) 1246 1.1 mrg inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct, 1247 1.1 mrg stmt_info, 0, vect_body); 1248 1.1 mrg 1249 1.1 mrg if (dump_enabled_p ()) 1250 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1251 1.1 mrg "vect_model_load_cost: inside_cost = %d, " 1252 1.1 mrg "prologue_cost = %d .\n", inside_cost, prologue_cost); 1253 1.1 mrg } 1254 1.1 mrg 1255 1.1 mrg 1256 1.1 mrg /* Calculate cost of DR's memory access. */ 1257 1.1 mrg void 1258 1.1 mrg vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies, 1259 1.1 mrg dr_alignment_support alignment_support_scheme, 1260 1.1 mrg int misalignment, 1261 1.1 mrg bool add_realign_cost, unsigned int *inside_cost, 1262 1.1 mrg unsigned int *prologue_cost, 1263 1.1 mrg stmt_vector_for_cost *prologue_cost_vec, 1264 1.1 mrg stmt_vector_for_cost *body_cost_vec, 1265 1.1 mrg bool record_prologue_costs) 1266 1.1 mrg { 1267 1.1 mrg switch (alignment_support_scheme) 1268 1.1 mrg { 1269 1.1 mrg case dr_aligned: 1270 1.1 mrg { 1271 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1272 1.1 mrg stmt_info, 0, vect_body); 1273 1.1 mrg 1274 1.1 mrg if (dump_enabled_p ()) 1275 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1276 1.1 mrg "vect_model_load_cost: aligned.\n"); 1277 1.1 mrg 1278 1.1 mrg break; 1279 1.1 mrg } 1280 1.1 mrg case dr_unaligned_supported: 1281 1.1 mrg { 1282 1.1 mrg /* Here, we assign an additional cost for the unaligned load. */ 1283 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1284 1.1 mrg unaligned_load, stmt_info, 1285 1.1 mrg misalignment, vect_body); 1286 1.1 mrg 1287 1.1 mrg if (dump_enabled_p ()) 1288 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1289 1.1 mrg "vect_model_load_cost: unaligned supported by " 1290 1.1 mrg "hardware.\n"); 1291 1.1 mrg 1292 1.1 mrg break; 1293 1.1 mrg } 1294 1.1 mrg case dr_explicit_realign: 1295 1.1 mrg { 1296 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, 1297 1.1 mrg vector_load, stmt_info, 0, vect_body); 1298 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1299 1.1 mrg vec_perm, stmt_info, 0, vect_body); 1300 1.1 mrg 1301 1.1 mrg /* FIXME: If the misalignment remains fixed across the iterations of 1302 1.1 mrg the containing loop, the following cost should be added to the 1303 1.1 mrg prologue costs. */ 1304 1.1 mrg if (targetm.vectorize.builtin_mask_for_load) 1305 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, 1306 1.1 mrg stmt_info, 0, vect_body); 1307 1.1 mrg 1308 1.1 mrg if (dump_enabled_p ()) 1309 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1310 1.1 mrg "vect_model_load_cost: explicit realign\n"); 1311 1.1 mrg 1312 1.1 mrg break; 1313 1.1 mrg } 1314 1.1 mrg case dr_explicit_realign_optimized: 1315 1.1 mrg { 1316 1.1 mrg if (dump_enabled_p ()) 1317 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1318 1.1 mrg "vect_model_load_cost: unaligned software " 1319 1.1 mrg "pipelined.\n"); 1320 1.1 mrg 1321 1.1 mrg /* Unaligned software pipeline has a load of an address, an initial 1322 1.1 mrg load, and possibly a mask operation to "prime" the loop. However, 1323 1.1 mrg if this is an access in a group of loads, which provide grouped 1324 1.1 mrg access, then the above cost should only be considered for one 1325 1.1 mrg access in the group. Inside the loop, there is a load op 1326 1.1 mrg and a realignment op. */ 1327 1.1 mrg 1328 1.1 mrg if (add_realign_cost && record_prologue_costs) 1329 1.1 mrg { 1330 1.1 mrg *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, 1331 1.1 mrg vector_stmt, stmt_info, 1332 1.1 mrg 0, vect_prologue); 1333 1.1 mrg if (targetm.vectorize.builtin_mask_for_load) 1334 1.1 mrg *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, 1335 1.1 mrg vector_stmt, stmt_info, 1336 1.1 mrg 0, vect_prologue); 1337 1.1 mrg } 1338 1.1 mrg 1339 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1340 1.1 mrg stmt_info, 0, vect_body); 1341 1.1 mrg *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, 1342 1.1 mrg stmt_info, 0, vect_body); 1343 1.1 mrg 1344 1.1 mrg if (dump_enabled_p ()) 1345 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1346 1.1 mrg "vect_model_load_cost: explicit realign optimized" 1347 1.1 mrg "\n"); 1348 1.1 mrg 1349 1.1 mrg break; 1350 1.1 mrg } 1351 1.1 mrg 1352 1.1 mrg case dr_unaligned_unsupported: 1353 1.1 mrg { 1354 1.1 mrg *inside_cost = VECT_MAX_COST; 1355 1.1 mrg 1356 1.1 mrg if (dump_enabled_p ()) 1357 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1358 1.1 mrg "vect_model_load_cost: unsupported access.\n"); 1359 1.1 mrg break; 1360 1.1 mrg } 1361 1.1 mrg 1362 1.1 mrg default: 1363 1.1 mrg gcc_unreachable (); 1364 1.1 mrg } 1365 1.1 mrg } 1366 1.1 mrg 1367 1.1 mrg /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in 1368 1.1 mrg the loop preheader for the vectorized stmt STMT_VINFO. */ 1369 1.1 mrg 1370 1.1 mrg static void 1371 1.1 mrg vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt, 1372 1.1 mrg gimple_stmt_iterator *gsi) 1373 1.1 mrg { 1374 1.1 mrg if (gsi) 1375 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi); 1376 1.1 mrg else 1377 1.1 mrg vinfo->insert_on_entry (stmt_vinfo, new_stmt); 1378 1.1 mrg 1379 1.1 mrg if (dump_enabled_p ()) 1380 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1381 1.1 mrg "created new init_stmt: %G", new_stmt); 1382 1.1 mrg } 1383 1.1 mrg 1384 1.1 mrg /* Function vect_init_vector. 1385 1.1 mrg 1386 1.1 mrg Insert a new stmt (INIT_STMT) that initializes a new variable of type 1387 1.1 mrg TYPE with the value VAL. If TYPE is a vector type and VAL does not have 1388 1.1 mrg vector type a vector with all elements equal to VAL is created first. 1389 1.1 mrg Place the initialization at GSI if it is not NULL. Otherwise, place the 1390 1.1 mrg initialization at the loop preheader. 1391 1.1 mrg Return the DEF of INIT_STMT. 1392 1.1 mrg It will be used in the vectorization of STMT_INFO. */ 1393 1.1 mrg 1394 1.1 mrg tree 1395 1.1 mrg vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type, 1396 1.1 mrg gimple_stmt_iterator *gsi) 1397 1.1 mrg { 1398 1.1 mrg gimple *init_stmt; 1399 1.1 mrg tree new_temp; 1400 1.1 mrg 1401 1.1 mrg /* We abuse this function to push sth to a SSA name with initial 'val'. */ 1402 1.1 mrg if (! useless_type_conversion_p (type, TREE_TYPE (val))) 1403 1.1 mrg { 1404 1.1 mrg gcc_assert (TREE_CODE (type) == VECTOR_TYPE); 1405 1.1 mrg if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) 1406 1.1 mrg { 1407 1.1 mrg /* Scalar boolean value should be transformed into 1408 1.1 mrg all zeros or all ones value before building a vector. */ 1409 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (type)) 1410 1.1 mrg { 1411 1.1 mrg tree true_val = build_all_ones_cst (TREE_TYPE (type)); 1412 1.1 mrg tree false_val = build_zero_cst (TREE_TYPE (type)); 1413 1.1 mrg 1414 1.1 mrg if (CONSTANT_CLASS_P (val)) 1415 1.1 mrg val = integer_zerop (val) ? false_val : true_val; 1416 1.1 mrg else 1417 1.1 mrg { 1418 1.1 mrg new_temp = make_ssa_name (TREE_TYPE (type)); 1419 1.1 mrg init_stmt = gimple_build_assign (new_temp, COND_EXPR, 1420 1.1 mrg val, true_val, false_val); 1421 1.1 mrg vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); 1422 1.1 mrg val = new_temp; 1423 1.1 mrg } 1424 1.1 mrg } 1425 1.1 mrg else 1426 1.1 mrg { 1427 1.1 mrg gimple_seq stmts = NULL; 1428 1.1 mrg if (! INTEGRAL_TYPE_P (TREE_TYPE (val))) 1429 1.1 mrg val = gimple_build (&stmts, VIEW_CONVERT_EXPR, 1430 1.1 mrg TREE_TYPE (type), val); 1431 1.1 mrg else 1432 1.1 mrg /* ??? Condition vectorization expects us to do 1433 1.1 mrg promotion of invariant/external defs. */ 1434 1.1 mrg val = gimple_convert (&stmts, TREE_TYPE (type), val); 1435 1.1 mrg for (gimple_stmt_iterator gsi2 = gsi_start (stmts); 1436 1.1 mrg !gsi_end_p (gsi2); ) 1437 1.1 mrg { 1438 1.1 mrg init_stmt = gsi_stmt (gsi2); 1439 1.1 mrg gsi_remove (&gsi2, false); 1440 1.1 mrg vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); 1441 1.1 mrg } 1442 1.1 mrg } 1443 1.1 mrg } 1444 1.1 mrg val = build_vector_from_val (type, val); 1445 1.1 mrg } 1446 1.1 mrg 1447 1.1 mrg new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_"); 1448 1.1 mrg init_stmt = gimple_build_assign (new_temp, val); 1449 1.1 mrg vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); 1450 1.1 mrg return new_temp; 1451 1.1 mrg } 1452 1.1 mrg 1453 1.1 mrg 1454 1.1 mrg /* Function vect_get_vec_defs_for_operand. 1455 1.1 mrg 1456 1.1 mrg OP is an operand in STMT_VINFO. This function returns a vector of 1457 1.1 mrg NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO. 1458 1.1 mrg 1459 1.1 mrg In the case that OP is an SSA_NAME which is defined in the loop, then 1460 1.1 mrg STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs. 1461 1.1 mrg 1462 1.1 mrg In case OP is an invariant or constant, a new stmt that creates a vector def 1463 1.1 mrg needs to be introduced. VECTYPE may be used to specify a required type for 1464 1.1 mrg vector invariant. */ 1465 1.1 mrg 1466 1.1 mrg void 1467 1.1 mrg vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo, 1468 1.1 mrg unsigned ncopies, 1469 1.1 mrg tree op, vec<tree> *vec_oprnds, tree vectype) 1470 1.1 mrg { 1471 1.1 mrg gimple *def_stmt; 1472 1.1 mrg enum vect_def_type dt; 1473 1.1 mrg bool is_simple_use; 1474 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 1475 1.1 mrg 1476 1.1 mrg if (dump_enabled_p ()) 1477 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1478 1.1 mrg "vect_get_vec_defs_for_operand: %T\n", op); 1479 1.1 mrg 1480 1.1 mrg stmt_vec_info def_stmt_info; 1481 1.1 mrg is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, 1482 1.1 mrg &def_stmt_info, &def_stmt); 1483 1.1 mrg gcc_assert (is_simple_use); 1484 1.1 mrg if (def_stmt && dump_enabled_p ()) 1485 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt); 1486 1.1 mrg 1487 1.1 mrg vec_oprnds->create (ncopies); 1488 1.1 mrg if (dt == vect_constant_def || dt == vect_external_def) 1489 1.1 mrg { 1490 1.1 mrg tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 1491 1.1 mrg tree vector_type; 1492 1.1 mrg 1493 1.1 mrg if (vectype) 1494 1.1 mrg vector_type = vectype; 1495 1.1 mrg else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) 1496 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) 1497 1.1 mrg vector_type = truth_type_for (stmt_vectype); 1498 1.1 mrg else 1499 1.1 mrg vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); 1500 1.1 mrg 1501 1.1 mrg gcc_assert (vector_type); 1502 1.1 mrg tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); 1503 1.1 mrg while (ncopies--) 1504 1.1 mrg vec_oprnds->quick_push (vop); 1505 1.1 mrg } 1506 1.1 mrg else 1507 1.1 mrg { 1508 1.1 mrg def_stmt_info = vect_stmt_to_vectorize (def_stmt_info); 1509 1.1 mrg gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies); 1510 1.1 mrg for (unsigned i = 0; i < ncopies; ++i) 1511 1.1 mrg vec_oprnds->quick_push (gimple_get_lhs 1512 1.1 mrg (STMT_VINFO_VEC_STMTS (def_stmt_info)[i])); 1513 1.1 mrg } 1514 1.1 mrg } 1515 1.1 mrg 1516 1.1 mrg 1517 1.1 mrg /* Get vectorized definitions for OP0 and OP1. */ 1518 1.1 mrg 1519 1.1 mrg void 1520 1.1 mrg vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node, 1521 1.1 mrg unsigned ncopies, 1522 1.1 mrg tree op0, vec<tree> *vec_oprnds0, tree vectype0, 1523 1.1 mrg tree op1, vec<tree> *vec_oprnds1, tree vectype1, 1524 1.1 mrg tree op2, vec<tree> *vec_oprnds2, tree vectype2, 1525 1.1 mrg tree op3, vec<tree> *vec_oprnds3, tree vectype3) 1526 1.1 mrg { 1527 1.1 mrg if (slp_node) 1528 1.1 mrg { 1529 1.1 mrg if (op0) 1530 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0); 1531 1.1 mrg if (op1) 1532 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1); 1533 1.1 mrg if (op2) 1534 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2); 1535 1.1 mrg if (op3) 1536 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3); 1537 1.1 mrg } 1538 1.1 mrg else 1539 1.1 mrg { 1540 1.1 mrg if (op0) 1541 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 1542 1.1 mrg op0, vec_oprnds0, vectype0); 1543 1.1 mrg if (op1) 1544 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 1545 1.1 mrg op1, vec_oprnds1, vectype1); 1546 1.1 mrg if (op2) 1547 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 1548 1.1 mrg op2, vec_oprnds2, vectype2); 1549 1.1 mrg if (op3) 1550 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 1551 1.1 mrg op3, vec_oprnds3, vectype3); 1552 1.1 mrg } 1553 1.1 mrg } 1554 1.1 mrg 1555 1.1 mrg void 1556 1.1 mrg vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node, 1557 1.1 mrg unsigned ncopies, 1558 1.1 mrg tree op0, vec<tree> *vec_oprnds0, 1559 1.1 mrg tree op1, vec<tree> *vec_oprnds1, 1560 1.1 mrg tree op2, vec<tree> *vec_oprnds2, 1561 1.1 mrg tree op3, vec<tree> *vec_oprnds3) 1562 1.1 mrg { 1563 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 1564 1.1 mrg op0, vec_oprnds0, NULL_TREE, 1565 1.1 mrg op1, vec_oprnds1, NULL_TREE, 1566 1.1 mrg op2, vec_oprnds2, NULL_TREE, 1567 1.1 mrg op3, vec_oprnds3, NULL_TREE); 1568 1.1 mrg } 1569 1.1 mrg 1570 1.1 mrg /* Helper function called by vect_finish_replace_stmt and 1571 1.1 mrg vect_finish_stmt_generation. Set the location of the new 1572 1.1 mrg statement and create and return a stmt_vec_info for it. */ 1573 1.1 mrg 1574 1.1 mrg static void 1575 1.1 mrg vect_finish_stmt_generation_1 (vec_info *, 1576 1.1 mrg stmt_vec_info stmt_info, gimple *vec_stmt) 1577 1.1 mrg { 1578 1.1 mrg if (dump_enabled_p ()) 1579 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt); 1580 1.1 mrg 1581 1.1 mrg if (stmt_info) 1582 1.1 mrg { 1583 1.1 mrg gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt)); 1584 1.1 mrg 1585 1.1 mrg /* While EH edges will generally prevent vectorization, stmt might 1586 1.1 mrg e.g. be in a must-not-throw region. Ensure newly created stmts 1587 1.1 mrg that could throw are part of the same region. */ 1588 1.1 mrg int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt); 1589 1.1 mrg if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt)) 1590 1.1 mrg add_stmt_to_eh_lp (vec_stmt, lp_nr); 1591 1.1 mrg } 1592 1.1 mrg else 1593 1.1 mrg gcc_assert (!stmt_could_throw_p (cfun, vec_stmt)); 1594 1.1 mrg } 1595 1.1 mrg 1596 1.1 mrg /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT, 1597 1.1 mrg which sets the same scalar result as STMT_INFO did. Create and return a 1598 1.1 mrg stmt_vec_info for VEC_STMT. */ 1599 1.1 mrg 1600 1.1 mrg void 1601 1.1 mrg vect_finish_replace_stmt (vec_info *vinfo, 1602 1.1 mrg stmt_vec_info stmt_info, gimple *vec_stmt) 1603 1.1 mrg { 1604 1.1 mrg gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt; 1605 1.1 mrg gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt)); 1606 1.1 mrg 1607 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt); 1608 1.1 mrg gsi_replace (&gsi, vec_stmt, true); 1609 1.1 mrg 1610 1.1 mrg vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt); 1611 1.1 mrg } 1612 1.1 mrg 1613 1.1 mrg /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it 1614 1.1 mrg before *GSI. Create and return a stmt_vec_info for VEC_STMT. */ 1615 1.1 mrg 1616 1.1 mrg void 1617 1.1 mrg vect_finish_stmt_generation (vec_info *vinfo, 1618 1.1 mrg stmt_vec_info stmt_info, gimple *vec_stmt, 1619 1.1 mrg gimple_stmt_iterator *gsi) 1620 1.1 mrg { 1621 1.1 mrg gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL); 1622 1.1 mrg 1623 1.1 mrg if (!gsi_end_p (*gsi) 1624 1.1 mrg && gimple_has_mem_ops (vec_stmt)) 1625 1.1 mrg { 1626 1.1 mrg gimple *at_stmt = gsi_stmt (*gsi); 1627 1.1 mrg tree vuse = gimple_vuse (at_stmt); 1628 1.1 mrg if (vuse && TREE_CODE (vuse) == SSA_NAME) 1629 1.1 mrg { 1630 1.1 mrg tree vdef = gimple_vdef (at_stmt); 1631 1.1 mrg gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt)); 1632 1.1 mrg gimple_set_modified (vec_stmt, true); 1633 1.1 mrg /* If we have an SSA vuse and insert a store, update virtual 1634 1.1 mrg SSA form to avoid triggering the renamer. Do so only 1635 1.1 mrg if we can easily see all uses - which is what almost always 1636 1.1 mrg happens with the way vectorized stmts are inserted. */ 1637 1.1 mrg if ((vdef && TREE_CODE (vdef) == SSA_NAME) 1638 1.1 mrg && ((is_gimple_assign (vec_stmt) 1639 1.1 mrg && !is_gimple_reg (gimple_assign_lhs (vec_stmt))) 1640 1.1 mrg || (is_gimple_call (vec_stmt) 1641 1.1 mrg && !(gimple_call_flags (vec_stmt) 1642 1.1 mrg & (ECF_CONST|ECF_PURE|ECF_NOVOPS))))) 1643 1.1 mrg { 1644 1.1 mrg tree new_vdef = copy_ssa_name (vuse, vec_stmt); 1645 1.1 mrg gimple_set_vdef (vec_stmt, new_vdef); 1646 1.1 mrg SET_USE (gimple_vuse_op (at_stmt), new_vdef); 1647 1.1 mrg } 1648 1.1 mrg } 1649 1.1 mrg } 1650 1.1 mrg gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1651 1.1 mrg vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt); 1652 1.1 mrg } 1653 1.1 mrg 1654 1.1 mrg /* We want to vectorize a call to combined function CFN with function 1655 1.1 mrg decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN 1656 1.1 mrg as the types of all inputs. Check whether this is possible using 1657 1.1 mrg an internal function, returning its code if so or IFN_LAST if not. */ 1658 1.1 mrg 1659 1.1 mrg static internal_fn 1660 1.1 mrg vectorizable_internal_function (combined_fn cfn, tree fndecl, 1661 1.1 mrg tree vectype_out, tree vectype_in) 1662 1.1 mrg { 1663 1.1 mrg internal_fn ifn; 1664 1.1 mrg if (internal_fn_p (cfn)) 1665 1.1 mrg ifn = as_internal_fn (cfn); 1666 1.1 mrg else 1667 1.1 mrg ifn = associated_internal_fn (fndecl); 1668 1.1 mrg if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) 1669 1.1 mrg { 1670 1.1 mrg const direct_internal_fn_info &info = direct_internal_fn (ifn); 1671 1.1 mrg if (info.vectorizable) 1672 1.1 mrg { 1673 1.1 mrg tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); 1674 1.1 mrg tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); 1675 1.1 mrg if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1), 1676 1.1 mrg OPTIMIZE_FOR_SPEED)) 1677 1.1 mrg return ifn; 1678 1.1 mrg } 1679 1.1 mrg } 1680 1.1 mrg return IFN_LAST; 1681 1.1 mrg } 1682 1.1 mrg 1683 1.1 mrg 1684 1.1 mrg static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info, 1685 1.1 mrg gimple_stmt_iterator *); 1686 1.1 mrg 1687 1.1 mrg /* Check whether a load or store statement in the loop described by 1688 1.1 mrg LOOP_VINFO is possible in a loop using partial vectors. This is 1689 1.1 mrg testing whether the vectorizer pass has the appropriate support, 1690 1.1 mrg as well as whether the target does. 1691 1.1 mrg 1692 1.1 mrg VLS_TYPE says whether the statement is a load or store and VECTYPE 1693 1.1 mrg is the type of the vector being loaded or stored. SLP_NODE is the SLP 1694 1.1 mrg node that contains the statement, or null if none. MEMORY_ACCESS_TYPE 1695 1.1 mrg says how the load or store is going to be implemented and GROUP_SIZE 1696 1.1 mrg is the number of load or store statements in the containing group. 1697 1.1 mrg If the access is a gather load or scatter store, GS_INFO describes 1698 1.1 mrg its arguments. If the load or store is conditional, SCALAR_MASK is the 1699 1.1 mrg condition under which it occurs. 1700 1.1 mrg 1701 1.1 mrg Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial 1702 1.1 mrg vectors is not supported, otherwise record the required rgroup control 1703 1.1 mrg types. */ 1704 1.1 mrg 1705 1.1 mrg static void 1706 1.1 mrg check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, 1707 1.1 mrg slp_tree slp_node, 1708 1.1 mrg vec_load_store_type vls_type, 1709 1.1 mrg int group_size, 1710 1.1 mrg vect_memory_access_type 1711 1.1 mrg memory_access_type, 1712 1.1 mrg gather_scatter_info *gs_info, 1713 1.1 mrg tree scalar_mask) 1714 1.1 mrg { 1715 1.1 mrg /* Invariant loads need no special support. */ 1716 1.1 mrg if (memory_access_type == VMAT_INVARIANT) 1717 1.1 mrg return; 1718 1.1 mrg 1719 1.1 mrg unsigned int nvectors; 1720 1.1 mrg if (slp_node) 1721 1.1 mrg nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1722 1.1 mrg else 1723 1.1 mrg nvectors = vect_get_num_copies (loop_vinfo, vectype); 1724 1.1 mrg 1725 1.1 mrg vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); 1726 1.1 mrg machine_mode vecmode = TYPE_MODE (vectype); 1727 1.1 mrg bool is_load = (vls_type == VLS_LOAD); 1728 1.1 mrg if (memory_access_type == VMAT_LOAD_STORE_LANES) 1729 1.1 mrg { 1730 1.1 mrg if (is_load 1731 1.1 mrg ? !vect_load_lanes_supported (vectype, group_size, true) 1732 1.1 mrg : !vect_store_lanes_supported (vectype, group_size, true)) 1733 1.1 mrg { 1734 1.1 mrg if (dump_enabled_p ()) 1735 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1736 1.1 mrg "can't operate on partial vectors because" 1737 1.1 mrg " the target doesn't have an appropriate" 1738 1.1 mrg " load/store-lanes instruction.\n"); 1739 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 1740 1.1 mrg return; 1741 1.1 mrg } 1742 1.1 mrg vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 1743 1.1 mrg scalar_mask); 1744 1.1 mrg return; 1745 1.1 mrg } 1746 1.1 mrg 1747 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER) 1748 1.1 mrg { 1749 1.1 mrg internal_fn ifn = (is_load 1750 1.1 mrg ? IFN_MASK_GATHER_LOAD 1751 1.1 mrg : IFN_MASK_SCATTER_STORE); 1752 1.1 mrg if (!internal_gather_scatter_fn_supported_p (ifn, vectype, 1753 1.1 mrg gs_info->memory_type, 1754 1.1 mrg gs_info->offset_vectype, 1755 1.1 mrg gs_info->scale)) 1756 1.1 mrg { 1757 1.1 mrg if (dump_enabled_p ()) 1758 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1759 1.1 mrg "can't operate on partial vectors because" 1760 1.1 mrg " the target doesn't have an appropriate" 1761 1.1 mrg " gather load or scatter store instruction.\n"); 1762 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 1763 1.1 mrg return; 1764 1.1 mrg } 1765 1.1 mrg vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 1766 1.1 mrg scalar_mask); 1767 1.1 mrg return; 1768 1.1 mrg } 1769 1.1 mrg 1770 1.1 mrg if (memory_access_type != VMAT_CONTIGUOUS 1771 1.1 mrg && memory_access_type != VMAT_CONTIGUOUS_PERMUTE) 1772 1.1 mrg { 1773 1.1 mrg /* Element X of the data must come from iteration i * VF + X of the 1774 1.1 mrg scalar loop. We need more work to support other mappings. */ 1775 1.1 mrg if (dump_enabled_p ()) 1776 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1777 1.1 mrg "can't operate on partial vectors because an" 1778 1.1 mrg " access isn't contiguous.\n"); 1779 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 1780 1.1 mrg return; 1781 1.1 mrg } 1782 1.1 mrg 1783 1.1 mrg if (!VECTOR_MODE_P (vecmode)) 1784 1.1 mrg { 1785 1.1 mrg if (dump_enabled_p ()) 1786 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1787 1.1 mrg "can't operate on partial vectors when emulating" 1788 1.1 mrg " vector operations.\n"); 1789 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 1790 1.1 mrg return; 1791 1.1 mrg } 1792 1.1 mrg 1793 1.1 mrg /* We might load more scalars than we need for permuting SLP loads. 1794 1.1 mrg We checked in get_group_load_store_type that the extra elements 1795 1.1 mrg don't leak into a new vector. */ 1796 1.1 mrg auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits) 1797 1.1 mrg { 1798 1.1 mrg unsigned int nvectors; 1799 1.1 mrg if (can_div_away_from_zero_p (size, nunits, &nvectors)) 1800 1.1 mrg return nvectors; 1801 1.1 mrg gcc_unreachable (); 1802 1.1 mrg }; 1803 1.1 mrg 1804 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1805 1.1 mrg poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1806 1.1 mrg machine_mode mask_mode; 1807 1.1 mrg bool using_partial_vectors_p = false; 1808 1.1 mrg if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode) 1809 1.1 mrg && can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) 1810 1.1 mrg { 1811 1.1 mrg nvectors = group_memory_nvectors (group_size * vf, nunits); 1812 1.1 mrg vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask); 1813 1.1 mrg using_partial_vectors_p = true; 1814 1.1 mrg } 1815 1.1 mrg 1816 1.1 mrg machine_mode vmode; 1817 1.1 mrg if (get_len_load_store_mode (vecmode, is_load).exists (&vmode)) 1818 1.1 mrg { 1819 1.1 mrg nvectors = group_memory_nvectors (group_size * vf, nunits); 1820 1.1 mrg vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); 1821 1.1 mrg unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode); 1822 1.1 mrg vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor); 1823 1.1 mrg using_partial_vectors_p = true; 1824 1.1 mrg } 1825 1.1 mrg 1826 1.1 mrg if (!using_partial_vectors_p) 1827 1.1 mrg { 1828 1.1 mrg if (dump_enabled_p ()) 1829 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1830 1.1 mrg "can't operate on partial vectors because the" 1831 1.1 mrg " target doesn't have the appropriate partial" 1832 1.1 mrg " vectorization load or store.\n"); 1833 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 1834 1.1 mrg } 1835 1.1 mrg } 1836 1.1 mrg 1837 1.1 mrg /* Return the mask input to a masked load or store. VEC_MASK is the vectorized 1838 1.1 mrg form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask 1839 1.1 mrg that needs to be applied to all loads and stores in a vectorized loop. 1840 1.1 mrg Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked, 1841 1.1 mrg otherwise return VEC_MASK & LOOP_MASK. 1842 1.1 mrg 1843 1.1 mrg MASK_TYPE is the type of both masks. If new statements are needed, 1844 1.1 mrg insert them before GSI. */ 1845 1.1 mrg 1846 1.1 mrg static tree 1847 1.1 mrg prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask, 1848 1.1 mrg tree vec_mask, gimple_stmt_iterator *gsi) 1849 1.1 mrg { 1850 1.1 mrg gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask))); 1851 1.1 mrg if (!loop_mask) 1852 1.1 mrg return vec_mask; 1853 1.1 mrg 1854 1.1 mrg gcc_assert (TREE_TYPE (loop_mask) == mask_type); 1855 1.1 mrg 1856 1.1 mrg if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask })) 1857 1.1 mrg return vec_mask; 1858 1.1 mrg 1859 1.1 mrg tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and"); 1860 1.1 mrg gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR, 1861 1.1 mrg vec_mask, loop_mask); 1862 1.1 mrg 1863 1.1 mrg gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); 1864 1.1 mrg return and_res; 1865 1.1 mrg } 1866 1.1 mrg 1867 1.1 mrg /* Determine whether we can use a gather load or scatter store to vectorize 1868 1.1 mrg strided load or store STMT_INFO by truncating the current offset to a 1869 1.1 mrg smaller width. We need to be able to construct an offset vector: 1870 1.1 mrg 1871 1.1 mrg { 0, X, X*2, X*3, ... } 1872 1.1 mrg 1873 1.1 mrg without loss of precision, where X is STMT_INFO's DR_STEP. 1874 1.1 mrg 1875 1.1 mrg Return true if this is possible, describing the gather load or scatter 1876 1.1 mrg store in GS_INFO. MASKED_P is true if the load or store is conditional. */ 1877 1.1 mrg 1878 1.1 mrg static bool 1879 1.1 mrg vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, 1880 1.1 mrg loop_vec_info loop_vinfo, bool masked_p, 1881 1.1 mrg gather_scatter_info *gs_info) 1882 1.1 mrg { 1883 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 1884 1.1 mrg data_reference *dr = dr_info->dr; 1885 1.1 mrg tree step = DR_STEP (dr); 1886 1.1 mrg if (TREE_CODE (step) != INTEGER_CST) 1887 1.1 mrg { 1888 1.1 mrg /* ??? Perhaps we could use range information here? */ 1889 1.1 mrg if (dump_enabled_p ()) 1890 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1891 1.1 mrg "cannot truncate variable step.\n"); 1892 1.1 mrg return false; 1893 1.1 mrg } 1894 1.1 mrg 1895 1.1 mrg /* Get the number of bits in an element. */ 1896 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1897 1.1 mrg scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); 1898 1.1 mrg unsigned int element_bits = GET_MODE_BITSIZE (element_mode); 1899 1.1 mrg 1900 1.1 mrg /* Set COUNT to the upper limit on the number of elements - 1. 1901 1.1 mrg Start with the maximum vectorization factor. */ 1902 1.1 mrg unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1; 1903 1.1 mrg 1904 1.1 mrg /* Try lowering COUNT to the number of scalar latch iterations. */ 1905 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1906 1.1 mrg widest_int max_iters; 1907 1.1 mrg if (max_loop_iterations (loop, &max_iters) 1908 1.1 mrg && max_iters < count) 1909 1.1 mrg count = max_iters.to_shwi (); 1910 1.1 mrg 1911 1.1 mrg /* Try scales of 1 and the element size. */ 1912 1.1 mrg int scales[] = { 1, vect_get_scalar_dr_size (dr_info) }; 1913 1.1 mrg wi::overflow_type overflow = wi::OVF_NONE; 1914 1.1 mrg for (int i = 0; i < 2; ++i) 1915 1.1 mrg { 1916 1.1 mrg int scale = scales[i]; 1917 1.1 mrg widest_int factor; 1918 1.1 mrg if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor)) 1919 1.1 mrg continue; 1920 1.1 mrg 1921 1.1 mrg /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */ 1922 1.1 mrg widest_int range = wi::mul (count, factor, SIGNED, &overflow); 1923 1.1 mrg if (overflow) 1924 1.1 mrg continue; 1925 1.1 mrg signop sign = range >= 0 ? UNSIGNED : SIGNED; 1926 1.1 mrg unsigned int min_offset_bits = wi::min_precision (range, sign); 1927 1.1 mrg 1928 1.1 mrg /* Find the narrowest viable offset type. */ 1929 1.1 mrg unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits); 1930 1.1 mrg tree offset_type = build_nonstandard_integer_type (offset_bits, 1931 1.1 mrg sign == UNSIGNED); 1932 1.1 mrg 1933 1.1 mrg /* See whether the target supports the operation with an offset 1934 1.1 mrg no narrower than OFFSET_TYPE. */ 1935 1.1 mrg tree memory_type = TREE_TYPE (DR_REF (dr)); 1936 1.1 mrg if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, 1937 1.1 mrg vectype, memory_type, offset_type, scale, 1938 1.1 mrg &gs_info->ifn, &gs_info->offset_vectype) 1939 1.1 mrg || gs_info->ifn == IFN_LAST) 1940 1.1 mrg continue; 1941 1.1 mrg 1942 1.1 mrg gs_info->decl = NULL_TREE; 1943 1.1 mrg /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, 1944 1.1 mrg but we don't need to store that here. */ 1945 1.1 mrg gs_info->base = NULL_TREE; 1946 1.1 mrg gs_info->element_type = TREE_TYPE (vectype); 1947 1.1 mrg gs_info->offset = fold_convert (offset_type, step); 1948 1.1 mrg gs_info->offset_dt = vect_constant_def; 1949 1.1 mrg gs_info->scale = scale; 1950 1.1 mrg gs_info->memory_type = memory_type; 1951 1.1 mrg return true; 1952 1.1 mrg } 1953 1.1 mrg 1954 1.1 mrg if (overflow && dump_enabled_p ()) 1955 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1956 1.1 mrg "truncating gather/scatter offset to %d bits" 1957 1.1 mrg " might change its value.\n", element_bits); 1958 1.1 mrg 1959 1.1 mrg return false; 1960 1.1 mrg } 1961 1.1 mrg 1962 1.1 mrg /* Return true if we can use gather/scatter internal functions to 1963 1.1 mrg vectorize STMT_INFO, which is a grouped or strided load or store. 1964 1.1 mrg MASKED_P is true if load or store is conditional. When returning 1965 1.1 mrg true, fill in GS_INFO with the information required to perform the 1966 1.1 mrg operation. */ 1967 1.1 mrg 1968 1.1 mrg static bool 1969 1.1 mrg vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, 1970 1.1 mrg loop_vec_info loop_vinfo, bool masked_p, 1971 1.1 mrg gather_scatter_info *gs_info) 1972 1.1 mrg { 1973 1.1 mrg if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info) 1974 1.1 mrg || gs_info->ifn == IFN_LAST) 1975 1.1 mrg return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, 1976 1.1 mrg masked_p, gs_info); 1977 1.1 mrg 1978 1.1 mrg tree old_offset_type = TREE_TYPE (gs_info->offset); 1979 1.1 mrg tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); 1980 1.1 mrg 1981 1.1 mrg gcc_assert (TYPE_PRECISION (new_offset_type) 1982 1.1 mrg >= TYPE_PRECISION (old_offset_type)); 1983 1.1 mrg gs_info->offset = fold_convert (new_offset_type, gs_info->offset); 1984 1.1 mrg 1985 1.1 mrg if (dump_enabled_p ()) 1986 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1987 1.1 mrg "using gather/scatter for strided/grouped access," 1988 1.1 mrg " scale = %d\n", gs_info->scale); 1989 1.1 mrg 1990 1.1 mrg return true; 1991 1.1 mrg } 1992 1.1 mrg 1993 1.1 mrg /* STMT_INFO is a non-strided load or store, meaning that it accesses 1994 1.1 mrg elements with a known constant step. Return -1 if that step 1995 1.1 mrg is negative, 0 if it is zero, and 1 if it is greater than zero. */ 1996 1.1 mrg 1997 1.1 mrg static int 1998 1.1 mrg compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info) 1999 1.1 mrg { 2000 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 2001 1.1 mrg return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step, 2002 1.1 mrg size_zero_node); 2003 1.1 mrg } 2004 1.1 mrg 2005 1.1 mrg /* If the target supports a permute mask that reverses the elements in 2006 1.1 mrg a vector of type VECTYPE, return that mask, otherwise return null. */ 2007 1.1 mrg 2008 1.1 mrg static tree 2009 1.1 mrg perm_mask_for_reverse (tree vectype) 2010 1.1 mrg { 2011 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2012 1.1 mrg 2013 1.1 mrg /* The encoding has a single stepped pattern. */ 2014 1.1 mrg vec_perm_builder sel (nunits, 1, 3); 2015 1.1 mrg for (int i = 0; i < 3; ++i) 2016 1.1 mrg sel.quick_push (nunits - 1 - i); 2017 1.1 mrg 2018 1.1 mrg vec_perm_indices indices (sel, 1, nunits); 2019 1.1 mrg if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) 2020 1.1 mrg return NULL_TREE; 2021 1.1 mrg return vect_gen_perm_mask_checked (vectype, indices); 2022 1.1 mrg } 2023 1.1 mrg 2024 1.1 mrg /* A subroutine of get_load_store_type, with a subset of the same 2025 1.1 mrg arguments. Handle the case where STMT_INFO is a load or store that 2026 1.1 mrg accesses consecutive elements with a negative step. Sets *POFFSET 2027 1.1 mrg to the offset to be applied to the DR for the first access. */ 2028 1.1 mrg 2029 1.1 mrg static vect_memory_access_type 2030 1.1 mrg get_negative_load_store_type (vec_info *vinfo, 2031 1.1 mrg stmt_vec_info stmt_info, tree vectype, 2032 1.1 mrg vec_load_store_type vls_type, 2033 1.1 mrg unsigned int ncopies, poly_int64 *poffset) 2034 1.1 mrg { 2035 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 2036 1.1 mrg dr_alignment_support alignment_support_scheme; 2037 1.1 mrg 2038 1.1 mrg if (ncopies > 1) 2039 1.1 mrg { 2040 1.1 mrg if (dump_enabled_p ()) 2041 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2042 1.1 mrg "multiple types with negative step.\n"); 2043 1.1 mrg return VMAT_ELEMENTWISE; 2044 1.1 mrg } 2045 1.1 mrg 2046 1.1 mrg /* For backward running DRs the first access in vectype actually is 2047 1.1 mrg N-1 elements before the address of the DR. */ 2048 1.1 mrg *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1) 2049 1.1 mrg * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))); 2050 1.1 mrg 2051 1.1 mrg int misalignment = dr_misalignment (dr_info, vectype, *poffset); 2052 1.1 mrg alignment_support_scheme 2053 1.1 mrg = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment); 2054 1.1 mrg if (alignment_support_scheme != dr_aligned 2055 1.1 mrg && alignment_support_scheme != dr_unaligned_supported) 2056 1.1 mrg { 2057 1.1 mrg if (dump_enabled_p ()) 2058 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2059 1.1 mrg "negative step but alignment required.\n"); 2060 1.1 mrg *poffset = 0; 2061 1.1 mrg return VMAT_ELEMENTWISE; 2062 1.1 mrg } 2063 1.1 mrg 2064 1.1 mrg if (vls_type == VLS_STORE_INVARIANT) 2065 1.1 mrg { 2066 1.1 mrg if (dump_enabled_p ()) 2067 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 2068 1.1 mrg "negative step with invariant source;" 2069 1.1 mrg " no permute needed.\n"); 2070 1.1 mrg return VMAT_CONTIGUOUS_DOWN; 2071 1.1 mrg } 2072 1.1 mrg 2073 1.1 mrg if (!perm_mask_for_reverse (vectype)) 2074 1.1 mrg { 2075 1.1 mrg if (dump_enabled_p ()) 2076 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2077 1.1 mrg "negative step and reversing not supported.\n"); 2078 1.1 mrg *poffset = 0; 2079 1.1 mrg return VMAT_ELEMENTWISE; 2080 1.1 mrg } 2081 1.1 mrg 2082 1.1 mrg return VMAT_CONTIGUOUS_REVERSE; 2083 1.1 mrg } 2084 1.1 mrg 2085 1.1 mrg /* STMT_INFO is either a masked or unconditional store. Return the value 2086 1.1 mrg being stored. */ 2087 1.1 mrg 2088 1.1 mrg tree 2089 1.1 mrg vect_get_store_rhs (stmt_vec_info stmt_info) 2090 1.1 mrg { 2091 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 2092 1.1 mrg { 2093 1.1 mrg gcc_assert (gimple_assign_single_p (assign)); 2094 1.1 mrg return gimple_assign_rhs1 (assign); 2095 1.1 mrg } 2096 1.1 mrg if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) 2097 1.1 mrg { 2098 1.1 mrg internal_fn ifn = gimple_call_internal_fn (call); 2099 1.1 mrg int index = internal_fn_stored_value_index (ifn); 2100 1.1 mrg gcc_assert (index >= 0); 2101 1.1 mrg return gimple_call_arg (call, index); 2102 1.1 mrg } 2103 1.1 mrg gcc_unreachable (); 2104 1.1 mrg } 2105 1.1 mrg 2106 1.1 mrg /* Function VECTOR_VECTOR_COMPOSITION_TYPE 2107 1.1 mrg 2108 1.1 mrg This function returns a vector type which can be composed with NETLS pieces, 2109 1.1 mrg whose type is recorded in PTYPE. VTYPE should be a vector type, and has the 2110 1.1 mrg same vector size as the return vector. It checks target whether supports 2111 1.1 mrg pieces-size vector mode for construction firstly, if target fails to, check 2112 1.1 mrg pieces-size scalar mode for construction further. It returns NULL_TREE if 2113 1.1 mrg fails to find the available composition. 2114 1.1 mrg 2115 1.1 mrg For example, for (vtype=V16QI, nelts=4), we can probably get: 2116 1.1 mrg - V16QI with PTYPE V4QI. 2117 1.1 mrg - V4SI with PTYPE SI. 2118 1.1 mrg - NULL_TREE. */ 2119 1.1 mrg 2120 1.1 mrg static tree 2121 1.1 mrg vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) 2122 1.1 mrg { 2123 1.1 mrg gcc_assert (VECTOR_TYPE_P (vtype)); 2124 1.1 mrg gcc_assert (known_gt (nelts, 0U)); 2125 1.1 mrg 2126 1.1 mrg machine_mode vmode = TYPE_MODE (vtype); 2127 1.1 mrg if (!VECTOR_MODE_P (vmode)) 2128 1.1 mrg return NULL_TREE; 2129 1.1 mrg 2130 1.1 mrg poly_uint64 vbsize = GET_MODE_BITSIZE (vmode); 2131 1.1 mrg unsigned int pbsize; 2132 1.1 mrg if (constant_multiple_p (vbsize, nelts, &pbsize)) 2133 1.1 mrg { 2134 1.1 mrg /* First check if vec_init optab supports construction from 2135 1.1 mrg vector pieces directly. */ 2136 1.1 mrg scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype)); 2137 1.1 mrg poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode); 2138 1.1 mrg machine_mode rmode; 2139 1.1 mrg if (related_vector_mode (vmode, elmode, inelts).exists (&rmode) 2140 1.1 mrg && (convert_optab_handler (vec_init_optab, vmode, rmode) 2141 1.1 mrg != CODE_FOR_nothing)) 2142 1.1 mrg { 2143 1.1 mrg *ptype = build_vector_type (TREE_TYPE (vtype), inelts); 2144 1.1 mrg return vtype; 2145 1.1 mrg } 2146 1.1 mrg 2147 1.1 mrg /* Otherwise check if exists an integer type of the same piece size and 2148 1.1 mrg if vec_init optab supports construction from it directly. */ 2149 1.1 mrg if (int_mode_for_size (pbsize, 0).exists (&elmode) 2150 1.1 mrg && related_vector_mode (vmode, elmode, nelts).exists (&rmode) 2151 1.1 mrg && (convert_optab_handler (vec_init_optab, rmode, elmode) 2152 1.1 mrg != CODE_FOR_nothing)) 2153 1.1 mrg { 2154 1.1 mrg *ptype = build_nonstandard_integer_type (pbsize, 1); 2155 1.1 mrg return build_vector_type (*ptype, nelts); 2156 1.1 mrg } 2157 1.1 mrg } 2158 1.1 mrg 2159 1.1 mrg return NULL_TREE; 2160 1.1 mrg } 2161 1.1 mrg 2162 1.1 mrg /* A subroutine of get_load_store_type, with a subset of the same 2163 1.1 mrg arguments. Handle the case where STMT_INFO is part of a grouped load 2164 1.1 mrg or store. 2165 1.1 mrg 2166 1.1 mrg For stores, the statements in the group are all consecutive 2167 1.1 mrg and there is no gap at the end. For loads, the statements in the 2168 1.1 mrg group might not be consecutive; there can be gaps between statements 2169 1.1 mrg as well as at the end. */ 2170 1.1 mrg 2171 1.1 mrg static bool 2172 1.1 mrg get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, 2173 1.1 mrg tree vectype, slp_tree slp_node, 2174 1.1 mrg bool masked_p, vec_load_store_type vls_type, 2175 1.1 mrg vect_memory_access_type *memory_access_type, 2176 1.1 mrg poly_int64 *poffset, 2177 1.1 mrg dr_alignment_support *alignment_support_scheme, 2178 1.1 mrg int *misalignment, 2179 1.1 mrg gather_scatter_info *gs_info) 2180 1.1 mrg { 2181 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 2182 1.1 mrg class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 2183 1.1 mrg stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 2184 1.1 mrg dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 2185 1.1 mrg unsigned int group_size = DR_GROUP_SIZE (first_stmt_info); 2186 1.1 mrg bool single_element_p = (stmt_info == first_stmt_info 2187 1.1 mrg && !DR_GROUP_NEXT_ELEMENT (stmt_info)); 2188 1.1 mrg unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info); 2189 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2190 1.1 mrg 2191 1.1 mrg /* True if the vectorized statements would access beyond the last 2192 1.1 mrg statement in the group. */ 2193 1.1 mrg bool overrun_p = false; 2194 1.1 mrg 2195 1.1 mrg /* True if we can cope with such overrun by peeling for gaps, so that 2196 1.1 mrg there is at least one final scalar iteration after the vector loop. */ 2197 1.1 mrg bool can_overrun_p = (!masked_p 2198 1.1 mrg && vls_type == VLS_LOAD 2199 1.1 mrg && loop_vinfo 2200 1.1 mrg && !loop->inner); 2201 1.1 mrg 2202 1.1 mrg /* There can only be a gap at the end of the group if the stride is 2203 1.1 mrg known at compile time. */ 2204 1.1 mrg gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0); 2205 1.1 mrg 2206 1.1 mrg /* Stores can't yet have gaps. */ 2207 1.1 mrg gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0); 2208 1.1 mrg 2209 1.1 mrg if (slp_node) 2210 1.1 mrg { 2211 1.1 mrg /* For SLP vectorization we directly vectorize a subchain 2212 1.1 mrg without permutation. */ 2213 1.1 mrg if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 2214 1.1 mrg first_dr_info 2215 1.1 mrg = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]); 2216 1.1 mrg if (STMT_VINFO_STRIDED_P (first_stmt_info)) 2217 1.1 mrg { 2218 1.1 mrg /* Try to use consecutive accesses of DR_GROUP_SIZE elements, 2219 1.1 mrg separated by the stride, until we have a complete vector. 2220 1.1 mrg Fall back to scalar accesses if that isn't possible. */ 2221 1.1 mrg if (multiple_p (nunits, group_size)) 2222 1.1 mrg *memory_access_type = VMAT_STRIDED_SLP; 2223 1.1 mrg else 2224 1.1 mrg *memory_access_type = VMAT_ELEMENTWISE; 2225 1.1 mrg } 2226 1.1 mrg else 2227 1.1 mrg { 2228 1.1 mrg overrun_p = loop_vinfo && gap != 0; 2229 1.1 mrg if (overrun_p && vls_type != VLS_LOAD) 2230 1.1 mrg { 2231 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2232 1.1 mrg "Grouped store with gaps requires" 2233 1.1 mrg " non-consecutive accesses\n"); 2234 1.1 mrg return false; 2235 1.1 mrg } 2236 1.1 mrg /* An overrun is fine if the trailing elements are smaller 2237 1.1 mrg than the alignment boundary B. Every vector access will 2238 1.1 mrg be a multiple of B and so we are guaranteed to access a 2239 1.1 mrg non-gap element in the same B-sized block. */ 2240 1.1 mrg if (overrun_p 2241 1.1 mrg && gap < (vect_known_alignment_in_bytes (first_dr_info, 2242 1.1 mrg vectype) 2243 1.1 mrg / vect_get_scalar_dr_size (first_dr_info))) 2244 1.1 mrg overrun_p = false; 2245 1.1 mrg 2246 1.1 mrg /* If the gap splits the vector in half and the target 2247 1.1 mrg can do half-vector operations avoid the epilogue peeling 2248 1.1 mrg by simply loading half of the vector only. Usually 2249 1.1 mrg the construction with an upper zero half will be elided. */ 2250 1.1 mrg dr_alignment_support alss; 2251 1.1 mrg int misalign = dr_misalignment (first_dr_info, vectype); 2252 1.1 mrg tree half_vtype; 2253 1.1 mrg if (overrun_p 2254 1.1 mrg && !masked_p 2255 1.1 mrg && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info, 2256 1.1 mrg vectype, misalign))) 2257 1.1 mrg == dr_aligned 2258 1.1 mrg || alss == dr_unaligned_supported) 2259 1.1 mrg && known_eq (nunits, (group_size - gap) * 2) 2260 1.1 mrg && known_eq (nunits, group_size) 2261 1.1 mrg && (vector_vector_composition_type (vectype, 2, &half_vtype) 2262 1.1 mrg != NULL_TREE)) 2263 1.1 mrg overrun_p = false; 2264 1.1 mrg 2265 1.1 mrg if (overrun_p && !can_overrun_p) 2266 1.1 mrg { 2267 1.1 mrg if (dump_enabled_p ()) 2268 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2269 1.1 mrg "Peeling for outer loop is not supported\n"); 2270 1.1 mrg return false; 2271 1.1 mrg } 2272 1.1 mrg int cmp = compare_step_with_zero (vinfo, stmt_info); 2273 1.1 mrg if (cmp < 0) 2274 1.1 mrg { 2275 1.1 mrg if (single_element_p) 2276 1.1 mrg /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is 2277 1.1 mrg only correct for single element "interleaving" SLP. */ 2278 1.1 mrg *memory_access_type = get_negative_load_store_type 2279 1.1 mrg (vinfo, stmt_info, vectype, vls_type, 1, poffset); 2280 1.1 mrg else 2281 1.1 mrg { 2282 1.1 mrg /* Try to use consecutive accesses of DR_GROUP_SIZE elements, 2283 1.1 mrg separated by the stride, until we have a complete vector. 2284 1.1 mrg Fall back to scalar accesses if that isn't possible. */ 2285 1.1 mrg if (multiple_p (nunits, group_size)) 2286 1.1 mrg *memory_access_type = VMAT_STRIDED_SLP; 2287 1.1 mrg else 2288 1.1 mrg *memory_access_type = VMAT_ELEMENTWISE; 2289 1.1 mrg } 2290 1.1 mrg } 2291 1.1 mrg else 2292 1.1 mrg { 2293 1.1 mrg gcc_assert (!loop_vinfo || cmp > 0); 2294 1.1 mrg *memory_access_type = VMAT_CONTIGUOUS; 2295 1.1 mrg } 2296 1.1 mrg 2297 1.1 mrg /* When we have a contiguous access across loop iterations 2298 1.1 mrg but the access in the loop doesn't cover the full vector 2299 1.1 mrg we can end up with no gap recorded but still excess 2300 1.1 mrg elements accessed, see PR103116. Make sure we peel for 2301 1.1 mrg gaps if necessary and sufficient and give up if not. */ 2302 1.1 mrg if (loop_vinfo 2303 1.1 mrg && *memory_access_type == VMAT_CONTIGUOUS 2304 1.1 mrg && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () 2305 1.1 mrg && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo), 2306 1.1 mrg nunits)) 2307 1.1 mrg { 2308 1.1 mrg unsigned HOST_WIDE_INT cnunits, cvf; 2309 1.1 mrg if (!can_overrun_p 2310 1.1 mrg || !nunits.is_constant (&cnunits) 2311 1.1 mrg || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf) 2312 1.1 mrg /* Peeling for gaps assumes that a single scalar iteration 2313 1.1 mrg is enough to make sure the last vector iteration doesn't 2314 1.1 mrg access excess elements. 2315 1.1 mrg ??? Enhancements include peeling multiple iterations 2316 1.1 mrg or using masked loads with a static mask. */ 2317 1.1 mrg || (group_size * cvf) % cnunits + group_size < cnunits) 2318 1.1 mrg { 2319 1.1 mrg if (dump_enabled_p ()) 2320 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2321 1.1 mrg "peeling for gaps insufficient for " 2322 1.1 mrg "access\n"); 2323 1.1 mrg return false; 2324 1.1 mrg } 2325 1.1 mrg overrun_p = true; 2326 1.1 mrg } 2327 1.1 mrg } 2328 1.1 mrg } 2329 1.1 mrg else 2330 1.1 mrg { 2331 1.1 mrg /* We can always handle this case using elementwise accesses, 2332 1.1 mrg but see if something more efficient is available. */ 2333 1.1 mrg *memory_access_type = VMAT_ELEMENTWISE; 2334 1.1 mrg 2335 1.1 mrg /* If there is a gap at the end of the group then these optimizations 2336 1.1 mrg would access excess elements in the last iteration. */ 2337 1.1 mrg bool would_overrun_p = (gap != 0); 2338 1.1 mrg /* An overrun is fine if the trailing elements are smaller than the 2339 1.1 mrg alignment boundary B. Every vector access will be a multiple of B 2340 1.1 mrg and so we are guaranteed to access a non-gap element in the 2341 1.1 mrg same B-sized block. */ 2342 1.1 mrg if (would_overrun_p 2343 1.1 mrg && !masked_p 2344 1.1 mrg && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype) 2345 1.1 mrg / vect_get_scalar_dr_size (first_dr_info))) 2346 1.1 mrg would_overrun_p = false; 2347 1.1 mrg 2348 1.1 mrg if (!STMT_VINFO_STRIDED_P (first_stmt_info) 2349 1.1 mrg && (can_overrun_p || !would_overrun_p) 2350 1.1 mrg && compare_step_with_zero (vinfo, stmt_info) > 0) 2351 1.1 mrg { 2352 1.1 mrg /* First cope with the degenerate case of a single-element 2353 1.1 mrg vector. */ 2354 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) 2355 1.1 mrg ; 2356 1.1 mrg 2357 1.1 mrg /* Otherwise try using LOAD/STORE_LANES. */ 2358 1.1 mrg else if (vls_type == VLS_LOAD 2359 1.1 mrg ? vect_load_lanes_supported (vectype, group_size, masked_p) 2360 1.1 mrg : vect_store_lanes_supported (vectype, group_size, 2361 1.1 mrg masked_p)) 2362 1.1 mrg { 2363 1.1 mrg *memory_access_type = VMAT_LOAD_STORE_LANES; 2364 1.1 mrg overrun_p = would_overrun_p; 2365 1.1 mrg } 2366 1.1 mrg 2367 1.1 mrg /* If that fails, try using permuting loads. */ 2368 1.1 mrg else if (vls_type == VLS_LOAD 2369 1.1 mrg ? vect_grouped_load_supported (vectype, single_element_p, 2370 1.1 mrg group_size) 2371 1.1 mrg : vect_grouped_store_supported (vectype, group_size)) 2372 1.1 mrg { 2373 1.1 mrg *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; 2374 1.1 mrg overrun_p = would_overrun_p; 2375 1.1 mrg } 2376 1.1 mrg } 2377 1.1 mrg 2378 1.1 mrg /* As a last resort, trying using a gather load or scatter store. 2379 1.1 mrg 2380 1.1 mrg ??? Although the code can handle all group sizes correctly, 2381 1.1 mrg it probably isn't a win to use separate strided accesses based 2382 1.1 mrg on nearby locations. Or, even if it's a win over scalar code, 2383 1.1 mrg it might not be a win over vectorizing at a lower VF, if that 2384 1.1 mrg allows us to use contiguous accesses. */ 2385 1.1 mrg if (*memory_access_type == VMAT_ELEMENTWISE 2386 1.1 mrg && single_element_p 2387 1.1 mrg && loop_vinfo 2388 1.1 mrg && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, 2389 1.1 mrg masked_p, gs_info)) 2390 1.1 mrg *memory_access_type = VMAT_GATHER_SCATTER; 2391 1.1 mrg } 2392 1.1 mrg 2393 1.1 mrg if (*memory_access_type == VMAT_GATHER_SCATTER 2394 1.1 mrg || *memory_access_type == VMAT_ELEMENTWISE) 2395 1.1 mrg { 2396 1.1 mrg *alignment_support_scheme = dr_unaligned_supported; 2397 1.1 mrg *misalignment = DR_MISALIGNMENT_UNKNOWN; 2398 1.1 mrg } 2399 1.1 mrg else 2400 1.1 mrg { 2401 1.1 mrg *misalignment = dr_misalignment (first_dr_info, vectype, *poffset); 2402 1.1 mrg *alignment_support_scheme 2403 1.1 mrg = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, 2404 1.1 mrg *misalignment); 2405 1.1 mrg } 2406 1.1 mrg 2407 1.1 mrg if (vls_type != VLS_LOAD && first_stmt_info == stmt_info) 2408 1.1 mrg { 2409 1.1 mrg /* STMT is the leader of the group. Check the operands of all the 2410 1.1 mrg stmts of the group. */ 2411 1.1 mrg stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); 2412 1.1 mrg while (next_stmt_info) 2413 1.1 mrg { 2414 1.1 mrg tree op = vect_get_store_rhs (next_stmt_info); 2415 1.1 mrg enum vect_def_type dt; 2416 1.1 mrg if (!vect_is_simple_use (op, vinfo, &dt)) 2417 1.1 mrg { 2418 1.1 mrg if (dump_enabled_p ()) 2419 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2420 1.1 mrg "use not simple.\n"); 2421 1.1 mrg return false; 2422 1.1 mrg } 2423 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 2424 1.1 mrg } 2425 1.1 mrg } 2426 1.1 mrg 2427 1.1 mrg if (overrun_p) 2428 1.1 mrg { 2429 1.1 mrg gcc_assert (can_overrun_p); 2430 1.1 mrg if (dump_enabled_p ()) 2431 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2432 1.1 mrg "Data access with gaps requires scalar " 2433 1.1 mrg "epilogue loop\n"); 2434 1.1 mrg LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 2435 1.1 mrg } 2436 1.1 mrg 2437 1.1 mrg return true; 2438 1.1 mrg } 2439 1.1 mrg 2440 1.1 mrg /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true 2441 1.1 mrg if there is a memory access type that the vectorized form can use, 2442 1.1 mrg storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers 2443 1.1 mrg or scatters, fill in GS_INFO accordingly. In addition 2444 1.1 mrg *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if 2445 1.1 mrg the target does not support the alignment scheme. *MISALIGNMENT 2446 1.1 mrg is set according to the alignment of the access (including 2447 1.1 mrg DR_MISALIGNMENT_UNKNOWN when it is unknown). 2448 1.1 mrg 2449 1.1 mrg SLP says whether we're performing SLP rather than loop vectorization. 2450 1.1 mrg MASKED_P is true if the statement is conditional on a vectorized mask. 2451 1.1 mrg VECTYPE is the vector type that the vectorized statements will use. 2452 1.1 mrg NCOPIES is the number of vector statements that will be needed. */ 2453 1.1 mrg 2454 1.1 mrg static bool 2455 1.1 mrg get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, 2456 1.1 mrg tree vectype, slp_tree slp_node, 2457 1.1 mrg bool masked_p, vec_load_store_type vls_type, 2458 1.1 mrg unsigned int ncopies, 2459 1.1 mrg vect_memory_access_type *memory_access_type, 2460 1.1 mrg poly_int64 *poffset, 2461 1.1 mrg dr_alignment_support *alignment_support_scheme, 2462 1.1 mrg int *misalignment, 2463 1.1 mrg gather_scatter_info *gs_info) 2464 1.1 mrg { 2465 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 2466 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2467 1.1 mrg *misalignment = DR_MISALIGNMENT_UNKNOWN; 2468 1.1 mrg *poffset = 0; 2469 1.1 mrg if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 2470 1.1 mrg { 2471 1.1 mrg *memory_access_type = VMAT_GATHER_SCATTER; 2472 1.1 mrg if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)) 2473 1.1 mrg gcc_unreachable (); 2474 1.1 mrg else if (!vect_is_simple_use (gs_info->offset, vinfo, 2475 1.1 mrg &gs_info->offset_dt, 2476 1.1 mrg &gs_info->offset_vectype)) 2477 1.1 mrg { 2478 1.1 mrg if (dump_enabled_p ()) 2479 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2480 1.1 mrg "%s index use not simple.\n", 2481 1.1 mrg vls_type == VLS_LOAD ? "gather" : "scatter"); 2482 1.1 mrg return false; 2483 1.1 mrg } 2484 1.1 mrg else if (gs_info->ifn == IFN_LAST && !gs_info->decl) 2485 1.1 mrg { 2486 1.1 mrg if (vls_type != VLS_LOAD) 2487 1.1 mrg { 2488 1.1 mrg if (dump_enabled_p ()) 2489 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2490 1.1 mrg "unsupported emulated scatter.\n"); 2491 1.1 mrg return false; 2492 1.1 mrg } 2493 1.1 mrg else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () 2494 1.1 mrg || !TYPE_VECTOR_SUBPARTS 2495 1.1 mrg (gs_info->offset_vectype).is_constant () 2496 1.1 mrg || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) 2497 1.1 mrg || !constant_multiple_p (TYPE_VECTOR_SUBPARTS 2498 1.1 mrg (gs_info->offset_vectype), 2499 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype))) 2500 1.1 mrg { 2501 1.1 mrg if (dump_enabled_p ()) 2502 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2503 1.1 mrg "unsupported vector types for emulated " 2504 1.1 mrg "gather.\n"); 2505 1.1 mrg return false; 2506 1.1 mrg } 2507 1.1 mrg } 2508 1.1 mrg /* Gather-scatter accesses perform only component accesses, alignment 2509 1.1 mrg is irrelevant for them. */ 2510 1.1 mrg *alignment_support_scheme = dr_unaligned_supported; 2511 1.1 mrg } 2512 1.1 mrg else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 2513 1.1 mrg { 2514 1.1 mrg if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, 2515 1.1 mrg masked_p, 2516 1.1 mrg vls_type, memory_access_type, poffset, 2517 1.1 mrg alignment_support_scheme, 2518 1.1 mrg misalignment, gs_info)) 2519 1.1 mrg return false; 2520 1.1 mrg } 2521 1.1 mrg else if (STMT_VINFO_STRIDED_P (stmt_info)) 2522 1.1 mrg { 2523 1.1 mrg gcc_assert (!slp_node); 2524 1.1 mrg if (loop_vinfo 2525 1.1 mrg && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, 2526 1.1 mrg masked_p, gs_info)) 2527 1.1 mrg *memory_access_type = VMAT_GATHER_SCATTER; 2528 1.1 mrg else 2529 1.1 mrg *memory_access_type = VMAT_ELEMENTWISE; 2530 1.1 mrg /* Alignment is irrelevant here. */ 2531 1.1 mrg *alignment_support_scheme = dr_unaligned_supported; 2532 1.1 mrg } 2533 1.1 mrg else 2534 1.1 mrg { 2535 1.1 mrg int cmp = compare_step_with_zero (vinfo, stmt_info); 2536 1.1 mrg if (cmp == 0) 2537 1.1 mrg { 2538 1.1 mrg gcc_assert (vls_type == VLS_LOAD); 2539 1.1 mrg *memory_access_type = VMAT_INVARIANT; 2540 1.1 mrg /* Invariant accesses perform only component accesses, alignment 2541 1.1 mrg is irrelevant for them. */ 2542 1.1 mrg *alignment_support_scheme = dr_unaligned_supported; 2543 1.1 mrg } 2544 1.1 mrg else 2545 1.1 mrg { 2546 1.1 mrg if (cmp < 0) 2547 1.1 mrg *memory_access_type = get_negative_load_store_type 2548 1.1 mrg (vinfo, stmt_info, vectype, vls_type, ncopies, poffset); 2549 1.1 mrg else 2550 1.1 mrg *memory_access_type = VMAT_CONTIGUOUS; 2551 1.1 mrg *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info), 2552 1.1 mrg vectype, *poffset); 2553 1.1 mrg *alignment_support_scheme 2554 1.1 mrg = vect_supportable_dr_alignment (vinfo, 2555 1.1 mrg STMT_VINFO_DR_INFO (stmt_info), 2556 1.1 mrg vectype, *misalignment); 2557 1.1 mrg } 2558 1.1 mrg } 2559 1.1 mrg 2560 1.1 mrg if ((*memory_access_type == VMAT_ELEMENTWISE 2561 1.1 mrg || *memory_access_type == VMAT_STRIDED_SLP) 2562 1.1 mrg && !nunits.is_constant ()) 2563 1.1 mrg { 2564 1.1 mrg if (dump_enabled_p ()) 2565 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2566 1.1 mrg "Not using elementwise accesses due to variable " 2567 1.1 mrg "vectorization factor.\n"); 2568 1.1 mrg return false; 2569 1.1 mrg } 2570 1.1 mrg 2571 1.1 mrg if (*alignment_support_scheme == dr_unaligned_unsupported) 2572 1.1 mrg { 2573 1.1 mrg if (dump_enabled_p ()) 2574 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2575 1.1 mrg "unsupported unaligned access\n"); 2576 1.1 mrg return false; 2577 1.1 mrg } 2578 1.1 mrg 2579 1.1 mrg /* FIXME: At the moment the cost model seems to underestimate the 2580 1.1 mrg cost of using elementwise accesses. This check preserves the 2581 1.1 mrg traditional behavior until that can be fixed. */ 2582 1.1 mrg stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 2583 1.1 mrg if (!first_stmt_info) 2584 1.1 mrg first_stmt_info = stmt_info; 2585 1.1 mrg if (*memory_access_type == VMAT_ELEMENTWISE 2586 1.1 mrg && !STMT_VINFO_STRIDED_P (first_stmt_info) 2587 1.1 mrg && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info) 2588 1.1 mrg && !DR_GROUP_NEXT_ELEMENT (stmt_info) 2589 1.1 mrg && !pow2p_hwi (DR_GROUP_SIZE (stmt_info)))) 2590 1.1 mrg { 2591 1.1 mrg if (dump_enabled_p ()) 2592 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2593 1.1 mrg "not falling back to elementwise accesses\n"); 2594 1.1 mrg return false; 2595 1.1 mrg } 2596 1.1 mrg return true; 2597 1.1 mrg } 2598 1.1 mrg 2599 1.1 mrg /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing 2600 1.1 mrg conditional operation STMT_INFO. When returning true, store the mask 2601 1.1 mrg in *MASK, the type of its definition in *MASK_DT_OUT, the type of the 2602 1.1 mrg vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding 2603 1.1 mrg to the mask in *MASK_NODE if MASK_NODE is not NULL. */ 2604 1.1 mrg 2605 1.1 mrg static bool 2606 1.1 mrg vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, 2607 1.1 mrg slp_tree slp_node, unsigned mask_index, 2608 1.1 mrg tree *mask, slp_tree *mask_node, 2609 1.1 mrg vect_def_type *mask_dt_out, tree *mask_vectype_out) 2610 1.1 mrg { 2611 1.1 mrg enum vect_def_type mask_dt; 2612 1.1 mrg tree mask_vectype; 2613 1.1 mrg slp_tree mask_node_1; 2614 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index, 2615 1.1 mrg mask, &mask_node_1, &mask_dt, &mask_vectype)) 2616 1.1 mrg { 2617 1.1 mrg if (dump_enabled_p ()) 2618 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2619 1.1 mrg "mask use not simple.\n"); 2620 1.1 mrg return false; 2621 1.1 mrg } 2622 1.1 mrg 2623 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask))) 2624 1.1 mrg { 2625 1.1 mrg if (dump_enabled_p ()) 2626 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2627 1.1 mrg "mask argument is not a boolean.\n"); 2628 1.1 mrg return false; 2629 1.1 mrg } 2630 1.1 mrg 2631 1.1 mrg /* If the caller is not prepared for adjusting an external/constant 2632 1.1 mrg SLP mask vector type fail. */ 2633 1.1 mrg if (slp_node 2634 1.1 mrg && !mask_node 2635 1.1 mrg && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def) 2636 1.1 mrg { 2637 1.1 mrg if (dump_enabled_p ()) 2638 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2639 1.1 mrg "SLP mask argument is not vectorized.\n"); 2640 1.1 mrg return false; 2641 1.1 mrg } 2642 1.1 mrg 2643 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2644 1.1 mrg if (!mask_vectype) 2645 1.1 mrg mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype)); 2646 1.1 mrg 2647 1.1 mrg if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)) 2648 1.1 mrg { 2649 1.1 mrg if (dump_enabled_p ()) 2650 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2651 1.1 mrg "could not find an appropriate vector mask type.\n"); 2652 1.1 mrg return false; 2653 1.1 mrg } 2654 1.1 mrg 2655 1.1 mrg if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype), 2656 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype))) 2657 1.1 mrg { 2658 1.1 mrg if (dump_enabled_p ()) 2659 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2660 1.1 mrg "vector mask type %T" 2661 1.1 mrg " does not match vector data type %T.\n", 2662 1.1 mrg mask_vectype, vectype); 2663 1.1 mrg 2664 1.1 mrg return false; 2665 1.1 mrg } 2666 1.1 mrg 2667 1.1 mrg *mask_dt_out = mask_dt; 2668 1.1 mrg *mask_vectype_out = mask_vectype; 2669 1.1 mrg if (mask_node) 2670 1.1 mrg *mask_node = mask_node_1; 2671 1.1 mrg return true; 2672 1.1 mrg } 2673 1.1 mrg 2674 1.1 mrg /* Return true if stored value RHS is suitable for vectorizing store 2675 1.1 mrg statement STMT_INFO. When returning true, store the type of the 2676 1.1 mrg definition in *RHS_DT_OUT, the type of the vectorized store value in 2677 1.1 mrg *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */ 2678 1.1 mrg 2679 1.1 mrg static bool 2680 1.1 mrg vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, 2681 1.1 mrg slp_tree slp_node, tree rhs, 2682 1.1 mrg vect_def_type *rhs_dt_out, tree *rhs_vectype_out, 2683 1.1 mrg vec_load_store_type *vls_type_out) 2684 1.1 mrg { 2685 1.1 mrg /* In the case this is a store from a constant make sure 2686 1.1 mrg native_encode_expr can handle it. */ 2687 1.1 mrg if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0) 2688 1.1 mrg { 2689 1.1 mrg if (dump_enabled_p ()) 2690 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2691 1.1 mrg "cannot encode constant as a byte sequence.\n"); 2692 1.1 mrg return false; 2693 1.1 mrg } 2694 1.1 mrg 2695 1.1 mrg unsigned op_no = 0; 2696 1.1 mrg if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) 2697 1.1 mrg { 2698 1.1 mrg if (gimple_call_internal_p (call) 2699 1.1 mrg && internal_store_fn_p (gimple_call_internal_fn (call))) 2700 1.1 mrg op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call)); 2701 1.1 mrg } 2702 1.1 mrg 2703 1.1 mrg enum vect_def_type rhs_dt; 2704 1.1 mrg tree rhs_vectype; 2705 1.1 mrg slp_tree slp_op; 2706 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no, 2707 1.1 mrg &rhs, &slp_op, &rhs_dt, &rhs_vectype)) 2708 1.1 mrg { 2709 1.1 mrg if (dump_enabled_p ()) 2710 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2711 1.1 mrg "use not simple.\n"); 2712 1.1 mrg return false; 2713 1.1 mrg } 2714 1.1 mrg 2715 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2716 1.1 mrg if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype)) 2717 1.1 mrg { 2718 1.1 mrg if (dump_enabled_p ()) 2719 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2720 1.1 mrg "incompatible vector types.\n"); 2721 1.1 mrg return false; 2722 1.1 mrg } 2723 1.1 mrg 2724 1.1 mrg *rhs_dt_out = rhs_dt; 2725 1.1 mrg *rhs_vectype_out = rhs_vectype; 2726 1.1 mrg if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def) 2727 1.1 mrg *vls_type_out = VLS_STORE_INVARIANT; 2728 1.1 mrg else 2729 1.1 mrg *vls_type_out = VLS_STORE; 2730 1.1 mrg return true; 2731 1.1 mrg } 2732 1.1 mrg 2733 1.1 mrg /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO. 2734 1.1 mrg Note that we support masks with floating-point type, in which case the 2735 1.1 mrg floats are interpreted as a bitmask. */ 2736 1.1 mrg 2737 1.1 mrg static tree 2738 1.1 mrg vect_build_all_ones_mask (vec_info *vinfo, 2739 1.1 mrg stmt_vec_info stmt_info, tree masktype) 2740 1.1 mrg { 2741 1.1 mrg if (TREE_CODE (masktype) == INTEGER_TYPE) 2742 1.1 mrg return build_int_cst (masktype, -1); 2743 1.1 mrg else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 2744 1.1 mrg { 2745 1.1 mrg tree mask = build_int_cst (TREE_TYPE (masktype), -1); 2746 1.1 mrg mask = build_vector_from_val (masktype, mask); 2747 1.1 mrg return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL); 2748 1.1 mrg } 2749 1.1 mrg else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 2750 1.1 mrg { 2751 1.1 mrg REAL_VALUE_TYPE r; 2752 1.1 mrg long tmp[6]; 2753 1.1 mrg for (int j = 0; j < 6; ++j) 2754 1.1 mrg tmp[j] = -1; 2755 1.1 mrg real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 2756 1.1 mrg tree mask = build_real (TREE_TYPE (masktype), r); 2757 1.1 mrg mask = build_vector_from_val (masktype, mask); 2758 1.1 mrg return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL); 2759 1.1 mrg } 2760 1.1 mrg gcc_unreachable (); 2761 1.1 mrg } 2762 1.1 mrg 2763 1.1 mrg /* Build an all-zero merge value of type VECTYPE while vectorizing 2764 1.1 mrg STMT_INFO as a gather load. */ 2765 1.1 mrg 2766 1.1 mrg static tree 2767 1.1 mrg vect_build_zero_merge_argument (vec_info *vinfo, 2768 1.1 mrg stmt_vec_info stmt_info, tree vectype) 2769 1.1 mrg { 2770 1.1 mrg tree merge; 2771 1.1 mrg if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE) 2772 1.1 mrg merge = build_int_cst (TREE_TYPE (vectype), 0); 2773 1.1 mrg else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype))) 2774 1.1 mrg { 2775 1.1 mrg REAL_VALUE_TYPE r; 2776 1.1 mrg long tmp[6]; 2777 1.1 mrg for (int j = 0; j < 6; ++j) 2778 1.1 mrg tmp[j] = 0; 2779 1.1 mrg real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype))); 2780 1.1 mrg merge = build_real (TREE_TYPE (vectype), r); 2781 1.1 mrg } 2782 1.1 mrg else 2783 1.1 mrg gcc_unreachable (); 2784 1.1 mrg merge = build_vector_from_val (vectype, merge); 2785 1.1 mrg return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL); 2786 1.1 mrg } 2787 1.1 mrg 2788 1.1 mrg /* Build a gather load call while vectorizing STMT_INFO. Insert new 2789 1.1 mrg instructions before GSI and add them to VEC_STMT. GS_INFO describes 2790 1.1 mrg the gather load operation. If the load is conditional, MASK is the 2791 1.1 mrg unvectorized condition and MASK_DT is its definition type, otherwise 2792 1.1 mrg MASK is null. */ 2793 1.1 mrg 2794 1.1 mrg static void 2795 1.1 mrg vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info, 2796 1.1 mrg gimple_stmt_iterator *gsi, 2797 1.1 mrg gimple **vec_stmt, 2798 1.1 mrg gather_scatter_info *gs_info, 2799 1.1 mrg tree mask) 2800 1.1 mrg { 2801 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 2802 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2803 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2804 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2805 1.1 mrg int ncopies = vect_get_num_copies (loop_vinfo, vectype); 2806 1.1 mrg edge pe = loop_preheader_edge (loop); 2807 1.1 mrg enum { NARROW, NONE, WIDEN } modifier; 2808 1.1 mrg poly_uint64 gather_off_nunits 2809 1.1 mrg = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype); 2810 1.1 mrg 2811 1.1 mrg tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); 2812 1.1 mrg tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); 2813 1.1 mrg tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2814 1.1 mrg tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2815 1.1 mrg tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2816 1.1 mrg tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2817 1.1 mrg tree scaletype = TREE_VALUE (arglist); 2818 1.1 mrg tree real_masktype = masktype; 2819 1.1 mrg gcc_checking_assert (types_compatible_p (srctype, rettype) 2820 1.1 mrg && (!mask 2821 1.1 mrg || TREE_CODE (masktype) == INTEGER_TYPE 2822 1.1 mrg || types_compatible_p (srctype, masktype))); 2823 1.1 mrg if (mask) 2824 1.1 mrg masktype = truth_type_for (srctype); 2825 1.1 mrg 2826 1.1 mrg tree mask_halftype = masktype; 2827 1.1 mrg tree perm_mask = NULL_TREE; 2828 1.1 mrg tree mask_perm_mask = NULL_TREE; 2829 1.1 mrg if (known_eq (nunits, gather_off_nunits)) 2830 1.1 mrg modifier = NONE; 2831 1.1 mrg else if (known_eq (nunits * 2, gather_off_nunits)) 2832 1.1 mrg { 2833 1.1 mrg modifier = WIDEN; 2834 1.1 mrg 2835 1.1 mrg /* Currently widening gathers and scatters are only supported for 2836 1.1 mrg fixed-length vectors. */ 2837 1.1 mrg int count = gather_off_nunits.to_constant (); 2838 1.1 mrg vec_perm_builder sel (count, count, 1); 2839 1.1 mrg for (int i = 0; i < count; ++i) 2840 1.1 mrg sel.quick_push (i | (count / 2)); 2841 1.1 mrg 2842 1.1 mrg vec_perm_indices indices (sel, 1, count); 2843 1.1 mrg perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, 2844 1.1 mrg indices); 2845 1.1 mrg } 2846 1.1 mrg else if (known_eq (nunits, gather_off_nunits * 2)) 2847 1.1 mrg { 2848 1.1 mrg modifier = NARROW; 2849 1.1 mrg 2850 1.1 mrg /* Currently narrowing gathers and scatters are only supported for 2851 1.1 mrg fixed-length vectors. */ 2852 1.1 mrg int count = nunits.to_constant (); 2853 1.1 mrg vec_perm_builder sel (count, count, 1); 2854 1.1 mrg sel.quick_grow (count); 2855 1.1 mrg for (int i = 0; i < count; ++i) 2856 1.1 mrg sel[i] = i < count / 2 ? i : i + count / 2; 2857 1.1 mrg vec_perm_indices indices (sel, 2, count); 2858 1.1 mrg perm_mask = vect_gen_perm_mask_checked (vectype, indices); 2859 1.1 mrg 2860 1.1 mrg ncopies *= 2; 2861 1.1 mrg 2862 1.1 mrg if (mask && VECTOR_TYPE_P (real_masktype)) 2863 1.1 mrg { 2864 1.1 mrg for (int i = 0; i < count; ++i) 2865 1.1 mrg sel[i] = i | (count / 2); 2866 1.1 mrg indices.new_vector (sel, 2, count); 2867 1.1 mrg mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices); 2868 1.1 mrg } 2869 1.1 mrg else if (mask) 2870 1.1 mrg mask_halftype = truth_type_for (gs_info->offset_vectype); 2871 1.1 mrg } 2872 1.1 mrg else 2873 1.1 mrg gcc_unreachable (); 2874 1.1 mrg 2875 1.1 mrg tree scalar_dest = gimple_get_lhs (stmt_info->stmt); 2876 1.1 mrg tree vec_dest = vect_create_destination_var (scalar_dest, vectype); 2877 1.1 mrg 2878 1.1 mrg tree ptr = fold_convert (ptrtype, gs_info->base); 2879 1.1 mrg if (!is_gimple_min_invariant (ptr)) 2880 1.1 mrg { 2881 1.1 mrg gimple_seq seq; 2882 1.1 mrg ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 2883 1.1 mrg basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 2884 1.1 mrg gcc_assert (!new_bb); 2885 1.1 mrg } 2886 1.1 mrg 2887 1.1 mrg tree scale = build_int_cst (scaletype, gs_info->scale); 2888 1.1 mrg 2889 1.1 mrg tree vec_oprnd0 = NULL_TREE; 2890 1.1 mrg tree vec_mask = NULL_TREE; 2891 1.1 mrg tree src_op = NULL_TREE; 2892 1.1 mrg tree mask_op = NULL_TREE; 2893 1.1 mrg tree prev_res = NULL_TREE; 2894 1.1 mrg 2895 1.1 mrg if (!mask) 2896 1.1 mrg { 2897 1.1 mrg src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype); 2898 1.1 mrg mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype); 2899 1.1 mrg } 2900 1.1 mrg 2901 1.1 mrg auto_vec<tree> vec_oprnds0; 2902 1.1 mrg auto_vec<tree> vec_masks; 2903 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 2904 1.1 mrg modifier == WIDEN ? ncopies / 2 : ncopies, 2905 1.1 mrg gs_info->offset, &vec_oprnds0); 2906 1.1 mrg if (mask) 2907 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 2908 1.1 mrg modifier == NARROW ? ncopies / 2 : ncopies, 2909 1.1 mrg mask, &vec_masks, masktype); 2910 1.1 mrg for (int j = 0; j < ncopies; ++j) 2911 1.1 mrg { 2912 1.1 mrg tree op, var; 2913 1.1 mrg if (modifier == WIDEN && (j & 1)) 2914 1.1 mrg op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, 2915 1.1 mrg perm_mask, stmt_info, gsi); 2916 1.1 mrg else 2917 1.1 mrg op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j]; 2918 1.1 mrg 2919 1.1 mrg if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 2920 1.1 mrg { 2921 1.1 mrg gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 2922 1.1 mrg TYPE_VECTOR_SUBPARTS (idxtype))); 2923 1.1 mrg var = vect_get_new_ssa_name (idxtype, vect_simple_var); 2924 1.1 mrg op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 2925 1.1 mrg gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2926 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 2927 1.1 mrg op = var; 2928 1.1 mrg } 2929 1.1 mrg 2930 1.1 mrg if (mask) 2931 1.1 mrg { 2932 1.1 mrg if (mask_perm_mask && (j & 1)) 2933 1.1 mrg mask_op = permute_vec_elements (vinfo, mask_op, mask_op, 2934 1.1 mrg mask_perm_mask, stmt_info, gsi); 2935 1.1 mrg else 2936 1.1 mrg { 2937 1.1 mrg if (modifier == NARROW) 2938 1.1 mrg { 2939 1.1 mrg if ((j & 1) == 0) 2940 1.1 mrg vec_mask = vec_masks[j / 2]; 2941 1.1 mrg } 2942 1.1 mrg else 2943 1.1 mrg vec_mask = vec_masks[j]; 2944 1.1 mrg 2945 1.1 mrg mask_op = vec_mask; 2946 1.1 mrg if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) 2947 1.1 mrg { 2948 1.1 mrg poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)); 2949 1.1 mrg poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype); 2950 1.1 mrg gcc_assert (known_eq (sub1, sub2)); 2951 1.1 mrg var = vect_get_new_ssa_name (masktype, vect_simple_var); 2952 1.1 mrg mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); 2953 1.1 mrg gassign *new_stmt 2954 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op); 2955 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 2956 1.1 mrg mask_op = var; 2957 1.1 mrg } 2958 1.1 mrg } 2959 1.1 mrg if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype)) 2960 1.1 mrg { 2961 1.1 mrg var = vect_get_new_ssa_name (mask_halftype, vect_simple_var); 2962 1.1 mrg gassign *new_stmt 2963 1.1 mrg = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR 2964 1.1 mrg : VEC_UNPACK_LO_EXPR, 2965 1.1 mrg mask_op); 2966 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 2967 1.1 mrg mask_op = var; 2968 1.1 mrg } 2969 1.1 mrg src_op = mask_op; 2970 1.1 mrg } 2971 1.1 mrg 2972 1.1 mrg tree mask_arg = mask_op; 2973 1.1 mrg if (masktype != real_masktype) 2974 1.1 mrg { 2975 1.1 mrg tree utype, optype = TREE_TYPE (mask_op); 2976 1.1 mrg if (VECTOR_TYPE_P (real_masktype) 2977 1.1 mrg || TYPE_MODE (real_masktype) == TYPE_MODE (optype)) 2978 1.1 mrg utype = real_masktype; 2979 1.1 mrg else 2980 1.1 mrg utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1); 2981 1.1 mrg var = vect_get_new_ssa_name (utype, vect_scalar_var); 2982 1.1 mrg mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op); 2983 1.1 mrg gassign *new_stmt 2984 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg); 2985 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 2986 1.1 mrg mask_arg = var; 2987 1.1 mrg if (!useless_type_conversion_p (real_masktype, utype)) 2988 1.1 mrg { 2989 1.1 mrg gcc_assert (TYPE_PRECISION (utype) 2990 1.1 mrg <= TYPE_PRECISION (real_masktype)); 2991 1.1 mrg var = vect_get_new_ssa_name (real_masktype, vect_scalar_var); 2992 1.1 mrg new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg); 2993 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 2994 1.1 mrg mask_arg = var; 2995 1.1 mrg } 2996 1.1 mrg src_op = build_zero_cst (srctype); 2997 1.1 mrg } 2998 1.1 mrg gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, 2999 1.1 mrg mask_arg, scale); 3000 1.1 mrg 3001 1.1 mrg if (!useless_type_conversion_p (vectype, rettype)) 3002 1.1 mrg { 3003 1.1 mrg gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 3004 1.1 mrg TYPE_VECTOR_SUBPARTS (rettype))); 3005 1.1 mrg op = vect_get_new_ssa_name (rettype, vect_simple_var); 3006 1.1 mrg gimple_call_set_lhs (new_stmt, op); 3007 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3008 1.1 mrg var = make_ssa_name (vec_dest); 3009 1.1 mrg op = build1 (VIEW_CONVERT_EXPR, vectype, op); 3010 1.1 mrg new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 3011 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3012 1.1 mrg } 3013 1.1 mrg else 3014 1.1 mrg { 3015 1.1 mrg var = make_ssa_name (vec_dest, new_stmt); 3016 1.1 mrg gimple_call_set_lhs (new_stmt, var); 3017 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3018 1.1 mrg } 3019 1.1 mrg 3020 1.1 mrg if (modifier == NARROW) 3021 1.1 mrg { 3022 1.1 mrg if ((j & 1) == 0) 3023 1.1 mrg { 3024 1.1 mrg prev_res = var; 3025 1.1 mrg continue; 3026 1.1 mrg } 3027 1.1 mrg var = permute_vec_elements (vinfo, prev_res, var, perm_mask, 3028 1.1 mrg stmt_info, gsi); 3029 1.1 mrg new_stmt = SSA_NAME_DEF_STMT (var); 3030 1.1 mrg } 3031 1.1 mrg 3032 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 3033 1.1 mrg } 3034 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 3035 1.1 mrg } 3036 1.1 mrg 3037 1.1 mrg /* Prepare the base and offset in GS_INFO for vectorization. 3038 1.1 mrg Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET 3039 1.1 mrg to the vectorized offset argument for the first copy of STMT_INFO. 3040 1.1 mrg STMT_INFO is the statement described by GS_INFO and LOOP is the 3041 1.1 mrg containing loop. */ 3042 1.1 mrg 3043 1.1 mrg static void 3044 1.1 mrg vect_get_gather_scatter_ops (loop_vec_info loop_vinfo, 3045 1.1 mrg class loop *loop, stmt_vec_info stmt_info, 3046 1.1 mrg slp_tree slp_node, gather_scatter_info *gs_info, 3047 1.1 mrg tree *dataref_ptr, vec<tree> *vec_offset) 3048 1.1 mrg { 3049 1.1 mrg gimple_seq stmts = NULL; 3050 1.1 mrg *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); 3051 1.1 mrg if (stmts != NULL) 3052 1.1 mrg { 3053 1.1 mrg basic_block new_bb; 3054 1.1 mrg edge pe = loop_preheader_edge (loop); 3055 1.1 mrg new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 3056 1.1 mrg gcc_assert (!new_bb); 3057 1.1 mrg } 3058 1.1 mrg if (slp_node) 3059 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset); 3060 1.1 mrg else 3061 1.1 mrg { 3062 1.1 mrg unsigned ncopies 3063 1.1 mrg = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype); 3064 1.1 mrg vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies, 3065 1.1 mrg gs_info->offset, vec_offset, 3066 1.1 mrg gs_info->offset_vectype); 3067 1.1 mrg } 3068 1.1 mrg } 3069 1.1 mrg 3070 1.1 mrg /* Prepare to implement a grouped or strided load or store using 3071 1.1 mrg the gather load or scatter store operation described by GS_INFO. 3072 1.1 mrg STMT_INFO is the load or store statement. 3073 1.1 mrg 3074 1.1 mrg Set *DATAREF_BUMP to the amount that should be added to the base 3075 1.1 mrg address after each copy of the vectorized statement. Set *VEC_OFFSET 3076 1.1 mrg to an invariant offset vector in which element I has the value 3077 1.1 mrg I * DR_STEP / SCALE. */ 3078 1.1 mrg 3079 1.1 mrg static void 3080 1.1 mrg vect_get_strided_load_store_ops (stmt_vec_info stmt_info, 3081 1.1 mrg loop_vec_info loop_vinfo, 3082 1.1 mrg gather_scatter_info *gs_info, 3083 1.1 mrg tree *dataref_bump, tree *vec_offset) 3084 1.1 mrg { 3085 1.1 mrg struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3086 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3087 1.1 mrg 3088 1.1 mrg tree bump = size_binop (MULT_EXPR, 3089 1.1 mrg fold_convert (sizetype, unshare_expr (DR_STEP (dr))), 3090 1.1 mrg size_int (TYPE_VECTOR_SUBPARTS (vectype))); 3091 1.1 mrg *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump); 3092 1.1 mrg 3093 1.1 mrg /* The offset given in GS_INFO can have pointer type, so use the element 3094 1.1 mrg type of the vector instead. */ 3095 1.1 mrg tree offset_type = TREE_TYPE (gs_info->offset_vectype); 3096 1.1 mrg 3097 1.1 mrg /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ 3098 1.1 mrg tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)), 3099 1.1 mrg ssize_int (gs_info->scale)); 3100 1.1 mrg step = fold_convert (offset_type, step); 3101 1.1 mrg 3102 1.1 mrg /* Create {0, X, X*2, X*3, ...}. */ 3103 1.1 mrg tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype, 3104 1.1 mrg build_zero_cst (offset_type), step); 3105 1.1 mrg *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset); 3106 1.1 mrg } 3107 1.1 mrg 3108 1.1 mrg /* Return the amount that should be added to a vector pointer to move 3109 1.1 mrg to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference 3110 1.1 mrg being vectorized and MEMORY_ACCESS_TYPE describes the type of 3111 1.1 mrg vectorization. */ 3112 1.1 mrg 3113 1.1 mrg static tree 3114 1.1 mrg vect_get_data_ptr_increment (vec_info *vinfo, 3115 1.1 mrg dr_vec_info *dr_info, tree aggr_type, 3116 1.1 mrg vect_memory_access_type memory_access_type) 3117 1.1 mrg { 3118 1.1 mrg if (memory_access_type == VMAT_INVARIANT) 3119 1.1 mrg return size_zero_node; 3120 1.1 mrg 3121 1.1 mrg tree iv_step = TYPE_SIZE_UNIT (aggr_type); 3122 1.1 mrg tree step = vect_dr_behavior (vinfo, dr_info)->step; 3123 1.1 mrg if (tree_int_cst_sgn (step) == -1) 3124 1.1 mrg iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step); 3125 1.1 mrg return iv_step; 3126 1.1 mrg } 3127 1.1 mrg 3128 1.1 mrg /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */ 3129 1.1 mrg 3130 1.1 mrg static bool 3131 1.1 mrg vectorizable_bswap (vec_info *vinfo, 3132 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 3133 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 3134 1.1 mrg slp_tree *slp_op, 3135 1.1 mrg tree vectype_in, stmt_vector_for_cost *cost_vec) 3136 1.1 mrg { 3137 1.1 mrg tree op, vectype; 3138 1.1 mrg gcall *stmt = as_a <gcall *> (stmt_info->stmt); 3139 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 3140 1.1 mrg unsigned ncopies; 3141 1.1 mrg 3142 1.1 mrg op = gimple_call_arg (stmt, 0); 3143 1.1 mrg vectype = STMT_VINFO_VECTYPE (stmt_info); 3144 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 3145 1.1 mrg 3146 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 3147 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3148 1.1 mrg case of SLP. */ 3149 1.1 mrg if (slp_node) 3150 1.1 mrg ncopies = 1; 3151 1.1 mrg else 3152 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 3153 1.1 mrg 3154 1.1 mrg gcc_assert (ncopies >= 1); 3155 1.1 mrg 3156 1.1 mrg tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in); 3157 1.1 mrg if (! char_vectype) 3158 1.1 mrg return false; 3159 1.1 mrg 3160 1.1 mrg poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); 3161 1.1 mrg unsigned word_bytes; 3162 1.1 mrg if (!constant_multiple_p (num_bytes, nunits, &word_bytes)) 3163 1.1 mrg return false; 3164 1.1 mrg 3165 1.1 mrg /* The encoding uses one stepped pattern for each byte in the word. */ 3166 1.1 mrg vec_perm_builder elts (num_bytes, word_bytes, 3); 3167 1.1 mrg for (unsigned i = 0; i < 3; ++i) 3168 1.1 mrg for (unsigned j = 0; j < word_bytes; ++j) 3169 1.1 mrg elts.quick_push ((i + 1) * word_bytes - j - 1); 3170 1.1 mrg 3171 1.1 mrg vec_perm_indices indices (elts, 1, num_bytes); 3172 1.1 mrg if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) 3173 1.1 mrg return false; 3174 1.1 mrg 3175 1.1 mrg if (! vec_stmt) 3176 1.1 mrg { 3177 1.1 mrg if (slp_node 3178 1.1 mrg && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in)) 3179 1.1 mrg { 3180 1.1 mrg if (dump_enabled_p ()) 3181 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3182 1.1 mrg "incompatible vector types for invariants\n"); 3183 1.1 mrg return false; 3184 1.1 mrg } 3185 1.1 mrg 3186 1.1 mrg STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 3187 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_bswap"); 3188 1.1 mrg record_stmt_cost (cost_vec, 3189 1.1 mrg 1, vector_stmt, stmt_info, 0, vect_prologue); 3190 1.1 mrg record_stmt_cost (cost_vec, 3191 1.1 mrg slp_node 3192 1.1 mrg ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies, 3193 1.1 mrg vec_perm, stmt_info, 0, vect_body); 3194 1.1 mrg return true; 3195 1.1 mrg } 3196 1.1 mrg 3197 1.1 mrg tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices); 3198 1.1 mrg 3199 1.1 mrg /* Transform. */ 3200 1.1 mrg vec<tree> vec_oprnds = vNULL; 3201 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 3202 1.1 mrg op, &vec_oprnds); 3203 1.1 mrg /* Arguments are ready. create the new vector stmt. */ 3204 1.1 mrg unsigned i; 3205 1.1 mrg tree vop; 3206 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 3207 1.1 mrg { 3208 1.1 mrg gimple *new_stmt; 3209 1.1 mrg tree tem = make_ssa_name (char_vectype); 3210 1.1 mrg new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 3211 1.1 mrg char_vectype, vop)); 3212 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3213 1.1 mrg tree tem2 = make_ssa_name (char_vectype); 3214 1.1 mrg new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR, 3215 1.1 mrg tem, tem, bswap_vconst); 3216 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3217 1.1 mrg tem = make_ssa_name (vectype); 3218 1.1 mrg new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 3219 1.1 mrg vectype, tem2)); 3220 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3221 1.1 mrg if (slp_node) 3222 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3223 1.1 mrg else 3224 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 3225 1.1 mrg } 3226 1.1 mrg 3227 1.1 mrg if (!slp_node) 3228 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 3229 1.1 mrg 3230 1.1 mrg vec_oprnds.release (); 3231 1.1 mrg return true; 3232 1.1 mrg } 3233 1.1 mrg 3234 1.1 mrg /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have 3235 1.1 mrg integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT 3236 1.1 mrg in a single step. On success, store the binary pack code in 3237 1.1 mrg *CONVERT_CODE. */ 3238 1.1 mrg 3239 1.1 mrg static bool 3240 1.1 mrg simple_integer_narrowing (tree vectype_out, tree vectype_in, 3241 1.1 mrg tree_code *convert_code) 3242 1.1 mrg { 3243 1.1 mrg if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)) 3244 1.1 mrg || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in))) 3245 1.1 mrg return false; 3246 1.1 mrg 3247 1.1 mrg tree_code code; 3248 1.1 mrg int multi_step_cvt = 0; 3249 1.1 mrg auto_vec <tree, 8> interm_types; 3250 1.1 mrg if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in, 3251 1.1 mrg &code, &multi_step_cvt, &interm_types) 3252 1.1 mrg || multi_step_cvt) 3253 1.1 mrg return false; 3254 1.1 mrg 3255 1.1 mrg *convert_code = code; 3256 1.1 mrg return true; 3257 1.1 mrg } 3258 1.1 mrg 3259 1.1 mrg /* Function vectorizable_call. 3260 1.1 mrg 3261 1.1 mrg Check if STMT_INFO performs a function call that can be vectorized. 3262 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 3263 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 3264 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 3265 1.1 mrg 3266 1.1 mrg static bool 3267 1.1 mrg vectorizable_call (vec_info *vinfo, 3268 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 3269 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 3270 1.1 mrg stmt_vector_for_cost *cost_vec) 3271 1.1 mrg { 3272 1.1 mrg gcall *stmt; 3273 1.1 mrg tree vec_dest; 3274 1.1 mrg tree scalar_dest; 3275 1.1 mrg tree op; 3276 1.1 mrg tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 3277 1.1 mrg tree vectype_out, vectype_in; 3278 1.1 mrg poly_uint64 nunits_in; 3279 1.1 mrg poly_uint64 nunits_out; 3280 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 3281 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 3282 1.1 mrg tree fndecl, new_temp, rhs_type; 3283 1.1 mrg enum vect_def_type dt[4] 3284 1.1 mrg = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type, 3285 1.1 mrg vect_unknown_def_type }; 3286 1.1 mrg tree vectypes[ARRAY_SIZE (dt)] = {}; 3287 1.1 mrg slp_tree slp_op[ARRAY_SIZE (dt)] = {}; 3288 1.1 mrg int ndts = ARRAY_SIZE (dt); 3289 1.1 mrg int ncopies, j; 3290 1.1 mrg auto_vec<tree, 8> vargs; 3291 1.1 mrg enum { NARROW, NONE, WIDEN } modifier; 3292 1.1 mrg size_t i, nargs; 3293 1.1 mrg tree lhs; 3294 1.1 mrg 3295 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3296 1.1 mrg return false; 3297 1.1 mrg 3298 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 3299 1.1 mrg && ! vec_stmt) 3300 1.1 mrg return false; 3301 1.1 mrg 3302 1.1 mrg /* Is STMT_INFO a vectorizable call? */ 3303 1.1 mrg stmt = dyn_cast <gcall *> (stmt_info->stmt); 3304 1.1 mrg if (!stmt) 3305 1.1 mrg return false; 3306 1.1 mrg 3307 1.1 mrg if (gimple_call_internal_p (stmt) 3308 1.1 mrg && (internal_load_fn_p (gimple_call_internal_fn (stmt)) 3309 1.1 mrg || internal_store_fn_p (gimple_call_internal_fn (stmt)))) 3310 1.1 mrg /* Handled by vectorizable_load and vectorizable_store. */ 3311 1.1 mrg return false; 3312 1.1 mrg 3313 1.1 mrg if (gimple_call_lhs (stmt) == NULL_TREE 3314 1.1 mrg || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 3315 1.1 mrg return false; 3316 1.1 mrg 3317 1.1 mrg gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt)); 3318 1.1 mrg 3319 1.1 mrg vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3320 1.1 mrg 3321 1.1 mrg /* Process function arguments. */ 3322 1.1 mrg rhs_type = NULL_TREE; 3323 1.1 mrg vectype_in = NULL_TREE; 3324 1.1 mrg nargs = gimple_call_num_args (stmt); 3325 1.1 mrg 3326 1.1 mrg /* Bail out if the function has more than four arguments, we do not have 3327 1.1 mrg interesting builtin functions to vectorize with more than two arguments 3328 1.1 mrg except for fma. No arguments is also not good. */ 3329 1.1 mrg if (nargs == 0 || nargs > 4) 3330 1.1 mrg return false; 3331 1.1 mrg 3332 1.1 mrg /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */ 3333 1.1 mrg combined_fn cfn = gimple_call_combined_fn (stmt); 3334 1.1 mrg if (cfn == CFN_GOMP_SIMD_LANE) 3335 1.1 mrg { 3336 1.1 mrg nargs = 0; 3337 1.1 mrg rhs_type = unsigned_type_node; 3338 1.1 mrg } 3339 1.1 mrg 3340 1.1 mrg int mask_opno = -1; 3341 1.1 mrg if (internal_fn_p (cfn)) 3342 1.1 mrg mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); 3343 1.1 mrg 3344 1.1 mrg for (i = 0; i < nargs; i++) 3345 1.1 mrg { 3346 1.1 mrg if ((int) i == mask_opno) 3347 1.1 mrg { 3348 1.1 mrg if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno, 3349 1.1 mrg &op, &slp_op[i], &dt[i], &vectypes[i])) 3350 1.1 mrg return false; 3351 1.1 mrg continue; 3352 1.1 mrg } 3353 1.1 mrg 3354 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 3355 1.1 mrg i, &op, &slp_op[i], &dt[i], &vectypes[i])) 3356 1.1 mrg { 3357 1.1 mrg if (dump_enabled_p ()) 3358 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3359 1.1 mrg "use not simple.\n"); 3360 1.1 mrg return false; 3361 1.1 mrg } 3362 1.1 mrg 3363 1.1 mrg /* We can only handle calls with arguments of the same type. */ 3364 1.1 mrg if (rhs_type 3365 1.1 mrg && !types_compatible_p (rhs_type, TREE_TYPE (op))) 3366 1.1 mrg { 3367 1.1 mrg if (dump_enabled_p ()) 3368 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3369 1.1 mrg "argument types differ.\n"); 3370 1.1 mrg return false; 3371 1.1 mrg } 3372 1.1 mrg if (!rhs_type) 3373 1.1 mrg rhs_type = TREE_TYPE (op); 3374 1.1 mrg 3375 1.1 mrg if (!vectype_in) 3376 1.1 mrg vectype_in = vectypes[i]; 3377 1.1 mrg else if (vectypes[i] 3378 1.1 mrg && !types_compatible_p (vectypes[i], vectype_in)) 3379 1.1 mrg { 3380 1.1 mrg if (dump_enabled_p ()) 3381 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3382 1.1 mrg "argument vector types differ.\n"); 3383 1.1 mrg return false; 3384 1.1 mrg } 3385 1.1 mrg } 3386 1.1 mrg /* If all arguments are external or constant defs, infer the vector type 3387 1.1 mrg from the scalar type. */ 3388 1.1 mrg if (!vectype_in) 3389 1.1 mrg vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); 3390 1.1 mrg if (vec_stmt) 3391 1.1 mrg gcc_assert (vectype_in); 3392 1.1 mrg if (!vectype_in) 3393 1.1 mrg { 3394 1.1 mrg if (dump_enabled_p ()) 3395 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3396 1.1 mrg "no vectype for scalar type %T\n", rhs_type); 3397 1.1 mrg 3398 1.1 mrg return false; 3399 1.1 mrg } 3400 1.1 mrg /* FORNOW: we don't yet support mixtures of vector sizes for calls, 3401 1.1 mrg just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz* 3402 1.1 mrg are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed 3403 1.1 mrg by a pack of the two vectors into an SI vector. We would need 3404 1.1 mrg separate code to handle direct VnDI->VnSI IFN_CTZs. */ 3405 1.1 mrg if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out)) 3406 1.1 mrg { 3407 1.1 mrg if (dump_enabled_p ()) 3408 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3409 1.1 mrg "mismatched vector sizes %T and %T\n", 3410 1.1 mrg vectype_in, vectype_out); 3411 1.1 mrg return false; 3412 1.1 mrg } 3413 1.1 mrg 3414 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (vectype_out) 3415 1.1 mrg != VECTOR_BOOLEAN_TYPE_P (vectype_in)) 3416 1.1 mrg { 3417 1.1 mrg if (dump_enabled_p ()) 3418 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3419 1.1 mrg "mixed mask and nonmask vector types\n"); 3420 1.1 mrg return false; 3421 1.1 mrg } 3422 1.1 mrg 3423 1.1 mrg if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out)) 3424 1.1 mrg { 3425 1.1 mrg if (dump_enabled_p ()) 3426 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3427 1.1 mrg "use emulated vector type for call\n"); 3428 1.1 mrg return false; 3429 1.1 mrg } 3430 1.1 mrg 3431 1.1 mrg /* FORNOW */ 3432 1.1 mrg nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 3433 1.1 mrg nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3434 1.1 mrg if (known_eq (nunits_in * 2, nunits_out)) 3435 1.1 mrg modifier = NARROW; 3436 1.1 mrg else if (known_eq (nunits_out, nunits_in)) 3437 1.1 mrg modifier = NONE; 3438 1.1 mrg else if (known_eq (nunits_out * 2, nunits_in)) 3439 1.1 mrg modifier = WIDEN; 3440 1.1 mrg else 3441 1.1 mrg return false; 3442 1.1 mrg 3443 1.1 mrg /* We only handle functions that do not read or clobber memory. */ 3444 1.1 mrg if (gimple_vuse (stmt)) 3445 1.1 mrg { 3446 1.1 mrg if (dump_enabled_p ()) 3447 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3448 1.1 mrg "function reads from or writes to memory.\n"); 3449 1.1 mrg return false; 3450 1.1 mrg } 3451 1.1 mrg 3452 1.1 mrg /* For now, we only vectorize functions if a target specific builtin 3453 1.1 mrg is available. TODO -- in some cases, it might be profitable to 3454 1.1 mrg insert the calls for pieces of the vector, in order to be able 3455 1.1 mrg to vectorize other operations in the loop. */ 3456 1.1 mrg fndecl = NULL_TREE; 3457 1.1 mrg internal_fn ifn = IFN_LAST; 3458 1.1 mrg tree callee = gimple_call_fndecl (stmt); 3459 1.1 mrg 3460 1.1 mrg /* First try using an internal function. */ 3461 1.1 mrg tree_code convert_code = ERROR_MARK; 3462 1.1 mrg if (cfn != CFN_LAST 3463 1.1 mrg && (modifier == NONE 3464 1.1 mrg || (modifier == NARROW 3465 1.1 mrg && simple_integer_narrowing (vectype_out, vectype_in, 3466 1.1 mrg &convert_code)))) 3467 1.1 mrg ifn = vectorizable_internal_function (cfn, callee, vectype_out, 3468 1.1 mrg vectype_in); 3469 1.1 mrg 3470 1.1 mrg /* If that fails, try asking for a target-specific built-in function. */ 3471 1.1 mrg if (ifn == IFN_LAST) 3472 1.1 mrg { 3473 1.1 mrg if (cfn != CFN_LAST) 3474 1.1 mrg fndecl = targetm.vectorize.builtin_vectorized_function 3475 1.1 mrg (cfn, vectype_out, vectype_in); 3476 1.1 mrg else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD)) 3477 1.1 mrg fndecl = targetm.vectorize.builtin_md_vectorized_function 3478 1.1 mrg (callee, vectype_out, vectype_in); 3479 1.1 mrg } 3480 1.1 mrg 3481 1.1 mrg if (ifn == IFN_LAST && !fndecl) 3482 1.1 mrg { 3483 1.1 mrg if (cfn == CFN_GOMP_SIMD_LANE 3484 1.1 mrg && !slp_node 3485 1.1 mrg && loop_vinfo 3486 1.1 mrg && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3487 1.1 mrg && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME 3488 1.1 mrg && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3489 1.1 mrg == SSA_NAME_VAR (gimple_call_arg (stmt, 0))) 3490 1.1 mrg { 3491 1.1 mrg /* We can handle IFN_GOMP_SIMD_LANE by returning a 3492 1.1 mrg { 0, 1, 2, ... vf - 1 } vector. */ 3493 1.1 mrg gcc_assert (nargs == 0); 3494 1.1 mrg } 3495 1.1 mrg else if (modifier == NONE 3496 1.1 mrg && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) 3497 1.1 mrg || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) 3498 1.1 mrg || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64) 3499 1.1 mrg || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128))) 3500 1.1 mrg return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node, 3501 1.1 mrg slp_op, vectype_in, cost_vec); 3502 1.1 mrg else 3503 1.1 mrg { 3504 1.1 mrg if (dump_enabled_p ()) 3505 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3506 1.1 mrg "function is not vectorizable.\n"); 3507 1.1 mrg return false; 3508 1.1 mrg } 3509 1.1 mrg } 3510 1.1 mrg 3511 1.1 mrg if (slp_node) 3512 1.1 mrg ncopies = 1; 3513 1.1 mrg else if (modifier == NARROW && ifn == IFN_LAST) 3514 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 3515 1.1 mrg else 3516 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 3517 1.1 mrg 3518 1.1 mrg /* Sanity check: make sure that at least one copy of the vectorized stmt 3519 1.1 mrg needs to be generated. */ 3520 1.1 mrg gcc_assert (ncopies >= 1); 3521 1.1 mrg 3522 1.1 mrg int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); 3523 1.1 mrg internal_fn cond_fn = get_conditional_internal_fn (ifn); 3524 1.1 mrg vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); 3525 1.1 mrg if (!vec_stmt) /* transformation not required. */ 3526 1.1 mrg { 3527 1.1 mrg if (slp_node) 3528 1.1 mrg for (i = 0; i < nargs; ++i) 3529 1.1 mrg if (!vect_maybe_update_slp_op_vectype (slp_op[i], 3530 1.1 mrg vectypes[i] 3531 1.1 mrg ? vectypes[i] : vectype_in)) 3532 1.1 mrg { 3533 1.1 mrg if (dump_enabled_p ()) 3534 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3535 1.1 mrg "incompatible vector types for invariants\n"); 3536 1.1 mrg return false; 3537 1.1 mrg } 3538 1.1 mrg STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 3539 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_call"); 3540 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, 3541 1.1 mrg ncopies, dt, ndts, slp_node, cost_vec); 3542 1.1 mrg if (ifn != IFN_LAST && modifier == NARROW && !slp_node) 3543 1.1 mrg record_stmt_cost (cost_vec, ncopies / 2, 3544 1.1 mrg vec_promote_demote, stmt_info, 0, vect_body); 3545 1.1 mrg 3546 1.1 mrg if (loop_vinfo 3547 1.1 mrg && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) 3548 1.1 mrg && (reduc_idx >= 0 || mask_opno >= 0)) 3549 1.1 mrg { 3550 1.1 mrg if (reduc_idx >= 0 3551 1.1 mrg && (cond_fn == IFN_LAST 3552 1.1 mrg || !direct_internal_fn_supported_p (cond_fn, vectype_out, 3553 1.1 mrg OPTIMIZE_FOR_SPEED))) 3554 1.1 mrg { 3555 1.1 mrg if (dump_enabled_p ()) 3556 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3557 1.1 mrg "can't use a fully-masked loop because no" 3558 1.1 mrg " conditional operation is available.\n"); 3559 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 3560 1.1 mrg } 3561 1.1 mrg else 3562 1.1 mrg { 3563 1.1 mrg unsigned int nvectors 3564 1.1 mrg = (slp_node 3565 1.1 mrg ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) 3566 1.1 mrg : ncopies); 3567 1.1 mrg tree scalar_mask = NULL_TREE; 3568 1.1 mrg if (mask_opno >= 0) 3569 1.1 mrg scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); 3570 1.1 mrg vect_record_loop_mask (loop_vinfo, masks, nvectors, 3571 1.1 mrg vectype_out, scalar_mask); 3572 1.1 mrg } 3573 1.1 mrg } 3574 1.1 mrg return true; 3575 1.1 mrg } 3576 1.1 mrg 3577 1.1 mrg /* Transform. */ 3578 1.1 mrg 3579 1.1 mrg if (dump_enabled_p ()) 3580 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 3581 1.1 mrg 3582 1.1 mrg /* Handle def. */ 3583 1.1 mrg scalar_dest = gimple_call_lhs (stmt); 3584 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 3585 1.1 mrg 3586 1.1 mrg bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); 3587 1.1 mrg unsigned int vect_nargs = nargs; 3588 1.1 mrg if (masked_loop_p && reduc_idx >= 0) 3589 1.1 mrg { 3590 1.1 mrg ifn = cond_fn; 3591 1.1 mrg vect_nargs += 2; 3592 1.1 mrg } 3593 1.1 mrg 3594 1.1 mrg if (modifier == NONE || ifn != IFN_LAST) 3595 1.1 mrg { 3596 1.1 mrg tree prev_res = NULL_TREE; 3597 1.1 mrg vargs.safe_grow (vect_nargs, true); 3598 1.1 mrg auto_vec<vec<tree> > vec_defs (nargs); 3599 1.1 mrg for (j = 0; j < ncopies; ++j) 3600 1.1 mrg { 3601 1.1 mrg /* Build argument list for the vectorized call. */ 3602 1.1 mrg if (slp_node) 3603 1.1 mrg { 3604 1.1 mrg vec<tree> vec_oprnds0; 3605 1.1 mrg 3606 1.1 mrg vect_get_slp_defs (vinfo, slp_node, &vec_defs); 3607 1.1 mrg vec_oprnds0 = vec_defs[0]; 3608 1.1 mrg 3609 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 3610 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) 3611 1.1 mrg { 3612 1.1 mrg int varg = 0; 3613 1.1 mrg if (masked_loop_p && reduc_idx >= 0) 3614 1.1 mrg { 3615 1.1 mrg unsigned int vec_num = vec_oprnds0.length (); 3616 1.1 mrg /* Always true for SLP. */ 3617 1.1 mrg gcc_assert (ncopies == 1); 3618 1.1 mrg vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num, 3619 1.1 mrg vectype_out, i); 3620 1.1 mrg } 3621 1.1 mrg size_t k; 3622 1.1 mrg for (k = 0; k < nargs; k++) 3623 1.1 mrg { 3624 1.1 mrg vec<tree> vec_oprndsk = vec_defs[k]; 3625 1.1 mrg vargs[varg++] = vec_oprndsk[i]; 3626 1.1 mrg } 3627 1.1 mrg if (masked_loop_p && reduc_idx >= 0) 3628 1.1 mrg vargs[varg++] = vargs[reduc_idx + 1]; 3629 1.1 mrg gimple *new_stmt; 3630 1.1 mrg if (modifier == NARROW) 3631 1.1 mrg { 3632 1.1 mrg /* We don't define any narrowing conditional functions 3633 1.1 mrg at present. */ 3634 1.1 mrg gcc_assert (mask_opno < 0); 3635 1.1 mrg tree half_res = make_ssa_name (vectype_in); 3636 1.1 mrg gcall *call 3637 1.1 mrg = gimple_build_call_internal_vec (ifn, vargs); 3638 1.1 mrg gimple_call_set_lhs (call, half_res); 3639 1.1 mrg gimple_call_set_nothrow (call, true); 3640 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 3641 1.1 mrg if ((i & 1) == 0) 3642 1.1 mrg { 3643 1.1 mrg prev_res = half_res; 3644 1.1 mrg continue; 3645 1.1 mrg } 3646 1.1 mrg new_temp = make_ssa_name (vec_dest); 3647 1.1 mrg new_stmt = gimple_build_assign (new_temp, convert_code, 3648 1.1 mrg prev_res, half_res); 3649 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 3650 1.1 mrg new_stmt, gsi); 3651 1.1 mrg } 3652 1.1 mrg else 3653 1.1 mrg { 3654 1.1 mrg if (mask_opno >= 0 && masked_loop_p) 3655 1.1 mrg { 3656 1.1 mrg unsigned int vec_num = vec_oprnds0.length (); 3657 1.1 mrg /* Always true for SLP. */ 3658 1.1 mrg gcc_assert (ncopies == 1); 3659 1.1 mrg tree mask = vect_get_loop_mask (gsi, masks, vec_num, 3660 1.1 mrg vectype_out, i); 3661 1.1 mrg vargs[mask_opno] = prepare_vec_mask 3662 1.1 mrg (loop_vinfo, TREE_TYPE (mask), mask, 3663 1.1 mrg vargs[mask_opno], gsi); 3664 1.1 mrg } 3665 1.1 mrg 3666 1.1 mrg gcall *call; 3667 1.1 mrg if (ifn != IFN_LAST) 3668 1.1 mrg call = gimple_build_call_internal_vec (ifn, vargs); 3669 1.1 mrg else 3670 1.1 mrg call = gimple_build_call_vec (fndecl, vargs); 3671 1.1 mrg new_temp = make_ssa_name (vec_dest, call); 3672 1.1 mrg gimple_call_set_lhs (call, new_temp); 3673 1.1 mrg gimple_call_set_nothrow (call, true); 3674 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 3675 1.1 mrg new_stmt = call; 3676 1.1 mrg } 3677 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3678 1.1 mrg } 3679 1.1 mrg continue; 3680 1.1 mrg } 3681 1.1 mrg 3682 1.1 mrg int varg = 0; 3683 1.1 mrg if (masked_loop_p && reduc_idx >= 0) 3684 1.1 mrg vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies, 3685 1.1 mrg vectype_out, j); 3686 1.1 mrg for (i = 0; i < nargs; i++) 3687 1.1 mrg { 3688 1.1 mrg op = gimple_call_arg (stmt, i); 3689 1.1 mrg if (j == 0) 3690 1.1 mrg { 3691 1.1 mrg vec_defs.quick_push (vNULL); 3692 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 3693 1.1 mrg op, &vec_defs[i], 3694 1.1 mrg vectypes[i]); 3695 1.1 mrg } 3696 1.1 mrg vargs[varg++] = vec_defs[i][j]; 3697 1.1 mrg } 3698 1.1 mrg if (masked_loop_p && reduc_idx >= 0) 3699 1.1 mrg vargs[varg++] = vargs[reduc_idx + 1]; 3700 1.1 mrg 3701 1.1 mrg if (mask_opno >= 0 && masked_loop_p) 3702 1.1 mrg { 3703 1.1 mrg tree mask = vect_get_loop_mask (gsi, masks, ncopies, 3704 1.1 mrg vectype_out, j); 3705 1.1 mrg vargs[mask_opno] 3706 1.1 mrg = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, 3707 1.1 mrg vargs[mask_opno], gsi); 3708 1.1 mrg } 3709 1.1 mrg 3710 1.1 mrg gimple *new_stmt; 3711 1.1 mrg if (cfn == CFN_GOMP_SIMD_LANE) 3712 1.1 mrg { 3713 1.1 mrg tree cst = build_index_vector (vectype_out, j * nunits_out, 1); 3714 1.1 mrg tree new_var 3715 1.1 mrg = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_"); 3716 1.1 mrg gimple *init_stmt = gimple_build_assign (new_var, cst); 3717 1.1 mrg vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL); 3718 1.1 mrg new_temp = make_ssa_name (vec_dest); 3719 1.1 mrg new_stmt = gimple_build_assign (new_temp, new_var); 3720 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3721 1.1 mrg } 3722 1.1 mrg else if (modifier == NARROW) 3723 1.1 mrg { 3724 1.1 mrg /* We don't define any narrowing conditional functions at 3725 1.1 mrg present. */ 3726 1.1 mrg gcc_assert (mask_opno < 0); 3727 1.1 mrg tree half_res = make_ssa_name (vectype_in); 3728 1.1 mrg gcall *call = gimple_build_call_internal_vec (ifn, vargs); 3729 1.1 mrg gimple_call_set_lhs (call, half_res); 3730 1.1 mrg gimple_call_set_nothrow (call, true); 3731 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 3732 1.1 mrg if ((j & 1) == 0) 3733 1.1 mrg { 3734 1.1 mrg prev_res = half_res; 3735 1.1 mrg continue; 3736 1.1 mrg } 3737 1.1 mrg new_temp = make_ssa_name (vec_dest); 3738 1.1 mrg new_stmt = gimple_build_assign (new_temp, convert_code, 3739 1.1 mrg prev_res, half_res); 3740 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3741 1.1 mrg } 3742 1.1 mrg else 3743 1.1 mrg { 3744 1.1 mrg gcall *call; 3745 1.1 mrg if (ifn != IFN_LAST) 3746 1.1 mrg call = gimple_build_call_internal_vec (ifn, vargs); 3747 1.1 mrg else 3748 1.1 mrg call = gimple_build_call_vec (fndecl, vargs); 3749 1.1 mrg new_temp = make_ssa_name (vec_dest, call); 3750 1.1 mrg gimple_call_set_lhs (call, new_temp); 3751 1.1 mrg gimple_call_set_nothrow (call, true); 3752 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 3753 1.1 mrg new_stmt = call; 3754 1.1 mrg } 3755 1.1 mrg 3756 1.1 mrg if (j == (modifier == NARROW ? 1 : 0)) 3757 1.1 mrg *vec_stmt = new_stmt; 3758 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 3759 1.1 mrg } 3760 1.1 mrg for (i = 0; i < nargs; i++) 3761 1.1 mrg { 3762 1.1 mrg vec<tree> vec_oprndsi = vec_defs[i]; 3763 1.1 mrg vec_oprndsi.release (); 3764 1.1 mrg } 3765 1.1 mrg } 3766 1.1 mrg else if (modifier == NARROW) 3767 1.1 mrg { 3768 1.1 mrg auto_vec<vec<tree> > vec_defs (nargs); 3769 1.1 mrg /* We don't define any narrowing conditional functions at present. */ 3770 1.1 mrg gcc_assert (mask_opno < 0); 3771 1.1 mrg for (j = 0; j < ncopies; ++j) 3772 1.1 mrg { 3773 1.1 mrg /* Build argument list for the vectorized call. */ 3774 1.1 mrg if (j == 0) 3775 1.1 mrg vargs.create (nargs * 2); 3776 1.1 mrg else 3777 1.1 mrg vargs.truncate (0); 3778 1.1 mrg 3779 1.1 mrg if (slp_node) 3780 1.1 mrg { 3781 1.1 mrg vec<tree> vec_oprnds0; 3782 1.1 mrg 3783 1.1 mrg vect_get_slp_defs (vinfo, slp_node, &vec_defs); 3784 1.1 mrg vec_oprnds0 = vec_defs[0]; 3785 1.1 mrg 3786 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 3787 1.1 mrg for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) 3788 1.1 mrg { 3789 1.1 mrg size_t k; 3790 1.1 mrg vargs.truncate (0); 3791 1.1 mrg for (k = 0; k < nargs; k++) 3792 1.1 mrg { 3793 1.1 mrg vec<tree> vec_oprndsk = vec_defs[k]; 3794 1.1 mrg vargs.quick_push (vec_oprndsk[i]); 3795 1.1 mrg vargs.quick_push (vec_oprndsk[i + 1]); 3796 1.1 mrg } 3797 1.1 mrg gcall *call; 3798 1.1 mrg if (ifn != IFN_LAST) 3799 1.1 mrg call = gimple_build_call_internal_vec (ifn, vargs); 3800 1.1 mrg else 3801 1.1 mrg call = gimple_build_call_vec (fndecl, vargs); 3802 1.1 mrg new_temp = make_ssa_name (vec_dest, call); 3803 1.1 mrg gimple_call_set_lhs (call, new_temp); 3804 1.1 mrg gimple_call_set_nothrow (call, true); 3805 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 3806 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (call); 3807 1.1 mrg } 3808 1.1 mrg continue; 3809 1.1 mrg } 3810 1.1 mrg 3811 1.1 mrg for (i = 0; i < nargs; i++) 3812 1.1 mrg { 3813 1.1 mrg op = gimple_call_arg (stmt, i); 3814 1.1 mrg if (j == 0) 3815 1.1 mrg { 3816 1.1 mrg vec_defs.quick_push (vNULL); 3817 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies, 3818 1.1 mrg op, &vec_defs[i], vectypes[i]); 3819 1.1 mrg } 3820 1.1 mrg vec_oprnd0 = vec_defs[i][2*j]; 3821 1.1 mrg vec_oprnd1 = vec_defs[i][2*j+1]; 3822 1.1 mrg 3823 1.1 mrg vargs.quick_push (vec_oprnd0); 3824 1.1 mrg vargs.quick_push (vec_oprnd1); 3825 1.1 mrg } 3826 1.1 mrg 3827 1.1 mrg gcall *new_stmt = gimple_build_call_vec (fndecl, vargs); 3828 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 3829 1.1 mrg gimple_call_set_lhs (new_stmt, new_temp); 3830 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 3831 1.1 mrg 3832 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 3833 1.1 mrg } 3834 1.1 mrg 3835 1.1 mrg if (!slp_node) 3836 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 3837 1.1 mrg 3838 1.1 mrg for (i = 0; i < nargs; i++) 3839 1.1 mrg { 3840 1.1 mrg vec<tree> vec_oprndsi = vec_defs[i]; 3841 1.1 mrg vec_oprndsi.release (); 3842 1.1 mrg } 3843 1.1 mrg } 3844 1.1 mrg else 3845 1.1 mrg /* No current target implements this case. */ 3846 1.1 mrg return false; 3847 1.1 mrg 3848 1.1 mrg vargs.release (); 3849 1.1 mrg 3850 1.1 mrg /* The call in STMT might prevent it from being removed in dce. 3851 1.1 mrg We however cannot remove it here, due to the way the ssa name 3852 1.1 mrg it defines is mapped to the new definition. So just replace 3853 1.1 mrg rhs of the statement with something harmless. */ 3854 1.1 mrg 3855 1.1 mrg if (slp_node) 3856 1.1 mrg return true; 3857 1.1 mrg 3858 1.1 mrg stmt_info = vect_orig_stmt (stmt_info); 3859 1.1 mrg lhs = gimple_get_lhs (stmt_info->stmt); 3860 1.1 mrg 3861 1.1 mrg gassign *new_stmt 3862 1.1 mrg = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 3863 1.1 mrg vinfo->replace_stmt (gsi, stmt_info, new_stmt); 3864 1.1 mrg 3865 1.1 mrg return true; 3866 1.1 mrg } 3867 1.1 mrg 3868 1.1 mrg 3869 1.1 mrg struct simd_call_arg_info 3870 1.1 mrg { 3871 1.1 mrg tree vectype; 3872 1.1 mrg tree op; 3873 1.1 mrg HOST_WIDE_INT linear_step; 3874 1.1 mrg enum vect_def_type dt; 3875 1.1 mrg unsigned int align; 3876 1.1 mrg bool simd_lane_linear; 3877 1.1 mrg }; 3878 1.1 mrg 3879 1.1 mrg /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME, 3880 1.1 mrg is linear within simd lane (but not within whole loop), note it in 3881 1.1 mrg *ARGINFO. */ 3882 1.1 mrg 3883 1.1 mrg static void 3884 1.1 mrg vect_simd_lane_linear (tree op, class loop *loop, 3885 1.1 mrg struct simd_call_arg_info *arginfo) 3886 1.1 mrg { 3887 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (op); 3888 1.1 mrg 3889 1.1 mrg if (!is_gimple_assign (def_stmt) 3890 1.1 mrg || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR 3891 1.1 mrg || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))) 3892 1.1 mrg return; 3893 1.1 mrg 3894 1.1 mrg tree base = gimple_assign_rhs1 (def_stmt); 3895 1.1 mrg HOST_WIDE_INT linear_step = 0; 3896 1.1 mrg tree v = gimple_assign_rhs2 (def_stmt); 3897 1.1 mrg while (TREE_CODE (v) == SSA_NAME) 3898 1.1 mrg { 3899 1.1 mrg tree t; 3900 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (v); 3901 1.1 mrg if (is_gimple_assign (def_stmt)) 3902 1.1 mrg switch (gimple_assign_rhs_code (def_stmt)) 3903 1.1 mrg { 3904 1.1 mrg case PLUS_EXPR: 3905 1.1 mrg t = gimple_assign_rhs2 (def_stmt); 3906 1.1 mrg if (linear_step || TREE_CODE (t) != INTEGER_CST) 3907 1.1 mrg return; 3908 1.1 mrg base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t); 3909 1.1 mrg v = gimple_assign_rhs1 (def_stmt); 3910 1.1 mrg continue; 3911 1.1 mrg case MULT_EXPR: 3912 1.1 mrg t = gimple_assign_rhs2 (def_stmt); 3913 1.1 mrg if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t)) 3914 1.1 mrg return; 3915 1.1 mrg linear_step = tree_to_shwi (t); 3916 1.1 mrg v = gimple_assign_rhs1 (def_stmt); 3917 1.1 mrg continue; 3918 1.1 mrg CASE_CONVERT: 3919 1.1 mrg t = gimple_assign_rhs1 (def_stmt); 3920 1.1 mrg if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE 3921 1.1 mrg || (TYPE_PRECISION (TREE_TYPE (v)) 3922 1.1 mrg < TYPE_PRECISION (TREE_TYPE (t)))) 3923 1.1 mrg return; 3924 1.1 mrg if (!linear_step) 3925 1.1 mrg linear_step = 1; 3926 1.1 mrg v = t; 3927 1.1 mrg continue; 3928 1.1 mrg default: 3929 1.1 mrg return; 3930 1.1 mrg } 3931 1.1 mrg else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE) 3932 1.1 mrg && loop->simduid 3933 1.1 mrg && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME 3934 1.1 mrg && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0)) 3935 1.1 mrg == loop->simduid)) 3936 1.1 mrg { 3937 1.1 mrg if (!linear_step) 3938 1.1 mrg linear_step = 1; 3939 1.1 mrg arginfo->linear_step = linear_step; 3940 1.1 mrg arginfo->op = base; 3941 1.1 mrg arginfo->simd_lane_linear = true; 3942 1.1 mrg return; 3943 1.1 mrg } 3944 1.1 mrg } 3945 1.1 mrg } 3946 1.1 mrg 3947 1.1 mrg /* Return the number of elements in vector type VECTYPE, which is associated 3948 1.1 mrg with a SIMD clone. At present these vectors always have a constant 3949 1.1 mrg length. */ 3950 1.1 mrg 3951 1.1 mrg static unsigned HOST_WIDE_INT 3952 1.1 mrg simd_clone_subparts (tree vectype) 3953 1.1 mrg { 3954 1.1 mrg return TYPE_VECTOR_SUBPARTS (vectype).to_constant (); 3955 1.1 mrg } 3956 1.1 mrg 3957 1.1 mrg /* Function vectorizable_simd_clone_call. 3958 1.1 mrg 3959 1.1 mrg Check if STMT_INFO performs a function call that can be vectorized 3960 1.1 mrg by calling a simd clone of the function. 3961 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 3962 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 3963 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 3964 1.1 mrg 3965 1.1 mrg static bool 3966 1.1 mrg vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, 3967 1.1 mrg gimple_stmt_iterator *gsi, 3968 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 3969 1.1 mrg stmt_vector_for_cost *) 3970 1.1 mrg { 3971 1.1 mrg tree vec_dest; 3972 1.1 mrg tree scalar_dest; 3973 1.1 mrg tree op, type; 3974 1.1 mrg tree vec_oprnd0 = NULL_TREE; 3975 1.1 mrg tree vectype; 3976 1.1 mrg poly_uint64 nunits; 3977 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 3978 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 3979 1.1 mrg class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 3980 1.1 mrg tree fndecl, new_temp; 3981 1.1 mrg int ncopies, j; 3982 1.1 mrg auto_vec<simd_call_arg_info> arginfo; 3983 1.1 mrg vec<tree> vargs = vNULL; 3984 1.1 mrg size_t i, nargs; 3985 1.1 mrg tree lhs, rtype, ratype; 3986 1.1 mrg vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; 3987 1.1 mrg 3988 1.1 mrg /* Is STMT a vectorizable call? */ 3989 1.1 mrg gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt); 3990 1.1 mrg if (!stmt) 3991 1.1 mrg return false; 3992 1.1 mrg 3993 1.1 mrg fndecl = gimple_call_fndecl (stmt); 3994 1.1 mrg if (fndecl == NULL_TREE) 3995 1.1 mrg return false; 3996 1.1 mrg 3997 1.1 mrg struct cgraph_node *node = cgraph_node::get (fndecl); 3998 1.1 mrg if (node == NULL || node->simd_clones == NULL) 3999 1.1 mrg return false; 4000 1.1 mrg 4001 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4002 1.1 mrg return false; 4003 1.1 mrg 4004 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 4005 1.1 mrg && ! vec_stmt) 4006 1.1 mrg return false; 4007 1.1 mrg 4008 1.1 mrg if (gimple_call_lhs (stmt) 4009 1.1 mrg && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 4010 1.1 mrg return false; 4011 1.1 mrg 4012 1.1 mrg gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt)); 4013 1.1 mrg 4014 1.1 mrg vectype = STMT_VINFO_VECTYPE (stmt_info); 4015 1.1 mrg 4016 1.1 mrg if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info)) 4017 1.1 mrg return false; 4018 1.1 mrg 4019 1.1 mrg /* FORNOW */ 4020 1.1 mrg if (slp_node) 4021 1.1 mrg return false; 4022 1.1 mrg 4023 1.1 mrg /* Process function arguments. */ 4024 1.1 mrg nargs = gimple_call_num_args (stmt); 4025 1.1 mrg 4026 1.1 mrg /* Bail out if the function has zero arguments. */ 4027 1.1 mrg if (nargs == 0) 4028 1.1 mrg return false; 4029 1.1 mrg 4030 1.1 mrg arginfo.reserve (nargs, true); 4031 1.1 mrg 4032 1.1 mrg for (i = 0; i < nargs; i++) 4033 1.1 mrg { 4034 1.1 mrg simd_call_arg_info thisarginfo; 4035 1.1 mrg affine_iv iv; 4036 1.1 mrg 4037 1.1 mrg thisarginfo.linear_step = 0; 4038 1.1 mrg thisarginfo.align = 0; 4039 1.1 mrg thisarginfo.op = NULL_TREE; 4040 1.1 mrg thisarginfo.simd_lane_linear = false; 4041 1.1 mrg 4042 1.1 mrg op = gimple_call_arg (stmt, i); 4043 1.1 mrg if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt, 4044 1.1 mrg &thisarginfo.vectype) 4045 1.1 mrg || thisarginfo.dt == vect_uninitialized_def) 4046 1.1 mrg { 4047 1.1 mrg if (dump_enabled_p ()) 4048 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4049 1.1 mrg "use not simple.\n"); 4050 1.1 mrg return false; 4051 1.1 mrg } 4052 1.1 mrg 4053 1.1 mrg if (thisarginfo.dt == vect_constant_def 4054 1.1 mrg || thisarginfo.dt == vect_external_def) 4055 1.1 mrg gcc_assert (thisarginfo.vectype == NULL_TREE); 4056 1.1 mrg else 4057 1.1 mrg { 4058 1.1 mrg gcc_assert (thisarginfo.vectype != NULL_TREE); 4059 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype)) 4060 1.1 mrg { 4061 1.1 mrg if (dump_enabled_p ()) 4062 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4063 1.1 mrg "vector mask arguments are not supported\n"); 4064 1.1 mrg return false; 4065 1.1 mrg } 4066 1.1 mrg } 4067 1.1 mrg 4068 1.1 mrg /* For linear arguments, the analyze phase should have saved 4069 1.1 mrg the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ 4070 1.1 mrg if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length () 4071 1.1 mrg && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]) 4072 1.1 mrg { 4073 1.1 mrg gcc_assert (vec_stmt); 4074 1.1 mrg thisarginfo.linear_step 4075 1.1 mrg = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]); 4076 1.1 mrg thisarginfo.op 4077 1.1 mrg = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1]; 4078 1.1 mrg thisarginfo.simd_lane_linear 4079 1.1 mrg = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3] 4080 1.1 mrg == boolean_true_node); 4081 1.1 mrg /* If loop has been peeled for alignment, we need to adjust it. */ 4082 1.1 mrg tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo); 4083 1.1 mrg tree n2 = LOOP_VINFO_NITERS (loop_vinfo); 4084 1.1 mrg if (n1 != n2 && !thisarginfo.simd_lane_linear) 4085 1.1 mrg { 4086 1.1 mrg tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2); 4087 1.1 mrg tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]; 4088 1.1 mrg tree opt = TREE_TYPE (thisarginfo.op); 4089 1.1 mrg bias = fold_convert (TREE_TYPE (step), bias); 4090 1.1 mrg bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step); 4091 1.1 mrg thisarginfo.op 4092 1.1 mrg = fold_build2 (POINTER_TYPE_P (opt) 4093 1.1 mrg ? POINTER_PLUS_EXPR : PLUS_EXPR, opt, 4094 1.1 mrg thisarginfo.op, bias); 4095 1.1 mrg } 4096 1.1 mrg } 4097 1.1 mrg else if (!vec_stmt 4098 1.1 mrg && thisarginfo.dt != vect_constant_def 4099 1.1 mrg && thisarginfo.dt != vect_external_def 4100 1.1 mrg && loop_vinfo 4101 1.1 mrg && TREE_CODE (op) == SSA_NAME 4102 1.1 mrg && simple_iv (loop, loop_containing_stmt (stmt), op, 4103 1.1 mrg &iv, false) 4104 1.1 mrg && tree_fits_shwi_p (iv.step)) 4105 1.1 mrg { 4106 1.1 mrg thisarginfo.linear_step = tree_to_shwi (iv.step); 4107 1.1 mrg thisarginfo.op = iv.base; 4108 1.1 mrg } 4109 1.1 mrg else if ((thisarginfo.dt == vect_constant_def 4110 1.1 mrg || thisarginfo.dt == vect_external_def) 4111 1.1 mrg && POINTER_TYPE_P (TREE_TYPE (op))) 4112 1.1 mrg thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; 4113 1.1 mrg /* Addresses of array elements indexed by GOMP_SIMD_LANE are 4114 1.1 mrg linear too. */ 4115 1.1 mrg if (POINTER_TYPE_P (TREE_TYPE (op)) 4116 1.1 mrg && !thisarginfo.linear_step 4117 1.1 mrg && !vec_stmt 4118 1.1 mrg && thisarginfo.dt != vect_constant_def 4119 1.1 mrg && thisarginfo.dt != vect_external_def 4120 1.1 mrg && loop_vinfo 4121 1.1 mrg && !slp_node 4122 1.1 mrg && TREE_CODE (op) == SSA_NAME) 4123 1.1 mrg vect_simd_lane_linear (op, loop, &thisarginfo); 4124 1.1 mrg 4125 1.1 mrg arginfo.quick_push (thisarginfo); 4126 1.1 mrg } 4127 1.1 mrg 4128 1.1 mrg poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 4129 1.1 mrg if (!vf.is_constant ()) 4130 1.1 mrg { 4131 1.1 mrg if (dump_enabled_p ()) 4132 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4133 1.1 mrg "not considering SIMD clones; not yet supported" 4134 1.1 mrg " for variable-width vectors.\n"); 4135 1.1 mrg return false; 4136 1.1 mrg } 4137 1.1 mrg 4138 1.1 mrg unsigned int badness = 0; 4139 1.1 mrg struct cgraph_node *bestn = NULL; 4140 1.1 mrg if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ()) 4141 1.1 mrg bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]); 4142 1.1 mrg else 4143 1.1 mrg for (struct cgraph_node *n = node->simd_clones; n != NULL; 4144 1.1 mrg n = n->simdclone->next_clone) 4145 1.1 mrg { 4146 1.1 mrg unsigned int this_badness = 0; 4147 1.1 mrg unsigned int num_calls; 4148 1.1 mrg if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls) 4149 1.1 mrg || n->simdclone->nargs != nargs) 4150 1.1 mrg continue; 4151 1.1 mrg if (num_calls != 1) 4152 1.1 mrg this_badness += exact_log2 (num_calls) * 4096; 4153 1.1 mrg if (n->simdclone->inbranch) 4154 1.1 mrg this_badness += 8192; 4155 1.1 mrg int target_badness = targetm.simd_clone.usable (n); 4156 1.1 mrg if (target_badness < 0) 4157 1.1 mrg continue; 4158 1.1 mrg this_badness += target_badness * 512; 4159 1.1 mrg /* FORNOW: Have to add code to add the mask argument. */ 4160 1.1 mrg if (n->simdclone->inbranch) 4161 1.1 mrg continue; 4162 1.1 mrg for (i = 0; i < nargs; i++) 4163 1.1 mrg { 4164 1.1 mrg switch (n->simdclone->args[i].arg_type) 4165 1.1 mrg { 4166 1.1 mrg case SIMD_CLONE_ARG_TYPE_VECTOR: 4167 1.1 mrg if (!useless_type_conversion_p 4168 1.1 mrg (n->simdclone->args[i].orig_type, 4169 1.1 mrg TREE_TYPE (gimple_call_arg (stmt, i)))) 4170 1.1 mrg i = -1; 4171 1.1 mrg else if (arginfo[i].dt == vect_constant_def 4172 1.1 mrg || arginfo[i].dt == vect_external_def 4173 1.1 mrg || arginfo[i].linear_step) 4174 1.1 mrg this_badness += 64; 4175 1.1 mrg break; 4176 1.1 mrg case SIMD_CLONE_ARG_TYPE_UNIFORM: 4177 1.1 mrg if (arginfo[i].dt != vect_constant_def 4178 1.1 mrg && arginfo[i].dt != vect_external_def) 4179 1.1 mrg i = -1; 4180 1.1 mrg break; 4181 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 4182 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 4183 1.1 mrg if (arginfo[i].dt == vect_constant_def 4184 1.1 mrg || arginfo[i].dt == vect_external_def 4185 1.1 mrg || (arginfo[i].linear_step 4186 1.1 mrg != n->simdclone->args[i].linear_step)) 4187 1.1 mrg i = -1; 4188 1.1 mrg break; 4189 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 4190 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 4191 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 4192 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 4193 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 4194 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 4195 1.1 mrg /* FORNOW */ 4196 1.1 mrg i = -1; 4197 1.1 mrg break; 4198 1.1 mrg case SIMD_CLONE_ARG_TYPE_MASK: 4199 1.1 mrg gcc_unreachable (); 4200 1.1 mrg } 4201 1.1 mrg if (i == (size_t) -1) 4202 1.1 mrg break; 4203 1.1 mrg if (n->simdclone->args[i].alignment > arginfo[i].align) 4204 1.1 mrg { 4205 1.1 mrg i = -1; 4206 1.1 mrg break; 4207 1.1 mrg } 4208 1.1 mrg if (arginfo[i].align) 4209 1.1 mrg this_badness += (exact_log2 (arginfo[i].align) 4210 1.1 mrg - exact_log2 (n->simdclone->args[i].alignment)); 4211 1.1 mrg } 4212 1.1 mrg if (i == (size_t) -1) 4213 1.1 mrg continue; 4214 1.1 mrg if (bestn == NULL || this_badness < badness) 4215 1.1 mrg { 4216 1.1 mrg bestn = n; 4217 1.1 mrg badness = this_badness; 4218 1.1 mrg } 4219 1.1 mrg } 4220 1.1 mrg 4221 1.1 mrg if (bestn == NULL) 4222 1.1 mrg return false; 4223 1.1 mrg 4224 1.1 mrg for (i = 0; i < nargs; i++) 4225 1.1 mrg if ((arginfo[i].dt == vect_constant_def 4226 1.1 mrg || arginfo[i].dt == vect_external_def) 4227 1.1 mrg && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) 4228 1.1 mrg { 4229 1.1 mrg tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); 4230 1.1 mrg arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, 4231 1.1 mrg slp_node); 4232 1.1 mrg if (arginfo[i].vectype == NULL 4233 1.1 mrg || !constant_multiple_p (bestn->simdclone->simdlen, 4234 1.1 mrg simd_clone_subparts (arginfo[i].vectype))) 4235 1.1 mrg return false; 4236 1.1 mrg } 4237 1.1 mrg 4238 1.1 mrg fndecl = bestn->decl; 4239 1.1 mrg nunits = bestn->simdclone->simdlen; 4240 1.1 mrg ncopies = vector_unroll_factor (vf, nunits); 4241 1.1 mrg 4242 1.1 mrg /* If the function isn't const, only allow it in simd loops where user 4243 1.1 mrg has asserted that at least nunits consecutive iterations can be 4244 1.1 mrg performed using SIMD instructions. */ 4245 1.1 mrg if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits)) 4246 1.1 mrg && gimple_vuse (stmt)) 4247 1.1 mrg return false; 4248 1.1 mrg 4249 1.1 mrg /* Sanity check: make sure that at least one copy of the vectorized stmt 4250 1.1 mrg needs to be generated. */ 4251 1.1 mrg gcc_assert (ncopies >= 1); 4252 1.1 mrg 4253 1.1 mrg if (!vec_stmt) /* transformation not required. */ 4254 1.1 mrg { 4255 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl); 4256 1.1 mrg for (i = 0; i < nargs; i++) 4257 1.1 mrg if ((bestn->simdclone->args[i].arg_type 4258 1.1 mrg == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) 4259 1.1 mrg || (bestn->simdclone->args[i].arg_type 4260 1.1 mrg == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)) 4261 1.1 mrg { 4262 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3 4263 1.1 mrg + 1, 4264 1.1 mrg true); 4265 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op); 4266 1.1 mrg tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op)) 4267 1.1 mrg ? size_type_node : TREE_TYPE (arginfo[i].op); 4268 1.1 mrg tree ls = build_int_cst (lst, arginfo[i].linear_step); 4269 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls); 4270 1.1 mrg tree sll = arginfo[i].simd_lane_linear 4271 1.1 mrg ? boolean_true_node : boolean_false_node; 4272 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll); 4273 1.1 mrg } 4274 1.1 mrg STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; 4275 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); 4276 1.1 mrg /* vect_model_simple_cost (vinfo, stmt_info, ncopies, 4277 1.1 mrg dt, slp_node, cost_vec); */ 4278 1.1 mrg return true; 4279 1.1 mrg } 4280 1.1 mrg 4281 1.1 mrg /* Transform. */ 4282 1.1 mrg 4283 1.1 mrg if (dump_enabled_p ()) 4284 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 4285 1.1 mrg 4286 1.1 mrg /* Handle def. */ 4287 1.1 mrg scalar_dest = gimple_call_lhs (stmt); 4288 1.1 mrg vec_dest = NULL_TREE; 4289 1.1 mrg rtype = NULL_TREE; 4290 1.1 mrg ratype = NULL_TREE; 4291 1.1 mrg if (scalar_dest) 4292 1.1 mrg { 4293 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 4294 1.1 mrg rtype = TREE_TYPE (TREE_TYPE (fndecl)); 4295 1.1 mrg if (TREE_CODE (rtype) == ARRAY_TYPE) 4296 1.1 mrg { 4297 1.1 mrg ratype = rtype; 4298 1.1 mrg rtype = TREE_TYPE (ratype); 4299 1.1 mrg } 4300 1.1 mrg } 4301 1.1 mrg 4302 1.1 mrg auto_vec<vec<tree> > vec_oprnds; 4303 1.1 mrg auto_vec<unsigned> vec_oprnds_i; 4304 1.1 mrg vec_oprnds.safe_grow_cleared (nargs, true); 4305 1.1 mrg vec_oprnds_i.safe_grow_cleared (nargs, true); 4306 1.1 mrg for (j = 0; j < ncopies; ++j) 4307 1.1 mrg { 4308 1.1 mrg /* Build argument list for the vectorized call. */ 4309 1.1 mrg if (j == 0) 4310 1.1 mrg vargs.create (nargs); 4311 1.1 mrg else 4312 1.1 mrg vargs.truncate (0); 4313 1.1 mrg 4314 1.1 mrg for (i = 0; i < nargs; i++) 4315 1.1 mrg { 4316 1.1 mrg unsigned int k, l, m, o; 4317 1.1 mrg tree atype; 4318 1.1 mrg op = gimple_call_arg (stmt, i); 4319 1.1 mrg switch (bestn->simdclone->args[i].arg_type) 4320 1.1 mrg { 4321 1.1 mrg case SIMD_CLONE_ARG_TYPE_VECTOR: 4322 1.1 mrg atype = bestn->simdclone->args[i].vector_type; 4323 1.1 mrg o = vector_unroll_factor (nunits, 4324 1.1 mrg simd_clone_subparts (atype)); 4325 1.1 mrg for (m = j * o; m < (j + 1) * o; m++) 4326 1.1 mrg { 4327 1.1 mrg if (simd_clone_subparts (atype) 4328 1.1 mrg < simd_clone_subparts (arginfo[i].vectype)) 4329 1.1 mrg { 4330 1.1 mrg poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); 4331 1.1 mrg k = (simd_clone_subparts (arginfo[i].vectype) 4332 1.1 mrg / simd_clone_subparts (atype)); 4333 1.1 mrg gcc_assert ((k & (k - 1)) == 0); 4334 1.1 mrg if (m == 0) 4335 1.1 mrg { 4336 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 4337 1.1 mrg ncopies * o / k, op, 4338 1.1 mrg &vec_oprnds[i]); 4339 1.1 mrg vec_oprnds_i[i] = 0; 4340 1.1 mrg vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; 4341 1.1 mrg } 4342 1.1 mrg else 4343 1.1 mrg { 4344 1.1 mrg vec_oprnd0 = arginfo[i].op; 4345 1.1 mrg if ((m & (k - 1)) == 0) 4346 1.1 mrg vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; 4347 1.1 mrg } 4348 1.1 mrg arginfo[i].op = vec_oprnd0; 4349 1.1 mrg vec_oprnd0 4350 1.1 mrg = build3 (BIT_FIELD_REF, atype, vec_oprnd0, 4351 1.1 mrg bitsize_int (prec), 4352 1.1 mrg bitsize_int ((m & (k - 1)) * prec)); 4353 1.1 mrg gassign *new_stmt 4354 1.1 mrg = gimple_build_assign (make_ssa_name (atype), 4355 1.1 mrg vec_oprnd0); 4356 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 4357 1.1 mrg new_stmt, gsi); 4358 1.1 mrg vargs.safe_push (gimple_assign_lhs (new_stmt)); 4359 1.1 mrg } 4360 1.1 mrg else 4361 1.1 mrg { 4362 1.1 mrg k = (simd_clone_subparts (atype) 4363 1.1 mrg / simd_clone_subparts (arginfo[i].vectype)); 4364 1.1 mrg gcc_assert ((k & (k - 1)) == 0); 4365 1.1 mrg vec<constructor_elt, va_gc> *ctor_elts; 4366 1.1 mrg if (k != 1) 4367 1.1 mrg vec_alloc (ctor_elts, k); 4368 1.1 mrg else 4369 1.1 mrg ctor_elts = NULL; 4370 1.1 mrg for (l = 0; l < k; l++) 4371 1.1 mrg { 4372 1.1 mrg if (m == 0 && l == 0) 4373 1.1 mrg { 4374 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 4375 1.1 mrg k * o * ncopies, 4376 1.1 mrg op, 4377 1.1 mrg &vec_oprnds[i]); 4378 1.1 mrg vec_oprnds_i[i] = 0; 4379 1.1 mrg vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; 4380 1.1 mrg } 4381 1.1 mrg else 4382 1.1 mrg vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; 4383 1.1 mrg arginfo[i].op = vec_oprnd0; 4384 1.1 mrg if (k == 1) 4385 1.1 mrg break; 4386 1.1 mrg CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, 4387 1.1 mrg vec_oprnd0); 4388 1.1 mrg } 4389 1.1 mrg if (k == 1) 4390 1.1 mrg if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0), 4391 1.1 mrg atype)) 4392 1.1 mrg { 4393 1.1 mrg vec_oprnd0 4394 1.1 mrg = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0); 4395 1.1 mrg gassign *new_stmt 4396 1.1 mrg = gimple_build_assign (make_ssa_name (atype), 4397 1.1 mrg vec_oprnd0); 4398 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 4399 1.1 mrg new_stmt, gsi); 4400 1.1 mrg vargs.safe_push (gimple_assign_lhs (new_stmt)); 4401 1.1 mrg } 4402 1.1 mrg else 4403 1.1 mrg vargs.safe_push (vec_oprnd0); 4404 1.1 mrg else 4405 1.1 mrg { 4406 1.1 mrg vec_oprnd0 = build_constructor (atype, ctor_elts); 4407 1.1 mrg gassign *new_stmt 4408 1.1 mrg = gimple_build_assign (make_ssa_name (atype), 4409 1.1 mrg vec_oprnd0); 4410 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 4411 1.1 mrg new_stmt, gsi); 4412 1.1 mrg vargs.safe_push (gimple_assign_lhs (new_stmt)); 4413 1.1 mrg } 4414 1.1 mrg } 4415 1.1 mrg } 4416 1.1 mrg break; 4417 1.1 mrg case SIMD_CLONE_ARG_TYPE_UNIFORM: 4418 1.1 mrg vargs.safe_push (op); 4419 1.1 mrg break; 4420 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 4421 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 4422 1.1 mrg if (j == 0) 4423 1.1 mrg { 4424 1.1 mrg gimple_seq stmts; 4425 1.1 mrg arginfo[i].op 4426 1.1 mrg = force_gimple_operand (unshare_expr (arginfo[i].op), 4427 1.1 mrg &stmts, true, NULL_TREE); 4428 1.1 mrg if (stmts != NULL) 4429 1.1 mrg { 4430 1.1 mrg basic_block new_bb; 4431 1.1 mrg edge pe = loop_preheader_edge (loop); 4432 1.1 mrg new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 4433 1.1 mrg gcc_assert (!new_bb); 4434 1.1 mrg } 4435 1.1 mrg if (arginfo[i].simd_lane_linear) 4436 1.1 mrg { 4437 1.1 mrg vargs.safe_push (arginfo[i].op); 4438 1.1 mrg break; 4439 1.1 mrg } 4440 1.1 mrg tree phi_res = copy_ssa_name (op); 4441 1.1 mrg gphi *new_phi = create_phi_node (phi_res, loop->header); 4442 1.1 mrg add_phi_arg (new_phi, arginfo[i].op, 4443 1.1 mrg loop_preheader_edge (loop), UNKNOWN_LOCATION); 4444 1.1 mrg enum tree_code code 4445 1.1 mrg = POINTER_TYPE_P (TREE_TYPE (op)) 4446 1.1 mrg ? POINTER_PLUS_EXPR : PLUS_EXPR; 4447 1.1 mrg tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4448 1.1 mrg ? sizetype : TREE_TYPE (op); 4449 1.1 mrg poly_widest_int cst 4450 1.1 mrg = wi::mul (bestn->simdclone->args[i].linear_step, 4451 1.1 mrg ncopies * nunits); 4452 1.1 mrg tree tcst = wide_int_to_tree (type, cst); 4453 1.1 mrg tree phi_arg = copy_ssa_name (op); 4454 1.1 mrg gassign *new_stmt 4455 1.1 mrg = gimple_build_assign (phi_arg, code, phi_res, tcst); 4456 1.1 mrg gimple_stmt_iterator si = gsi_after_labels (loop->header); 4457 1.1 mrg gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); 4458 1.1 mrg add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), 4459 1.1 mrg UNKNOWN_LOCATION); 4460 1.1 mrg arginfo[i].op = phi_res; 4461 1.1 mrg vargs.safe_push (phi_res); 4462 1.1 mrg } 4463 1.1 mrg else 4464 1.1 mrg { 4465 1.1 mrg enum tree_code code 4466 1.1 mrg = POINTER_TYPE_P (TREE_TYPE (op)) 4467 1.1 mrg ? POINTER_PLUS_EXPR : PLUS_EXPR; 4468 1.1 mrg tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4469 1.1 mrg ? sizetype : TREE_TYPE (op); 4470 1.1 mrg poly_widest_int cst 4471 1.1 mrg = wi::mul (bestn->simdclone->args[i].linear_step, 4472 1.1 mrg j * nunits); 4473 1.1 mrg tree tcst = wide_int_to_tree (type, cst); 4474 1.1 mrg new_temp = make_ssa_name (TREE_TYPE (op)); 4475 1.1 mrg gassign *new_stmt 4476 1.1 mrg = gimple_build_assign (new_temp, code, 4477 1.1 mrg arginfo[i].op, tcst); 4478 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4479 1.1 mrg vargs.safe_push (new_temp); 4480 1.1 mrg } 4481 1.1 mrg break; 4482 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 4483 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 4484 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 4485 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 4486 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 4487 1.1 mrg case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 4488 1.1 mrg default: 4489 1.1 mrg gcc_unreachable (); 4490 1.1 mrg } 4491 1.1 mrg } 4492 1.1 mrg 4493 1.1 mrg gcall *new_call = gimple_build_call_vec (fndecl, vargs); 4494 1.1 mrg if (vec_dest) 4495 1.1 mrg { 4496 1.1 mrg gcc_assert (ratype 4497 1.1 mrg || known_eq (simd_clone_subparts (rtype), nunits)); 4498 1.1 mrg if (ratype) 4499 1.1 mrg new_temp = create_tmp_var (ratype); 4500 1.1 mrg else if (useless_type_conversion_p (vectype, rtype)) 4501 1.1 mrg new_temp = make_ssa_name (vec_dest, new_call); 4502 1.1 mrg else 4503 1.1 mrg new_temp = make_ssa_name (rtype, new_call); 4504 1.1 mrg gimple_call_set_lhs (new_call, new_temp); 4505 1.1 mrg } 4506 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi); 4507 1.1 mrg gimple *new_stmt = new_call; 4508 1.1 mrg 4509 1.1 mrg if (vec_dest) 4510 1.1 mrg { 4511 1.1 mrg if (!multiple_p (simd_clone_subparts (vectype), nunits)) 4512 1.1 mrg { 4513 1.1 mrg unsigned int k, l; 4514 1.1 mrg poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); 4515 1.1 mrg poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype)); 4516 1.1 mrg k = vector_unroll_factor (nunits, 4517 1.1 mrg simd_clone_subparts (vectype)); 4518 1.1 mrg gcc_assert ((k & (k - 1)) == 0); 4519 1.1 mrg for (l = 0; l < k; l++) 4520 1.1 mrg { 4521 1.1 mrg tree t; 4522 1.1 mrg if (ratype) 4523 1.1 mrg { 4524 1.1 mrg t = build_fold_addr_expr (new_temp); 4525 1.1 mrg t = build2 (MEM_REF, vectype, t, 4526 1.1 mrg build_int_cst (TREE_TYPE (t), l * bytes)); 4527 1.1 mrg } 4528 1.1 mrg else 4529 1.1 mrg t = build3 (BIT_FIELD_REF, vectype, new_temp, 4530 1.1 mrg bitsize_int (prec), bitsize_int (l * prec)); 4531 1.1 mrg new_stmt = gimple_build_assign (make_ssa_name (vectype), t); 4532 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4533 1.1 mrg 4534 1.1 mrg if (j == 0 && l == 0) 4535 1.1 mrg *vec_stmt = new_stmt; 4536 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 4537 1.1 mrg } 4538 1.1 mrg 4539 1.1 mrg if (ratype) 4540 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, new_temp); 4541 1.1 mrg continue; 4542 1.1 mrg } 4543 1.1 mrg else if (!multiple_p (nunits, simd_clone_subparts (vectype))) 4544 1.1 mrg { 4545 1.1 mrg unsigned int k = (simd_clone_subparts (vectype) 4546 1.1 mrg / simd_clone_subparts (rtype)); 4547 1.1 mrg gcc_assert ((k & (k - 1)) == 0); 4548 1.1 mrg if ((j & (k - 1)) == 0) 4549 1.1 mrg vec_alloc (ret_ctor_elts, k); 4550 1.1 mrg if (ratype) 4551 1.1 mrg { 4552 1.1 mrg unsigned int m, o; 4553 1.1 mrg o = vector_unroll_factor (nunits, 4554 1.1 mrg simd_clone_subparts (rtype)); 4555 1.1 mrg for (m = 0; m < o; m++) 4556 1.1 mrg { 4557 1.1 mrg tree tem = build4 (ARRAY_REF, rtype, new_temp, 4558 1.1 mrg size_int (m), NULL_TREE, NULL_TREE); 4559 1.1 mrg new_stmt = gimple_build_assign (make_ssa_name (rtype), 4560 1.1 mrg tem); 4561 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 4562 1.1 mrg new_stmt, gsi); 4563 1.1 mrg CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, 4564 1.1 mrg gimple_assign_lhs (new_stmt)); 4565 1.1 mrg } 4566 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, new_temp); 4567 1.1 mrg } 4568 1.1 mrg else 4569 1.1 mrg CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); 4570 1.1 mrg if ((j & (k - 1)) != k - 1) 4571 1.1 mrg continue; 4572 1.1 mrg vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); 4573 1.1 mrg new_stmt 4574 1.1 mrg = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0); 4575 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4576 1.1 mrg 4577 1.1 mrg if ((unsigned) j == k - 1) 4578 1.1 mrg *vec_stmt = new_stmt; 4579 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 4580 1.1 mrg continue; 4581 1.1 mrg } 4582 1.1 mrg else if (ratype) 4583 1.1 mrg { 4584 1.1 mrg tree t = build_fold_addr_expr (new_temp); 4585 1.1 mrg t = build2 (MEM_REF, vectype, t, 4586 1.1 mrg build_int_cst (TREE_TYPE (t), 0)); 4587 1.1 mrg new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t); 4588 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4589 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, new_temp); 4590 1.1 mrg } 4591 1.1 mrg else if (!useless_type_conversion_p (vectype, rtype)) 4592 1.1 mrg { 4593 1.1 mrg vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp); 4594 1.1 mrg new_stmt 4595 1.1 mrg = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0); 4596 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4597 1.1 mrg } 4598 1.1 mrg } 4599 1.1 mrg 4600 1.1 mrg if (j == 0) 4601 1.1 mrg *vec_stmt = new_stmt; 4602 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 4603 1.1 mrg } 4604 1.1 mrg 4605 1.1 mrg for (i = 0; i < nargs; ++i) 4606 1.1 mrg { 4607 1.1 mrg vec<tree> oprndsi = vec_oprnds[i]; 4608 1.1 mrg oprndsi.release (); 4609 1.1 mrg } 4610 1.1 mrg vargs.release (); 4611 1.1 mrg 4612 1.1 mrg /* The call in STMT might prevent it from being removed in dce. 4613 1.1 mrg We however cannot remove it here, due to the way the ssa name 4614 1.1 mrg it defines is mapped to the new definition. So just replace 4615 1.1 mrg rhs of the statement with something harmless. */ 4616 1.1 mrg 4617 1.1 mrg if (slp_node) 4618 1.1 mrg return true; 4619 1.1 mrg 4620 1.1 mrg gimple *new_stmt; 4621 1.1 mrg if (scalar_dest) 4622 1.1 mrg { 4623 1.1 mrg type = TREE_TYPE (scalar_dest); 4624 1.1 mrg lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt); 4625 1.1 mrg new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 4626 1.1 mrg } 4627 1.1 mrg else 4628 1.1 mrg new_stmt = gimple_build_nop (); 4629 1.1 mrg vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt); 4630 1.1 mrg unlink_stmt_vdef (stmt); 4631 1.1 mrg 4632 1.1 mrg return true; 4633 1.1 mrg } 4634 1.1 mrg 4635 1.1 mrg 4636 1.1 mrg /* Function vect_gen_widened_results_half 4637 1.1 mrg 4638 1.1 mrg Create a vector stmt whose code, type, number of arguments, and result 4639 1.1 mrg variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 4640 1.1 mrg VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI. 4641 1.1 mrg In the case that CODE is a CALL_EXPR, this means that a call to DECL 4642 1.1 mrg needs to be created (DECL is a function-decl of a target-builtin). 4643 1.1 mrg STMT_INFO is the original scalar stmt that we are vectorizing. */ 4644 1.1 mrg 4645 1.1 mrg static gimple * 4646 1.1 mrg vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code, 4647 1.1 mrg tree vec_oprnd0, tree vec_oprnd1, int op_type, 4648 1.1 mrg tree vec_dest, gimple_stmt_iterator *gsi, 4649 1.1 mrg stmt_vec_info stmt_info) 4650 1.1 mrg { 4651 1.1 mrg gimple *new_stmt; 4652 1.1 mrg tree new_temp; 4653 1.1 mrg 4654 1.1 mrg /* Generate half of the widened result: */ 4655 1.1 mrg gcc_assert (op_type == TREE_CODE_LENGTH (code)); 4656 1.1 mrg if (op_type != binary_op) 4657 1.1 mrg vec_oprnd1 = NULL; 4658 1.1 mrg new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); 4659 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 4660 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 4661 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4662 1.1 mrg 4663 1.1 mrg return new_stmt; 4664 1.1 mrg } 4665 1.1 mrg 4666 1.1 mrg 4667 1.1 mrg /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 4668 1.1 mrg For multi-step conversions store the resulting vectors and call the function 4669 1.1 mrg recursively. */ 4670 1.1 mrg 4671 1.1 mrg static void 4672 1.1 mrg vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, 4673 1.1 mrg int multi_step_cvt, 4674 1.1 mrg stmt_vec_info stmt_info, 4675 1.1 mrg vec<tree> &vec_dsts, 4676 1.1 mrg gimple_stmt_iterator *gsi, 4677 1.1 mrg slp_tree slp_node, enum tree_code code) 4678 1.1 mrg { 4679 1.1 mrg unsigned int i; 4680 1.1 mrg tree vop0, vop1, new_tmp, vec_dest; 4681 1.1 mrg 4682 1.1 mrg vec_dest = vec_dsts.pop (); 4683 1.1 mrg 4684 1.1 mrg for (i = 0; i < vec_oprnds->length (); i += 2) 4685 1.1 mrg { 4686 1.1 mrg /* Create demotion operation. */ 4687 1.1 mrg vop0 = (*vec_oprnds)[i]; 4688 1.1 mrg vop1 = (*vec_oprnds)[i + 1]; 4689 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 4690 1.1 mrg new_tmp = make_ssa_name (vec_dest, new_stmt); 4691 1.1 mrg gimple_assign_set_lhs (new_stmt, new_tmp); 4692 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 4693 1.1 mrg 4694 1.1 mrg if (multi_step_cvt) 4695 1.1 mrg /* Store the resulting vector for next recursive call. */ 4696 1.1 mrg (*vec_oprnds)[i/2] = new_tmp; 4697 1.1 mrg else 4698 1.1 mrg { 4699 1.1 mrg /* This is the last step of the conversion sequence. Store the 4700 1.1 mrg vectors in SLP_NODE or in vector info of the scalar statement 4701 1.1 mrg (or in STMT_VINFO_RELATED_STMT chain). */ 4702 1.1 mrg if (slp_node) 4703 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4704 1.1 mrg else 4705 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 4706 1.1 mrg } 4707 1.1 mrg } 4708 1.1 mrg 4709 1.1 mrg /* For multi-step demotion operations we first generate demotion operations 4710 1.1 mrg from the source type to the intermediate types, and then combine the 4711 1.1 mrg results (stored in VEC_OPRNDS) in demotion operation to the destination 4712 1.1 mrg type. */ 4713 1.1 mrg if (multi_step_cvt) 4714 1.1 mrg { 4715 1.1 mrg /* At each level of recursion we have half of the operands we had at the 4716 1.1 mrg previous level. */ 4717 1.1 mrg vec_oprnds->truncate ((i+1)/2); 4718 1.1 mrg vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds, 4719 1.1 mrg multi_step_cvt - 1, 4720 1.1 mrg stmt_info, vec_dsts, gsi, 4721 1.1 mrg slp_node, VEC_PACK_TRUNC_EXPR); 4722 1.1 mrg } 4723 1.1 mrg 4724 1.1 mrg vec_dsts.quick_push (vec_dest); 4725 1.1 mrg } 4726 1.1 mrg 4727 1.1 mrg 4728 1.1 mrg /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 4729 1.1 mrg and VEC_OPRNDS1, for a binary operation associated with scalar statement 4730 1.1 mrg STMT_INFO. For multi-step conversions store the resulting vectors and 4731 1.1 mrg call the function recursively. */ 4732 1.1 mrg 4733 1.1 mrg static void 4734 1.1 mrg vect_create_vectorized_promotion_stmts (vec_info *vinfo, 4735 1.1 mrg vec<tree> *vec_oprnds0, 4736 1.1 mrg vec<tree> *vec_oprnds1, 4737 1.1 mrg stmt_vec_info stmt_info, tree vec_dest, 4738 1.1 mrg gimple_stmt_iterator *gsi, 4739 1.1 mrg enum tree_code code1, 4740 1.1 mrg enum tree_code code2, int op_type) 4741 1.1 mrg { 4742 1.1 mrg int i; 4743 1.1 mrg tree vop0, vop1, new_tmp1, new_tmp2; 4744 1.1 mrg gimple *new_stmt1, *new_stmt2; 4745 1.1 mrg vec<tree> vec_tmp = vNULL; 4746 1.1 mrg 4747 1.1 mrg vec_tmp.create (vec_oprnds0->length () * 2); 4748 1.1 mrg FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 4749 1.1 mrg { 4750 1.1 mrg if (op_type == binary_op) 4751 1.1 mrg vop1 = (*vec_oprnds1)[i]; 4752 1.1 mrg else 4753 1.1 mrg vop1 = NULL_TREE; 4754 1.1 mrg 4755 1.1 mrg /* Generate the two halves of promotion operation. */ 4756 1.1 mrg new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1, 4757 1.1 mrg op_type, vec_dest, gsi, 4758 1.1 mrg stmt_info); 4759 1.1 mrg new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1, 4760 1.1 mrg op_type, vec_dest, gsi, 4761 1.1 mrg stmt_info); 4762 1.1 mrg if (is_gimple_call (new_stmt1)) 4763 1.1 mrg { 4764 1.1 mrg new_tmp1 = gimple_call_lhs (new_stmt1); 4765 1.1 mrg new_tmp2 = gimple_call_lhs (new_stmt2); 4766 1.1 mrg } 4767 1.1 mrg else 4768 1.1 mrg { 4769 1.1 mrg new_tmp1 = gimple_assign_lhs (new_stmt1); 4770 1.1 mrg new_tmp2 = gimple_assign_lhs (new_stmt2); 4771 1.1 mrg } 4772 1.1 mrg 4773 1.1 mrg /* Store the results for the next step. */ 4774 1.1 mrg vec_tmp.quick_push (new_tmp1); 4775 1.1 mrg vec_tmp.quick_push (new_tmp2); 4776 1.1 mrg } 4777 1.1 mrg 4778 1.1 mrg vec_oprnds0->release (); 4779 1.1 mrg *vec_oprnds0 = vec_tmp; 4780 1.1 mrg } 4781 1.1 mrg 4782 1.1 mrg /* Create vectorized promotion stmts for widening stmts using only half the 4783 1.1 mrg potential vector size for input. */ 4784 1.1 mrg static void 4785 1.1 mrg vect_create_half_widening_stmts (vec_info *vinfo, 4786 1.1 mrg vec<tree> *vec_oprnds0, 4787 1.1 mrg vec<tree> *vec_oprnds1, 4788 1.1 mrg stmt_vec_info stmt_info, tree vec_dest, 4789 1.1 mrg gimple_stmt_iterator *gsi, 4790 1.1 mrg enum tree_code code1, 4791 1.1 mrg int op_type) 4792 1.1 mrg { 4793 1.1 mrg int i; 4794 1.1 mrg tree vop0, vop1; 4795 1.1 mrg gimple *new_stmt1; 4796 1.1 mrg gimple *new_stmt2; 4797 1.1 mrg gimple *new_stmt3; 4798 1.1 mrg vec<tree> vec_tmp = vNULL; 4799 1.1 mrg 4800 1.1 mrg vec_tmp.create (vec_oprnds0->length ()); 4801 1.1 mrg FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 4802 1.1 mrg { 4803 1.1 mrg tree new_tmp1, new_tmp2, new_tmp3, out_type; 4804 1.1 mrg 4805 1.1 mrg gcc_assert (op_type == binary_op); 4806 1.1 mrg vop1 = (*vec_oprnds1)[i]; 4807 1.1 mrg 4808 1.1 mrg /* Widen the first vector input. */ 4809 1.1 mrg out_type = TREE_TYPE (vec_dest); 4810 1.1 mrg new_tmp1 = make_ssa_name (out_type); 4811 1.1 mrg new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0); 4812 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi); 4813 1.1 mrg if (VECTOR_TYPE_P (TREE_TYPE (vop1))) 4814 1.1 mrg { 4815 1.1 mrg /* Widen the second vector input. */ 4816 1.1 mrg new_tmp2 = make_ssa_name (out_type); 4817 1.1 mrg new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1); 4818 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi); 4819 1.1 mrg /* Perform the operation. With both vector inputs widened. */ 4820 1.1 mrg new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2); 4821 1.1 mrg } 4822 1.1 mrg else 4823 1.1 mrg { 4824 1.1 mrg /* Perform the operation. With the single vector input widened. */ 4825 1.1 mrg new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1); 4826 1.1 mrg } 4827 1.1 mrg 4828 1.1 mrg new_tmp3 = make_ssa_name (vec_dest, new_stmt3); 4829 1.1 mrg gimple_assign_set_lhs (new_stmt3, new_tmp3); 4830 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi); 4831 1.1 mrg 4832 1.1 mrg /* Store the results for the next step. */ 4833 1.1 mrg vec_tmp.quick_push (new_tmp3); 4834 1.1 mrg } 4835 1.1 mrg 4836 1.1 mrg vec_oprnds0->release (); 4837 1.1 mrg *vec_oprnds0 = vec_tmp; 4838 1.1 mrg } 4839 1.1 mrg 4840 1.1 mrg 4841 1.1 mrg /* Check if STMT_INFO performs a conversion operation that can be vectorized. 4842 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 4843 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 4844 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 4845 1.1 mrg 4846 1.1 mrg static bool 4847 1.1 mrg vectorizable_conversion (vec_info *vinfo, 4848 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 4849 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 4850 1.1 mrg stmt_vector_for_cost *cost_vec) 4851 1.1 mrg { 4852 1.1 mrg tree vec_dest; 4853 1.1 mrg tree scalar_dest; 4854 1.1 mrg tree op0, op1 = NULL_TREE; 4855 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 4856 1.1 mrg enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 4857 1.1 mrg enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 4858 1.1 mrg tree new_temp; 4859 1.1 mrg enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 4860 1.1 mrg int ndts = 2; 4861 1.1 mrg poly_uint64 nunits_in; 4862 1.1 mrg poly_uint64 nunits_out; 4863 1.1 mrg tree vectype_out, vectype_in; 4864 1.1 mrg int ncopies, i; 4865 1.1 mrg tree lhs_type, rhs_type; 4866 1.1 mrg enum { NARROW, NONE, WIDEN } modifier; 4867 1.1 mrg vec<tree> vec_oprnds0 = vNULL; 4868 1.1 mrg vec<tree> vec_oprnds1 = vNULL; 4869 1.1 mrg tree vop0; 4870 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 4871 1.1 mrg int multi_step_cvt = 0; 4872 1.1 mrg vec<tree> interm_types = vNULL; 4873 1.1 mrg tree intermediate_type, cvt_type = NULL_TREE; 4874 1.1 mrg int op_type; 4875 1.1 mrg unsigned short fltsz; 4876 1.1 mrg 4877 1.1 mrg /* Is STMT a vectorizable conversion? */ 4878 1.1 mrg 4879 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4880 1.1 mrg return false; 4881 1.1 mrg 4882 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 4883 1.1 mrg && ! vec_stmt) 4884 1.1 mrg return false; 4885 1.1 mrg 4886 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 4887 1.1 mrg if (!stmt) 4888 1.1 mrg return false; 4889 1.1 mrg 4890 1.1 mrg if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 4891 1.1 mrg return false; 4892 1.1 mrg 4893 1.1 mrg code = gimple_assign_rhs_code (stmt); 4894 1.1 mrg if (!CONVERT_EXPR_CODE_P (code) 4895 1.1 mrg && code != FIX_TRUNC_EXPR 4896 1.1 mrg && code != FLOAT_EXPR 4897 1.1 mrg && code != WIDEN_PLUS_EXPR 4898 1.1 mrg && code != WIDEN_MINUS_EXPR 4899 1.1 mrg && code != WIDEN_MULT_EXPR 4900 1.1 mrg && code != WIDEN_LSHIFT_EXPR) 4901 1.1 mrg return false; 4902 1.1 mrg 4903 1.1 mrg bool widen_arith = (code == WIDEN_PLUS_EXPR 4904 1.1 mrg || code == WIDEN_MINUS_EXPR 4905 1.1 mrg || code == WIDEN_MULT_EXPR 4906 1.1 mrg || code == WIDEN_LSHIFT_EXPR); 4907 1.1 mrg op_type = TREE_CODE_LENGTH (code); 4908 1.1 mrg 4909 1.1 mrg /* Check types of lhs and rhs. */ 4910 1.1 mrg scalar_dest = gimple_assign_lhs (stmt); 4911 1.1 mrg lhs_type = TREE_TYPE (scalar_dest); 4912 1.1 mrg vectype_out = STMT_VINFO_VECTYPE (stmt_info); 4913 1.1 mrg 4914 1.1 mrg /* Check the operands of the operation. */ 4915 1.1 mrg slp_tree slp_op0, slp_op1 = NULL; 4916 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 4917 1.1 mrg 0, &op0, &slp_op0, &dt[0], &vectype_in)) 4918 1.1 mrg { 4919 1.1 mrg if (dump_enabled_p ()) 4920 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4921 1.1 mrg "use not simple.\n"); 4922 1.1 mrg return false; 4923 1.1 mrg } 4924 1.1 mrg 4925 1.1 mrg rhs_type = TREE_TYPE (op0); 4926 1.1 mrg if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 4927 1.1 mrg && !((INTEGRAL_TYPE_P (lhs_type) 4928 1.1 mrg && INTEGRAL_TYPE_P (rhs_type)) 4929 1.1 mrg || (SCALAR_FLOAT_TYPE_P (lhs_type) 4930 1.1 mrg && SCALAR_FLOAT_TYPE_P (rhs_type)))) 4931 1.1 mrg return false; 4932 1.1 mrg 4933 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) 4934 1.1 mrg && ((INTEGRAL_TYPE_P (lhs_type) 4935 1.1 mrg && !type_has_mode_precision_p (lhs_type)) 4936 1.1 mrg || (INTEGRAL_TYPE_P (rhs_type) 4937 1.1 mrg && !type_has_mode_precision_p (rhs_type)))) 4938 1.1 mrg { 4939 1.1 mrg if (dump_enabled_p ()) 4940 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4941 1.1 mrg "type conversion to/from bit-precision unsupported." 4942 1.1 mrg "\n"); 4943 1.1 mrg return false; 4944 1.1 mrg } 4945 1.1 mrg 4946 1.1 mrg if (op_type == binary_op) 4947 1.1 mrg { 4948 1.1 mrg gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR 4949 1.1 mrg || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR); 4950 1.1 mrg 4951 1.1 mrg op1 = gimple_assign_rhs2 (stmt); 4952 1.1 mrg tree vectype1_in; 4953 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, 4954 1.1 mrg &op1, &slp_op1, &dt[1], &vectype1_in)) 4955 1.1 mrg { 4956 1.1 mrg if (dump_enabled_p ()) 4957 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4958 1.1 mrg "use not simple.\n"); 4959 1.1 mrg return false; 4960 1.1 mrg } 4961 1.1 mrg /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 4962 1.1 mrg OP1. */ 4963 1.1 mrg if (!vectype_in) 4964 1.1 mrg vectype_in = vectype1_in; 4965 1.1 mrg } 4966 1.1 mrg 4967 1.1 mrg /* If op0 is an external or constant def, infer the vector type 4968 1.1 mrg from the scalar type. */ 4969 1.1 mrg if (!vectype_in) 4970 1.1 mrg vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); 4971 1.1 mrg if (vec_stmt) 4972 1.1 mrg gcc_assert (vectype_in); 4973 1.1 mrg if (!vectype_in) 4974 1.1 mrg { 4975 1.1 mrg if (dump_enabled_p ()) 4976 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4977 1.1 mrg "no vectype for scalar type %T\n", rhs_type); 4978 1.1 mrg 4979 1.1 mrg return false; 4980 1.1 mrg } 4981 1.1 mrg 4982 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (vectype_out) 4983 1.1 mrg && !VECTOR_BOOLEAN_TYPE_P (vectype_in)) 4984 1.1 mrg { 4985 1.1 mrg if (dump_enabled_p ()) 4986 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4987 1.1 mrg "can't convert between boolean and non " 4988 1.1 mrg "boolean vectors %T\n", rhs_type); 4989 1.1 mrg 4990 1.1 mrg return false; 4991 1.1 mrg } 4992 1.1 mrg 4993 1.1 mrg nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 4994 1.1 mrg nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 4995 1.1 mrg if (known_eq (nunits_out, nunits_in)) 4996 1.1 mrg if (widen_arith) 4997 1.1 mrg modifier = WIDEN; 4998 1.1 mrg else 4999 1.1 mrg modifier = NONE; 5000 1.1 mrg else if (multiple_p (nunits_out, nunits_in)) 5001 1.1 mrg modifier = NARROW; 5002 1.1 mrg else 5003 1.1 mrg { 5004 1.1 mrg gcc_checking_assert (multiple_p (nunits_in, nunits_out)); 5005 1.1 mrg modifier = WIDEN; 5006 1.1 mrg } 5007 1.1 mrg 5008 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 5009 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5010 1.1 mrg case of SLP. */ 5011 1.1 mrg if (slp_node) 5012 1.1 mrg ncopies = 1; 5013 1.1 mrg else if (modifier == NARROW) 5014 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 5015 1.1 mrg else 5016 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 5017 1.1 mrg 5018 1.1 mrg /* Sanity check: make sure that at least one copy of the vectorized stmt 5019 1.1 mrg needs to be generated. */ 5020 1.1 mrg gcc_assert (ncopies >= 1); 5021 1.1 mrg 5022 1.1 mrg bool found_mode = false; 5023 1.1 mrg scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type); 5024 1.1 mrg scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type); 5025 1.1 mrg opt_scalar_mode rhs_mode_iter; 5026 1.1 mrg 5027 1.1 mrg /* Supportable by target? */ 5028 1.1 mrg switch (modifier) 5029 1.1 mrg { 5030 1.1 mrg case NONE: 5031 1.1 mrg if (code != FIX_TRUNC_EXPR 5032 1.1 mrg && code != FLOAT_EXPR 5033 1.1 mrg && !CONVERT_EXPR_CODE_P (code)) 5034 1.1 mrg return false; 5035 1.1 mrg if (supportable_convert_operation (code, vectype_out, vectype_in, &code1)) 5036 1.1 mrg break; 5037 1.1 mrg /* FALLTHRU */ 5038 1.1 mrg unsupported: 5039 1.1 mrg if (dump_enabled_p ()) 5040 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5041 1.1 mrg "conversion not supported by target.\n"); 5042 1.1 mrg return false; 5043 1.1 mrg 5044 1.1 mrg case WIDEN: 5045 1.1 mrg if (known_eq (nunits_in, nunits_out)) 5046 1.1 mrg { 5047 1.1 mrg if (!supportable_half_widening_operation (code, vectype_out, 5048 1.1 mrg vectype_in, &code1)) 5049 1.1 mrg goto unsupported; 5050 1.1 mrg gcc_assert (!(multi_step_cvt && op_type == binary_op)); 5051 1.1 mrg break; 5052 1.1 mrg } 5053 1.1 mrg if (supportable_widening_operation (vinfo, code, stmt_info, 5054 1.1 mrg vectype_out, vectype_in, &code1, 5055 1.1 mrg &code2, &multi_step_cvt, 5056 1.1 mrg &interm_types)) 5057 1.1 mrg { 5058 1.1 mrg /* Binary widening operation can only be supported directly by the 5059 1.1 mrg architecture. */ 5060 1.1 mrg gcc_assert (!(multi_step_cvt && op_type == binary_op)); 5061 1.1 mrg break; 5062 1.1 mrg } 5063 1.1 mrg 5064 1.1 mrg if (code != FLOAT_EXPR 5065 1.1 mrg || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode)) 5066 1.1 mrg goto unsupported; 5067 1.1 mrg 5068 1.1 mrg fltsz = GET_MODE_SIZE (lhs_mode); 5069 1.1 mrg FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode) 5070 1.1 mrg { 5071 1.1 mrg rhs_mode = rhs_mode_iter.require (); 5072 1.1 mrg if (GET_MODE_SIZE (rhs_mode) > fltsz) 5073 1.1 mrg break; 5074 1.1 mrg 5075 1.1 mrg cvt_type 5076 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 5077 1.1 mrg cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 5078 1.1 mrg if (cvt_type == NULL_TREE) 5079 1.1 mrg goto unsupported; 5080 1.1 mrg 5081 1.1 mrg if (GET_MODE_SIZE (rhs_mode) == fltsz) 5082 1.1 mrg { 5083 1.1 mrg if (!supportable_convert_operation (code, vectype_out, 5084 1.1 mrg cvt_type, &codecvt1)) 5085 1.1 mrg goto unsupported; 5086 1.1 mrg } 5087 1.1 mrg else if (!supportable_widening_operation (vinfo, code, stmt_info, 5088 1.1 mrg vectype_out, cvt_type, 5089 1.1 mrg &codecvt1, &codecvt2, 5090 1.1 mrg &multi_step_cvt, 5091 1.1 mrg &interm_types)) 5092 1.1 mrg continue; 5093 1.1 mrg else 5094 1.1 mrg gcc_assert (multi_step_cvt == 0); 5095 1.1 mrg 5096 1.1 mrg if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info, 5097 1.1 mrg cvt_type, 5098 1.1 mrg vectype_in, &code1, &code2, 5099 1.1 mrg &multi_step_cvt, &interm_types)) 5100 1.1 mrg { 5101 1.1 mrg found_mode = true; 5102 1.1 mrg break; 5103 1.1 mrg } 5104 1.1 mrg } 5105 1.1 mrg 5106 1.1 mrg if (!found_mode) 5107 1.1 mrg goto unsupported; 5108 1.1 mrg 5109 1.1 mrg if (GET_MODE_SIZE (rhs_mode) == fltsz) 5110 1.1 mrg codecvt2 = ERROR_MARK; 5111 1.1 mrg else 5112 1.1 mrg { 5113 1.1 mrg multi_step_cvt++; 5114 1.1 mrg interm_types.safe_push (cvt_type); 5115 1.1 mrg cvt_type = NULL_TREE; 5116 1.1 mrg } 5117 1.1 mrg break; 5118 1.1 mrg 5119 1.1 mrg case NARROW: 5120 1.1 mrg gcc_assert (op_type == unary_op); 5121 1.1 mrg if (supportable_narrowing_operation (code, vectype_out, vectype_in, 5122 1.1 mrg &code1, &multi_step_cvt, 5123 1.1 mrg &interm_types)) 5124 1.1 mrg break; 5125 1.1 mrg 5126 1.1 mrg if (code != FIX_TRUNC_EXPR 5127 1.1 mrg || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) 5128 1.1 mrg goto unsupported; 5129 1.1 mrg 5130 1.1 mrg cvt_type 5131 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 5132 1.1 mrg cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 5133 1.1 mrg if (cvt_type == NULL_TREE) 5134 1.1 mrg goto unsupported; 5135 1.1 mrg if (!supportable_convert_operation (code, cvt_type, vectype_in, 5136 1.1 mrg &codecvt1)) 5137 1.1 mrg goto unsupported; 5138 1.1 mrg if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 5139 1.1 mrg &code1, &multi_step_cvt, 5140 1.1 mrg &interm_types)) 5141 1.1 mrg break; 5142 1.1 mrg goto unsupported; 5143 1.1 mrg 5144 1.1 mrg default: 5145 1.1 mrg gcc_unreachable (); 5146 1.1 mrg } 5147 1.1 mrg 5148 1.1 mrg if (!vec_stmt) /* transformation not required. */ 5149 1.1 mrg { 5150 1.1 mrg if (slp_node 5151 1.1 mrg && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in) 5152 1.1 mrg || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in))) 5153 1.1 mrg { 5154 1.1 mrg if (dump_enabled_p ()) 5155 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5156 1.1 mrg "incompatible vector types for invariants\n"); 5157 1.1 mrg return false; 5158 1.1 mrg } 5159 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_conversion"); 5160 1.1 mrg if (modifier == NONE) 5161 1.1 mrg { 5162 1.1 mrg STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 5163 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, 5164 1.1 mrg cost_vec); 5165 1.1 mrg } 5166 1.1 mrg else if (modifier == NARROW) 5167 1.1 mrg { 5168 1.1 mrg STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 5169 1.1 mrg /* The final packing step produces one vector result per copy. */ 5170 1.1 mrg unsigned int nvectors 5171 1.1 mrg = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies); 5172 1.1 mrg vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, 5173 1.1 mrg multi_step_cvt, cost_vec, 5174 1.1 mrg widen_arith); 5175 1.1 mrg } 5176 1.1 mrg else 5177 1.1 mrg { 5178 1.1 mrg STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 5179 1.1 mrg /* The initial unpacking step produces two vector results 5180 1.1 mrg per copy. MULTI_STEP_CVT is 0 for a single conversion, 5181 1.1 mrg so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ 5182 1.1 mrg unsigned int nvectors 5183 1.1 mrg = (slp_node 5184 1.1 mrg ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt 5185 1.1 mrg : ncopies * 2); 5186 1.1 mrg vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, 5187 1.1 mrg multi_step_cvt, cost_vec, 5188 1.1 mrg widen_arith); 5189 1.1 mrg } 5190 1.1 mrg interm_types.release (); 5191 1.1 mrg return true; 5192 1.1 mrg } 5193 1.1 mrg 5194 1.1 mrg /* Transform. */ 5195 1.1 mrg if (dump_enabled_p ()) 5196 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5197 1.1 mrg "transform conversion. ncopies = %d.\n", ncopies); 5198 1.1 mrg 5199 1.1 mrg if (op_type == binary_op) 5200 1.1 mrg { 5201 1.1 mrg if (CONSTANT_CLASS_P (op0)) 5202 1.1 mrg op0 = fold_convert (TREE_TYPE (op1), op0); 5203 1.1 mrg else if (CONSTANT_CLASS_P (op1)) 5204 1.1 mrg op1 = fold_convert (TREE_TYPE (op0), op1); 5205 1.1 mrg } 5206 1.1 mrg 5207 1.1 mrg /* In case of multi-step conversion, we first generate conversion operations 5208 1.1 mrg to the intermediate types, and then from that types to the final one. 5209 1.1 mrg We create vector destinations for the intermediate type (TYPES) received 5210 1.1 mrg from supportable_*_operation, and store them in the correct order 5211 1.1 mrg for future use in vect_create_vectorized_*_stmts (). */ 5212 1.1 mrg auto_vec<tree> vec_dsts (multi_step_cvt + 1); 5213 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, 5214 1.1 mrg (cvt_type && modifier == WIDEN) 5215 1.1 mrg ? cvt_type : vectype_out); 5216 1.1 mrg vec_dsts.quick_push (vec_dest); 5217 1.1 mrg 5218 1.1 mrg if (multi_step_cvt) 5219 1.1 mrg { 5220 1.1 mrg for (i = interm_types.length () - 1; 5221 1.1 mrg interm_types.iterate (i, &intermediate_type); i--) 5222 1.1 mrg { 5223 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, 5224 1.1 mrg intermediate_type); 5225 1.1 mrg vec_dsts.quick_push (vec_dest); 5226 1.1 mrg } 5227 1.1 mrg } 5228 1.1 mrg 5229 1.1 mrg if (cvt_type) 5230 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, 5231 1.1 mrg modifier == WIDEN 5232 1.1 mrg ? vectype_out : cvt_type); 5233 1.1 mrg 5234 1.1 mrg int ninputs = 1; 5235 1.1 mrg if (!slp_node) 5236 1.1 mrg { 5237 1.1 mrg if (modifier == WIDEN) 5238 1.1 mrg ; 5239 1.1 mrg else if (modifier == NARROW) 5240 1.1 mrg { 5241 1.1 mrg if (multi_step_cvt) 5242 1.1 mrg ninputs = vect_pow2 (multi_step_cvt); 5243 1.1 mrg ninputs *= 2; 5244 1.1 mrg } 5245 1.1 mrg } 5246 1.1 mrg 5247 1.1 mrg switch (modifier) 5248 1.1 mrg { 5249 1.1 mrg case NONE: 5250 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 5251 1.1 mrg op0, &vec_oprnds0); 5252 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5253 1.1 mrg { 5254 1.1 mrg /* Arguments are ready, create the new vector stmt. */ 5255 1.1 mrg gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 5256 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); 5257 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 5258 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 5259 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5260 1.1 mrg 5261 1.1 mrg if (slp_node) 5262 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5263 1.1 mrg else 5264 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 5265 1.1 mrg } 5266 1.1 mrg break; 5267 1.1 mrg 5268 1.1 mrg case WIDEN: 5269 1.1 mrg /* In case the vectorization factor (VF) is bigger than the number 5270 1.1 mrg of elements that we can fit in a vectype (nunits), we have to 5271 1.1 mrg generate more than one vector stmt - i.e - we need to "unroll" 5272 1.1 mrg the vector stmt by a factor VF/nunits. */ 5273 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, 5274 1.1 mrg op0, &vec_oprnds0, 5275 1.1 mrg code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1, 5276 1.1 mrg &vec_oprnds1); 5277 1.1 mrg if (code == WIDEN_LSHIFT_EXPR) 5278 1.1 mrg { 5279 1.1 mrg int oprnds_size = vec_oprnds0.length (); 5280 1.1 mrg vec_oprnds1.create (oprnds_size); 5281 1.1 mrg for (i = 0; i < oprnds_size; ++i) 5282 1.1 mrg vec_oprnds1.quick_push (op1); 5283 1.1 mrg } 5284 1.1 mrg /* Arguments are ready. Create the new vector stmts. */ 5285 1.1 mrg for (i = multi_step_cvt; i >= 0; i--) 5286 1.1 mrg { 5287 1.1 mrg tree this_dest = vec_dsts[i]; 5288 1.1 mrg enum tree_code c1 = code1, c2 = code2; 5289 1.1 mrg if (i == 0 && codecvt2 != ERROR_MARK) 5290 1.1 mrg { 5291 1.1 mrg c1 = codecvt1; 5292 1.1 mrg c2 = codecvt2; 5293 1.1 mrg } 5294 1.1 mrg if (known_eq (nunits_out, nunits_in)) 5295 1.1 mrg vect_create_half_widening_stmts (vinfo, &vec_oprnds0, 5296 1.1 mrg &vec_oprnds1, stmt_info, 5297 1.1 mrg this_dest, gsi, 5298 1.1 mrg c1, op_type); 5299 1.1 mrg else 5300 1.1 mrg vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0, 5301 1.1 mrg &vec_oprnds1, stmt_info, 5302 1.1 mrg this_dest, gsi, 5303 1.1 mrg c1, c2, op_type); 5304 1.1 mrg } 5305 1.1 mrg 5306 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5307 1.1 mrg { 5308 1.1 mrg gimple *new_stmt; 5309 1.1 mrg if (cvt_type) 5310 1.1 mrg { 5311 1.1 mrg gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 5312 1.1 mrg new_temp = make_ssa_name (vec_dest); 5313 1.1 mrg new_stmt = gimple_build_assign (new_temp, codecvt1, vop0); 5314 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5315 1.1 mrg } 5316 1.1 mrg else 5317 1.1 mrg new_stmt = SSA_NAME_DEF_STMT (vop0); 5318 1.1 mrg 5319 1.1 mrg if (slp_node) 5320 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5321 1.1 mrg else 5322 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 5323 1.1 mrg } 5324 1.1 mrg break; 5325 1.1 mrg 5326 1.1 mrg case NARROW: 5327 1.1 mrg /* In case the vectorization factor (VF) is bigger than the number 5328 1.1 mrg of elements that we can fit in a vectype (nunits), we have to 5329 1.1 mrg generate more than one vector stmt - i.e - we need to "unroll" 5330 1.1 mrg the vector stmt by a factor VF/nunits. */ 5331 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, 5332 1.1 mrg op0, &vec_oprnds0); 5333 1.1 mrg /* Arguments are ready. Create the new vector stmts. */ 5334 1.1 mrg if (cvt_type) 5335 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5336 1.1 mrg { 5337 1.1 mrg gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 5338 1.1 mrg new_temp = make_ssa_name (vec_dest); 5339 1.1 mrg gassign *new_stmt 5340 1.1 mrg = gimple_build_assign (new_temp, codecvt1, vop0); 5341 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5342 1.1 mrg vec_oprnds0[i] = new_temp; 5343 1.1 mrg } 5344 1.1 mrg 5345 1.1 mrg vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0, 5346 1.1 mrg multi_step_cvt, 5347 1.1 mrg stmt_info, vec_dsts, gsi, 5348 1.1 mrg slp_node, code1); 5349 1.1 mrg break; 5350 1.1 mrg } 5351 1.1 mrg if (!slp_node) 5352 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 5353 1.1 mrg 5354 1.1 mrg vec_oprnds0.release (); 5355 1.1 mrg vec_oprnds1.release (); 5356 1.1 mrg interm_types.release (); 5357 1.1 mrg 5358 1.1 mrg return true; 5359 1.1 mrg } 5360 1.1 mrg 5361 1.1 mrg /* Return true if we can assume from the scalar form of STMT_INFO that 5362 1.1 mrg neither the scalar nor the vector forms will generate code. STMT_INFO 5363 1.1 mrg is known not to involve a data reference. */ 5364 1.1 mrg 5365 1.1 mrg bool 5366 1.1 mrg vect_nop_conversion_p (stmt_vec_info stmt_info) 5367 1.1 mrg { 5368 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5369 1.1 mrg if (!stmt) 5370 1.1 mrg return false; 5371 1.1 mrg 5372 1.1 mrg tree lhs = gimple_assign_lhs (stmt); 5373 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt); 5374 1.1 mrg tree rhs = gimple_assign_rhs1 (stmt); 5375 1.1 mrg 5376 1.1 mrg if (code == SSA_NAME || code == VIEW_CONVERT_EXPR) 5377 1.1 mrg return true; 5378 1.1 mrg 5379 1.1 mrg if (CONVERT_EXPR_CODE_P (code)) 5380 1.1 mrg return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)); 5381 1.1 mrg 5382 1.1 mrg return false; 5383 1.1 mrg } 5384 1.1 mrg 5385 1.1 mrg /* Function vectorizable_assignment. 5386 1.1 mrg 5387 1.1 mrg Check if STMT_INFO performs an assignment (copy) that can be vectorized. 5388 1.1 mrg If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized 5389 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5390 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 5391 1.1 mrg 5392 1.1 mrg static bool 5393 1.1 mrg vectorizable_assignment (vec_info *vinfo, 5394 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5395 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 5396 1.1 mrg stmt_vector_for_cost *cost_vec) 5397 1.1 mrg { 5398 1.1 mrg tree vec_dest; 5399 1.1 mrg tree scalar_dest; 5400 1.1 mrg tree op; 5401 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 5402 1.1 mrg tree new_temp; 5403 1.1 mrg enum vect_def_type dt[1] = {vect_unknown_def_type}; 5404 1.1 mrg int ndts = 1; 5405 1.1 mrg int ncopies; 5406 1.1 mrg int i; 5407 1.1 mrg vec<tree> vec_oprnds = vNULL; 5408 1.1 mrg tree vop; 5409 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 5410 1.1 mrg enum tree_code code; 5411 1.1 mrg tree vectype_in; 5412 1.1 mrg 5413 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5414 1.1 mrg return false; 5415 1.1 mrg 5416 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5417 1.1 mrg && ! vec_stmt) 5418 1.1 mrg return false; 5419 1.1 mrg 5420 1.1 mrg /* Is vectorizable assignment? */ 5421 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5422 1.1 mrg if (!stmt) 5423 1.1 mrg return false; 5424 1.1 mrg 5425 1.1 mrg scalar_dest = gimple_assign_lhs (stmt); 5426 1.1 mrg if (TREE_CODE (scalar_dest) != SSA_NAME) 5427 1.1 mrg return false; 5428 1.1 mrg 5429 1.1 mrg if (STMT_VINFO_DATA_REF (stmt_info)) 5430 1.1 mrg return false; 5431 1.1 mrg 5432 1.1 mrg code = gimple_assign_rhs_code (stmt); 5433 1.1 mrg if (!(gimple_assign_single_p (stmt) 5434 1.1 mrg || code == PAREN_EXPR 5435 1.1 mrg || CONVERT_EXPR_CODE_P (code))) 5436 1.1 mrg return false; 5437 1.1 mrg 5438 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5439 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 5440 1.1 mrg 5441 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 5442 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5443 1.1 mrg case of SLP. */ 5444 1.1 mrg if (slp_node) 5445 1.1 mrg ncopies = 1; 5446 1.1 mrg else 5447 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 5448 1.1 mrg 5449 1.1 mrg gcc_assert (ncopies >= 1); 5450 1.1 mrg 5451 1.1 mrg slp_tree slp_op; 5452 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op, 5453 1.1 mrg &dt[0], &vectype_in)) 5454 1.1 mrg { 5455 1.1 mrg if (dump_enabled_p ()) 5456 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5457 1.1 mrg "use not simple.\n"); 5458 1.1 mrg return false; 5459 1.1 mrg } 5460 1.1 mrg if (!vectype_in) 5461 1.1 mrg vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node); 5462 1.1 mrg 5463 1.1 mrg /* We can handle NOP_EXPR conversions that do not change the number 5464 1.1 mrg of elements or the vector size. */ 5465 1.1 mrg if ((CONVERT_EXPR_CODE_P (code) 5466 1.1 mrg || code == VIEW_CONVERT_EXPR) 5467 1.1 mrg && (!vectype_in 5468 1.1 mrg || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits) 5469 1.1 mrg || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)), 5470 1.1 mrg GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 5471 1.1 mrg return false; 5472 1.1 mrg 5473 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in)) 5474 1.1 mrg { 5475 1.1 mrg if (dump_enabled_p ()) 5476 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5477 1.1 mrg "can't convert between boolean and non " 5478 1.1 mrg "boolean vectors %T\n", TREE_TYPE (op)); 5479 1.1 mrg 5480 1.1 mrg return false; 5481 1.1 mrg } 5482 1.1 mrg 5483 1.1 mrg /* We do not handle bit-precision changes. */ 5484 1.1 mrg if ((CONVERT_EXPR_CODE_P (code) 5485 1.1 mrg || code == VIEW_CONVERT_EXPR) 5486 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 5487 1.1 mrg && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 5488 1.1 mrg || !type_has_mode_precision_p (TREE_TYPE (op))) 5489 1.1 mrg /* But a conversion that does not change the bit-pattern is ok. */ 5490 1.1 mrg && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 5491 1.1 mrg > TYPE_PRECISION (TREE_TYPE (op))) 5492 1.1 mrg && TYPE_UNSIGNED (TREE_TYPE (op)))) 5493 1.1 mrg { 5494 1.1 mrg if (dump_enabled_p ()) 5495 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5496 1.1 mrg "type conversion to/from bit-precision " 5497 1.1 mrg "unsupported.\n"); 5498 1.1 mrg return false; 5499 1.1 mrg } 5500 1.1 mrg 5501 1.1 mrg if (!vec_stmt) /* transformation not required. */ 5502 1.1 mrg { 5503 1.1 mrg if (slp_node 5504 1.1 mrg && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in)) 5505 1.1 mrg { 5506 1.1 mrg if (dump_enabled_p ()) 5507 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5508 1.1 mrg "incompatible vector types for invariants\n"); 5509 1.1 mrg return false; 5510 1.1 mrg } 5511 1.1 mrg STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 5512 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_assignment"); 5513 1.1 mrg if (!vect_nop_conversion_p (stmt_info)) 5514 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, 5515 1.1 mrg cost_vec); 5516 1.1 mrg return true; 5517 1.1 mrg } 5518 1.1 mrg 5519 1.1 mrg /* Transform. */ 5520 1.1 mrg if (dump_enabled_p ()) 5521 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n"); 5522 1.1 mrg 5523 1.1 mrg /* Handle def. */ 5524 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 5525 1.1 mrg 5526 1.1 mrg /* Handle use. */ 5527 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds); 5528 1.1 mrg 5529 1.1 mrg /* Arguments are ready. create the new vector stmt. */ 5530 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 5531 1.1 mrg { 5532 1.1 mrg if (CONVERT_EXPR_CODE_P (code) 5533 1.1 mrg || code == VIEW_CONVERT_EXPR) 5534 1.1 mrg vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 5535 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_dest, vop); 5536 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 5537 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 5538 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5539 1.1 mrg if (slp_node) 5540 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5541 1.1 mrg else 5542 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 5543 1.1 mrg } 5544 1.1 mrg if (!slp_node) 5545 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 5546 1.1 mrg 5547 1.1 mrg vec_oprnds.release (); 5548 1.1 mrg return true; 5549 1.1 mrg } 5550 1.1 mrg 5551 1.1 mrg 5552 1.1 mrg /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 5553 1.1 mrg either as shift by a scalar or by a vector. */ 5554 1.1 mrg 5555 1.1 mrg bool 5556 1.1 mrg vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type) 5557 1.1 mrg { 5558 1.1 mrg 5559 1.1 mrg machine_mode vec_mode; 5560 1.1 mrg optab optab; 5561 1.1 mrg int icode; 5562 1.1 mrg tree vectype; 5563 1.1 mrg 5564 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, scalar_type); 5565 1.1 mrg if (!vectype) 5566 1.1 mrg return false; 5567 1.1 mrg 5568 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_scalar); 5569 1.1 mrg if (!optab 5570 1.1 mrg || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 5571 1.1 mrg { 5572 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_vector); 5573 1.1 mrg if (!optab 5574 1.1 mrg || (optab_handler (optab, TYPE_MODE (vectype)) 5575 1.1 mrg == CODE_FOR_nothing)) 5576 1.1 mrg return false; 5577 1.1 mrg } 5578 1.1 mrg 5579 1.1 mrg vec_mode = TYPE_MODE (vectype); 5580 1.1 mrg icode = (int) optab_handler (optab, vec_mode); 5581 1.1 mrg if (icode == CODE_FOR_nothing) 5582 1.1 mrg return false; 5583 1.1 mrg 5584 1.1 mrg return true; 5585 1.1 mrg } 5586 1.1 mrg 5587 1.1 mrg 5588 1.1 mrg /* Function vectorizable_shift. 5589 1.1 mrg 5590 1.1 mrg Check if STMT_INFO performs a shift operation that can be vectorized. 5591 1.1 mrg If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized 5592 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5593 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 5594 1.1 mrg 5595 1.1 mrg static bool 5596 1.1 mrg vectorizable_shift (vec_info *vinfo, 5597 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5598 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 5599 1.1 mrg stmt_vector_for_cost *cost_vec) 5600 1.1 mrg { 5601 1.1 mrg tree vec_dest; 5602 1.1 mrg tree scalar_dest; 5603 1.1 mrg tree op0, op1 = NULL; 5604 1.1 mrg tree vec_oprnd1 = NULL_TREE; 5605 1.1 mrg tree vectype; 5606 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 5607 1.1 mrg enum tree_code code; 5608 1.1 mrg machine_mode vec_mode; 5609 1.1 mrg tree new_temp; 5610 1.1 mrg optab optab; 5611 1.1 mrg int icode; 5612 1.1 mrg machine_mode optab_op2_mode; 5613 1.1 mrg enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 5614 1.1 mrg int ndts = 2; 5615 1.1 mrg poly_uint64 nunits_in; 5616 1.1 mrg poly_uint64 nunits_out; 5617 1.1 mrg tree vectype_out; 5618 1.1 mrg tree op1_vectype; 5619 1.1 mrg int ncopies; 5620 1.1 mrg int i; 5621 1.1 mrg vec<tree> vec_oprnds0 = vNULL; 5622 1.1 mrg vec<tree> vec_oprnds1 = vNULL; 5623 1.1 mrg tree vop0, vop1; 5624 1.1 mrg unsigned int k; 5625 1.1 mrg bool scalar_shift_arg = true; 5626 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 5627 1.1 mrg bool incompatible_op1_vectype_p = false; 5628 1.1 mrg 5629 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5630 1.1 mrg return false; 5631 1.1 mrg 5632 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5633 1.1 mrg && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle 5634 1.1 mrg && ! vec_stmt) 5635 1.1 mrg return false; 5636 1.1 mrg 5637 1.1 mrg /* Is STMT a vectorizable binary/unary operation? */ 5638 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5639 1.1 mrg if (!stmt) 5640 1.1 mrg return false; 5641 1.1 mrg 5642 1.1 mrg if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 5643 1.1 mrg return false; 5644 1.1 mrg 5645 1.1 mrg code = gimple_assign_rhs_code (stmt); 5646 1.1 mrg 5647 1.1 mrg if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 5648 1.1 mrg || code == RROTATE_EXPR)) 5649 1.1 mrg return false; 5650 1.1 mrg 5651 1.1 mrg scalar_dest = gimple_assign_lhs (stmt); 5652 1.1 mrg vectype_out = STMT_VINFO_VECTYPE (stmt_info); 5653 1.1 mrg if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))) 5654 1.1 mrg { 5655 1.1 mrg if (dump_enabled_p ()) 5656 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5657 1.1 mrg "bit-precision shifts not supported.\n"); 5658 1.1 mrg return false; 5659 1.1 mrg } 5660 1.1 mrg 5661 1.1 mrg slp_tree slp_op0; 5662 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 5663 1.1 mrg 0, &op0, &slp_op0, &dt[0], &vectype)) 5664 1.1 mrg { 5665 1.1 mrg if (dump_enabled_p ()) 5666 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5667 1.1 mrg "use not simple.\n"); 5668 1.1 mrg return false; 5669 1.1 mrg } 5670 1.1 mrg /* If op0 is an external or constant def, infer the vector type 5671 1.1 mrg from the scalar type. */ 5672 1.1 mrg if (!vectype) 5673 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node); 5674 1.1 mrg if (vec_stmt) 5675 1.1 mrg gcc_assert (vectype); 5676 1.1 mrg if (!vectype) 5677 1.1 mrg { 5678 1.1 mrg if (dump_enabled_p ()) 5679 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5680 1.1 mrg "no vectype for scalar type\n"); 5681 1.1 mrg return false; 5682 1.1 mrg } 5683 1.1 mrg 5684 1.1 mrg nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 5685 1.1 mrg nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 5686 1.1 mrg if (maybe_ne (nunits_out, nunits_in)) 5687 1.1 mrg return false; 5688 1.1 mrg 5689 1.1 mrg stmt_vec_info op1_def_stmt_info; 5690 1.1 mrg slp_tree slp_op1; 5691 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1, 5692 1.1 mrg &dt[1], &op1_vectype, &op1_def_stmt_info)) 5693 1.1 mrg { 5694 1.1 mrg if (dump_enabled_p ()) 5695 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5696 1.1 mrg "use not simple.\n"); 5697 1.1 mrg return false; 5698 1.1 mrg } 5699 1.1 mrg 5700 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 5701 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5702 1.1 mrg case of SLP. */ 5703 1.1 mrg if (slp_node) 5704 1.1 mrg ncopies = 1; 5705 1.1 mrg else 5706 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 5707 1.1 mrg 5708 1.1 mrg gcc_assert (ncopies >= 1); 5709 1.1 mrg 5710 1.1 mrg /* Determine whether the shift amount is a vector, or scalar. If the 5711 1.1 mrg shift/rotate amount is a vector, use the vector/vector shift optabs. */ 5712 1.1 mrg 5713 1.1 mrg if ((dt[1] == vect_internal_def 5714 1.1 mrg || dt[1] == vect_induction_def 5715 1.1 mrg || dt[1] == vect_nested_cycle) 5716 1.1 mrg && !slp_node) 5717 1.1 mrg scalar_shift_arg = false; 5718 1.1 mrg else if (dt[1] == vect_constant_def 5719 1.1 mrg || dt[1] == vect_external_def 5720 1.1 mrg || dt[1] == vect_internal_def) 5721 1.1 mrg { 5722 1.1 mrg /* In SLP, need to check whether the shift count is the same, 5723 1.1 mrg in loops if it is a constant or invariant, it is always 5724 1.1 mrg a scalar shift. */ 5725 1.1 mrg if (slp_node) 5726 1.1 mrg { 5727 1.1 mrg vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 5728 1.1 mrg stmt_vec_info slpstmt_info; 5729 1.1 mrg 5730 1.1 mrg FOR_EACH_VEC_ELT (stmts, k, slpstmt_info) 5731 1.1 mrg { 5732 1.1 mrg gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt); 5733 1.1 mrg if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 5734 1.1 mrg scalar_shift_arg = false; 5735 1.1 mrg } 5736 1.1 mrg 5737 1.1 mrg /* For internal SLP defs we have to make sure we see scalar stmts 5738 1.1 mrg for all vector elements. 5739 1.1 mrg ??? For different vectors we could resort to a different 5740 1.1 mrg scalar shift operand but code-generation below simply always 5741 1.1 mrg takes the first. */ 5742 1.1 mrg if (dt[1] == vect_internal_def 5743 1.1 mrg && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), 5744 1.1 mrg stmts.length ())) 5745 1.1 mrg scalar_shift_arg = false; 5746 1.1 mrg } 5747 1.1 mrg 5748 1.1 mrg /* If the shift amount is computed by a pattern stmt we cannot 5749 1.1 mrg use the scalar amount directly thus give up and use a vector 5750 1.1 mrg shift. */ 5751 1.1 mrg if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info)) 5752 1.1 mrg scalar_shift_arg = false; 5753 1.1 mrg } 5754 1.1 mrg else 5755 1.1 mrg { 5756 1.1 mrg if (dump_enabled_p ()) 5757 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5758 1.1 mrg "operand mode requires invariant argument.\n"); 5759 1.1 mrg return false; 5760 1.1 mrg } 5761 1.1 mrg 5762 1.1 mrg /* Vector shifted by vector. */ 5763 1.1 mrg bool was_scalar_shift_arg = scalar_shift_arg; 5764 1.1 mrg if (!scalar_shift_arg) 5765 1.1 mrg { 5766 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_vector); 5767 1.1 mrg if (dump_enabled_p ()) 5768 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5769 1.1 mrg "vector/vector shift/rotate found.\n"); 5770 1.1 mrg 5771 1.1 mrg if (!op1_vectype) 5772 1.1 mrg op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1), 5773 1.1 mrg slp_op1); 5774 1.1 mrg incompatible_op1_vectype_p 5775 1.1 mrg = (op1_vectype == NULL_TREE 5776 1.1 mrg || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype), 5777 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype)) 5778 1.1 mrg || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)); 5779 1.1 mrg if (incompatible_op1_vectype_p 5780 1.1 mrg && (!slp_node 5781 1.1 mrg || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def 5782 1.1 mrg || slp_op1->refcnt != 1)) 5783 1.1 mrg { 5784 1.1 mrg if (dump_enabled_p ()) 5785 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5786 1.1 mrg "unusable type for last operand in" 5787 1.1 mrg " vector/vector shift/rotate.\n"); 5788 1.1 mrg return false; 5789 1.1 mrg } 5790 1.1 mrg } 5791 1.1 mrg /* See if the machine has a vector shifted by scalar insn and if not 5792 1.1 mrg then see if it has a vector shifted by vector insn. */ 5793 1.1 mrg else 5794 1.1 mrg { 5795 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_scalar); 5796 1.1 mrg if (optab 5797 1.1 mrg && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 5798 1.1 mrg { 5799 1.1 mrg if (dump_enabled_p ()) 5800 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5801 1.1 mrg "vector/scalar shift/rotate found.\n"); 5802 1.1 mrg } 5803 1.1 mrg else 5804 1.1 mrg { 5805 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_vector); 5806 1.1 mrg if (optab 5807 1.1 mrg && (optab_handler (optab, TYPE_MODE (vectype)) 5808 1.1 mrg != CODE_FOR_nothing)) 5809 1.1 mrg { 5810 1.1 mrg scalar_shift_arg = false; 5811 1.1 mrg 5812 1.1 mrg if (dump_enabled_p ()) 5813 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5814 1.1 mrg "vector/vector shift/rotate found.\n"); 5815 1.1 mrg 5816 1.1 mrg if (!op1_vectype) 5817 1.1 mrg op1_vectype = get_vectype_for_scalar_type (vinfo, 5818 1.1 mrg TREE_TYPE (op1), 5819 1.1 mrg slp_op1); 5820 1.1 mrg 5821 1.1 mrg /* Unlike the other binary operators, shifts/rotates have 5822 1.1 mrg the rhs being int, instead of the same type as the lhs, 5823 1.1 mrg so make sure the scalar is the right type if we are 5824 1.1 mrg dealing with vectors of long long/long/short/char. */ 5825 1.1 mrg incompatible_op1_vectype_p 5826 1.1 mrg = (!op1_vectype 5827 1.1 mrg || !tree_nop_conversion_p (TREE_TYPE (vectype), 5828 1.1 mrg TREE_TYPE (op1))); 5829 1.1 mrg if (incompatible_op1_vectype_p 5830 1.1 mrg && dt[1] == vect_internal_def) 5831 1.1 mrg { 5832 1.1 mrg if (dump_enabled_p ()) 5833 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5834 1.1 mrg "unusable type for last operand in" 5835 1.1 mrg " vector/vector shift/rotate.\n"); 5836 1.1 mrg return false; 5837 1.1 mrg } 5838 1.1 mrg } 5839 1.1 mrg } 5840 1.1 mrg } 5841 1.1 mrg 5842 1.1 mrg /* Supportable by target? */ 5843 1.1 mrg if (!optab) 5844 1.1 mrg { 5845 1.1 mrg if (dump_enabled_p ()) 5846 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5847 1.1 mrg "no optab.\n"); 5848 1.1 mrg return false; 5849 1.1 mrg } 5850 1.1 mrg vec_mode = TYPE_MODE (vectype); 5851 1.1 mrg icode = (int) optab_handler (optab, vec_mode); 5852 1.1 mrg if (icode == CODE_FOR_nothing) 5853 1.1 mrg { 5854 1.1 mrg if (dump_enabled_p ()) 5855 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5856 1.1 mrg "op not supported by target.\n"); 5857 1.1 mrg return false; 5858 1.1 mrg } 5859 1.1 mrg /* vector lowering cannot optimize vector shifts using word arithmetic. */ 5860 1.1 mrg if (vect_emulated_vector_p (vectype)) 5861 1.1 mrg return false; 5862 1.1 mrg 5863 1.1 mrg if (!vec_stmt) /* transformation not required. */ 5864 1.1 mrg { 5865 1.1 mrg if (slp_node 5866 1.1 mrg && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) 5867 1.1 mrg || ((!scalar_shift_arg || dt[1] == vect_internal_def) 5868 1.1 mrg && (!incompatible_op1_vectype_p 5869 1.1 mrg || dt[1] == vect_constant_def) 5870 1.1 mrg && !vect_maybe_update_slp_op_vectype 5871 1.1 mrg (slp_op1, 5872 1.1 mrg incompatible_op1_vectype_p ? vectype : op1_vectype)))) 5873 1.1 mrg { 5874 1.1 mrg if (dump_enabled_p ()) 5875 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5876 1.1 mrg "incompatible vector types for invariants\n"); 5877 1.1 mrg return false; 5878 1.1 mrg } 5879 1.1 mrg /* Now adjust the constant shift amount in place. */ 5880 1.1 mrg if (slp_node 5881 1.1 mrg && incompatible_op1_vectype_p 5882 1.1 mrg && dt[1] == vect_constant_def) 5883 1.1 mrg { 5884 1.1 mrg for (unsigned i = 0; 5885 1.1 mrg i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i) 5886 1.1 mrg { 5887 1.1 mrg SLP_TREE_SCALAR_OPS (slp_op1)[i] 5888 1.1 mrg = fold_convert (TREE_TYPE (vectype), 5889 1.1 mrg SLP_TREE_SCALAR_OPS (slp_op1)[i]); 5890 1.1 mrg gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i]) 5891 1.1 mrg == INTEGER_CST)); 5892 1.1 mrg } 5893 1.1 mrg } 5894 1.1 mrg STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 5895 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_shift"); 5896 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, 5897 1.1 mrg scalar_shift_arg ? 1 : ndts, slp_node, cost_vec); 5898 1.1 mrg return true; 5899 1.1 mrg } 5900 1.1 mrg 5901 1.1 mrg /* Transform. */ 5902 1.1 mrg 5903 1.1 mrg if (dump_enabled_p ()) 5904 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5905 1.1 mrg "transform binary/unary operation.\n"); 5906 1.1 mrg 5907 1.1 mrg if (incompatible_op1_vectype_p && !slp_node) 5908 1.1 mrg { 5909 1.1 mrg gcc_assert (!scalar_shift_arg && was_scalar_shift_arg); 5910 1.1 mrg op1 = fold_convert (TREE_TYPE (vectype), op1); 5911 1.1 mrg if (dt[1] != vect_constant_def) 5912 1.1 mrg op1 = vect_init_vector (vinfo, stmt_info, op1, 5913 1.1 mrg TREE_TYPE (vectype), NULL); 5914 1.1 mrg } 5915 1.1 mrg 5916 1.1 mrg /* Handle def. */ 5917 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 5918 1.1 mrg 5919 1.1 mrg if (scalar_shift_arg && dt[1] != vect_internal_def) 5920 1.1 mrg { 5921 1.1 mrg /* Vector shl and shr insn patterns can be defined with scalar 5922 1.1 mrg operand 2 (shift operand). In this case, use constant or loop 5923 1.1 mrg invariant op1 directly, without extending it to vector mode 5924 1.1 mrg first. */ 5925 1.1 mrg optab_op2_mode = insn_data[icode].operand[2].mode; 5926 1.1 mrg if (!VECTOR_MODE_P (optab_op2_mode)) 5927 1.1 mrg { 5928 1.1 mrg if (dump_enabled_p ()) 5929 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5930 1.1 mrg "operand 1 using scalar mode.\n"); 5931 1.1 mrg vec_oprnd1 = op1; 5932 1.1 mrg vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies); 5933 1.1 mrg vec_oprnds1.quick_push (vec_oprnd1); 5934 1.1 mrg /* Store vec_oprnd1 for every vector stmt to be created. 5935 1.1 mrg We check during the analysis that all the shift arguments 5936 1.1 mrg are the same. 5937 1.1 mrg TODO: Allow different constants for different vector 5938 1.1 mrg stmts generated for an SLP instance. */ 5939 1.1 mrg for (k = 0; 5940 1.1 mrg k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++) 5941 1.1 mrg vec_oprnds1.quick_push (vec_oprnd1); 5942 1.1 mrg } 5943 1.1 mrg } 5944 1.1 mrg else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p) 5945 1.1 mrg { 5946 1.1 mrg if (was_scalar_shift_arg) 5947 1.1 mrg { 5948 1.1 mrg /* If the argument was the same in all lanes create 5949 1.1 mrg the correctly typed vector shift amount directly. */ 5950 1.1 mrg op1 = fold_convert (TREE_TYPE (vectype), op1); 5951 1.1 mrg op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype), 5952 1.1 mrg !loop_vinfo ? gsi : NULL); 5953 1.1 mrg vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype, 5954 1.1 mrg !loop_vinfo ? gsi : NULL); 5955 1.1 mrg vec_oprnds1.create (slp_node->vec_stmts_size); 5956 1.1 mrg for (k = 0; k < slp_node->vec_stmts_size; k++) 5957 1.1 mrg vec_oprnds1.quick_push (vec_oprnd1); 5958 1.1 mrg } 5959 1.1 mrg else if (dt[1] == vect_constant_def) 5960 1.1 mrg /* The constant shift amount has been adjusted in place. */ 5961 1.1 mrg ; 5962 1.1 mrg else 5963 1.1 mrg gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype)); 5964 1.1 mrg } 5965 1.1 mrg 5966 1.1 mrg /* vec_oprnd1 is available if operand 1 should be of a scalar-type 5967 1.1 mrg (a special case for certain kind of vector shifts); otherwise, 5968 1.1 mrg operand 1 should be of a vector type (the usual case). */ 5969 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 5970 1.1 mrg op0, &vec_oprnds0, 5971 1.1 mrg vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1); 5972 1.1 mrg 5973 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 5974 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5975 1.1 mrg { 5976 1.1 mrg /* For internal defs where we need to use a scalar shift arg 5977 1.1 mrg extract the first lane. */ 5978 1.1 mrg if (scalar_shift_arg && dt[1] == vect_internal_def) 5979 1.1 mrg { 5980 1.1 mrg vop1 = vec_oprnds1[0]; 5981 1.1 mrg new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1))); 5982 1.1 mrg gassign *new_stmt 5983 1.1 mrg = gimple_build_assign (new_temp, 5984 1.1 mrg build3 (BIT_FIELD_REF, TREE_TYPE (new_temp), 5985 1.1 mrg vop1, 5986 1.1 mrg TYPE_SIZE (TREE_TYPE (new_temp)), 5987 1.1 mrg bitsize_zero_node)); 5988 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5989 1.1 mrg vop1 = new_temp; 5990 1.1 mrg } 5991 1.1 mrg else 5992 1.1 mrg vop1 = vec_oprnds1[i]; 5993 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 5994 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 5995 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 5996 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 5997 1.1 mrg if (slp_node) 5998 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5999 1.1 mrg else 6000 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 6001 1.1 mrg } 6002 1.1 mrg 6003 1.1 mrg if (!slp_node) 6004 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 6005 1.1 mrg 6006 1.1 mrg vec_oprnds0.release (); 6007 1.1 mrg vec_oprnds1.release (); 6008 1.1 mrg 6009 1.1 mrg return true; 6010 1.1 mrg } 6011 1.1 mrg 6012 1.1 mrg 6013 1.1 mrg /* Function vectorizable_operation. 6014 1.1 mrg 6015 1.1 mrg Check if STMT_INFO performs a binary, unary or ternary operation that can 6016 1.1 mrg be vectorized. 6017 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 6018 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 6019 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 6020 1.1 mrg 6021 1.1 mrg static bool 6022 1.1 mrg vectorizable_operation (vec_info *vinfo, 6023 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 6024 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 6025 1.1 mrg stmt_vector_for_cost *cost_vec) 6026 1.1 mrg { 6027 1.1 mrg tree vec_dest; 6028 1.1 mrg tree scalar_dest; 6029 1.1 mrg tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 6030 1.1 mrg tree vectype; 6031 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 6032 1.1 mrg enum tree_code code, orig_code; 6033 1.1 mrg machine_mode vec_mode; 6034 1.1 mrg tree new_temp; 6035 1.1 mrg int op_type; 6036 1.1 mrg optab optab; 6037 1.1 mrg bool target_support_p; 6038 1.1 mrg enum vect_def_type dt[3] 6039 1.1 mrg = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 6040 1.1 mrg int ndts = 3; 6041 1.1 mrg poly_uint64 nunits_in; 6042 1.1 mrg poly_uint64 nunits_out; 6043 1.1 mrg tree vectype_out; 6044 1.1 mrg int ncopies, vec_num; 6045 1.1 mrg int i; 6046 1.1 mrg vec<tree> vec_oprnds0 = vNULL; 6047 1.1 mrg vec<tree> vec_oprnds1 = vNULL; 6048 1.1 mrg vec<tree> vec_oprnds2 = vNULL; 6049 1.1 mrg tree vop0, vop1, vop2; 6050 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 6051 1.1 mrg 6052 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 6053 1.1 mrg return false; 6054 1.1 mrg 6055 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 6056 1.1 mrg && ! vec_stmt) 6057 1.1 mrg return false; 6058 1.1 mrg 6059 1.1 mrg /* Is STMT a vectorizable binary/unary operation? */ 6060 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 6061 1.1 mrg if (!stmt) 6062 1.1 mrg return false; 6063 1.1 mrg 6064 1.1 mrg /* Loads and stores are handled in vectorizable_{load,store}. */ 6065 1.1 mrg if (STMT_VINFO_DATA_REF (stmt_info)) 6066 1.1 mrg return false; 6067 1.1 mrg 6068 1.1 mrg orig_code = code = gimple_assign_rhs_code (stmt); 6069 1.1 mrg 6070 1.1 mrg /* Shifts are handled in vectorizable_shift. */ 6071 1.1 mrg if (code == LSHIFT_EXPR 6072 1.1 mrg || code == RSHIFT_EXPR 6073 1.1 mrg || code == LROTATE_EXPR 6074 1.1 mrg || code == RROTATE_EXPR) 6075 1.1 mrg return false; 6076 1.1 mrg 6077 1.1 mrg /* Comparisons are handled in vectorizable_comparison. */ 6078 1.1 mrg if (TREE_CODE_CLASS (code) == tcc_comparison) 6079 1.1 mrg return false; 6080 1.1 mrg 6081 1.1 mrg /* Conditions are handled in vectorizable_condition. */ 6082 1.1 mrg if (code == COND_EXPR) 6083 1.1 mrg return false; 6084 1.1 mrg 6085 1.1 mrg /* For pointer addition and subtraction, we should use the normal 6086 1.1 mrg plus and minus for the vector operation. */ 6087 1.1 mrg if (code == POINTER_PLUS_EXPR) 6088 1.1 mrg code = PLUS_EXPR; 6089 1.1 mrg if (code == POINTER_DIFF_EXPR) 6090 1.1 mrg code = MINUS_EXPR; 6091 1.1 mrg 6092 1.1 mrg /* Support only unary or binary operations. */ 6093 1.1 mrg op_type = TREE_CODE_LENGTH (code); 6094 1.1 mrg if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 6095 1.1 mrg { 6096 1.1 mrg if (dump_enabled_p ()) 6097 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6098 1.1 mrg "num. args = %d (not unary/binary/ternary op).\n", 6099 1.1 mrg op_type); 6100 1.1 mrg return false; 6101 1.1 mrg } 6102 1.1 mrg 6103 1.1 mrg scalar_dest = gimple_assign_lhs (stmt); 6104 1.1 mrg vectype_out = STMT_VINFO_VECTYPE (stmt_info); 6105 1.1 mrg 6106 1.1 mrg /* Most operations cannot handle bit-precision types without extra 6107 1.1 mrg truncations. */ 6108 1.1 mrg bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out); 6109 1.1 mrg if (!mask_op_p 6110 1.1 mrg && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 6111 1.1 mrg /* Exception are bitwise binary operations. */ 6112 1.1 mrg && code != BIT_IOR_EXPR 6113 1.1 mrg && code != BIT_XOR_EXPR 6114 1.1 mrg && code != BIT_AND_EXPR) 6115 1.1 mrg { 6116 1.1 mrg if (dump_enabled_p ()) 6117 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6118 1.1 mrg "bit-precision arithmetic not supported.\n"); 6119 1.1 mrg return false; 6120 1.1 mrg } 6121 1.1 mrg 6122 1.1 mrg slp_tree slp_op0; 6123 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 6124 1.1 mrg 0, &op0, &slp_op0, &dt[0], &vectype)) 6125 1.1 mrg { 6126 1.1 mrg if (dump_enabled_p ()) 6127 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6128 1.1 mrg "use not simple.\n"); 6129 1.1 mrg return false; 6130 1.1 mrg } 6131 1.1 mrg bool is_invariant = (dt[0] == vect_external_def 6132 1.1 mrg || dt[0] == vect_constant_def); 6133 1.1 mrg /* If op0 is an external or constant def, infer the vector type 6134 1.1 mrg from the scalar type. */ 6135 1.1 mrg if (!vectype) 6136 1.1 mrg { 6137 1.1 mrg /* For boolean type we cannot determine vectype by 6138 1.1 mrg invariant value (don't know whether it is a vector 6139 1.1 mrg of booleans or vector of integers). We use output 6140 1.1 mrg vectype because operations on boolean don't change 6141 1.1 mrg type. */ 6142 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0))) 6143 1.1 mrg { 6144 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest))) 6145 1.1 mrg { 6146 1.1 mrg if (dump_enabled_p ()) 6147 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6148 1.1 mrg "not supported operation on bool value.\n"); 6149 1.1 mrg return false; 6150 1.1 mrg } 6151 1.1 mrg vectype = vectype_out; 6152 1.1 mrg } 6153 1.1 mrg else 6154 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), 6155 1.1 mrg slp_node); 6156 1.1 mrg } 6157 1.1 mrg if (vec_stmt) 6158 1.1 mrg gcc_assert (vectype); 6159 1.1 mrg if (!vectype) 6160 1.1 mrg { 6161 1.1 mrg if (dump_enabled_p ()) 6162 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6163 1.1 mrg "no vectype for scalar type %T\n", 6164 1.1 mrg TREE_TYPE (op0)); 6165 1.1 mrg 6166 1.1 mrg return false; 6167 1.1 mrg } 6168 1.1 mrg 6169 1.1 mrg nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 6170 1.1 mrg nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 6171 1.1 mrg if (maybe_ne (nunits_out, nunits_in)) 6172 1.1 mrg return false; 6173 1.1 mrg 6174 1.1 mrg tree vectype2 = NULL_TREE, vectype3 = NULL_TREE; 6175 1.1 mrg slp_tree slp_op1 = NULL, slp_op2 = NULL; 6176 1.1 mrg if (op_type == binary_op || op_type == ternary_op) 6177 1.1 mrg { 6178 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 6179 1.1 mrg 1, &op1, &slp_op1, &dt[1], &vectype2)) 6180 1.1 mrg { 6181 1.1 mrg if (dump_enabled_p ()) 6182 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6183 1.1 mrg "use not simple.\n"); 6184 1.1 mrg return false; 6185 1.1 mrg } 6186 1.1 mrg is_invariant &= (dt[1] == vect_external_def 6187 1.1 mrg || dt[1] == vect_constant_def); 6188 1.1 mrg if (vectype2 6189 1.1 mrg && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))) 6190 1.1 mrg return false; 6191 1.1 mrg } 6192 1.1 mrg if (op_type == ternary_op) 6193 1.1 mrg { 6194 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 6195 1.1 mrg 2, &op2, &slp_op2, &dt[2], &vectype3)) 6196 1.1 mrg { 6197 1.1 mrg if (dump_enabled_p ()) 6198 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6199 1.1 mrg "use not simple.\n"); 6200 1.1 mrg return false; 6201 1.1 mrg } 6202 1.1 mrg is_invariant &= (dt[2] == vect_external_def 6203 1.1 mrg || dt[2] == vect_constant_def); 6204 1.1 mrg if (vectype3 6205 1.1 mrg && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))) 6206 1.1 mrg return false; 6207 1.1 mrg } 6208 1.1 mrg 6209 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 6210 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 6211 1.1 mrg case of SLP. */ 6212 1.1 mrg if (slp_node) 6213 1.1 mrg { 6214 1.1 mrg ncopies = 1; 6215 1.1 mrg vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6216 1.1 mrg } 6217 1.1 mrg else 6218 1.1 mrg { 6219 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 6220 1.1 mrg vec_num = 1; 6221 1.1 mrg } 6222 1.1 mrg 6223 1.1 mrg gcc_assert (ncopies >= 1); 6224 1.1 mrg 6225 1.1 mrg /* Reject attempts to combine mask types with nonmask types, e.g. if 6226 1.1 mrg we have an AND between a (nonmask) boolean loaded from memory and 6227 1.1 mrg a (mask) boolean result of a comparison. 6228 1.1 mrg 6229 1.1 mrg TODO: We could easily fix these cases up using pattern statements. */ 6230 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p 6231 1.1 mrg || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p) 6232 1.1 mrg || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p)) 6233 1.1 mrg { 6234 1.1 mrg if (dump_enabled_p ()) 6235 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6236 1.1 mrg "mixed mask and nonmask vector types\n"); 6237 1.1 mrg return false; 6238 1.1 mrg } 6239 1.1 mrg 6240 1.1 mrg /* Supportable by target? */ 6241 1.1 mrg 6242 1.1 mrg vec_mode = TYPE_MODE (vectype); 6243 1.1 mrg if (code == MULT_HIGHPART_EXPR) 6244 1.1 mrg target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)); 6245 1.1 mrg else 6246 1.1 mrg { 6247 1.1 mrg optab = optab_for_tree_code (code, vectype, optab_default); 6248 1.1 mrg if (!optab) 6249 1.1 mrg { 6250 1.1 mrg if (dump_enabled_p ()) 6251 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6252 1.1 mrg "no optab.\n"); 6253 1.1 mrg return false; 6254 1.1 mrg } 6255 1.1 mrg target_support_p = (optab_handler (optab, vec_mode) 6256 1.1 mrg != CODE_FOR_nothing); 6257 1.1 mrg } 6258 1.1 mrg 6259 1.1 mrg bool using_emulated_vectors_p = vect_emulated_vector_p (vectype); 6260 1.1 mrg if (!target_support_p) 6261 1.1 mrg { 6262 1.1 mrg if (dump_enabled_p ()) 6263 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6264 1.1 mrg "op not supported by target.\n"); 6265 1.1 mrg /* Check only during analysis. */ 6266 1.1 mrg if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) 6267 1.1 mrg || (!vec_stmt && !vect_can_vectorize_without_simd_p (code))) 6268 1.1 mrg return false; 6269 1.1 mrg if (dump_enabled_p ()) 6270 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 6271 1.1 mrg "proceeding using word mode.\n"); 6272 1.1 mrg using_emulated_vectors_p = true; 6273 1.1 mrg } 6274 1.1 mrg 6275 1.1 mrg if (using_emulated_vectors_p 6276 1.1 mrg && !vect_can_vectorize_without_simd_p (code)) 6277 1.1 mrg { 6278 1.1 mrg if (dump_enabled_p ()) 6279 1.1 mrg dump_printf (MSG_NOTE, "using word mode not possible.\n"); 6280 1.1 mrg return false; 6281 1.1 mrg } 6282 1.1 mrg 6283 1.1 mrg /* ??? We should instead expand the operations here, instead of 6284 1.1 mrg relying on vector lowering which has this hard cap on the number 6285 1.1 mrg of vector elements below it performs elementwise operations. */ 6286 1.1 mrg if (using_emulated_vectors_p 6287 1.1 mrg && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR) 6288 1.1 mrg && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4 6289 1.1 mrg || maybe_lt (nunits_out, 4U))) 6290 1.1 mrg { 6291 1.1 mrg if (dump_enabled_p ()) 6292 1.1 mrg dump_printf (MSG_NOTE, "not using word mode for +- and less than " 6293 1.1 mrg "four vector elements\n"); 6294 1.1 mrg return false; 6295 1.1 mrg } 6296 1.1 mrg 6297 1.1 mrg int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); 6298 1.1 mrg vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); 6299 1.1 mrg internal_fn cond_fn = get_conditional_internal_fn (code); 6300 1.1 mrg 6301 1.1 mrg /* If operating on inactive elements could generate spurious traps, 6302 1.1 mrg we need to restrict the operation to active lanes. Note that this 6303 1.1 mrg specifically doesn't apply to unhoisted invariants, since they 6304 1.1 mrg operate on the same value for every lane. 6305 1.1 mrg 6306 1.1 mrg Similarly, if this operation is part of a reduction, a fully-masked 6307 1.1 mrg loop should only change the active lanes of the reduction chain, 6308 1.1 mrg keeping the inactive lanes as-is. */ 6309 1.1 mrg bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt)) 6310 1.1 mrg || reduc_idx >= 0); 6311 1.1 mrg 6312 1.1 mrg if (!vec_stmt) /* transformation not required. */ 6313 1.1 mrg { 6314 1.1 mrg if (loop_vinfo 6315 1.1 mrg && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) 6316 1.1 mrg && mask_out_inactive) 6317 1.1 mrg { 6318 1.1 mrg if (cond_fn == IFN_LAST 6319 1.1 mrg || !direct_internal_fn_supported_p (cond_fn, vectype, 6320 1.1 mrg OPTIMIZE_FOR_SPEED)) 6321 1.1 mrg { 6322 1.1 mrg if (dump_enabled_p ()) 6323 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6324 1.1 mrg "can't use a fully-masked loop because no" 6325 1.1 mrg " conditional operation is available.\n"); 6326 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 6327 1.1 mrg } 6328 1.1 mrg else 6329 1.1 mrg vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, 6330 1.1 mrg vectype, NULL); 6331 1.1 mrg } 6332 1.1 mrg 6333 1.1 mrg /* Put types on constant and invariant SLP children. */ 6334 1.1 mrg if (slp_node 6335 1.1 mrg && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) 6336 1.1 mrg || !vect_maybe_update_slp_op_vectype (slp_op1, vectype) 6337 1.1 mrg || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))) 6338 1.1 mrg { 6339 1.1 mrg if (dump_enabled_p ()) 6340 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6341 1.1 mrg "incompatible vector types for invariants\n"); 6342 1.1 mrg return false; 6343 1.1 mrg } 6344 1.1 mrg 6345 1.1 mrg STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 6346 1.1 mrg DUMP_VECT_SCOPE ("vectorizable_operation"); 6347 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, 6348 1.1 mrg ncopies, dt, ndts, slp_node, cost_vec); 6349 1.1 mrg if (using_emulated_vectors_p) 6350 1.1 mrg { 6351 1.1 mrg /* The above vect_model_simple_cost call handles constants 6352 1.1 mrg in the prologue and (mis-)costs one of the stmts as 6353 1.1 mrg vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate 6354 1.1 mrg for the actual lowering that will be applied. */ 6355 1.1 mrg unsigned n 6356 1.1 mrg = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies; 6357 1.1 mrg switch (code) 6358 1.1 mrg { 6359 1.1 mrg case PLUS_EXPR: 6360 1.1 mrg n *= 5; 6361 1.1 mrg break; 6362 1.1 mrg case MINUS_EXPR: 6363 1.1 mrg n *= 6; 6364 1.1 mrg break; 6365 1.1 mrg case NEGATE_EXPR: 6366 1.1 mrg n *= 4; 6367 1.1 mrg break; 6368 1.1 mrg default:; 6369 1.1 mrg } 6370 1.1 mrg record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body); 6371 1.1 mrg } 6372 1.1 mrg return true; 6373 1.1 mrg } 6374 1.1 mrg 6375 1.1 mrg /* Transform. */ 6376 1.1 mrg 6377 1.1 mrg if (dump_enabled_p ()) 6378 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 6379 1.1 mrg "transform binary/unary operation.\n"); 6380 1.1 mrg 6381 1.1 mrg bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); 6382 1.1 mrg 6383 1.1 mrg /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as 6384 1.1 mrg vectors with unsigned elements, but the result is signed. So, we 6385 1.1 mrg need to compute the MINUS_EXPR into vectype temporary and 6386 1.1 mrg VIEW_CONVERT_EXPR it into the final vectype_out result. */ 6387 1.1 mrg tree vec_cvt_dest = NULL_TREE; 6388 1.1 mrg if (orig_code == POINTER_DIFF_EXPR) 6389 1.1 mrg { 6390 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 6391 1.1 mrg vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out); 6392 1.1 mrg } 6393 1.1 mrg /* Handle def. */ 6394 1.1 mrg else 6395 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 6396 1.1 mrg 6397 1.1 mrg /* In case the vectorization factor (VF) is bigger than the number 6398 1.1 mrg of elements that we can fit in a vectype (nunits), we have to generate 6399 1.1 mrg more than one vector stmt - i.e - we need to "unroll" the 6400 1.1 mrg vector stmt by a factor VF/nunits. In doing so, we record a pointer 6401 1.1 mrg from one copy of the vector stmt to the next, in the field 6402 1.1 mrg STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 6403 1.1 mrg stages to find the correct vector defs to be used when vectorizing 6404 1.1 mrg stmts that use the defs of the current stmt. The example below 6405 1.1 mrg illustrates the vectorization process when VF=16 and nunits=4 (i.e., 6406 1.1 mrg we need to create 4 vectorized stmts): 6407 1.1 mrg 6408 1.1 mrg before vectorization: 6409 1.1 mrg RELATED_STMT VEC_STMT 6410 1.1 mrg S1: x = memref - - 6411 1.1 mrg S2: z = x + 1 - - 6412 1.1 mrg 6413 1.1 mrg step 1: vectorize stmt S1 (done in vectorizable_load. See more details 6414 1.1 mrg there): 6415 1.1 mrg RELATED_STMT VEC_STMT 6416 1.1 mrg VS1_0: vx0 = memref0 VS1_1 - 6417 1.1 mrg VS1_1: vx1 = memref1 VS1_2 - 6418 1.1 mrg VS1_2: vx2 = memref2 VS1_3 - 6419 1.1 mrg VS1_3: vx3 = memref3 - - 6420 1.1 mrg S1: x = load - VS1_0 6421 1.1 mrg S2: z = x + 1 - - 6422 1.1 mrg 6423 1.1 mrg step2: vectorize stmt S2 (done here): 6424 1.1 mrg To vectorize stmt S2 we first need to find the relevant vector 6425 1.1 mrg def for the first operand 'x'. This is, as usual, obtained from 6426 1.1 mrg the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 6427 1.1 mrg that defines 'x' (S1). This way we find the stmt VS1_0, and the 6428 1.1 mrg relevant vector def 'vx0'. Having found 'vx0' we can generate 6429 1.1 mrg the vector stmt VS2_0, and as usual, record it in the 6430 1.1 mrg STMT_VINFO_VEC_STMT of stmt S2. 6431 1.1 mrg When creating the second copy (VS2_1), we obtain the relevant vector 6432 1.1 mrg def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 6433 1.1 mrg stmt VS1_0. This way we find the stmt VS1_1 and the relevant 6434 1.1 mrg vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 6435 1.1 mrg pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 6436 1.1 mrg Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 6437 1.1 mrg chain of stmts and pointers: 6438 1.1 mrg RELATED_STMT VEC_STMT 6439 1.1 mrg VS1_0: vx0 = memref0 VS1_1 - 6440 1.1 mrg VS1_1: vx1 = memref1 VS1_2 - 6441 1.1 mrg VS1_2: vx2 = memref2 VS1_3 - 6442 1.1 mrg VS1_3: vx3 = memref3 - - 6443 1.1 mrg S1: x = load - VS1_0 6444 1.1 mrg VS2_0: vz0 = vx0 + v1 VS2_1 - 6445 1.1 mrg VS2_1: vz1 = vx1 + v1 VS2_2 - 6446 1.1 mrg VS2_2: vz2 = vx2 + v1 VS2_3 - 6447 1.1 mrg VS2_3: vz3 = vx3 + v1 - - 6448 1.1 mrg S2: z = x + 1 - VS2_0 */ 6449 1.1 mrg 6450 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 6451 1.1 mrg op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2); 6452 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 6453 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 6454 1.1 mrg { 6455 1.1 mrg gimple *new_stmt = NULL; 6456 1.1 mrg vop1 = ((op_type == binary_op || op_type == ternary_op) 6457 1.1 mrg ? vec_oprnds1[i] : NULL_TREE); 6458 1.1 mrg vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE); 6459 1.1 mrg if (masked_loop_p && mask_out_inactive) 6460 1.1 mrg { 6461 1.1 mrg tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 6462 1.1 mrg vectype, i); 6463 1.1 mrg auto_vec<tree> vops (5); 6464 1.1 mrg vops.quick_push (mask); 6465 1.1 mrg vops.quick_push (vop0); 6466 1.1 mrg if (vop1) 6467 1.1 mrg vops.quick_push (vop1); 6468 1.1 mrg if (vop2) 6469 1.1 mrg vops.quick_push (vop2); 6470 1.1 mrg if (reduc_idx >= 0) 6471 1.1 mrg { 6472 1.1 mrg /* Perform the operation on active elements only and take 6473 1.1 mrg inactive elements from the reduction chain input. */ 6474 1.1 mrg gcc_assert (!vop2); 6475 1.1 mrg vops.quick_push (reduc_idx == 1 ? vop1 : vop0); 6476 1.1 mrg } 6477 1.1 mrg else 6478 1.1 mrg { 6479 1.1 mrg auto else_value = targetm.preferred_else_value 6480 1.1 mrg (cond_fn, vectype, vops.length () - 1, &vops[1]); 6481 1.1 mrg vops.quick_push (else_value); 6482 1.1 mrg } 6483 1.1 mrg gcall *call = gimple_build_call_internal_vec (cond_fn, vops); 6484 1.1 mrg new_temp = make_ssa_name (vec_dest, call); 6485 1.1 mrg gimple_call_set_lhs (call, new_temp); 6486 1.1 mrg gimple_call_set_nothrow (call, true); 6487 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 6488 1.1 mrg new_stmt = call; 6489 1.1 mrg } 6490 1.1 mrg else 6491 1.1 mrg { 6492 1.1 mrg tree mask = NULL_TREE; 6493 1.1 mrg /* When combining two masks check if either of them is elsewhere 6494 1.1 mrg combined with a loop mask, if that's the case we can mark that the 6495 1.1 mrg new combined mask doesn't need to be combined with a loop mask. */ 6496 1.1 mrg if (masked_loop_p 6497 1.1 mrg && code == BIT_AND_EXPR 6498 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (vectype)) 6499 1.1 mrg { 6500 1.1 mrg if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, 6501 1.1 mrg ncopies})) 6502 1.1 mrg { 6503 1.1 mrg mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 6504 1.1 mrg vectype, i); 6505 1.1 mrg 6506 1.1 mrg vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, 6507 1.1 mrg vop0, gsi); 6508 1.1 mrg } 6509 1.1 mrg 6510 1.1 mrg if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, 6511 1.1 mrg ncopies })) 6512 1.1 mrg { 6513 1.1 mrg mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 6514 1.1 mrg vectype, i); 6515 1.1 mrg 6516 1.1 mrg vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, 6517 1.1 mrg vop1, gsi); 6518 1.1 mrg } 6519 1.1 mrg } 6520 1.1 mrg 6521 1.1 mrg new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2); 6522 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 6523 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 6524 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 6525 1.1 mrg if (using_emulated_vectors_p) 6526 1.1 mrg suppress_warning (new_stmt, OPT_Wvector_operation_performance); 6527 1.1 mrg 6528 1.1 mrg /* Enter the combined value into the vector cond hash so we don't 6529 1.1 mrg AND it with a loop mask again. */ 6530 1.1 mrg if (mask) 6531 1.1 mrg loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask }); 6532 1.1 mrg 6533 1.1 mrg if (vec_cvt_dest) 6534 1.1 mrg { 6535 1.1 mrg new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); 6536 1.1 mrg new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, 6537 1.1 mrg new_temp); 6538 1.1 mrg new_temp = make_ssa_name (vec_cvt_dest, new_stmt); 6539 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 6540 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 6541 1.1 mrg new_stmt, gsi); 6542 1.1 mrg } 6543 1.1 mrg } 6544 1.1 mrg if (slp_node) 6545 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 6546 1.1 mrg else 6547 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 6548 1.1 mrg } 6549 1.1 mrg 6550 1.1 mrg if (!slp_node) 6551 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 6552 1.1 mrg 6553 1.1 mrg vec_oprnds0.release (); 6554 1.1 mrg vec_oprnds1.release (); 6555 1.1 mrg vec_oprnds2.release (); 6556 1.1 mrg 6557 1.1 mrg return true; 6558 1.1 mrg } 6559 1.1 mrg 6560 1.1 mrg /* A helper function to ensure data reference DR_INFO's base alignment. */ 6561 1.1 mrg 6562 1.1 mrg static void 6563 1.1 mrg ensure_base_align (dr_vec_info *dr_info) 6564 1.1 mrg { 6565 1.1 mrg /* Alignment is only analyzed for the first element of a DR group, 6566 1.1 mrg use that to look at base alignment we need to enforce. */ 6567 1.1 mrg if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)) 6568 1.1 mrg dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt)); 6569 1.1 mrg 6570 1.1 mrg gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED); 6571 1.1 mrg 6572 1.1 mrg if (dr_info->base_misaligned) 6573 1.1 mrg { 6574 1.1 mrg tree base_decl = dr_info->base_decl; 6575 1.1 mrg 6576 1.1 mrg // We should only be able to increase the alignment of a base object if 6577 1.1 mrg // we know what its new alignment should be at compile time. 6578 1.1 mrg unsigned HOST_WIDE_INT align_base_to = 6579 1.1 mrg DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT; 6580 1.1 mrg 6581 1.1 mrg if (decl_in_symtab_p (base_decl)) 6582 1.1 mrg symtab_node::get (base_decl)->increase_alignment (align_base_to); 6583 1.1 mrg else if (DECL_ALIGN (base_decl) < align_base_to) 6584 1.1 mrg { 6585 1.1 mrg SET_DECL_ALIGN (base_decl, align_base_to); 6586 1.1 mrg DECL_USER_ALIGN (base_decl) = 1; 6587 1.1 mrg } 6588 1.1 mrg dr_info->base_misaligned = false; 6589 1.1 mrg } 6590 1.1 mrg } 6591 1.1 mrg 6592 1.1 mrg 6593 1.1 mrg /* Function get_group_alias_ptr_type. 6594 1.1 mrg 6595 1.1 mrg Return the alias type for the group starting at FIRST_STMT_INFO. */ 6596 1.1 mrg 6597 1.1 mrg static tree 6598 1.1 mrg get_group_alias_ptr_type (stmt_vec_info first_stmt_info) 6599 1.1 mrg { 6600 1.1 mrg struct data_reference *first_dr, *next_dr; 6601 1.1 mrg 6602 1.1 mrg first_dr = STMT_VINFO_DATA_REF (first_stmt_info); 6603 1.1 mrg stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info); 6604 1.1 mrg while (next_stmt_info) 6605 1.1 mrg { 6606 1.1 mrg next_dr = STMT_VINFO_DATA_REF (next_stmt_info); 6607 1.1 mrg if (get_alias_set (DR_REF (first_dr)) 6608 1.1 mrg != get_alias_set (DR_REF (next_dr))) 6609 1.1 mrg { 6610 1.1 mrg if (dump_enabled_p ()) 6611 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 6612 1.1 mrg "conflicting alias set types.\n"); 6613 1.1 mrg return ptr_type_node; 6614 1.1 mrg } 6615 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 6616 1.1 mrg } 6617 1.1 mrg return reference_alias_ptr_type (DR_REF (first_dr)); 6618 1.1 mrg } 6619 1.1 mrg 6620 1.1 mrg 6621 1.1 mrg /* Function scan_operand_equal_p. 6622 1.1 mrg 6623 1.1 mrg Helper function for check_scan_store. Compare two references 6624 1.1 mrg with .GOMP_SIMD_LANE bases. */ 6625 1.1 mrg 6626 1.1 mrg static bool 6627 1.1 mrg scan_operand_equal_p (tree ref1, tree ref2) 6628 1.1 mrg { 6629 1.1 mrg tree ref[2] = { ref1, ref2 }; 6630 1.1 mrg poly_int64 bitsize[2], bitpos[2]; 6631 1.1 mrg tree offset[2], base[2]; 6632 1.1 mrg for (int i = 0; i < 2; ++i) 6633 1.1 mrg { 6634 1.1 mrg machine_mode mode; 6635 1.1 mrg int unsignedp, reversep, volatilep = 0; 6636 1.1 mrg base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i], 6637 1.1 mrg &offset[i], &mode, &unsignedp, 6638 1.1 mrg &reversep, &volatilep); 6639 1.1 mrg if (reversep || volatilep || maybe_ne (bitpos[i], 0)) 6640 1.1 mrg return false; 6641 1.1 mrg if (TREE_CODE (base[i]) == MEM_REF 6642 1.1 mrg && offset[i] == NULL_TREE 6643 1.1 mrg && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME) 6644 1.1 mrg { 6645 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0)); 6646 1.1 mrg if (is_gimple_assign (def_stmt) 6647 1.1 mrg && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR 6648 1.1 mrg && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR 6649 1.1 mrg && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME) 6650 1.1 mrg { 6651 1.1 mrg if (maybe_ne (mem_ref_offset (base[i]), 0)) 6652 1.1 mrg return false; 6653 1.1 mrg base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0); 6654 1.1 mrg offset[i] = gimple_assign_rhs2 (def_stmt); 6655 1.1 mrg } 6656 1.1 mrg } 6657 1.1 mrg } 6658 1.1 mrg 6659 1.1 mrg if (!operand_equal_p (base[0], base[1], 0)) 6660 1.1 mrg return false; 6661 1.1 mrg if (maybe_ne (bitsize[0], bitsize[1])) 6662 1.1 mrg return false; 6663 1.1 mrg if (offset[0] != offset[1]) 6664 1.1 mrg { 6665 1.1 mrg if (!offset[0] || !offset[1]) 6666 1.1 mrg return false; 6667 1.1 mrg if (!operand_equal_p (offset[0], offset[1], 0)) 6668 1.1 mrg { 6669 1.1 mrg tree step[2]; 6670 1.1 mrg for (int i = 0; i < 2; ++i) 6671 1.1 mrg { 6672 1.1 mrg step[i] = integer_one_node; 6673 1.1 mrg if (TREE_CODE (offset[i]) == SSA_NAME) 6674 1.1 mrg { 6675 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); 6676 1.1 mrg if (is_gimple_assign (def_stmt) 6677 1.1 mrg && gimple_assign_rhs_code (def_stmt) == MULT_EXPR 6678 1.1 mrg && (TREE_CODE (gimple_assign_rhs2 (def_stmt)) 6679 1.1 mrg == INTEGER_CST)) 6680 1.1 mrg { 6681 1.1 mrg step[i] = gimple_assign_rhs2 (def_stmt); 6682 1.1 mrg offset[i] = gimple_assign_rhs1 (def_stmt); 6683 1.1 mrg } 6684 1.1 mrg } 6685 1.1 mrg else if (TREE_CODE (offset[i]) == MULT_EXPR) 6686 1.1 mrg { 6687 1.1 mrg step[i] = TREE_OPERAND (offset[i], 1); 6688 1.1 mrg offset[i] = TREE_OPERAND (offset[i], 0); 6689 1.1 mrg } 6690 1.1 mrg tree rhs1 = NULL_TREE; 6691 1.1 mrg if (TREE_CODE (offset[i]) == SSA_NAME) 6692 1.1 mrg { 6693 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); 6694 1.1 mrg if (gimple_assign_cast_p (def_stmt)) 6695 1.1 mrg rhs1 = gimple_assign_rhs1 (def_stmt); 6696 1.1 mrg } 6697 1.1 mrg else if (CONVERT_EXPR_P (offset[i])) 6698 1.1 mrg rhs1 = TREE_OPERAND (offset[i], 0); 6699 1.1 mrg if (rhs1 6700 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 6701 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (offset[i])) 6702 1.1 mrg && (TYPE_PRECISION (TREE_TYPE (offset[i])) 6703 1.1 mrg >= TYPE_PRECISION (TREE_TYPE (rhs1)))) 6704 1.1 mrg offset[i] = rhs1; 6705 1.1 mrg } 6706 1.1 mrg if (!operand_equal_p (offset[0], offset[1], 0) 6707 1.1 mrg || !operand_equal_p (step[0], step[1], 0)) 6708 1.1 mrg return false; 6709 1.1 mrg } 6710 1.1 mrg } 6711 1.1 mrg return true; 6712 1.1 mrg } 6713 1.1 mrg 6714 1.1 mrg 6715 1.1 mrg enum scan_store_kind { 6716 1.1 mrg /* Normal permutation. */ 6717 1.1 mrg scan_store_kind_perm, 6718 1.1 mrg 6719 1.1 mrg /* Whole vector left shift permutation with zero init. */ 6720 1.1 mrg scan_store_kind_lshift_zero, 6721 1.1 mrg 6722 1.1 mrg /* Whole vector left shift permutation and VEC_COND_EXPR. */ 6723 1.1 mrg scan_store_kind_lshift_cond 6724 1.1 mrg }; 6725 1.1 mrg 6726 1.1 mrg /* Function check_scan_store. 6727 1.1 mrg 6728 1.1 mrg Verify if we can perform the needed permutations or whole vector shifts. 6729 1.1 mrg Return -1 on failure, otherwise exact log2 of vectype's nunits. 6730 1.1 mrg USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation 6731 1.1 mrg to do at each step. */ 6732 1.1 mrg 6733 1.1 mrg static int 6734 1.1 mrg scan_store_can_perm_p (tree vectype, tree init, 6735 1.1 mrg vec<enum scan_store_kind> *use_whole_vector = NULL) 6736 1.1 mrg { 6737 1.1 mrg enum machine_mode vec_mode = TYPE_MODE (vectype); 6738 1.1 mrg unsigned HOST_WIDE_INT nunits; 6739 1.1 mrg if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) 6740 1.1 mrg return -1; 6741 1.1 mrg int units_log2 = exact_log2 (nunits); 6742 1.1 mrg if (units_log2 <= 0) 6743 1.1 mrg return -1; 6744 1.1 mrg 6745 1.1 mrg int i; 6746 1.1 mrg enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm; 6747 1.1 mrg for (i = 0; i <= units_log2; ++i) 6748 1.1 mrg { 6749 1.1 mrg unsigned HOST_WIDE_INT j, k; 6750 1.1 mrg enum scan_store_kind kind = scan_store_kind_perm; 6751 1.1 mrg vec_perm_builder sel (nunits, nunits, 1); 6752 1.1 mrg sel.quick_grow (nunits); 6753 1.1 mrg if (i == units_log2) 6754 1.1 mrg { 6755 1.1 mrg for (j = 0; j < nunits; ++j) 6756 1.1 mrg sel[j] = nunits - 1; 6757 1.1 mrg } 6758 1.1 mrg else 6759 1.1 mrg { 6760 1.1 mrg for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) 6761 1.1 mrg sel[j] = j; 6762 1.1 mrg for (k = 0; j < nunits; ++j, ++k) 6763 1.1 mrg sel[j] = nunits + k; 6764 1.1 mrg } 6765 1.1 mrg vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); 6766 1.1 mrg if (!can_vec_perm_const_p (vec_mode, indices)) 6767 1.1 mrg { 6768 1.1 mrg if (i == units_log2) 6769 1.1 mrg return -1; 6770 1.1 mrg 6771 1.1 mrg if (whole_vector_shift_kind == scan_store_kind_perm) 6772 1.1 mrg { 6773 1.1 mrg if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing) 6774 1.1 mrg return -1; 6775 1.1 mrg whole_vector_shift_kind = scan_store_kind_lshift_zero; 6776 1.1 mrg /* Whole vector shifts shift in zeros, so if init is all zero 6777 1.1 mrg constant, there is no need to do anything further. */ 6778 1.1 mrg if ((TREE_CODE (init) != INTEGER_CST 6779 1.1 mrg && TREE_CODE (init) != REAL_CST) 6780 1.1 mrg || !initializer_zerop (init)) 6781 1.1 mrg { 6782 1.1 mrg tree masktype = truth_type_for (vectype); 6783 1.1 mrg if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST)) 6784 1.1 mrg return -1; 6785 1.1 mrg whole_vector_shift_kind = scan_store_kind_lshift_cond; 6786 1.1 mrg } 6787 1.1 mrg } 6788 1.1 mrg kind = whole_vector_shift_kind; 6789 1.1 mrg } 6790 1.1 mrg if (use_whole_vector) 6791 1.1 mrg { 6792 1.1 mrg if (kind != scan_store_kind_perm && use_whole_vector->is_empty ()) 6793 1.1 mrg use_whole_vector->safe_grow_cleared (i, true); 6794 1.1 mrg if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ()) 6795 1.1 mrg use_whole_vector->safe_push (kind); 6796 1.1 mrg } 6797 1.1 mrg } 6798 1.1 mrg 6799 1.1 mrg return units_log2; 6800 1.1 mrg } 6801 1.1 mrg 6802 1.1 mrg 6803 1.1 mrg /* Function check_scan_store. 6804 1.1 mrg 6805 1.1 mrg Check magic stores for #pragma omp scan {in,ex}clusive reductions. */ 6806 1.1 mrg 6807 1.1 mrg static bool 6808 1.1 mrg check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, 6809 1.1 mrg enum vect_def_type rhs_dt, bool slp, tree mask, 6810 1.1 mrg vect_memory_access_type memory_access_type) 6811 1.1 mrg { 6812 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 6813 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 6814 1.1 mrg tree ref_type; 6815 1.1 mrg 6816 1.1 mrg gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); 6817 1.1 mrg if (slp 6818 1.1 mrg || mask 6819 1.1 mrg || memory_access_type != VMAT_CONTIGUOUS 6820 1.1 mrg || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR 6821 1.1 mrg || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) 6822 1.1 mrg || loop_vinfo == NULL 6823 1.1 mrg || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 6824 1.1 mrg || STMT_VINFO_GROUPED_ACCESS (stmt_info) 6825 1.1 mrg || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info)) 6826 1.1 mrg || !integer_zerop (DR_INIT (dr_info->dr)) 6827 1.1 mrg || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr))) 6828 1.1 mrg || !alias_sets_conflict_p (get_alias_set (vectype), 6829 1.1 mrg get_alias_set (TREE_TYPE (ref_type)))) 6830 1.1 mrg { 6831 1.1 mrg if (dump_enabled_p ()) 6832 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6833 1.1 mrg "unsupported OpenMP scan store.\n"); 6834 1.1 mrg return false; 6835 1.1 mrg } 6836 1.1 mrg 6837 1.1 mrg /* We need to pattern match code built by OpenMP lowering and simplified 6838 1.1 mrg by following optimizations into something we can handle. 6839 1.1 mrg #pragma omp simd reduction(inscan,+:r) 6840 1.1 mrg for (...) 6841 1.1 mrg { 6842 1.1 mrg r += something (); 6843 1.1 mrg #pragma omp scan inclusive (r) 6844 1.1 mrg use (r); 6845 1.1 mrg } 6846 1.1 mrg shall have body with: 6847 1.1 mrg // Initialization for input phase, store the reduction initializer: 6848 1.1 mrg _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); 6849 1.1 mrg _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); 6850 1.1 mrg D.2042[_21] = 0; 6851 1.1 mrg // Actual input phase: 6852 1.1 mrg ... 6853 1.1 mrg r.0_5 = D.2042[_20]; 6854 1.1 mrg _6 = _4 + r.0_5; 6855 1.1 mrg D.2042[_20] = _6; 6856 1.1 mrg // Initialization for scan phase: 6857 1.1 mrg _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2); 6858 1.1 mrg _26 = D.2043[_25]; 6859 1.1 mrg _27 = D.2042[_25]; 6860 1.1 mrg _28 = _26 + _27; 6861 1.1 mrg D.2043[_25] = _28; 6862 1.1 mrg D.2042[_25] = _28; 6863 1.1 mrg // Actual scan phase: 6864 1.1 mrg ... 6865 1.1 mrg r.1_8 = D.2042[_20]; 6866 1.1 mrg ... 6867 1.1 mrg The "omp simd array" variable D.2042 holds the privatized copy used 6868 1.1 mrg inside of the loop and D.2043 is another one that holds copies of 6869 1.1 mrg the current original list item. The separate GOMP_SIMD_LANE ifn 6870 1.1 mrg kinds are there in order to allow optimizing the initializer store 6871 1.1 mrg and combiner sequence, e.g. if it is originally some C++ish user 6872 1.1 mrg defined reduction, but allow the vectorizer to pattern recognize it 6873 1.1 mrg and turn into the appropriate vectorized scan. 6874 1.1 mrg 6875 1.1 mrg For exclusive scan, this is slightly different: 6876 1.1 mrg #pragma omp simd reduction(inscan,+:r) 6877 1.1 mrg for (...) 6878 1.1 mrg { 6879 1.1 mrg use (r); 6880 1.1 mrg #pragma omp scan exclusive (r) 6881 1.1 mrg r += something (); 6882 1.1 mrg } 6883 1.1 mrg shall have body with: 6884 1.1 mrg // Initialization for input phase, store the reduction initializer: 6885 1.1 mrg _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); 6886 1.1 mrg _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); 6887 1.1 mrg D.2042[_21] = 0; 6888 1.1 mrg // Actual input phase: 6889 1.1 mrg ... 6890 1.1 mrg r.0_5 = D.2042[_20]; 6891 1.1 mrg _6 = _4 + r.0_5; 6892 1.1 mrg D.2042[_20] = _6; 6893 1.1 mrg // Initialization for scan phase: 6894 1.1 mrg _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3); 6895 1.1 mrg _26 = D.2043[_25]; 6896 1.1 mrg D.2044[_25] = _26; 6897 1.1 mrg _27 = D.2042[_25]; 6898 1.1 mrg _28 = _26 + _27; 6899 1.1 mrg D.2043[_25] = _28; 6900 1.1 mrg // Actual scan phase: 6901 1.1 mrg ... 6902 1.1 mrg r.1_8 = D.2044[_20]; 6903 1.1 mrg ... */ 6904 1.1 mrg 6905 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) 6906 1.1 mrg { 6907 1.1 mrg /* Match the D.2042[_21] = 0; store above. Just require that 6908 1.1 mrg it is a constant or external definition store. */ 6909 1.1 mrg if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def) 6910 1.1 mrg { 6911 1.1 mrg fail_init: 6912 1.1 mrg if (dump_enabled_p ()) 6913 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6914 1.1 mrg "unsupported OpenMP scan initializer store.\n"); 6915 1.1 mrg return false; 6916 1.1 mrg } 6917 1.1 mrg 6918 1.1 mrg if (! loop_vinfo->scan_map) 6919 1.1 mrg loop_vinfo->scan_map = new hash_map<tree, tree>; 6920 1.1 mrg tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 6921 1.1 mrg tree &cached = loop_vinfo->scan_map->get_or_insert (var); 6922 1.1 mrg if (cached) 6923 1.1 mrg goto fail_init; 6924 1.1 mrg cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info)); 6925 1.1 mrg 6926 1.1 mrg /* These stores can be vectorized normally. */ 6927 1.1 mrg return true; 6928 1.1 mrg } 6929 1.1 mrg 6930 1.1 mrg if (rhs_dt != vect_internal_def) 6931 1.1 mrg { 6932 1.1 mrg fail: 6933 1.1 mrg if (dump_enabled_p ()) 6934 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6935 1.1 mrg "unsupported OpenMP scan combiner pattern.\n"); 6936 1.1 mrg return false; 6937 1.1 mrg } 6938 1.1 mrg 6939 1.1 mrg gimple *stmt = STMT_VINFO_STMT (stmt_info); 6940 1.1 mrg tree rhs = gimple_assign_rhs1 (stmt); 6941 1.1 mrg if (TREE_CODE (rhs) != SSA_NAME) 6942 1.1 mrg goto fail; 6943 1.1 mrg 6944 1.1 mrg gimple *other_store_stmt = NULL; 6945 1.1 mrg tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 6946 1.1 mrg bool inscan_var_store 6947 1.1 mrg = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; 6948 1.1 mrg 6949 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 6950 1.1 mrg { 6951 1.1 mrg if (!inscan_var_store) 6952 1.1 mrg { 6953 1.1 mrg use_operand_p use_p; 6954 1.1 mrg imm_use_iterator iter; 6955 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 6956 1.1 mrg { 6957 1.1 mrg gimple *use_stmt = USE_STMT (use_p); 6958 1.1 mrg if (use_stmt == stmt || is_gimple_debug (use_stmt)) 6959 1.1 mrg continue; 6960 1.1 mrg if (gimple_bb (use_stmt) != gimple_bb (stmt) 6961 1.1 mrg || !is_gimple_assign (use_stmt) 6962 1.1 mrg || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS 6963 1.1 mrg || other_store_stmt 6964 1.1 mrg || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) 6965 1.1 mrg goto fail; 6966 1.1 mrg other_store_stmt = use_stmt; 6967 1.1 mrg } 6968 1.1 mrg if (other_store_stmt == NULL) 6969 1.1 mrg goto fail; 6970 1.1 mrg rhs = gimple_assign_lhs (other_store_stmt); 6971 1.1 mrg if (!single_imm_use (rhs, &use_p, &other_store_stmt)) 6972 1.1 mrg goto fail; 6973 1.1 mrg } 6974 1.1 mrg } 6975 1.1 mrg else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) 6976 1.1 mrg { 6977 1.1 mrg use_operand_p use_p; 6978 1.1 mrg imm_use_iterator iter; 6979 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 6980 1.1 mrg { 6981 1.1 mrg gimple *use_stmt = USE_STMT (use_p); 6982 1.1 mrg if (use_stmt == stmt || is_gimple_debug (use_stmt)) 6983 1.1 mrg continue; 6984 1.1 mrg if (other_store_stmt) 6985 1.1 mrg goto fail; 6986 1.1 mrg other_store_stmt = use_stmt; 6987 1.1 mrg } 6988 1.1 mrg } 6989 1.1 mrg else 6990 1.1 mrg goto fail; 6991 1.1 mrg 6992 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); 6993 1.1 mrg if (gimple_bb (def_stmt) != gimple_bb (stmt) 6994 1.1 mrg || !is_gimple_assign (def_stmt) 6995 1.1 mrg || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS) 6996 1.1 mrg goto fail; 6997 1.1 mrg 6998 1.1 mrg enum tree_code code = gimple_assign_rhs_code (def_stmt); 6999 1.1 mrg /* For pointer addition, we should use the normal plus for the vector 7000 1.1 mrg operation. */ 7001 1.1 mrg switch (code) 7002 1.1 mrg { 7003 1.1 mrg case POINTER_PLUS_EXPR: 7004 1.1 mrg code = PLUS_EXPR; 7005 1.1 mrg break; 7006 1.1 mrg case MULT_HIGHPART_EXPR: 7007 1.1 mrg goto fail; 7008 1.1 mrg default: 7009 1.1 mrg break; 7010 1.1 mrg } 7011 1.1 mrg if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code)) 7012 1.1 mrg goto fail; 7013 1.1 mrg 7014 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt); 7015 1.1 mrg tree rhs2 = gimple_assign_rhs2 (def_stmt); 7016 1.1 mrg if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME) 7017 1.1 mrg goto fail; 7018 1.1 mrg 7019 1.1 mrg gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); 7020 1.1 mrg gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); 7021 1.1 mrg if (gimple_bb (load1_stmt) != gimple_bb (stmt) 7022 1.1 mrg || !gimple_assign_load_p (load1_stmt) 7023 1.1 mrg || gimple_bb (load2_stmt) != gimple_bb (stmt) 7024 1.1 mrg || !gimple_assign_load_p (load2_stmt)) 7025 1.1 mrg goto fail; 7026 1.1 mrg 7027 1.1 mrg stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); 7028 1.1 mrg stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); 7029 1.1 mrg if (load1_stmt_info == NULL 7030 1.1 mrg || load2_stmt_info == NULL 7031 1.1 mrg || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) 7032 1.1 mrg != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)) 7033 1.1 mrg || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) 7034 1.1 mrg != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) 7035 1.1 mrg goto fail; 7036 1.1 mrg 7037 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store) 7038 1.1 mrg { 7039 1.1 mrg dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); 7040 1.1 mrg if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR 7041 1.1 mrg || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0))) 7042 1.1 mrg goto fail; 7043 1.1 mrg tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); 7044 1.1 mrg tree lrhs; 7045 1.1 mrg if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 7046 1.1 mrg lrhs = rhs1; 7047 1.1 mrg else 7048 1.1 mrg lrhs = rhs2; 7049 1.1 mrg use_operand_p use_p; 7050 1.1 mrg imm_use_iterator iter; 7051 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs) 7052 1.1 mrg { 7053 1.1 mrg gimple *use_stmt = USE_STMT (use_p); 7054 1.1 mrg if (use_stmt == def_stmt || is_gimple_debug (use_stmt)) 7055 1.1 mrg continue; 7056 1.1 mrg if (other_store_stmt) 7057 1.1 mrg goto fail; 7058 1.1 mrg other_store_stmt = use_stmt; 7059 1.1 mrg } 7060 1.1 mrg } 7061 1.1 mrg 7062 1.1 mrg if (other_store_stmt == NULL) 7063 1.1 mrg goto fail; 7064 1.1 mrg if (gimple_bb (other_store_stmt) != gimple_bb (stmt) 7065 1.1 mrg || !gimple_store_p (other_store_stmt)) 7066 1.1 mrg goto fail; 7067 1.1 mrg 7068 1.1 mrg stmt_vec_info other_store_stmt_info 7069 1.1 mrg = loop_vinfo->lookup_stmt (other_store_stmt); 7070 1.1 mrg if (other_store_stmt_info == NULL 7071 1.1 mrg || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) 7072 1.1 mrg != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) 7073 1.1 mrg goto fail; 7074 1.1 mrg 7075 1.1 mrg gimple *stmt1 = stmt; 7076 1.1 mrg gimple *stmt2 = other_store_stmt; 7077 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 7078 1.1 mrg std::swap (stmt1, stmt2); 7079 1.1 mrg if (scan_operand_equal_p (gimple_assign_lhs (stmt1), 7080 1.1 mrg gimple_assign_rhs1 (load2_stmt))) 7081 1.1 mrg { 7082 1.1 mrg std::swap (rhs1, rhs2); 7083 1.1 mrg std::swap (load1_stmt, load2_stmt); 7084 1.1 mrg std::swap (load1_stmt_info, load2_stmt_info); 7085 1.1 mrg } 7086 1.1 mrg if (!scan_operand_equal_p (gimple_assign_lhs (stmt1), 7087 1.1 mrg gimple_assign_rhs1 (load1_stmt))) 7088 1.1 mrg goto fail; 7089 1.1 mrg 7090 1.1 mrg tree var3 = NULL_TREE; 7091 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3 7092 1.1 mrg && !scan_operand_equal_p (gimple_assign_lhs (stmt2), 7093 1.1 mrg gimple_assign_rhs1 (load2_stmt))) 7094 1.1 mrg goto fail; 7095 1.1 mrg else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 7096 1.1 mrg { 7097 1.1 mrg dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); 7098 1.1 mrg if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR 7099 1.1 mrg || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0))) 7100 1.1 mrg goto fail; 7101 1.1 mrg var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); 7102 1.1 mrg if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3)) 7103 1.1 mrg || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3)) 7104 1.1 mrg || lookup_attribute ("omp simd inscan exclusive", 7105 1.1 mrg DECL_ATTRIBUTES (var3))) 7106 1.1 mrg goto fail; 7107 1.1 mrg } 7108 1.1 mrg 7109 1.1 mrg dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); 7110 1.1 mrg if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR 7111 1.1 mrg || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0))) 7112 1.1 mrg goto fail; 7113 1.1 mrg 7114 1.1 mrg tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 7115 1.1 mrg tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0); 7116 1.1 mrg if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1)) 7117 1.1 mrg || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2)) 7118 1.1 mrg || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 7119 1.1 mrg == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2)))) 7120 1.1 mrg goto fail; 7121 1.1 mrg 7122 1.1 mrg if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 7123 1.1 mrg std::swap (var1, var2); 7124 1.1 mrg 7125 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 7126 1.1 mrg { 7127 1.1 mrg if (!lookup_attribute ("omp simd inscan exclusive", 7128 1.1 mrg DECL_ATTRIBUTES (var1))) 7129 1.1 mrg goto fail; 7130 1.1 mrg var1 = var3; 7131 1.1 mrg } 7132 1.1 mrg 7133 1.1 mrg if (loop_vinfo->scan_map == NULL) 7134 1.1 mrg goto fail; 7135 1.1 mrg tree *init = loop_vinfo->scan_map->get (var1); 7136 1.1 mrg if (init == NULL) 7137 1.1 mrg goto fail; 7138 1.1 mrg 7139 1.1 mrg /* The IL is as expected, now check if we can actually vectorize it. 7140 1.1 mrg Inclusive scan: 7141 1.1 mrg _26 = D.2043[_25]; 7142 1.1 mrg _27 = D.2042[_25]; 7143 1.1 mrg _28 = _26 + _27; 7144 1.1 mrg D.2043[_25] = _28; 7145 1.1 mrg D.2042[_25] = _28; 7146 1.1 mrg should be vectorized as (where _40 is the vectorized rhs 7147 1.1 mrg from the D.2042[_21] = 0; store): 7148 1.1 mrg _30 = MEM <vector(8) int> [(int *)&D.2043]; 7149 1.1 mrg _31 = MEM <vector(8) int> [(int *)&D.2042]; 7150 1.1 mrg _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7151 1.1 mrg _33 = _31 + _32; 7152 1.1 mrg // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; 7153 1.1 mrg _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>; 7154 1.1 mrg _35 = _33 + _34; 7155 1.1 mrg // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7156 1.1 mrg // _31[1]+.._31[4], ... _31[4]+.._31[7] }; 7157 1.1 mrg _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>; 7158 1.1 mrg _37 = _35 + _36; 7159 1.1 mrg // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7160 1.1 mrg // _31[0]+.._31[4], ... _31[0]+.._31[7] }; 7161 1.1 mrg _38 = _30 + _37; 7162 1.1 mrg _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; 7163 1.1 mrg MEM <vector(8) int> [(int *)&D.2043] = _39; 7164 1.1 mrg MEM <vector(8) int> [(int *)&D.2042] = _38; 7165 1.1 mrg Exclusive scan: 7166 1.1 mrg _26 = D.2043[_25]; 7167 1.1 mrg D.2044[_25] = _26; 7168 1.1 mrg _27 = D.2042[_25]; 7169 1.1 mrg _28 = _26 + _27; 7170 1.1 mrg D.2043[_25] = _28; 7171 1.1 mrg should be vectorized as (where _40 is the vectorized rhs 7172 1.1 mrg from the D.2042[_21] = 0; store): 7173 1.1 mrg _30 = MEM <vector(8) int> [(int *)&D.2043]; 7174 1.1 mrg _31 = MEM <vector(8) int> [(int *)&D.2042]; 7175 1.1 mrg _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7176 1.1 mrg _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7177 1.1 mrg _34 = _32 + _33; 7178 1.1 mrg // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3], 7179 1.1 mrg // _31[3]+_31[4], ... _31[5]+.._31[6] }; 7180 1.1 mrg _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>; 7181 1.1 mrg _36 = _34 + _35; 7182 1.1 mrg // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7183 1.1 mrg // _31[1]+.._31[4], ... _31[3]+.._31[6] }; 7184 1.1 mrg _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>; 7185 1.1 mrg _38 = _36 + _37; 7186 1.1 mrg // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7187 1.1 mrg // _31[0]+.._31[4], ... _31[0]+.._31[6] }; 7188 1.1 mrg _39 = _30 + _38; 7189 1.1 mrg _50 = _31 + _39; 7190 1.1 mrg _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>; 7191 1.1 mrg MEM <vector(8) int> [(int *)&D.2044] = _39; 7192 1.1 mrg MEM <vector(8) int> [(int *)&D.2042] = _51; */ 7193 1.1 mrg enum machine_mode vec_mode = TYPE_MODE (vectype); 7194 1.1 mrg optab optab = optab_for_tree_code (code, vectype, optab_default); 7195 1.1 mrg if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) 7196 1.1 mrg goto fail; 7197 1.1 mrg 7198 1.1 mrg int units_log2 = scan_store_can_perm_p (vectype, *init); 7199 1.1 mrg if (units_log2 == -1) 7200 1.1 mrg goto fail; 7201 1.1 mrg 7202 1.1 mrg return true; 7203 1.1 mrg } 7204 1.1 mrg 7205 1.1 mrg 7206 1.1 mrg /* Function vectorizable_scan_store. 7207 1.1 mrg 7208 1.1 mrg Helper of vectorizable_score, arguments like on vectorizable_store. 7209 1.1 mrg Handle only the transformation, checking is done in check_scan_store. */ 7210 1.1 mrg 7211 1.1 mrg static bool 7212 1.1 mrg vectorizable_scan_store (vec_info *vinfo, 7213 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 7214 1.1 mrg gimple **vec_stmt, int ncopies) 7215 1.1 mrg { 7216 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 7217 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 7218 1.1 mrg tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)); 7219 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 7220 1.1 mrg 7221 1.1 mrg if (dump_enabled_p ()) 7222 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 7223 1.1 mrg "transform scan store. ncopies = %d\n", ncopies); 7224 1.1 mrg 7225 1.1 mrg gimple *stmt = STMT_VINFO_STMT (stmt_info); 7226 1.1 mrg tree rhs = gimple_assign_rhs1 (stmt); 7227 1.1 mrg gcc_assert (TREE_CODE (rhs) == SSA_NAME); 7228 1.1 mrg 7229 1.1 mrg tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 7230 1.1 mrg bool inscan_var_store 7231 1.1 mrg = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; 7232 1.1 mrg 7233 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 7234 1.1 mrg { 7235 1.1 mrg use_operand_p use_p; 7236 1.1 mrg imm_use_iterator iter; 7237 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 7238 1.1 mrg { 7239 1.1 mrg gimple *use_stmt = USE_STMT (use_p); 7240 1.1 mrg if (use_stmt == stmt || is_gimple_debug (use_stmt)) 7241 1.1 mrg continue; 7242 1.1 mrg rhs = gimple_assign_lhs (use_stmt); 7243 1.1 mrg break; 7244 1.1 mrg } 7245 1.1 mrg } 7246 1.1 mrg 7247 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); 7248 1.1 mrg enum tree_code code = gimple_assign_rhs_code (def_stmt); 7249 1.1 mrg if (code == POINTER_PLUS_EXPR) 7250 1.1 mrg code = PLUS_EXPR; 7251 1.1 mrg gcc_assert (TREE_CODE_LENGTH (code) == binary_op 7252 1.1 mrg && commutative_tree_code (code)); 7253 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt); 7254 1.1 mrg tree rhs2 = gimple_assign_rhs2 (def_stmt); 7255 1.1 mrg gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME); 7256 1.1 mrg gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); 7257 1.1 mrg gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); 7258 1.1 mrg stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); 7259 1.1 mrg stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); 7260 1.1 mrg dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); 7261 1.1 mrg dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); 7262 1.1 mrg tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); 7263 1.1 mrg tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); 7264 1.1 mrg 7265 1.1 mrg if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 7266 1.1 mrg { 7267 1.1 mrg std::swap (rhs1, rhs2); 7268 1.1 mrg std::swap (var1, var2); 7269 1.1 mrg std::swap (load1_dr_info, load2_dr_info); 7270 1.1 mrg } 7271 1.1 mrg 7272 1.1 mrg tree *init = loop_vinfo->scan_map->get (var1); 7273 1.1 mrg gcc_assert (init); 7274 1.1 mrg 7275 1.1 mrg unsigned HOST_WIDE_INT nunits; 7276 1.1 mrg if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) 7277 1.1 mrg gcc_unreachable (); 7278 1.1 mrg auto_vec<enum scan_store_kind, 16> use_whole_vector; 7279 1.1 mrg int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector); 7280 1.1 mrg gcc_assert (units_log2 > 0); 7281 1.1 mrg auto_vec<tree, 16> perms; 7282 1.1 mrg perms.quick_grow (units_log2 + 1); 7283 1.1 mrg tree zero_vec = NULL_TREE, masktype = NULL_TREE; 7284 1.1 mrg for (int i = 0; i <= units_log2; ++i) 7285 1.1 mrg { 7286 1.1 mrg unsigned HOST_WIDE_INT j, k; 7287 1.1 mrg vec_perm_builder sel (nunits, nunits, 1); 7288 1.1 mrg sel.quick_grow (nunits); 7289 1.1 mrg if (i == units_log2) 7290 1.1 mrg for (j = 0; j < nunits; ++j) 7291 1.1 mrg sel[j] = nunits - 1; 7292 1.1 mrg else 7293 1.1 mrg { 7294 1.1 mrg for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) 7295 1.1 mrg sel[j] = j; 7296 1.1 mrg for (k = 0; j < nunits; ++j, ++k) 7297 1.1 mrg sel[j] = nunits + k; 7298 1.1 mrg } 7299 1.1 mrg vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); 7300 1.1 mrg if (!use_whole_vector.is_empty () 7301 1.1 mrg && use_whole_vector[i] != scan_store_kind_perm) 7302 1.1 mrg { 7303 1.1 mrg if (zero_vec == NULL_TREE) 7304 1.1 mrg zero_vec = build_zero_cst (vectype); 7305 1.1 mrg if (masktype == NULL_TREE 7306 1.1 mrg && use_whole_vector[i] == scan_store_kind_lshift_cond) 7307 1.1 mrg masktype = truth_type_for (vectype); 7308 1.1 mrg perms[i] = vect_gen_perm_mask_any (vectype, indices); 7309 1.1 mrg } 7310 1.1 mrg else 7311 1.1 mrg perms[i] = vect_gen_perm_mask_checked (vectype, indices); 7312 1.1 mrg } 7313 1.1 mrg 7314 1.1 mrg tree vec_oprnd1 = NULL_TREE; 7315 1.1 mrg tree vec_oprnd2 = NULL_TREE; 7316 1.1 mrg tree vec_oprnd3 = NULL_TREE; 7317 1.1 mrg tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr); 7318 1.1 mrg tree dataref_offset = build_int_cst (ref_type, 0); 7319 1.1 mrg tree bump = vect_get_data_ptr_increment (vinfo, dr_info, 7320 1.1 mrg vectype, VMAT_CONTIGUOUS); 7321 1.1 mrg tree ldataref_ptr = NULL_TREE; 7322 1.1 mrg tree orig = NULL_TREE; 7323 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 7324 1.1 mrg ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr); 7325 1.1 mrg auto_vec<tree> vec_oprnds1; 7326 1.1 mrg auto_vec<tree> vec_oprnds2; 7327 1.1 mrg auto_vec<tree> vec_oprnds3; 7328 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies, 7329 1.1 mrg *init, &vec_oprnds1, 7330 1.1 mrg ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2, 7331 1.1 mrg rhs2, &vec_oprnds3); 7332 1.1 mrg for (int j = 0; j < ncopies; j++) 7333 1.1 mrg { 7334 1.1 mrg vec_oprnd1 = vec_oprnds1[j]; 7335 1.1 mrg if (ldataref_ptr == NULL) 7336 1.1 mrg vec_oprnd2 = vec_oprnds2[j]; 7337 1.1 mrg vec_oprnd3 = vec_oprnds3[j]; 7338 1.1 mrg if (j == 0) 7339 1.1 mrg orig = vec_oprnd3; 7340 1.1 mrg else if (!inscan_var_store) 7341 1.1 mrg dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); 7342 1.1 mrg 7343 1.1 mrg if (ldataref_ptr) 7344 1.1 mrg { 7345 1.1 mrg vec_oprnd2 = make_ssa_name (vectype); 7346 1.1 mrg tree data_ref = fold_build2 (MEM_REF, vectype, 7347 1.1 mrg unshare_expr (ldataref_ptr), 7348 1.1 mrg dataref_offset); 7349 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr)); 7350 1.1 mrg gimple *g = gimple_build_assign (vec_oprnd2, data_ref); 7351 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7352 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7353 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 7354 1.1 mrg } 7355 1.1 mrg 7356 1.1 mrg tree v = vec_oprnd2; 7357 1.1 mrg for (int i = 0; i < units_log2; ++i) 7358 1.1 mrg { 7359 1.1 mrg tree new_temp = make_ssa_name (vectype); 7360 1.1 mrg gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, 7361 1.1 mrg (zero_vec 7362 1.1 mrg && (use_whole_vector[i] 7363 1.1 mrg != scan_store_kind_perm)) 7364 1.1 mrg ? zero_vec : vec_oprnd1, v, 7365 1.1 mrg perms[i]); 7366 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7367 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7368 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 7369 1.1 mrg 7370 1.1 mrg if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond) 7371 1.1 mrg { 7372 1.1 mrg /* Whole vector shift shifted in zero bits, but if *init 7373 1.1 mrg is not initializer_zerop, we need to replace those elements 7374 1.1 mrg with elements from vec_oprnd1. */ 7375 1.1 mrg tree_vector_builder vb (masktype, nunits, 1); 7376 1.1 mrg for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k) 7377 1.1 mrg vb.quick_push (k < (HOST_WIDE_INT_1U << i) 7378 1.1 mrg ? boolean_false_node : boolean_true_node); 7379 1.1 mrg 7380 1.1 mrg tree new_temp2 = make_ssa_name (vectype); 7381 1.1 mrg g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (), 7382 1.1 mrg new_temp, vec_oprnd1); 7383 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 7384 1.1 mrg g, gsi); 7385 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7386 1.1 mrg new_temp = new_temp2; 7387 1.1 mrg } 7388 1.1 mrg 7389 1.1 mrg /* For exclusive scan, perform the perms[i] permutation once 7390 1.1 mrg more. */ 7391 1.1 mrg if (i == 0 7392 1.1 mrg && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 7393 1.1 mrg && v == vec_oprnd2) 7394 1.1 mrg { 7395 1.1 mrg v = new_temp; 7396 1.1 mrg --i; 7397 1.1 mrg continue; 7398 1.1 mrg } 7399 1.1 mrg 7400 1.1 mrg tree new_temp2 = make_ssa_name (vectype); 7401 1.1 mrg g = gimple_build_assign (new_temp2, code, v, new_temp); 7402 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7403 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7404 1.1 mrg 7405 1.1 mrg v = new_temp2; 7406 1.1 mrg } 7407 1.1 mrg 7408 1.1 mrg tree new_temp = make_ssa_name (vectype); 7409 1.1 mrg gimple *g = gimple_build_assign (new_temp, code, orig, v); 7410 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7411 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7412 1.1 mrg 7413 1.1 mrg tree last_perm_arg = new_temp; 7414 1.1 mrg /* For exclusive scan, new_temp computed above is the exclusive scan 7415 1.1 mrg prefix sum. Turn it into inclusive prefix sum for the broadcast 7416 1.1 mrg of the last element into orig. */ 7417 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 7418 1.1 mrg { 7419 1.1 mrg last_perm_arg = make_ssa_name (vectype); 7420 1.1 mrg g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2); 7421 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7422 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7423 1.1 mrg } 7424 1.1 mrg 7425 1.1 mrg orig = make_ssa_name (vectype); 7426 1.1 mrg g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg, 7427 1.1 mrg last_perm_arg, perms[units_log2]); 7428 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7429 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7430 1.1 mrg 7431 1.1 mrg if (!inscan_var_store) 7432 1.1 mrg { 7433 1.1 mrg tree data_ref = fold_build2 (MEM_REF, vectype, 7434 1.1 mrg unshare_expr (dataref_ptr), 7435 1.1 mrg dataref_offset); 7436 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); 7437 1.1 mrg g = gimple_build_assign (data_ref, new_temp); 7438 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7439 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7440 1.1 mrg } 7441 1.1 mrg } 7442 1.1 mrg 7443 1.1 mrg if (inscan_var_store) 7444 1.1 mrg for (int j = 0; j < ncopies; j++) 7445 1.1 mrg { 7446 1.1 mrg if (j != 0) 7447 1.1 mrg dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); 7448 1.1 mrg 7449 1.1 mrg tree data_ref = fold_build2 (MEM_REF, vectype, 7450 1.1 mrg unshare_expr (dataref_ptr), 7451 1.1 mrg dataref_offset); 7452 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); 7453 1.1 mrg gimple *g = gimple_build_assign (data_ref, orig); 7454 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 7455 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g); 7456 1.1 mrg } 7457 1.1 mrg return true; 7458 1.1 mrg } 7459 1.1 mrg 7460 1.1 mrg 7461 1.1 mrg /* Function vectorizable_store. 7462 1.1 mrg 7463 1.1 mrg Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure) 7464 1.1 mrg that can be vectorized. 7465 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 7466 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 7467 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 7468 1.1 mrg 7469 1.1 mrg static bool 7470 1.1 mrg vectorizable_store (vec_info *vinfo, 7471 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 7472 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 7473 1.1 mrg stmt_vector_for_cost *cost_vec) 7474 1.1 mrg { 7475 1.1 mrg tree data_ref; 7476 1.1 mrg tree op; 7477 1.1 mrg tree vec_oprnd = NULL_TREE; 7478 1.1 mrg tree elem_type; 7479 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 7480 1.1 mrg class loop *loop = NULL; 7481 1.1 mrg machine_mode vec_mode; 7482 1.1 mrg tree dummy; 7483 1.1 mrg enum vect_def_type rhs_dt = vect_unknown_def_type; 7484 1.1 mrg enum vect_def_type mask_dt = vect_unknown_def_type; 7485 1.1 mrg tree dataref_ptr = NULL_TREE; 7486 1.1 mrg tree dataref_offset = NULL_TREE; 7487 1.1 mrg gimple *ptr_incr = NULL; 7488 1.1 mrg int ncopies; 7489 1.1 mrg int j; 7490 1.1 mrg stmt_vec_info first_stmt_info; 7491 1.1 mrg bool grouped_store; 7492 1.1 mrg unsigned int group_size, i; 7493 1.1 mrg vec<tree> oprnds = vNULL; 7494 1.1 mrg vec<tree> result_chain = vNULL; 7495 1.1 mrg vec<tree> vec_oprnds = vNULL; 7496 1.1 mrg bool slp = (slp_node != NULL); 7497 1.1 mrg unsigned int vec_num; 7498 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 7499 1.1 mrg tree aggr_type; 7500 1.1 mrg gather_scatter_info gs_info; 7501 1.1 mrg poly_uint64 vf; 7502 1.1 mrg vec_load_store_type vls_type; 7503 1.1 mrg tree ref_type; 7504 1.1 mrg 7505 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 7506 1.1 mrg return false; 7507 1.1 mrg 7508 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 7509 1.1 mrg && ! vec_stmt) 7510 1.1 mrg return false; 7511 1.1 mrg 7512 1.1 mrg /* Is vectorizable store? */ 7513 1.1 mrg 7514 1.1 mrg tree mask = NULL_TREE, mask_vectype = NULL_TREE; 7515 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 7516 1.1 mrg { 7517 1.1 mrg tree scalar_dest = gimple_assign_lhs (assign); 7518 1.1 mrg if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 7519 1.1 mrg && is_pattern_stmt_p (stmt_info)) 7520 1.1 mrg scalar_dest = TREE_OPERAND (scalar_dest, 0); 7521 1.1 mrg if (TREE_CODE (scalar_dest) != ARRAY_REF 7522 1.1 mrg && TREE_CODE (scalar_dest) != BIT_FIELD_REF 7523 1.1 mrg && TREE_CODE (scalar_dest) != INDIRECT_REF 7524 1.1 mrg && TREE_CODE (scalar_dest) != COMPONENT_REF 7525 1.1 mrg && TREE_CODE (scalar_dest) != IMAGPART_EXPR 7526 1.1 mrg && TREE_CODE (scalar_dest) != REALPART_EXPR 7527 1.1 mrg && TREE_CODE (scalar_dest) != MEM_REF) 7528 1.1 mrg return false; 7529 1.1 mrg } 7530 1.1 mrg else 7531 1.1 mrg { 7532 1.1 mrg gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 7533 1.1 mrg if (!call || !gimple_call_internal_p (call)) 7534 1.1 mrg return false; 7535 1.1 mrg 7536 1.1 mrg internal_fn ifn = gimple_call_internal_fn (call); 7537 1.1 mrg if (!internal_store_fn_p (ifn)) 7538 1.1 mrg return false; 7539 1.1 mrg 7540 1.1 mrg if (slp_node != NULL) 7541 1.1 mrg { 7542 1.1 mrg if (dump_enabled_p ()) 7543 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7544 1.1 mrg "SLP of masked stores not supported.\n"); 7545 1.1 mrg return false; 7546 1.1 mrg } 7547 1.1 mrg 7548 1.1 mrg int mask_index = internal_fn_mask_index (ifn); 7549 1.1 mrg if (mask_index >= 0 7550 1.1 mrg && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, 7551 1.1 mrg &mask, NULL, &mask_dt, &mask_vectype)) 7552 1.1 mrg return false; 7553 1.1 mrg } 7554 1.1 mrg 7555 1.1 mrg op = vect_get_store_rhs (stmt_info); 7556 1.1 mrg 7557 1.1 mrg /* Cannot have hybrid store SLP -- that would mean storing to the 7558 1.1 mrg same location twice. */ 7559 1.1 mrg gcc_assert (slp == PURE_SLP_STMT (stmt_info)); 7560 1.1 mrg 7561 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE; 7562 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 7563 1.1 mrg 7564 1.1 mrg if (loop_vinfo) 7565 1.1 mrg { 7566 1.1 mrg loop = LOOP_VINFO_LOOP (loop_vinfo); 7567 1.1 mrg vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 7568 1.1 mrg } 7569 1.1 mrg else 7570 1.1 mrg vf = 1; 7571 1.1 mrg 7572 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 7573 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 7574 1.1 mrg case of SLP. */ 7575 1.1 mrg if (slp) 7576 1.1 mrg ncopies = 1; 7577 1.1 mrg else 7578 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 7579 1.1 mrg 7580 1.1 mrg gcc_assert (ncopies >= 1); 7581 1.1 mrg 7582 1.1 mrg /* FORNOW. This restriction should be relaxed. */ 7583 1.1 mrg if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1) 7584 1.1 mrg { 7585 1.1 mrg if (dump_enabled_p ()) 7586 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7587 1.1 mrg "multiple types in nested loop.\n"); 7588 1.1 mrg return false; 7589 1.1 mrg } 7590 1.1 mrg 7591 1.1 mrg if (!vect_check_store_rhs (vinfo, stmt_info, slp_node, 7592 1.1 mrg op, &rhs_dt, &rhs_vectype, &vls_type)) 7593 1.1 mrg return false; 7594 1.1 mrg 7595 1.1 mrg elem_type = TREE_TYPE (vectype); 7596 1.1 mrg vec_mode = TYPE_MODE (vectype); 7597 1.1 mrg 7598 1.1 mrg if (!STMT_VINFO_DATA_REF (stmt_info)) 7599 1.1 mrg return false; 7600 1.1 mrg 7601 1.1 mrg vect_memory_access_type memory_access_type; 7602 1.1 mrg enum dr_alignment_support alignment_support_scheme; 7603 1.1 mrg int misalignment; 7604 1.1 mrg poly_int64 poffset; 7605 1.1 mrg if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type, 7606 1.1 mrg ncopies, &memory_access_type, &poffset, 7607 1.1 mrg &alignment_support_scheme, &misalignment, &gs_info)) 7608 1.1 mrg return false; 7609 1.1 mrg 7610 1.1 mrg if (mask) 7611 1.1 mrg { 7612 1.1 mrg if (memory_access_type == VMAT_CONTIGUOUS) 7613 1.1 mrg { 7614 1.1 mrg if (!VECTOR_MODE_P (vec_mode) 7615 1.1 mrg || !can_vec_mask_load_store_p (vec_mode, 7616 1.1 mrg TYPE_MODE (mask_vectype), false)) 7617 1.1 mrg return false; 7618 1.1 mrg } 7619 1.1 mrg else if (memory_access_type != VMAT_LOAD_STORE_LANES 7620 1.1 mrg && (memory_access_type != VMAT_GATHER_SCATTER 7621 1.1 mrg || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) 7622 1.1 mrg { 7623 1.1 mrg if (dump_enabled_p ()) 7624 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7625 1.1 mrg "unsupported access type for masked store.\n"); 7626 1.1 mrg return false; 7627 1.1 mrg } 7628 1.1 mrg } 7629 1.1 mrg else 7630 1.1 mrg { 7631 1.1 mrg /* FORNOW. In some cases can vectorize even if data-type not supported 7632 1.1 mrg (e.g. - array initialization with 0). */ 7633 1.1 mrg if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 7634 1.1 mrg return false; 7635 1.1 mrg } 7636 1.1 mrg 7637 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; 7638 1.1 mrg grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) 7639 1.1 mrg && memory_access_type != VMAT_GATHER_SCATTER 7640 1.1 mrg && (slp || memory_access_type != VMAT_CONTIGUOUS)); 7641 1.1 mrg if (grouped_store) 7642 1.1 mrg { 7643 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 7644 1.1 mrg first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 7645 1.1 mrg group_size = DR_GROUP_SIZE (first_stmt_info); 7646 1.1 mrg } 7647 1.1 mrg else 7648 1.1 mrg { 7649 1.1 mrg first_stmt_info = stmt_info; 7650 1.1 mrg first_dr_info = dr_info; 7651 1.1 mrg group_size = vec_num = 1; 7652 1.1 mrg } 7653 1.1 mrg 7654 1.1 mrg if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt) 7655 1.1 mrg { 7656 1.1 mrg if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask, 7657 1.1 mrg memory_access_type)) 7658 1.1 mrg return false; 7659 1.1 mrg } 7660 1.1 mrg 7661 1.1 mrg if (!vec_stmt) /* transformation not required. */ 7662 1.1 mrg { 7663 1.1 mrg STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 7664 1.1 mrg 7665 1.1 mrg if (loop_vinfo 7666 1.1 mrg && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) 7667 1.1 mrg check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, 7668 1.1 mrg vls_type, group_size, 7669 1.1 mrg memory_access_type, &gs_info, 7670 1.1 mrg mask); 7671 1.1 mrg 7672 1.1 mrg if (slp_node 7673 1.1 mrg && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], 7674 1.1 mrg vectype)) 7675 1.1 mrg { 7676 1.1 mrg if (dump_enabled_p ()) 7677 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7678 1.1 mrg "incompatible vector types for invariants\n"); 7679 1.1 mrg return false; 7680 1.1 mrg } 7681 1.1 mrg 7682 1.1 mrg if (dump_enabled_p () 7683 1.1 mrg && memory_access_type != VMAT_ELEMENTWISE 7684 1.1 mrg && memory_access_type != VMAT_GATHER_SCATTER 7685 1.1 mrg && alignment_support_scheme != dr_aligned) 7686 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 7687 1.1 mrg "Vectorizing an unaligned access.\n"); 7688 1.1 mrg 7689 1.1 mrg STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 7690 1.1 mrg vect_model_store_cost (vinfo, stmt_info, ncopies, 7691 1.1 mrg memory_access_type, alignment_support_scheme, 7692 1.1 mrg misalignment, vls_type, slp_node, cost_vec); 7693 1.1 mrg return true; 7694 1.1 mrg } 7695 1.1 mrg gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 7696 1.1 mrg 7697 1.1 mrg /* Transform. */ 7698 1.1 mrg 7699 1.1 mrg ensure_base_align (dr_info); 7700 1.1 mrg 7701 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 7702 1.1 mrg { 7703 1.1 mrg tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src; 7704 1.1 mrg tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); 7705 1.1 mrg tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 7706 1.1 mrg tree ptr, var, scale, vec_mask; 7707 1.1 mrg tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE; 7708 1.1 mrg tree mask_halfvectype = mask_vectype; 7709 1.1 mrg edge pe = loop_preheader_edge (loop); 7710 1.1 mrg gimple_seq seq; 7711 1.1 mrg basic_block new_bb; 7712 1.1 mrg enum { NARROW, NONE, WIDEN } modifier; 7713 1.1 mrg poly_uint64 scatter_off_nunits 7714 1.1 mrg = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); 7715 1.1 mrg 7716 1.1 mrg if (known_eq (nunits, scatter_off_nunits)) 7717 1.1 mrg modifier = NONE; 7718 1.1 mrg else if (known_eq (nunits * 2, scatter_off_nunits)) 7719 1.1 mrg { 7720 1.1 mrg modifier = WIDEN; 7721 1.1 mrg 7722 1.1 mrg /* Currently gathers and scatters are only supported for 7723 1.1 mrg fixed-length vectors. */ 7724 1.1 mrg unsigned int count = scatter_off_nunits.to_constant (); 7725 1.1 mrg vec_perm_builder sel (count, count, 1); 7726 1.1 mrg for (i = 0; i < (unsigned int) count; ++i) 7727 1.1 mrg sel.quick_push (i | (count / 2)); 7728 1.1 mrg 7729 1.1 mrg vec_perm_indices indices (sel, 1, count); 7730 1.1 mrg perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 7731 1.1 mrg indices); 7732 1.1 mrg gcc_assert (perm_mask != NULL_TREE); 7733 1.1 mrg } 7734 1.1 mrg else if (known_eq (nunits, scatter_off_nunits * 2)) 7735 1.1 mrg { 7736 1.1 mrg modifier = NARROW; 7737 1.1 mrg 7738 1.1 mrg /* Currently gathers and scatters are only supported for 7739 1.1 mrg fixed-length vectors. */ 7740 1.1 mrg unsigned int count = nunits.to_constant (); 7741 1.1 mrg vec_perm_builder sel (count, count, 1); 7742 1.1 mrg for (i = 0; i < (unsigned int) count; ++i) 7743 1.1 mrg sel.quick_push (i | (count / 2)); 7744 1.1 mrg 7745 1.1 mrg vec_perm_indices indices (sel, 2, count); 7746 1.1 mrg perm_mask = vect_gen_perm_mask_checked (vectype, indices); 7747 1.1 mrg gcc_assert (perm_mask != NULL_TREE); 7748 1.1 mrg ncopies *= 2; 7749 1.1 mrg 7750 1.1 mrg if (mask) 7751 1.1 mrg mask_halfvectype = truth_type_for (gs_info.offset_vectype); 7752 1.1 mrg } 7753 1.1 mrg else 7754 1.1 mrg gcc_unreachable (); 7755 1.1 mrg 7756 1.1 mrg rettype = TREE_TYPE (TREE_TYPE (gs_info.decl)); 7757 1.1 mrg ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7758 1.1 mrg masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7759 1.1 mrg idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7760 1.1 mrg srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7761 1.1 mrg scaletype = TREE_VALUE (arglist); 7762 1.1 mrg 7763 1.1 mrg gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE 7764 1.1 mrg && TREE_CODE (rettype) == VOID_TYPE); 7765 1.1 mrg 7766 1.1 mrg ptr = fold_convert (ptrtype, gs_info.base); 7767 1.1 mrg if (!is_gimple_min_invariant (ptr)) 7768 1.1 mrg { 7769 1.1 mrg ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 7770 1.1 mrg new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 7771 1.1 mrg gcc_assert (!new_bb); 7772 1.1 mrg } 7773 1.1 mrg 7774 1.1 mrg if (mask == NULL_TREE) 7775 1.1 mrg { 7776 1.1 mrg mask_arg = build_int_cst (masktype, -1); 7777 1.1 mrg mask_arg = vect_init_vector (vinfo, stmt_info, 7778 1.1 mrg mask_arg, masktype, NULL); 7779 1.1 mrg } 7780 1.1 mrg 7781 1.1 mrg scale = build_int_cst (scaletype, gs_info.scale); 7782 1.1 mrg 7783 1.1 mrg auto_vec<tree> vec_oprnds0; 7784 1.1 mrg auto_vec<tree> vec_oprnds1; 7785 1.1 mrg auto_vec<tree> vec_masks; 7786 1.1 mrg if (mask) 7787 1.1 mrg { 7788 1.1 mrg tree mask_vectype = truth_type_for (vectype); 7789 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 7790 1.1 mrg modifier == NARROW 7791 1.1 mrg ? ncopies / 2 : ncopies, 7792 1.1 mrg mask, &vec_masks, mask_vectype); 7793 1.1 mrg } 7794 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 7795 1.1 mrg modifier == WIDEN 7796 1.1 mrg ? ncopies / 2 : ncopies, 7797 1.1 mrg gs_info.offset, &vec_oprnds0); 7798 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, 7799 1.1 mrg modifier == NARROW 7800 1.1 mrg ? ncopies / 2 : ncopies, 7801 1.1 mrg op, &vec_oprnds1); 7802 1.1 mrg for (j = 0; j < ncopies; ++j) 7803 1.1 mrg { 7804 1.1 mrg if (modifier == WIDEN) 7805 1.1 mrg { 7806 1.1 mrg if (j & 1) 7807 1.1 mrg op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, 7808 1.1 mrg perm_mask, stmt_info, gsi); 7809 1.1 mrg else 7810 1.1 mrg op = vec_oprnd0 = vec_oprnds0[j / 2]; 7811 1.1 mrg src = vec_oprnd1 = vec_oprnds1[j]; 7812 1.1 mrg if (mask) 7813 1.1 mrg mask_op = vec_mask = vec_masks[j]; 7814 1.1 mrg } 7815 1.1 mrg else if (modifier == NARROW) 7816 1.1 mrg { 7817 1.1 mrg if (j & 1) 7818 1.1 mrg src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1, 7819 1.1 mrg perm_mask, stmt_info, gsi); 7820 1.1 mrg else 7821 1.1 mrg src = vec_oprnd1 = vec_oprnds1[j / 2]; 7822 1.1 mrg op = vec_oprnd0 = vec_oprnds0[j]; 7823 1.1 mrg if (mask) 7824 1.1 mrg mask_op = vec_mask = vec_masks[j / 2]; 7825 1.1 mrg } 7826 1.1 mrg else 7827 1.1 mrg { 7828 1.1 mrg op = vec_oprnd0 = vec_oprnds0[j]; 7829 1.1 mrg src = vec_oprnd1 = vec_oprnds1[j]; 7830 1.1 mrg if (mask) 7831 1.1 mrg mask_op = vec_mask = vec_masks[j]; 7832 1.1 mrg } 7833 1.1 mrg 7834 1.1 mrg if (!useless_type_conversion_p (srctype, TREE_TYPE (src))) 7835 1.1 mrg { 7836 1.1 mrg gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)), 7837 1.1 mrg TYPE_VECTOR_SUBPARTS (srctype))); 7838 1.1 mrg var = vect_get_new_ssa_name (srctype, vect_simple_var); 7839 1.1 mrg src = build1 (VIEW_CONVERT_EXPR, srctype, src); 7840 1.1 mrg gassign *new_stmt 7841 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, src); 7842 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7843 1.1 mrg src = var; 7844 1.1 mrg } 7845 1.1 mrg 7846 1.1 mrg if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 7847 1.1 mrg { 7848 1.1 mrg gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 7849 1.1 mrg TYPE_VECTOR_SUBPARTS (idxtype))); 7850 1.1 mrg var = vect_get_new_ssa_name (idxtype, vect_simple_var); 7851 1.1 mrg op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 7852 1.1 mrg gassign *new_stmt 7853 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 7854 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7855 1.1 mrg op = var; 7856 1.1 mrg } 7857 1.1 mrg 7858 1.1 mrg if (mask) 7859 1.1 mrg { 7860 1.1 mrg tree utype; 7861 1.1 mrg mask_arg = mask_op; 7862 1.1 mrg if (modifier == NARROW) 7863 1.1 mrg { 7864 1.1 mrg var = vect_get_new_ssa_name (mask_halfvectype, 7865 1.1 mrg vect_simple_var); 7866 1.1 mrg gassign *new_stmt 7867 1.1 mrg = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR 7868 1.1 mrg : VEC_UNPACK_LO_EXPR, 7869 1.1 mrg mask_op); 7870 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7871 1.1 mrg mask_arg = var; 7872 1.1 mrg } 7873 1.1 mrg tree optype = TREE_TYPE (mask_arg); 7874 1.1 mrg if (TYPE_MODE (masktype) == TYPE_MODE (optype)) 7875 1.1 mrg utype = masktype; 7876 1.1 mrg else 7877 1.1 mrg utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1); 7878 1.1 mrg var = vect_get_new_ssa_name (utype, vect_scalar_var); 7879 1.1 mrg mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg); 7880 1.1 mrg gassign *new_stmt 7881 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg); 7882 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7883 1.1 mrg mask_arg = var; 7884 1.1 mrg if (!useless_type_conversion_p (masktype, utype)) 7885 1.1 mrg { 7886 1.1 mrg gcc_assert (TYPE_PRECISION (utype) 7887 1.1 mrg <= TYPE_PRECISION (masktype)); 7888 1.1 mrg var = vect_get_new_ssa_name (masktype, vect_scalar_var); 7889 1.1 mrg new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg); 7890 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7891 1.1 mrg mask_arg = var; 7892 1.1 mrg } 7893 1.1 mrg } 7894 1.1 mrg 7895 1.1 mrg gcall *new_stmt 7896 1.1 mrg = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale); 7897 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 7898 1.1 mrg 7899 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 7900 1.1 mrg } 7901 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 7902 1.1 mrg return true; 7903 1.1 mrg } 7904 1.1 mrg else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) 7905 1.1 mrg return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies); 7906 1.1 mrg 7907 1.1 mrg if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 7908 1.1 mrg DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++; 7909 1.1 mrg 7910 1.1 mrg if (grouped_store) 7911 1.1 mrg { 7912 1.1 mrg /* FORNOW */ 7913 1.1 mrg gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info)); 7914 1.1 mrg 7915 1.1 mrg /* We vectorize all the stmts of the interleaving group when we 7916 1.1 mrg reach the last stmt in the group. */ 7917 1.1 mrg if (DR_GROUP_STORE_COUNT (first_stmt_info) 7918 1.1 mrg < DR_GROUP_SIZE (first_stmt_info) 7919 1.1 mrg && !slp) 7920 1.1 mrg { 7921 1.1 mrg *vec_stmt = NULL; 7922 1.1 mrg return true; 7923 1.1 mrg } 7924 1.1 mrg 7925 1.1 mrg if (slp) 7926 1.1 mrg { 7927 1.1 mrg grouped_store = false; 7928 1.1 mrg /* VEC_NUM is the number of vect stmts to be created for this 7929 1.1 mrg group. */ 7930 1.1 mrg vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7931 1.1 mrg first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 7932 1.1 mrg gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info) 7933 1.1 mrg == first_stmt_info); 7934 1.1 mrg first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 7935 1.1 mrg op = vect_get_store_rhs (first_stmt_info); 7936 1.1 mrg } 7937 1.1 mrg else 7938 1.1 mrg /* VEC_NUM is the number of vect stmts to be created for this 7939 1.1 mrg group. */ 7940 1.1 mrg vec_num = group_size; 7941 1.1 mrg 7942 1.1 mrg ref_type = get_group_alias_ptr_type (first_stmt_info); 7943 1.1 mrg } 7944 1.1 mrg else 7945 1.1 mrg ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); 7946 1.1 mrg 7947 1.1 mrg if (dump_enabled_p ()) 7948 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 7949 1.1 mrg "transform store. ncopies = %d\n", ncopies); 7950 1.1 mrg 7951 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 7952 1.1 mrg || memory_access_type == VMAT_STRIDED_SLP) 7953 1.1 mrg { 7954 1.1 mrg gimple_stmt_iterator incr_gsi; 7955 1.1 mrg bool insert_after; 7956 1.1 mrg gimple *incr; 7957 1.1 mrg tree offvar; 7958 1.1 mrg tree ivstep; 7959 1.1 mrg tree running_off; 7960 1.1 mrg tree stride_base, stride_step, alias_off; 7961 1.1 mrg tree vec_oprnd; 7962 1.1 mrg tree dr_offset; 7963 1.1 mrg unsigned int g; 7964 1.1 mrg /* Checked by get_load_store_type. */ 7965 1.1 mrg unsigned int const_nunits = nunits.to_constant (); 7966 1.1 mrg 7967 1.1 mrg gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); 7968 1.1 mrg gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); 7969 1.1 mrg 7970 1.1 mrg dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info); 7971 1.1 mrg stride_base 7972 1.1 mrg = fold_build_pointer_plus 7973 1.1 mrg (DR_BASE_ADDRESS (first_dr_info->dr), 7974 1.1 mrg size_binop (PLUS_EXPR, 7975 1.1 mrg convert_to_ptrofftype (dr_offset), 7976 1.1 mrg convert_to_ptrofftype (DR_INIT (first_dr_info->dr)))); 7977 1.1 mrg stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr)); 7978 1.1 mrg 7979 1.1 mrg /* For a store with loop-invariant (but other than power-of-2) 7980 1.1 mrg stride (i.e. not a grouped access) like so: 7981 1.1 mrg 7982 1.1 mrg for (i = 0; i < n; i += stride) 7983 1.1 mrg array[i] = ...; 7984 1.1 mrg 7985 1.1 mrg we generate a new induction variable and new stores from 7986 1.1 mrg the components of the (vectorized) rhs: 7987 1.1 mrg 7988 1.1 mrg for (j = 0; ; j += VF*stride) 7989 1.1 mrg vectemp = ...; 7990 1.1 mrg tmp1 = vectemp[0]; 7991 1.1 mrg array[j] = tmp1; 7992 1.1 mrg tmp2 = vectemp[1]; 7993 1.1 mrg array[j + stride] = tmp2; 7994 1.1 mrg ... 7995 1.1 mrg */ 7996 1.1 mrg 7997 1.1 mrg unsigned nstores = const_nunits; 7998 1.1 mrg unsigned lnel = 1; 7999 1.1 mrg tree ltype = elem_type; 8000 1.1 mrg tree lvectype = vectype; 8001 1.1 mrg if (slp) 8002 1.1 mrg { 8003 1.1 mrg if (group_size < const_nunits 8004 1.1 mrg && const_nunits % group_size == 0) 8005 1.1 mrg { 8006 1.1 mrg nstores = const_nunits / group_size; 8007 1.1 mrg lnel = group_size; 8008 1.1 mrg ltype = build_vector_type (elem_type, group_size); 8009 1.1 mrg lvectype = vectype; 8010 1.1 mrg 8011 1.1 mrg /* First check if vec_extract optab doesn't support extraction 8012 1.1 mrg of vector elts directly. */ 8013 1.1 mrg scalar_mode elmode = SCALAR_TYPE_MODE (elem_type); 8014 1.1 mrg machine_mode vmode; 8015 1.1 mrg if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 8016 1.1 mrg || !related_vector_mode (TYPE_MODE (vectype), elmode, 8017 1.1 mrg group_size).exists (&vmode) 8018 1.1 mrg || (convert_optab_handler (vec_extract_optab, 8019 1.1 mrg TYPE_MODE (vectype), vmode) 8020 1.1 mrg == CODE_FOR_nothing)) 8021 1.1 mrg { 8022 1.1 mrg /* Try to avoid emitting an extract of vector elements 8023 1.1 mrg by performing the extracts using an integer type of the 8024 1.1 mrg same size, extracting from a vector of those and then 8025 1.1 mrg re-interpreting it as the original vector type if 8026 1.1 mrg supported. */ 8027 1.1 mrg unsigned lsize 8028 1.1 mrg = group_size * GET_MODE_BITSIZE (elmode); 8029 1.1 mrg unsigned int lnunits = const_nunits / group_size; 8030 1.1 mrg /* If we can't construct such a vector fall back to 8031 1.1 mrg element extracts from the original vector type and 8032 1.1 mrg element size stores. */ 8033 1.1 mrg if (int_mode_for_size (lsize, 0).exists (&elmode) 8034 1.1 mrg && VECTOR_MODE_P (TYPE_MODE (vectype)) 8035 1.1 mrg && related_vector_mode (TYPE_MODE (vectype), elmode, 8036 1.1 mrg lnunits).exists (&vmode) 8037 1.1 mrg && (convert_optab_handler (vec_extract_optab, 8038 1.1 mrg vmode, elmode) 8039 1.1 mrg != CODE_FOR_nothing)) 8040 1.1 mrg { 8041 1.1 mrg nstores = lnunits; 8042 1.1 mrg lnel = group_size; 8043 1.1 mrg ltype = build_nonstandard_integer_type (lsize, 1); 8044 1.1 mrg lvectype = build_vector_type (ltype, nstores); 8045 1.1 mrg } 8046 1.1 mrg /* Else fall back to vector extraction anyway. 8047 1.1 mrg Fewer stores are more important than avoiding spilling 8048 1.1 mrg of the vector we extract from. Compared to the 8049 1.1 mrg construction case in vectorizable_load no store-forwarding 8050 1.1 mrg issue exists here for reasonable archs. */ 8051 1.1 mrg } 8052 1.1 mrg } 8053 1.1 mrg else if (group_size >= const_nunits 8054 1.1 mrg && group_size % const_nunits == 0) 8055 1.1 mrg { 8056 1.1 mrg nstores = 1; 8057 1.1 mrg lnel = const_nunits; 8058 1.1 mrg ltype = vectype; 8059 1.1 mrg lvectype = vectype; 8060 1.1 mrg } 8061 1.1 mrg ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); 8062 1.1 mrg ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 8063 1.1 mrg } 8064 1.1 mrg 8065 1.1 mrg ivstep = stride_step; 8066 1.1 mrg ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, 8067 1.1 mrg build_int_cst (TREE_TYPE (ivstep), vf)); 8068 1.1 mrg 8069 1.1 mrg standard_iv_increment_position (loop, &incr_gsi, &insert_after); 8070 1.1 mrg 8071 1.1 mrg stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 8072 1.1 mrg ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 8073 1.1 mrg create_iv (stride_base, ivstep, NULL, 8074 1.1 mrg loop, &incr_gsi, insert_after, 8075 1.1 mrg &offvar, NULL); 8076 1.1 mrg incr = gsi_stmt (incr_gsi); 8077 1.1 mrg 8078 1.1 mrg stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 8079 1.1 mrg 8080 1.1 mrg alias_off = build_int_cst (ref_type, 0); 8081 1.1 mrg stmt_vec_info next_stmt_info = first_stmt_info; 8082 1.1 mrg for (g = 0; g < group_size; g++) 8083 1.1 mrg { 8084 1.1 mrg running_off = offvar; 8085 1.1 mrg if (g) 8086 1.1 mrg { 8087 1.1 mrg tree size = TYPE_SIZE_UNIT (ltype); 8088 1.1 mrg tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g), 8089 1.1 mrg size); 8090 1.1 mrg tree newoff = copy_ssa_name (running_off, NULL); 8091 1.1 mrg incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 8092 1.1 mrg running_off, pos); 8093 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi); 8094 1.1 mrg running_off = newoff; 8095 1.1 mrg } 8096 1.1 mrg if (!slp) 8097 1.1 mrg op = vect_get_store_rhs (next_stmt_info); 8098 1.1 mrg vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, 8099 1.1 mrg op, &vec_oprnds); 8100 1.1 mrg unsigned int group_el = 0; 8101 1.1 mrg unsigned HOST_WIDE_INT 8102 1.1 mrg elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 8103 1.1 mrg for (j = 0; j < ncopies; j++) 8104 1.1 mrg { 8105 1.1 mrg vec_oprnd = vec_oprnds[j]; 8106 1.1 mrg /* Pun the vector to extract from if necessary. */ 8107 1.1 mrg if (lvectype != vectype) 8108 1.1 mrg { 8109 1.1 mrg tree tem = make_ssa_name (lvectype); 8110 1.1 mrg gimple *pun 8111 1.1 mrg = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 8112 1.1 mrg lvectype, vec_oprnd)); 8113 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi); 8114 1.1 mrg vec_oprnd = tem; 8115 1.1 mrg } 8116 1.1 mrg for (i = 0; i < nstores; i++) 8117 1.1 mrg { 8118 1.1 mrg tree newref, newoff; 8119 1.1 mrg gimple *incr, *assign; 8120 1.1 mrg tree size = TYPE_SIZE (ltype); 8121 1.1 mrg /* Extract the i'th component. */ 8122 1.1 mrg tree pos = fold_build2 (MULT_EXPR, bitsizetype, 8123 1.1 mrg bitsize_int (i), size); 8124 1.1 mrg tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd, 8125 1.1 mrg size, pos); 8126 1.1 mrg 8127 1.1 mrg elem = force_gimple_operand_gsi (gsi, elem, true, 8128 1.1 mrg NULL_TREE, true, 8129 1.1 mrg GSI_SAME_STMT); 8130 1.1 mrg 8131 1.1 mrg tree this_off = build_int_cst (TREE_TYPE (alias_off), 8132 1.1 mrg group_el * elsz); 8133 1.1 mrg newref = build2 (MEM_REF, ltype, 8134 1.1 mrg running_off, this_off); 8135 1.1 mrg vect_copy_ref_info (newref, DR_REF (first_dr_info->dr)); 8136 1.1 mrg 8137 1.1 mrg /* And store it to *running_off. */ 8138 1.1 mrg assign = gimple_build_assign (newref, elem); 8139 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi); 8140 1.1 mrg 8141 1.1 mrg group_el += lnel; 8142 1.1 mrg if (! slp 8143 1.1 mrg || group_el == group_size) 8144 1.1 mrg { 8145 1.1 mrg newoff = copy_ssa_name (running_off, NULL); 8146 1.1 mrg incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 8147 1.1 mrg running_off, stride_step); 8148 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi); 8149 1.1 mrg 8150 1.1 mrg running_off = newoff; 8151 1.1 mrg group_el = 0; 8152 1.1 mrg } 8153 1.1 mrg if (g == group_size - 1 8154 1.1 mrg && !slp) 8155 1.1 mrg { 8156 1.1 mrg if (j == 0 && i == 0) 8157 1.1 mrg *vec_stmt = assign; 8158 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign); 8159 1.1 mrg } 8160 1.1 mrg } 8161 1.1 mrg } 8162 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8163 1.1 mrg vec_oprnds.release (); 8164 1.1 mrg if (slp) 8165 1.1 mrg break; 8166 1.1 mrg } 8167 1.1 mrg 8168 1.1 mrg return true; 8169 1.1 mrg } 8170 1.1 mrg 8171 1.1 mrg auto_vec<tree> dr_chain (group_size); 8172 1.1 mrg oprnds.create (group_size); 8173 1.1 mrg 8174 1.1 mrg gcc_assert (alignment_support_scheme); 8175 1.1 mrg vec_loop_masks *loop_masks 8176 1.1 mrg = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 8177 1.1 mrg ? &LOOP_VINFO_MASKS (loop_vinfo) 8178 1.1 mrg : NULL); 8179 1.1 mrg vec_loop_lens *loop_lens 8180 1.1 mrg = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) 8181 1.1 mrg ? &LOOP_VINFO_LENS (loop_vinfo) 8182 1.1 mrg : NULL); 8183 1.1 mrg 8184 1.1 mrg /* Shouldn't go with length-based approach if fully masked. */ 8185 1.1 mrg gcc_assert (!loop_lens || !loop_masks); 8186 1.1 mrg 8187 1.1 mrg /* Targets with store-lane instructions must not require explicit 8188 1.1 mrg realignment. vect_supportable_dr_alignment always returns either 8189 1.1 mrg dr_aligned or dr_unaligned_supported for masked operations. */ 8190 1.1 mrg gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 8191 1.1 mrg && !mask 8192 1.1 mrg && !loop_masks) 8193 1.1 mrg || alignment_support_scheme == dr_aligned 8194 1.1 mrg || alignment_support_scheme == dr_unaligned_supported); 8195 1.1 mrg 8196 1.1 mrg tree offset = NULL_TREE; 8197 1.1 mrg if (!known_eq (poffset, 0)) 8198 1.1 mrg offset = size_int (poffset); 8199 1.1 mrg 8200 1.1 mrg tree bump; 8201 1.1 mrg tree vec_offset = NULL_TREE; 8202 1.1 mrg if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8203 1.1 mrg { 8204 1.1 mrg aggr_type = NULL_TREE; 8205 1.1 mrg bump = NULL_TREE; 8206 1.1 mrg } 8207 1.1 mrg else if (memory_access_type == VMAT_GATHER_SCATTER) 8208 1.1 mrg { 8209 1.1 mrg aggr_type = elem_type; 8210 1.1 mrg vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, 8211 1.1 mrg &bump, &vec_offset); 8212 1.1 mrg } 8213 1.1 mrg else 8214 1.1 mrg { 8215 1.1 mrg if (memory_access_type == VMAT_LOAD_STORE_LANES) 8216 1.1 mrg aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 8217 1.1 mrg else 8218 1.1 mrg aggr_type = vectype; 8219 1.1 mrg bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type, 8220 1.1 mrg memory_access_type); 8221 1.1 mrg } 8222 1.1 mrg 8223 1.1 mrg if (mask) 8224 1.1 mrg LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true; 8225 1.1 mrg 8226 1.1 mrg /* In case the vectorization factor (VF) is bigger than the number 8227 1.1 mrg of elements that we can fit in a vectype (nunits), we have to generate 8228 1.1 mrg more than one vector stmt - i.e - we need to "unroll" the 8229 1.1 mrg vector stmt by a factor VF/nunits. */ 8230 1.1 mrg 8231 1.1 mrg /* In case of interleaving (non-unit grouped access): 8232 1.1 mrg 8233 1.1 mrg S1: &base + 2 = x2 8234 1.1 mrg S2: &base = x0 8235 1.1 mrg S3: &base + 1 = x1 8236 1.1 mrg S4: &base + 3 = x3 8237 1.1 mrg 8238 1.1 mrg We create vectorized stores starting from base address (the access of the 8239 1.1 mrg first stmt in the chain (S2 in the above example), when the last store stmt 8240 1.1 mrg of the chain (S4) is reached: 8241 1.1 mrg 8242 1.1 mrg VS1: &base = vx2 8243 1.1 mrg VS2: &base + vec_size*1 = vx0 8244 1.1 mrg VS3: &base + vec_size*2 = vx1 8245 1.1 mrg VS4: &base + vec_size*3 = vx3 8246 1.1 mrg 8247 1.1 mrg Then permutation statements are generated: 8248 1.1 mrg 8249 1.1 mrg VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 8250 1.1 mrg VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 8251 1.1 mrg ... 8252 1.1 mrg 8253 1.1 mrg And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 8254 1.1 mrg (the order of the data-refs in the output of vect_permute_store_chain 8255 1.1 mrg corresponds to the order of scalar stmts in the interleaving chain - see 8256 1.1 mrg the documentation of vect_permute_store_chain()). 8257 1.1 mrg 8258 1.1 mrg In case of both multiple types and interleaving, above vector stores and 8259 1.1 mrg permutation stmts are created for every copy. The result vector stmts are 8260 1.1 mrg put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 8261 1.1 mrg STMT_VINFO_RELATED_STMT for the next copies. 8262 1.1 mrg */ 8263 1.1 mrg 8264 1.1 mrg auto_vec<tree> vec_masks; 8265 1.1 mrg tree vec_mask = NULL; 8266 1.1 mrg auto_vec<tree> vec_offsets; 8267 1.1 mrg auto_vec<vec<tree> > gvec_oprnds; 8268 1.1 mrg gvec_oprnds.safe_grow_cleared (group_size, true); 8269 1.1 mrg for (j = 0; j < ncopies; j++) 8270 1.1 mrg { 8271 1.1 mrg gimple *new_stmt; 8272 1.1 mrg if (j == 0) 8273 1.1 mrg { 8274 1.1 mrg if (slp) 8275 1.1 mrg { 8276 1.1 mrg /* Get vectorized arguments for SLP_NODE. */ 8277 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, 8278 1.1 mrg op, &vec_oprnds); 8279 1.1 mrg vec_oprnd = vec_oprnds[0]; 8280 1.1 mrg } 8281 1.1 mrg else 8282 1.1 mrg { 8283 1.1 mrg /* For interleaved stores we collect vectorized defs for all the 8284 1.1 mrg stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 8285 1.1 mrg used as an input to vect_permute_store_chain(). 8286 1.1 mrg 8287 1.1 mrg If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN 8288 1.1 mrg and OPRNDS are of size 1. */ 8289 1.1 mrg stmt_vec_info next_stmt_info = first_stmt_info; 8290 1.1 mrg for (i = 0; i < group_size; i++) 8291 1.1 mrg { 8292 1.1 mrg /* Since gaps are not supported for interleaved stores, 8293 1.1 mrg DR_GROUP_SIZE is the exact number of stmts in the chain. 8294 1.1 mrg Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case 8295 1.1 mrg that there is no interleaving, DR_GROUP_SIZE is 1, 8296 1.1 mrg and only one iteration of the loop will be executed. */ 8297 1.1 mrg op = vect_get_store_rhs (next_stmt_info); 8298 1.1 mrg vect_get_vec_defs_for_operand (vinfo, next_stmt_info, 8299 1.1 mrg ncopies, op, &gvec_oprnds[i]); 8300 1.1 mrg vec_oprnd = gvec_oprnds[i][0]; 8301 1.1 mrg dr_chain.quick_push (gvec_oprnds[i][0]); 8302 1.1 mrg oprnds.quick_push (gvec_oprnds[i][0]); 8303 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8304 1.1 mrg } 8305 1.1 mrg if (mask) 8306 1.1 mrg { 8307 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, 8308 1.1 mrg mask, &vec_masks, mask_vectype); 8309 1.1 mrg vec_mask = vec_masks[0]; 8310 1.1 mrg } 8311 1.1 mrg } 8312 1.1 mrg 8313 1.1 mrg /* We should have catched mismatched types earlier. */ 8314 1.1 mrg gcc_assert (useless_type_conversion_p (vectype, 8315 1.1 mrg TREE_TYPE (vec_oprnd))); 8316 1.1 mrg bool simd_lane_access_p 8317 1.1 mrg = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; 8318 1.1 mrg if (simd_lane_access_p 8319 1.1 mrg && !loop_masks 8320 1.1 mrg && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR 8321 1.1 mrg && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0)) 8322 1.1 mrg && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info)) 8323 1.1 mrg && integer_zerop (DR_INIT (first_dr_info->dr)) 8324 1.1 mrg && alias_sets_conflict_p (get_alias_set (aggr_type), 8325 1.1 mrg get_alias_set (TREE_TYPE (ref_type)))) 8326 1.1 mrg { 8327 1.1 mrg dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr)); 8328 1.1 mrg dataref_offset = build_int_cst (ref_type, 0); 8329 1.1 mrg } 8330 1.1 mrg else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8331 1.1 mrg { 8332 1.1 mrg vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, 8333 1.1 mrg slp_node, &gs_info, &dataref_ptr, 8334 1.1 mrg &vec_offsets); 8335 1.1 mrg vec_offset = vec_offsets[0]; 8336 1.1 mrg } 8337 1.1 mrg else 8338 1.1 mrg dataref_ptr 8339 1.1 mrg = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, 8340 1.1 mrg simd_lane_access_p ? loop : NULL, 8341 1.1 mrg offset, &dummy, gsi, &ptr_incr, 8342 1.1 mrg simd_lane_access_p, bump); 8343 1.1 mrg } 8344 1.1 mrg else 8345 1.1 mrg { 8346 1.1 mrg /* For interleaved stores we created vectorized defs for all the 8347 1.1 mrg defs stored in OPRNDS in the previous iteration (previous copy). 8348 1.1 mrg DR_CHAIN is then used as an input to vect_permute_store_chain(). 8349 1.1 mrg If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and 8350 1.1 mrg OPRNDS are of size 1. */ 8351 1.1 mrg for (i = 0; i < group_size; i++) 8352 1.1 mrg { 8353 1.1 mrg vec_oprnd = gvec_oprnds[i][j]; 8354 1.1 mrg dr_chain[i] = gvec_oprnds[i][j]; 8355 1.1 mrg oprnds[i] = gvec_oprnds[i][j]; 8356 1.1 mrg } 8357 1.1 mrg if (mask) 8358 1.1 mrg vec_mask = vec_masks[j]; 8359 1.1 mrg if (dataref_offset) 8360 1.1 mrg dataref_offset 8361 1.1 mrg = int_const_binop (PLUS_EXPR, dataref_offset, bump); 8362 1.1 mrg else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8363 1.1 mrg vec_offset = vec_offsets[j]; 8364 1.1 mrg else 8365 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, 8366 1.1 mrg stmt_info, bump); 8367 1.1 mrg } 8368 1.1 mrg 8369 1.1 mrg if (memory_access_type == VMAT_LOAD_STORE_LANES) 8370 1.1 mrg { 8371 1.1 mrg tree vec_array; 8372 1.1 mrg 8373 1.1 mrg /* Get an array into which we can store the individual vectors. */ 8374 1.1 mrg vec_array = create_vector_array (vectype, vec_num); 8375 1.1 mrg 8376 1.1 mrg /* Invalidate the current contents of VEC_ARRAY. This should 8377 1.1 mrg become an RTL clobber too, which prevents the vector registers 8378 1.1 mrg from being upward-exposed. */ 8379 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); 8380 1.1 mrg 8381 1.1 mrg /* Store the individual vectors into the array. */ 8382 1.1 mrg for (i = 0; i < vec_num; i++) 8383 1.1 mrg { 8384 1.1 mrg vec_oprnd = dr_chain[i]; 8385 1.1 mrg write_vector_array (vinfo, stmt_info, 8386 1.1 mrg gsi, vec_oprnd, vec_array, i); 8387 1.1 mrg } 8388 1.1 mrg 8389 1.1 mrg tree final_mask = NULL; 8390 1.1 mrg if (loop_masks) 8391 1.1 mrg final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 8392 1.1 mrg vectype, j); 8393 1.1 mrg if (vec_mask) 8394 1.1 mrg final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, 8395 1.1 mrg final_mask, vec_mask, gsi); 8396 1.1 mrg 8397 1.1 mrg gcall *call; 8398 1.1 mrg if (final_mask) 8399 1.1 mrg { 8400 1.1 mrg /* Emit: 8401 1.1 mrg MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, 8402 1.1 mrg VEC_ARRAY). */ 8403 1.1 mrg unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); 8404 1.1 mrg tree alias_ptr = build_int_cst (ref_type, align); 8405 1.1 mrg call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, 8406 1.1 mrg dataref_ptr, alias_ptr, 8407 1.1 mrg final_mask, vec_array); 8408 1.1 mrg } 8409 1.1 mrg else 8410 1.1 mrg { 8411 1.1 mrg /* Emit: 8412 1.1 mrg MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 8413 1.1 mrg data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 8414 1.1 mrg call = gimple_build_call_internal (IFN_STORE_LANES, 1, 8415 1.1 mrg vec_array); 8416 1.1 mrg gimple_call_set_lhs (call, data_ref); 8417 1.1 mrg } 8418 1.1 mrg gimple_call_set_nothrow (call, true); 8419 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 8420 1.1 mrg new_stmt = call; 8421 1.1 mrg 8422 1.1 mrg /* Record that VEC_ARRAY is now dead. */ 8423 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); 8424 1.1 mrg } 8425 1.1 mrg else 8426 1.1 mrg { 8427 1.1 mrg new_stmt = NULL; 8428 1.1 mrg if (grouped_store) 8429 1.1 mrg { 8430 1.1 mrg if (j == 0) 8431 1.1 mrg result_chain.create (group_size); 8432 1.1 mrg /* Permute. */ 8433 1.1 mrg vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, 8434 1.1 mrg gsi, &result_chain); 8435 1.1 mrg } 8436 1.1 mrg 8437 1.1 mrg stmt_vec_info next_stmt_info = first_stmt_info; 8438 1.1 mrg for (i = 0; i < vec_num; i++) 8439 1.1 mrg { 8440 1.1 mrg unsigned misalign; 8441 1.1 mrg unsigned HOST_WIDE_INT align; 8442 1.1 mrg 8443 1.1 mrg tree final_mask = NULL_TREE; 8444 1.1 mrg if (loop_masks) 8445 1.1 mrg final_mask = vect_get_loop_mask (gsi, loop_masks, 8446 1.1 mrg vec_num * ncopies, 8447 1.1 mrg vectype, vec_num * j + i); 8448 1.1 mrg if (vec_mask) 8449 1.1 mrg final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, 8450 1.1 mrg final_mask, vec_mask, gsi); 8451 1.1 mrg 8452 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER) 8453 1.1 mrg { 8454 1.1 mrg tree scale = size_int (gs_info.scale); 8455 1.1 mrg gcall *call; 8456 1.1 mrg if (final_mask) 8457 1.1 mrg call = gimple_build_call_internal 8458 1.1 mrg (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset, 8459 1.1 mrg scale, vec_oprnd, final_mask); 8460 1.1 mrg else 8461 1.1 mrg call = gimple_build_call_internal 8462 1.1 mrg (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset, 8463 1.1 mrg scale, vec_oprnd); 8464 1.1 mrg gimple_call_set_nothrow (call, true); 8465 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 8466 1.1 mrg new_stmt = call; 8467 1.1 mrg break; 8468 1.1 mrg } 8469 1.1 mrg 8470 1.1 mrg if (i > 0) 8471 1.1 mrg /* Bump the vector pointer. */ 8472 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 8473 1.1 mrg gsi, stmt_info, bump); 8474 1.1 mrg 8475 1.1 mrg if (slp) 8476 1.1 mrg vec_oprnd = vec_oprnds[i]; 8477 1.1 mrg else if (grouped_store) 8478 1.1 mrg /* For grouped stores vectorized defs are interleaved in 8479 1.1 mrg vect_permute_store_chain(). */ 8480 1.1 mrg vec_oprnd = result_chain[i]; 8481 1.1 mrg 8482 1.1 mrg align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); 8483 1.1 mrg if (alignment_support_scheme == dr_aligned) 8484 1.1 mrg misalign = 0; 8485 1.1 mrg else if (misalignment == DR_MISALIGNMENT_UNKNOWN) 8486 1.1 mrg { 8487 1.1 mrg align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); 8488 1.1 mrg misalign = 0; 8489 1.1 mrg } 8490 1.1 mrg else 8491 1.1 mrg misalign = misalignment; 8492 1.1 mrg if (dataref_offset == NULL_TREE 8493 1.1 mrg && TREE_CODE (dataref_ptr) == SSA_NAME) 8494 1.1 mrg set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 8495 1.1 mrg misalign); 8496 1.1 mrg align = least_bit_hwi (misalign | align); 8497 1.1 mrg 8498 1.1 mrg if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 8499 1.1 mrg { 8500 1.1 mrg tree perm_mask = perm_mask_for_reverse (vectype); 8501 1.1 mrg tree perm_dest = vect_create_destination_var 8502 1.1 mrg (vect_get_store_rhs (stmt_info), vectype); 8503 1.1 mrg tree new_temp = make_ssa_name (perm_dest); 8504 1.1 mrg 8505 1.1 mrg /* Generate the permute statement. */ 8506 1.1 mrg gimple *perm_stmt 8507 1.1 mrg = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, 8508 1.1 mrg vec_oprnd, perm_mask); 8509 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); 8510 1.1 mrg 8511 1.1 mrg perm_stmt = SSA_NAME_DEF_STMT (new_temp); 8512 1.1 mrg vec_oprnd = new_temp; 8513 1.1 mrg } 8514 1.1 mrg 8515 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 8516 1.1 mrg if (final_mask) 8517 1.1 mrg { 8518 1.1 mrg tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); 8519 1.1 mrg gcall *call 8520 1.1 mrg = gimple_build_call_internal (IFN_MASK_STORE, 4, 8521 1.1 mrg dataref_ptr, ptr, 8522 1.1 mrg final_mask, vec_oprnd); 8523 1.1 mrg gimple_call_set_nothrow (call, true); 8524 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 8525 1.1 mrg new_stmt = call; 8526 1.1 mrg } 8527 1.1 mrg else if (loop_lens) 8528 1.1 mrg { 8529 1.1 mrg tree final_len 8530 1.1 mrg = vect_get_loop_len (loop_vinfo, loop_lens, 8531 1.1 mrg vec_num * ncopies, vec_num * j + i); 8532 1.1 mrg tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); 8533 1.1 mrg machine_mode vmode = TYPE_MODE (vectype); 8534 1.1 mrg opt_machine_mode new_ovmode 8535 1.1 mrg = get_len_load_store_mode (vmode, false); 8536 1.1 mrg machine_mode new_vmode = new_ovmode.require (); 8537 1.1 mrg /* Need conversion if it's wrapped with VnQI. */ 8538 1.1 mrg if (vmode != new_vmode) 8539 1.1 mrg { 8540 1.1 mrg tree new_vtype 8541 1.1 mrg = build_vector_type_for_mode (unsigned_intQI_type_node, 8542 1.1 mrg new_vmode); 8543 1.1 mrg tree var 8544 1.1 mrg = vect_get_new_ssa_name (new_vtype, vect_simple_var); 8545 1.1 mrg vec_oprnd 8546 1.1 mrg = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd); 8547 1.1 mrg gassign *new_stmt 8548 1.1 mrg = gimple_build_assign (var, VIEW_CONVERT_EXPR, 8549 1.1 mrg vec_oprnd); 8550 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, 8551 1.1 mrg gsi); 8552 1.1 mrg vec_oprnd = var; 8553 1.1 mrg } 8554 1.1 mrg 8555 1.1 mrg signed char biasval = 8556 1.1 mrg LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); 8557 1.1 mrg 8558 1.1 mrg tree bias = build_int_cst (intQI_type_node, biasval); 8559 1.1 mrg gcall *call 8560 1.1 mrg = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr, 8561 1.1 mrg ptr, final_len, vec_oprnd, 8562 1.1 mrg bias); 8563 1.1 mrg gimple_call_set_nothrow (call, true); 8564 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 8565 1.1 mrg new_stmt = call; 8566 1.1 mrg } 8567 1.1 mrg else 8568 1.1 mrg { 8569 1.1 mrg data_ref = fold_build2 (MEM_REF, vectype, 8570 1.1 mrg dataref_ptr, 8571 1.1 mrg dataref_offset 8572 1.1 mrg ? dataref_offset 8573 1.1 mrg : build_int_cst (ref_type, 0)); 8574 1.1 mrg if (alignment_support_scheme == dr_aligned) 8575 1.1 mrg ; 8576 1.1 mrg else 8577 1.1 mrg TREE_TYPE (data_ref) 8578 1.1 mrg = build_aligned_type (TREE_TYPE (data_ref), 8579 1.1 mrg align * BITS_PER_UNIT); 8580 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 8581 1.1 mrg new_stmt = gimple_build_assign (data_ref, vec_oprnd); 8582 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 8583 1.1 mrg } 8584 1.1 mrg 8585 1.1 mrg if (slp) 8586 1.1 mrg continue; 8587 1.1 mrg 8588 1.1 mrg next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8589 1.1 mrg if (!next_stmt_info) 8590 1.1 mrg break; 8591 1.1 mrg } 8592 1.1 mrg } 8593 1.1 mrg if (!slp) 8594 1.1 mrg { 8595 1.1 mrg if (j == 0) 8596 1.1 mrg *vec_stmt = new_stmt; 8597 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 8598 1.1 mrg } 8599 1.1 mrg } 8600 1.1 mrg 8601 1.1 mrg for (i = 0; i < group_size; ++i) 8602 1.1 mrg { 8603 1.1 mrg vec<tree> oprndsi = gvec_oprnds[i]; 8604 1.1 mrg oprndsi.release (); 8605 1.1 mrg } 8606 1.1 mrg oprnds.release (); 8607 1.1 mrg result_chain.release (); 8608 1.1 mrg vec_oprnds.release (); 8609 1.1 mrg 8610 1.1 mrg return true; 8611 1.1 mrg } 8612 1.1 mrg 8613 1.1 mrg /* Given a vector type VECTYPE, turns permutation SEL into the equivalent 8614 1.1 mrg VECTOR_CST mask. No checks are made that the target platform supports the 8615 1.1 mrg mask, so callers may wish to test can_vec_perm_const_p separately, or use 8616 1.1 mrg vect_gen_perm_mask_checked. */ 8617 1.1 mrg 8618 1.1 mrg tree 8619 1.1 mrg vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) 8620 1.1 mrg { 8621 1.1 mrg tree mask_type; 8622 1.1 mrg 8623 1.1 mrg poly_uint64 nunits = sel.length (); 8624 1.1 mrg gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype))); 8625 1.1 mrg 8626 1.1 mrg mask_type = build_vector_type (ssizetype, nunits); 8627 1.1 mrg return vec_perm_indices_to_tree (mask_type, sel); 8628 1.1 mrg } 8629 1.1 mrg 8630 1.1 mrg /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p, 8631 1.1 mrg i.e. that the target supports the pattern _for arbitrary input vectors_. */ 8632 1.1 mrg 8633 1.1 mrg tree 8634 1.1 mrg vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) 8635 1.1 mrg { 8636 1.1 mrg gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); 8637 1.1 mrg return vect_gen_perm_mask_any (vectype, sel); 8638 1.1 mrg } 8639 1.1 mrg 8640 1.1 mrg /* Given a vector variable X and Y, that was generated for the scalar 8641 1.1 mrg STMT_INFO, generate instructions to permute the vector elements of X and Y 8642 1.1 mrg using permutation mask MASK_VEC, insert them at *GSI and return the 8643 1.1 mrg permuted vector variable. */ 8644 1.1 mrg 8645 1.1 mrg static tree 8646 1.1 mrg permute_vec_elements (vec_info *vinfo, 8647 1.1 mrg tree x, tree y, tree mask_vec, stmt_vec_info stmt_info, 8648 1.1 mrg gimple_stmt_iterator *gsi) 8649 1.1 mrg { 8650 1.1 mrg tree vectype = TREE_TYPE (x); 8651 1.1 mrg tree perm_dest, data_ref; 8652 1.1 mrg gimple *perm_stmt; 8653 1.1 mrg 8654 1.1 mrg tree scalar_dest = gimple_get_lhs (stmt_info->stmt); 8655 1.1 mrg if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME) 8656 1.1 mrg perm_dest = vect_create_destination_var (scalar_dest, vectype); 8657 1.1 mrg else 8658 1.1 mrg perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL); 8659 1.1 mrg data_ref = make_ssa_name (perm_dest); 8660 1.1 mrg 8661 1.1 mrg /* Generate the permute statement. */ 8662 1.1 mrg perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec); 8663 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); 8664 1.1 mrg 8665 1.1 mrg return data_ref; 8666 1.1 mrg } 8667 1.1 mrg 8668 1.1 mrg /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP, 8669 1.1 mrg inserting them on the loops preheader edge. Returns true if we 8670 1.1 mrg were successful in doing so (and thus STMT_INFO can be moved then), 8671 1.1 mrg otherwise returns false. */ 8672 1.1 mrg 8673 1.1 mrg static bool 8674 1.1 mrg hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop) 8675 1.1 mrg { 8676 1.1 mrg ssa_op_iter i; 8677 1.1 mrg tree op; 8678 1.1 mrg bool any = false; 8679 1.1 mrg 8680 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE) 8681 1.1 mrg { 8682 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (op); 8683 1.1 mrg if (!gimple_nop_p (def_stmt) 8684 1.1 mrg && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 8685 1.1 mrg { 8686 1.1 mrg /* Make sure we don't need to recurse. While we could do 8687 1.1 mrg so in simple cases when there are more complex use webs 8688 1.1 mrg we don't have an easy way to preserve stmt order to fulfil 8689 1.1 mrg dependencies within them. */ 8690 1.1 mrg tree op2; 8691 1.1 mrg ssa_op_iter i2; 8692 1.1 mrg if (gimple_code (def_stmt) == GIMPLE_PHI) 8693 1.1 mrg return false; 8694 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE) 8695 1.1 mrg { 8696 1.1 mrg gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2); 8697 1.1 mrg if (!gimple_nop_p (def_stmt2) 8698 1.1 mrg && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2))) 8699 1.1 mrg return false; 8700 1.1 mrg } 8701 1.1 mrg any = true; 8702 1.1 mrg } 8703 1.1 mrg } 8704 1.1 mrg 8705 1.1 mrg if (!any) 8706 1.1 mrg return true; 8707 1.1 mrg 8708 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE) 8709 1.1 mrg { 8710 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (op); 8711 1.1 mrg if (!gimple_nop_p (def_stmt) 8712 1.1 mrg && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 8713 1.1 mrg { 8714 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 8715 1.1 mrg gsi_remove (&gsi, false); 8716 1.1 mrg gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt); 8717 1.1 mrg } 8718 1.1 mrg } 8719 1.1 mrg 8720 1.1 mrg return true; 8721 1.1 mrg } 8722 1.1 mrg 8723 1.1 mrg /* vectorizable_load. 8724 1.1 mrg 8725 1.1 mrg Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure) 8726 1.1 mrg that can be vectorized. 8727 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 8728 1.1 mrg stmt to replace it, put it in VEC_STMT, and insert it at GSI. 8729 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 8730 1.1 mrg 8731 1.1 mrg static bool 8732 1.1 mrg vectorizable_load (vec_info *vinfo, 8733 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 8734 1.1 mrg gimple **vec_stmt, slp_tree slp_node, 8735 1.1 mrg stmt_vector_for_cost *cost_vec) 8736 1.1 mrg { 8737 1.1 mrg tree scalar_dest; 8738 1.1 mrg tree vec_dest = NULL; 8739 1.1 mrg tree data_ref = NULL; 8740 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 8741 1.1 mrg class loop *loop = NULL; 8742 1.1 mrg class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father; 8743 1.1 mrg bool nested_in_vect_loop = false; 8744 1.1 mrg tree elem_type; 8745 1.1 mrg tree new_temp; 8746 1.1 mrg machine_mode mode; 8747 1.1 mrg tree dummy; 8748 1.1 mrg tree dataref_ptr = NULL_TREE; 8749 1.1 mrg tree dataref_offset = NULL_TREE; 8750 1.1 mrg gimple *ptr_incr = NULL; 8751 1.1 mrg int ncopies; 8752 1.1 mrg int i, j; 8753 1.1 mrg unsigned int group_size; 8754 1.1 mrg poly_uint64 group_gap_adj; 8755 1.1 mrg tree msq = NULL_TREE, lsq; 8756 1.1 mrg tree realignment_token = NULL_TREE; 8757 1.1 mrg gphi *phi = NULL; 8758 1.1 mrg vec<tree> dr_chain = vNULL; 8759 1.1 mrg bool grouped_load = false; 8760 1.1 mrg stmt_vec_info first_stmt_info; 8761 1.1 mrg stmt_vec_info first_stmt_info_for_drptr = NULL; 8762 1.1 mrg bool compute_in_loop = false; 8763 1.1 mrg class loop *at_loop; 8764 1.1 mrg int vec_num; 8765 1.1 mrg bool slp = (slp_node != NULL); 8766 1.1 mrg bool slp_perm = false; 8767 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 8768 1.1 mrg poly_uint64 vf; 8769 1.1 mrg tree aggr_type; 8770 1.1 mrg gather_scatter_info gs_info; 8771 1.1 mrg tree ref_type; 8772 1.1 mrg enum vect_def_type mask_dt = vect_unknown_def_type; 8773 1.1 mrg 8774 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 8775 1.1 mrg return false; 8776 1.1 mrg 8777 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 8778 1.1 mrg && ! vec_stmt) 8779 1.1 mrg return false; 8780 1.1 mrg 8781 1.1 mrg if (!STMT_VINFO_DATA_REF (stmt_info)) 8782 1.1 mrg return false; 8783 1.1 mrg 8784 1.1 mrg tree mask = NULL_TREE, mask_vectype = NULL_TREE; 8785 1.1 mrg int mask_index = -1; 8786 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 8787 1.1 mrg { 8788 1.1 mrg scalar_dest = gimple_assign_lhs (assign); 8789 1.1 mrg if (TREE_CODE (scalar_dest) != SSA_NAME) 8790 1.1 mrg return false; 8791 1.1 mrg 8792 1.1 mrg tree_code code = gimple_assign_rhs_code (assign); 8793 1.1 mrg if (code != ARRAY_REF 8794 1.1 mrg && code != BIT_FIELD_REF 8795 1.1 mrg && code != INDIRECT_REF 8796 1.1 mrg && code != COMPONENT_REF 8797 1.1 mrg && code != IMAGPART_EXPR 8798 1.1 mrg && code != REALPART_EXPR 8799 1.1 mrg && code != MEM_REF 8800 1.1 mrg && TREE_CODE_CLASS (code) != tcc_declaration) 8801 1.1 mrg return false; 8802 1.1 mrg } 8803 1.1 mrg else 8804 1.1 mrg { 8805 1.1 mrg gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 8806 1.1 mrg if (!call || !gimple_call_internal_p (call)) 8807 1.1 mrg return false; 8808 1.1 mrg 8809 1.1 mrg internal_fn ifn = gimple_call_internal_fn (call); 8810 1.1 mrg if (!internal_load_fn_p (ifn)) 8811 1.1 mrg return false; 8812 1.1 mrg 8813 1.1 mrg scalar_dest = gimple_call_lhs (call); 8814 1.1 mrg if (!scalar_dest) 8815 1.1 mrg return false; 8816 1.1 mrg 8817 1.1 mrg mask_index = internal_fn_mask_index (ifn); 8818 1.1 mrg /* ??? For SLP the mask operand is always last. */ 8819 1.1 mrg if (mask_index >= 0 && slp_node) 8820 1.1 mrg mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1; 8821 1.1 mrg if (mask_index >= 0 8822 1.1 mrg && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, 8823 1.1 mrg &mask, NULL, &mask_dt, &mask_vectype)) 8824 1.1 mrg return false; 8825 1.1 mrg } 8826 1.1 mrg 8827 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 8828 1.1 mrg poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 8829 1.1 mrg 8830 1.1 mrg if (loop_vinfo) 8831 1.1 mrg { 8832 1.1 mrg loop = LOOP_VINFO_LOOP (loop_vinfo); 8833 1.1 mrg nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info); 8834 1.1 mrg vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 8835 1.1 mrg } 8836 1.1 mrg else 8837 1.1 mrg vf = 1; 8838 1.1 mrg 8839 1.1 mrg /* Multiple types in SLP are handled by creating the appropriate number of 8840 1.1 mrg vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 8841 1.1 mrg case of SLP. */ 8842 1.1 mrg if (slp) 8843 1.1 mrg ncopies = 1; 8844 1.1 mrg else 8845 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 8846 1.1 mrg 8847 1.1 mrg gcc_assert (ncopies >= 1); 8848 1.1 mrg 8849 1.1 mrg /* FORNOW. This restriction should be relaxed. */ 8850 1.1 mrg if (nested_in_vect_loop && ncopies > 1) 8851 1.1 mrg { 8852 1.1 mrg if (dump_enabled_p ()) 8853 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8854 1.1 mrg "multiple types in nested loop.\n"); 8855 1.1 mrg return false; 8856 1.1 mrg } 8857 1.1 mrg 8858 1.1 mrg /* Invalidate assumptions made by dependence analysis when vectorization 8859 1.1 mrg on the unrolled body effectively re-orders stmts. */ 8860 1.1 mrg if (ncopies > 1 8861 1.1 mrg && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 8862 1.1 mrg && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 8863 1.1 mrg STMT_VINFO_MIN_NEG_DIST (stmt_info))) 8864 1.1 mrg { 8865 1.1 mrg if (dump_enabled_p ()) 8866 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8867 1.1 mrg "cannot perform implicit CSE when unrolling " 8868 1.1 mrg "with negative dependence distance\n"); 8869 1.1 mrg return false; 8870 1.1 mrg } 8871 1.1 mrg 8872 1.1 mrg elem_type = TREE_TYPE (vectype); 8873 1.1 mrg mode = TYPE_MODE (vectype); 8874 1.1 mrg 8875 1.1 mrg /* FORNOW. In some cases can vectorize even if data-type not supported 8876 1.1 mrg (e.g. - data copies). */ 8877 1.1 mrg if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 8878 1.1 mrg { 8879 1.1 mrg if (dump_enabled_p ()) 8880 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8881 1.1 mrg "Aligned load, but unsupported type.\n"); 8882 1.1 mrg return false; 8883 1.1 mrg } 8884 1.1 mrg 8885 1.1 mrg /* Check if the load is a part of an interleaving chain. */ 8886 1.1 mrg if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 8887 1.1 mrg { 8888 1.1 mrg grouped_load = true; 8889 1.1 mrg /* FORNOW */ 8890 1.1 mrg gcc_assert (!nested_in_vect_loop); 8891 1.1 mrg gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)); 8892 1.1 mrg 8893 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 8894 1.1 mrg group_size = DR_GROUP_SIZE (first_stmt_info); 8895 1.1 mrg 8896 1.1 mrg /* Refuse non-SLP vectorization of SLP-only groups. */ 8897 1.1 mrg if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info)) 8898 1.1 mrg { 8899 1.1 mrg if (dump_enabled_p ()) 8900 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8901 1.1 mrg "cannot vectorize load in non-SLP mode.\n"); 8902 1.1 mrg return false; 8903 1.1 mrg } 8904 1.1 mrg 8905 1.1 mrg if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 8906 1.1 mrg { 8907 1.1 mrg slp_perm = true; 8908 1.1 mrg 8909 1.1 mrg if (!loop_vinfo) 8910 1.1 mrg { 8911 1.1 mrg /* In BB vectorization we may not actually use a loaded vector 8912 1.1 mrg accessing elements in excess of DR_GROUP_SIZE. */ 8913 1.1 mrg stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 8914 1.1 mrg group_info = DR_GROUP_FIRST_ELEMENT (group_info); 8915 1.1 mrg unsigned HOST_WIDE_INT nunits; 8916 1.1 mrg unsigned j, k, maxk = 0; 8917 1.1 mrg FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k) 8918 1.1 mrg if (k > maxk) 8919 1.1 mrg maxk = k; 8920 1.1 mrg tree vectype = SLP_TREE_VECTYPE (slp_node); 8921 1.1 mrg if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits) 8922 1.1 mrg || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1))) 8923 1.1 mrg { 8924 1.1 mrg if (dump_enabled_p ()) 8925 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8926 1.1 mrg "BB vectorization with gaps at the end of " 8927 1.1 mrg "a load is not supported\n"); 8928 1.1 mrg return false; 8929 1.1 mrg } 8930 1.1 mrg } 8931 1.1 mrg 8932 1.1 mrg auto_vec<tree> tem; 8933 1.1 mrg unsigned n_perms; 8934 1.1 mrg if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf, 8935 1.1 mrg true, &n_perms)) 8936 1.1 mrg { 8937 1.1 mrg if (dump_enabled_p ()) 8938 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, 8939 1.1 mrg vect_location, 8940 1.1 mrg "unsupported load permutation\n"); 8941 1.1 mrg return false; 8942 1.1 mrg } 8943 1.1 mrg } 8944 1.1 mrg 8945 1.1 mrg /* Invalidate assumptions made by dependence analysis when vectorization 8946 1.1 mrg on the unrolled body effectively re-orders stmts. */ 8947 1.1 mrg if (!PURE_SLP_STMT (stmt_info) 8948 1.1 mrg && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 8949 1.1 mrg && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 8950 1.1 mrg STMT_VINFO_MIN_NEG_DIST (stmt_info))) 8951 1.1 mrg { 8952 1.1 mrg if (dump_enabled_p ()) 8953 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8954 1.1 mrg "cannot perform implicit CSE when performing " 8955 1.1 mrg "group loads with negative dependence distance\n"); 8956 1.1 mrg return false; 8957 1.1 mrg } 8958 1.1 mrg } 8959 1.1 mrg else 8960 1.1 mrg group_size = 1; 8961 1.1 mrg 8962 1.1 mrg vect_memory_access_type memory_access_type; 8963 1.1 mrg enum dr_alignment_support alignment_support_scheme; 8964 1.1 mrg int misalignment; 8965 1.1 mrg poly_int64 poffset; 8966 1.1 mrg if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, 8967 1.1 mrg ncopies, &memory_access_type, &poffset, 8968 1.1 mrg &alignment_support_scheme, &misalignment, &gs_info)) 8969 1.1 mrg return false; 8970 1.1 mrg 8971 1.1 mrg if (mask) 8972 1.1 mrg { 8973 1.1 mrg if (memory_access_type == VMAT_CONTIGUOUS) 8974 1.1 mrg { 8975 1.1 mrg machine_mode vec_mode = TYPE_MODE (vectype); 8976 1.1 mrg if (!VECTOR_MODE_P (vec_mode) 8977 1.1 mrg || !can_vec_mask_load_store_p (vec_mode, 8978 1.1 mrg TYPE_MODE (mask_vectype), true)) 8979 1.1 mrg return false; 8980 1.1 mrg } 8981 1.1 mrg else if (memory_access_type != VMAT_LOAD_STORE_LANES 8982 1.1 mrg && memory_access_type != VMAT_GATHER_SCATTER) 8983 1.1 mrg { 8984 1.1 mrg if (dump_enabled_p ()) 8985 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8986 1.1 mrg "unsupported access type for masked load.\n"); 8987 1.1 mrg return false; 8988 1.1 mrg } 8989 1.1 mrg else if (memory_access_type == VMAT_GATHER_SCATTER 8990 1.1 mrg && gs_info.ifn == IFN_LAST 8991 1.1 mrg && !gs_info.decl) 8992 1.1 mrg { 8993 1.1 mrg if (dump_enabled_p ()) 8994 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8995 1.1 mrg "unsupported masked emulated gather.\n"); 8996 1.1 mrg return false; 8997 1.1 mrg } 8998 1.1 mrg else if (memory_access_type == VMAT_ELEMENTWISE 8999 1.1 mrg || memory_access_type == VMAT_STRIDED_SLP) 9000 1.1 mrg { 9001 1.1 mrg if (dump_enabled_p ()) 9002 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9003 1.1 mrg "unsupported masked strided access.\n"); 9004 1.1 mrg return false; 9005 1.1 mrg } 9006 1.1 mrg } 9007 1.1 mrg 9008 1.1 mrg if (!vec_stmt) /* transformation not required. */ 9009 1.1 mrg { 9010 1.1 mrg if (slp_node 9011 1.1 mrg && mask 9012 1.1 mrg && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], 9013 1.1 mrg mask_vectype)) 9014 1.1 mrg { 9015 1.1 mrg if (dump_enabled_p ()) 9016 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9017 1.1 mrg "incompatible vector types for invariants\n"); 9018 1.1 mrg return false; 9019 1.1 mrg } 9020 1.1 mrg 9021 1.1 mrg if (!slp) 9022 1.1 mrg STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 9023 1.1 mrg 9024 1.1 mrg if (loop_vinfo 9025 1.1 mrg && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) 9026 1.1 mrg check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, 9027 1.1 mrg VLS_LOAD, group_size, 9028 1.1 mrg memory_access_type, &gs_info, 9029 1.1 mrg mask); 9030 1.1 mrg 9031 1.1 mrg if (dump_enabled_p () 9032 1.1 mrg && memory_access_type != VMAT_ELEMENTWISE 9033 1.1 mrg && memory_access_type != VMAT_GATHER_SCATTER 9034 1.1 mrg && alignment_support_scheme != dr_aligned) 9035 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 9036 1.1 mrg "Vectorizing an unaligned access.\n"); 9037 1.1 mrg 9038 1.1 mrg STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 9039 1.1 mrg vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, 9040 1.1 mrg alignment_support_scheme, misalignment, 9041 1.1 mrg &gs_info, slp_node, cost_vec); 9042 1.1 mrg return true; 9043 1.1 mrg } 9044 1.1 mrg 9045 1.1 mrg if (!slp) 9046 1.1 mrg gcc_assert (memory_access_type 9047 1.1 mrg == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 9048 1.1 mrg 9049 1.1 mrg if (dump_enabled_p ()) 9050 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 9051 1.1 mrg "transform load. ncopies = %d\n", ncopies); 9052 1.1 mrg 9053 1.1 mrg /* Transform. */ 9054 1.1 mrg 9055 1.1 mrg dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; 9056 1.1 mrg ensure_base_align (dr_info); 9057 1.1 mrg 9058 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 9059 1.1 mrg { 9060 1.1 mrg vect_build_gather_load_calls (vinfo, 9061 1.1 mrg stmt_info, gsi, vec_stmt, &gs_info, mask); 9062 1.1 mrg return true; 9063 1.1 mrg } 9064 1.1 mrg 9065 1.1 mrg if (memory_access_type == VMAT_INVARIANT) 9066 1.1 mrg { 9067 1.1 mrg gcc_assert (!grouped_load && !mask && !bb_vinfo); 9068 1.1 mrg /* If we have versioned for aliasing or the loop doesn't 9069 1.1 mrg have any data dependencies that would preclude this, 9070 1.1 mrg then we are sure this is a loop invariant load and 9071 1.1 mrg thus we can insert it on the preheader edge. */ 9072 1.1 mrg bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) 9073 1.1 mrg && !nested_in_vect_loop 9074 1.1 mrg && hoist_defs_of_uses (stmt_info, loop)); 9075 1.1 mrg if (hoist_p) 9076 1.1 mrg { 9077 1.1 mrg gassign *stmt = as_a <gassign *> (stmt_info->stmt); 9078 1.1 mrg if (dump_enabled_p ()) 9079 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 9080 1.1 mrg "hoisting out of the vectorized loop: %G", stmt); 9081 1.1 mrg scalar_dest = copy_ssa_name (scalar_dest); 9082 1.1 mrg tree rhs = unshare_expr (gimple_assign_rhs1 (stmt)); 9083 1.1 mrg gsi_insert_on_edge_immediate 9084 1.1 mrg (loop_preheader_edge (loop), 9085 1.1 mrg gimple_build_assign (scalar_dest, rhs)); 9086 1.1 mrg } 9087 1.1 mrg /* These copies are all equivalent, but currently the representation 9088 1.1 mrg requires a separate STMT_VINFO_VEC_STMT for each one. */ 9089 1.1 mrg gimple_stmt_iterator gsi2 = *gsi; 9090 1.1 mrg gsi_next (&gsi2); 9091 1.1 mrg for (j = 0; j < ncopies; j++) 9092 1.1 mrg { 9093 1.1 mrg if (hoist_p) 9094 1.1 mrg new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, 9095 1.1 mrg vectype, NULL); 9096 1.1 mrg else 9097 1.1 mrg new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, 9098 1.1 mrg vectype, &gsi2); 9099 1.1 mrg gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); 9100 1.1 mrg if (slp) 9101 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 9102 1.1 mrg else 9103 1.1 mrg { 9104 1.1 mrg if (j == 0) 9105 1.1 mrg *vec_stmt = new_stmt; 9106 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 9107 1.1 mrg } 9108 1.1 mrg } 9109 1.1 mrg return true; 9110 1.1 mrg } 9111 1.1 mrg 9112 1.1 mrg if (memory_access_type == VMAT_ELEMENTWISE 9113 1.1 mrg || memory_access_type == VMAT_STRIDED_SLP) 9114 1.1 mrg { 9115 1.1 mrg gimple_stmt_iterator incr_gsi; 9116 1.1 mrg bool insert_after; 9117 1.1 mrg tree offvar; 9118 1.1 mrg tree ivstep; 9119 1.1 mrg tree running_off; 9120 1.1 mrg vec<constructor_elt, va_gc> *v = NULL; 9121 1.1 mrg tree stride_base, stride_step, alias_off; 9122 1.1 mrg /* Checked by get_load_store_type. */ 9123 1.1 mrg unsigned int const_nunits = nunits.to_constant (); 9124 1.1 mrg unsigned HOST_WIDE_INT cst_offset = 0; 9125 1.1 mrg tree dr_offset; 9126 1.1 mrg 9127 1.1 mrg gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)); 9128 1.1 mrg gcc_assert (!nested_in_vect_loop); 9129 1.1 mrg 9130 1.1 mrg if (grouped_load) 9131 1.1 mrg { 9132 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 9133 1.1 mrg first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 9134 1.1 mrg } 9135 1.1 mrg else 9136 1.1 mrg { 9137 1.1 mrg first_stmt_info = stmt_info; 9138 1.1 mrg first_dr_info = dr_info; 9139 1.1 mrg } 9140 1.1 mrg if (slp && grouped_load) 9141 1.1 mrg { 9142 1.1 mrg group_size = DR_GROUP_SIZE (first_stmt_info); 9143 1.1 mrg ref_type = get_group_alias_ptr_type (first_stmt_info); 9144 1.1 mrg } 9145 1.1 mrg else 9146 1.1 mrg { 9147 1.1 mrg if (grouped_load) 9148 1.1 mrg cst_offset 9149 1.1 mrg = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))) 9150 1.1 mrg * vect_get_place_in_interleaving_chain (stmt_info, 9151 1.1 mrg first_stmt_info)); 9152 1.1 mrg group_size = 1; 9153 1.1 mrg ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)); 9154 1.1 mrg } 9155 1.1 mrg 9156 1.1 mrg dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info); 9157 1.1 mrg stride_base 9158 1.1 mrg = fold_build_pointer_plus 9159 1.1 mrg (DR_BASE_ADDRESS (first_dr_info->dr), 9160 1.1 mrg size_binop (PLUS_EXPR, 9161 1.1 mrg convert_to_ptrofftype (dr_offset), 9162 1.1 mrg convert_to_ptrofftype (DR_INIT (first_dr_info->dr)))); 9163 1.1 mrg stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr)); 9164 1.1 mrg 9165 1.1 mrg /* For a load with loop-invariant (but other than power-of-2) 9166 1.1 mrg stride (i.e. not a grouped access) like so: 9167 1.1 mrg 9168 1.1 mrg for (i = 0; i < n; i += stride) 9169 1.1 mrg ... = array[i]; 9170 1.1 mrg 9171 1.1 mrg we generate a new induction variable and new accesses to 9172 1.1 mrg form a new vector (or vectors, depending on ncopies): 9173 1.1 mrg 9174 1.1 mrg for (j = 0; ; j += VF*stride) 9175 1.1 mrg tmp1 = array[j]; 9176 1.1 mrg tmp2 = array[j + stride]; 9177 1.1 mrg ... 9178 1.1 mrg vectemp = {tmp1, tmp2, ...} 9179 1.1 mrg */ 9180 1.1 mrg 9181 1.1 mrg ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step, 9182 1.1 mrg build_int_cst (TREE_TYPE (stride_step), vf)); 9183 1.1 mrg 9184 1.1 mrg standard_iv_increment_position (loop, &incr_gsi, &insert_after); 9185 1.1 mrg 9186 1.1 mrg stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 9187 1.1 mrg ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 9188 1.1 mrg create_iv (stride_base, ivstep, NULL, 9189 1.1 mrg loop, &incr_gsi, insert_after, 9190 1.1 mrg &offvar, NULL); 9191 1.1 mrg 9192 1.1 mrg stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 9193 1.1 mrg 9194 1.1 mrg running_off = offvar; 9195 1.1 mrg alias_off = build_int_cst (ref_type, 0); 9196 1.1 mrg int nloads = const_nunits; 9197 1.1 mrg int lnel = 1; 9198 1.1 mrg tree ltype = TREE_TYPE (vectype); 9199 1.1 mrg tree lvectype = vectype; 9200 1.1 mrg auto_vec<tree> dr_chain; 9201 1.1 mrg if (memory_access_type == VMAT_STRIDED_SLP) 9202 1.1 mrg { 9203 1.1 mrg if (group_size < const_nunits) 9204 1.1 mrg { 9205 1.1 mrg /* First check if vec_init optab supports construction from vector 9206 1.1 mrg elts directly. Otherwise avoid emitting a constructor of 9207 1.1 mrg vector elements by performing the loads using an integer type 9208 1.1 mrg of the same size, constructing a vector of those and then 9209 1.1 mrg re-interpreting it as the original vector type. This avoids a 9210 1.1 mrg huge runtime penalty due to the general inability to perform 9211 1.1 mrg store forwarding from smaller stores to a larger load. */ 9212 1.1 mrg tree ptype; 9213 1.1 mrg tree vtype 9214 1.1 mrg = vector_vector_composition_type (vectype, 9215 1.1 mrg const_nunits / group_size, 9216 1.1 mrg &ptype); 9217 1.1 mrg if (vtype != NULL_TREE) 9218 1.1 mrg { 9219 1.1 mrg nloads = const_nunits / group_size; 9220 1.1 mrg lnel = group_size; 9221 1.1 mrg lvectype = vtype; 9222 1.1 mrg ltype = ptype; 9223 1.1 mrg } 9224 1.1 mrg } 9225 1.1 mrg else 9226 1.1 mrg { 9227 1.1 mrg nloads = 1; 9228 1.1 mrg lnel = const_nunits; 9229 1.1 mrg ltype = vectype; 9230 1.1 mrg } 9231 1.1 mrg ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); 9232 1.1 mrg } 9233 1.1 mrg /* Load vector(1) scalar_type if it's 1 element-wise vectype. */ 9234 1.1 mrg else if (nloads == 1) 9235 1.1 mrg ltype = vectype; 9236 1.1 mrg 9237 1.1 mrg if (slp) 9238 1.1 mrg { 9239 1.1 mrg /* For SLP permutation support we need to load the whole group, 9240 1.1 mrg not only the number of vector stmts the permutation result 9241 1.1 mrg fits in. */ 9242 1.1 mrg if (slp_perm) 9243 1.1 mrg { 9244 1.1 mrg /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for 9245 1.1 mrg variable VF. */ 9246 1.1 mrg unsigned int const_vf = vf.to_constant (); 9247 1.1 mrg ncopies = CEIL (group_size * const_vf, const_nunits); 9248 1.1 mrg dr_chain.create (ncopies); 9249 1.1 mrg } 9250 1.1 mrg else 9251 1.1 mrg ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 9252 1.1 mrg } 9253 1.1 mrg unsigned int group_el = 0; 9254 1.1 mrg unsigned HOST_WIDE_INT 9255 1.1 mrg elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 9256 1.1 mrg unsigned int n_groups = 0; 9257 1.1 mrg for (j = 0; j < ncopies; j++) 9258 1.1 mrg { 9259 1.1 mrg if (nloads > 1) 9260 1.1 mrg vec_alloc (v, nloads); 9261 1.1 mrg gimple *new_stmt = NULL; 9262 1.1 mrg for (i = 0; i < nloads; i++) 9263 1.1 mrg { 9264 1.1 mrg tree this_off = build_int_cst (TREE_TYPE (alias_off), 9265 1.1 mrg group_el * elsz + cst_offset); 9266 1.1 mrg tree data_ref = build2 (MEM_REF, ltype, running_off, this_off); 9267 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 9268 1.1 mrg new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref); 9269 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 9270 1.1 mrg if (nloads > 1) 9271 1.1 mrg CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9272 1.1 mrg gimple_assign_lhs (new_stmt)); 9273 1.1 mrg 9274 1.1 mrg group_el += lnel; 9275 1.1 mrg if (! slp 9276 1.1 mrg || group_el == group_size) 9277 1.1 mrg { 9278 1.1 mrg n_groups++; 9279 1.1 mrg /* When doing SLP make sure to not load elements from 9280 1.1 mrg the next vector iteration, those will not be accessed 9281 1.1 mrg so just use the last element again. See PR107451. */ 9282 1.1 mrg if (!slp || known_lt (n_groups, vf)) 9283 1.1 mrg { 9284 1.1 mrg tree newoff = copy_ssa_name (running_off); 9285 1.1 mrg gimple *incr 9286 1.1 mrg = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 9287 1.1 mrg running_off, stride_step); 9288 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi); 9289 1.1 mrg running_off = newoff; 9290 1.1 mrg } 9291 1.1 mrg group_el = 0; 9292 1.1 mrg } 9293 1.1 mrg } 9294 1.1 mrg if (nloads > 1) 9295 1.1 mrg { 9296 1.1 mrg tree vec_inv = build_constructor (lvectype, v); 9297 1.1 mrg new_temp = vect_init_vector (vinfo, stmt_info, 9298 1.1 mrg vec_inv, lvectype, gsi); 9299 1.1 mrg new_stmt = SSA_NAME_DEF_STMT (new_temp); 9300 1.1 mrg if (lvectype != vectype) 9301 1.1 mrg { 9302 1.1 mrg new_stmt = gimple_build_assign (make_ssa_name (vectype), 9303 1.1 mrg VIEW_CONVERT_EXPR, 9304 1.1 mrg build1 (VIEW_CONVERT_EXPR, 9305 1.1 mrg vectype, new_temp)); 9306 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 9307 1.1 mrg } 9308 1.1 mrg } 9309 1.1 mrg 9310 1.1 mrg if (slp) 9311 1.1 mrg { 9312 1.1 mrg if (slp_perm) 9313 1.1 mrg dr_chain.quick_push (gimple_assign_lhs (new_stmt)); 9314 1.1 mrg else 9315 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 9316 1.1 mrg } 9317 1.1 mrg else 9318 1.1 mrg { 9319 1.1 mrg if (j == 0) 9320 1.1 mrg *vec_stmt = new_stmt; 9321 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 9322 1.1 mrg } 9323 1.1 mrg } 9324 1.1 mrg if (slp_perm) 9325 1.1 mrg { 9326 1.1 mrg unsigned n_perms; 9327 1.1 mrg vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf, 9328 1.1 mrg false, &n_perms); 9329 1.1 mrg } 9330 1.1 mrg return true; 9331 1.1 mrg } 9332 1.1 mrg 9333 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER 9334 1.1 mrg || (!slp && memory_access_type == VMAT_CONTIGUOUS)) 9335 1.1 mrg grouped_load = false; 9336 1.1 mrg 9337 1.1 mrg if (grouped_load) 9338 1.1 mrg { 9339 1.1 mrg first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 9340 1.1 mrg group_size = DR_GROUP_SIZE (first_stmt_info); 9341 1.1 mrg /* For SLP vectorization we directly vectorize a subchain 9342 1.1 mrg without permutation. */ 9343 1.1 mrg if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 9344 1.1 mrg first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 9345 1.1 mrg /* For BB vectorization always use the first stmt to base 9346 1.1 mrg the data ref pointer on. */ 9347 1.1 mrg if (bb_vinfo) 9348 1.1 mrg first_stmt_info_for_drptr 9349 1.1 mrg = vect_find_first_scalar_stmt_in_slp (slp_node); 9350 1.1 mrg 9351 1.1 mrg /* Check if the chain of loads is already vectorized. */ 9352 1.1 mrg if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists () 9353 1.1 mrg /* For SLP we would need to copy over SLP_TREE_VEC_STMTS. 9354 1.1 mrg ??? But we can only do so if there is exactly one 9355 1.1 mrg as we have no way to get at the rest. Leave the CSE 9356 1.1 mrg opportunity alone. 9357 1.1 mrg ??? With the group load eventually participating 9358 1.1 mrg in multiple different permutations (having multiple 9359 1.1 mrg slp nodes which refer to the same group) the CSE 9360 1.1 mrg is even wrong code. See PR56270. */ 9361 1.1 mrg && !slp) 9362 1.1 mrg { 9363 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 9364 1.1 mrg return true; 9365 1.1 mrg } 9366 1.1 mrg first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 9367 1.1 mrg group_gap_adj = 0; 9368 1.1 mrg 9369 1.1 mrg /* VEC_NUM is the number of vect stmts to be created for this group. */ 9370 1.1 mrg if (slp) 9371 1.1 mrg { 9372 1.1 mrg grouped_load = false; 9373 1.1 mrg /* If an SLP permutation is from N elements to N elements, 9374 1.1 mrg and if one vector holds a whole number of N, we can load 9375 1.1 mrg the inputs to the permutation in the same way as an 9376 1.1 mrg unpermuted sequence. In other cases we need to load the 9377 1.1 mrg whole group, not only the number of vector stmts the 9378 1.1 mrg permutation result fits in. */ 9379 1.1 mrg unsigned scalar_lanes = SLP_TREE_LANES (slp_node); 9380 1.1 mrg if (slp_perm 9381 1.1 mrg && (group_size != scalar_lanes 9382 1.1 mrg || !multiple_p (nunits, group_size))) 9383 1.1 mrg { 9384 1.1 mrg /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for 9385 1.1 mrg variable VF; see vect_transform_slp_perm_load. */ 9386 1.1 mrg unsigned int const_vf = vf.to_constant (); 9387 1.1 mrg unsigned int const_nunits = nunits.to_constant (); 9388 1.1 mrg vec_num = CEIL (group_size * const_vf, const_nunits); 9389 1.1 mrg group_gap_adj = vf * group_size - nunits * vec_num; 9390 1.1 mrg } 9391 1.1 mrg else 9392 1.1 mrg { 9393 1.1 mrg vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 9394 1.1 mrg group_gap_adj 9395 1.1 mrg = group_size - scalar_lanes; 9396 1.1 mrg } 9397 1.1 mrg } 9398 1.1 mrg else 9399 1.1 mrg vec_num = group_size; 9400 1.1 mrg 9401 1.1 mrg ref_type = get_group_alias_ptr_type (first_stmt_info); 9402 1.1 mrg } 9403 1.1 mrg else 9404 1.1 mrg { 9405 1.1 mrg first_stmt_info = stmt_info; 9406 1.1 mrg first_dr_info = dr_info; 9407 1.1 mrg group_size = vec_num = 1; 9408 1.1 mrg group_gap_adj = 0; 9409 1.1 mrg ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); 9410 1.1 mrg if (slp) 9411 1.1 mrg vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 9412 1.1 mrg } 9413 1.1 mrg 9414 1.1 mrg gcc_assert (alignment_support_scheme); 9415 1.1 mrg vec_loop_masks *loop_masks 9416 1.1 mrg = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 9417 1.1 mrg ? &LOOP_VINFO_MASKS (loop_vinfo) 9418 1.1 mrg : NULL); 9419 1.1 mrg vec_loop_lens *loop_lens 9420 1.1 mrg = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) 9421 1.1 mrg ? &LOOP_VINFO_LENS (loop_vinfo) 9422 1.1 mrg : NULL); 9423 1.1 mrg 9424 1.1 mrg /* Shouldn't go with length-based approach if fully masked. */ 9425 1.1 mrg gcc_assert (!loop_lens || !loop_masks); 9426 1.1 mrg 9427 1.1 mrg /* Targets with store-lane instructions must not require explicit 9428 1.1 mrg realignment. vect_supportable_dr_alignment always returns either 9429 1.1 mrg dr_aligned or dr_unaligned_supported for masked operations. */ 9430 1.1 mrg gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 9431 1.1 mrg && !mask 9432 1.1 mrg && !loop_masks) 9433 1.1 mrg || alignment_support_scheme == dr_aligned 9434 1.1 mrg || alignment_support_scheme == dr_unaligned_supported); 9435 1.1 mrg 9436 1.1 mrg /* In case the vectorization factor (VF) is bigger than the number 9437 1.1 mrg of elements that we can fit in a vectype (nunits), we have to generate 9438 1.1 mrg more than one vector stmt - i.e - we need to "unroll" the 9439 1.1 mrg vector stmt by a factor VF/nunits. In doing so, we record a pointer 9440 1.1 mrg from one copy of the vector stmt to the next, in the field 9441 1.1 mrg STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 9442 1.1 mrg stages to find the correct vector defs to be used when vectorizing 9443 1.1 mrg stmts that use the defs of the current stmt. The example below 9444 1.1 mrg illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 9445 1.1 mrg need to create 4 vectorized stmts): 9446 1.1 mrg 9447 1.1 mrg before vectorization: 9448 1.1 mrg RELATED_STMT VEC_STMT 9449 1.1 mrg S1: x = memref - - 9450 1.1 mrg S2: z = x + 1 - - 9451 1.1 mrg 9452 1.1 mrg step 1: vectorize stmt S1: 9453 1.1 mrg We first create the vector stmt VS1_0, and, as usual, record a 9454 1.1 mrg pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 9455 1.1 mrg Next, we create the vector stmt VS1_1, and record a pointer to 9456 1.1 mrg it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 9457 1.1 mrg Similarly, for VS1_2 and VS1_3. This is the resulting chain of 9458 1.1 mrg stmts and pointers: 9459 1.1 mrg RELATED_STMT VEC_STMT 9460 1.1 mrg VS1_0: vx0 = memref0 VS1_1 - 9461 1.1 mrg VS1_1: vx1 = memref1 VS1_2 - 9462 1.1 mrg VS1_2: vx2 = memref2 VS1_3 - 9463 1.1 mrg VS1_3: vx3 = memref3 - - 9464 1.1 mrg S1: x = load - VS1_0 9465 1.1 mrg S2: z = x + 1 - - 9466 1.1 mrg */ 9467 1.1 mrg 9468 1.1 mrg /* In case of interleaving (non-unit grouped access): 9469 1.1 mrg 9470 1.1 mrg S1: x2 = &base + 2 9471 1.1 mrg S2: x0 = &base 9472 1.1 mrg S3: x1 = &base + 1 9473 1.1 mrg S4: x3 = &base + 3 9474 1.1 mrg 9475 1.1 mrg Vectorized loads are created in the order of memory accesses 9476 1.1 mrg starting from the access of the first stmt of the chain: 9477 1.1 mrg 9478 1.1 mrg VS1: vx0 = &base 9479 1.1 mrg VS2: vx1 = &base + vec_size*1 9480 1.1 mrg VS3: vx3 = &base + vec_size*2 9481 1.1 mrg VS4: vx4 = &base + vec_size*3 9482 1.1 mrg 9483 1.1 mrg Then permutation statements are generated: 9484 1.1 mrg 9485 1.1 mrg VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 9486 1.1 mrg VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 9487 1.1 mrg ... 9488 1.1 mrg 9489 1.1 mrg And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 9490 1.1 mrg (the order of the data-refs in the output of vect_permute_load_chain 9491 1.1 mrg corresponds to the order of scalar stmts in the interleaving chain - see 9492 1.1 mrg the documentation of vect_permute_load_chain()). 9493 1.1 mrg The generation of permutation stmts and recording them in 9494 1.1 mrg STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load(). 9495 1.1 mrg 9496 1.1 mrg In case of both multiple types and interleaving, the vector loads and 9497 1.1 mrg permutation stmts above are created for every copy. The result vector 9498 1.1 mrg stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 9499 1.1 mrg corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 9500 1.1 mrg 9501 1.1 mrg /* If the data reference is aligned (dr_aligned) or potentially unaligned 9502 1.1 mrg on a target that supports unaligned accesses (dr_unaligned_supported) 9503 1.1 mrg we generate the following code: 9504 1.1 mrg p = initial_addr; 9505 1.1 mrg indx = 0; 9506 1.1 mrg loop { 9507 1.1 mrg p = p + indx * vectype_size; 9508 1.1 mrg vec_dest = *(p); 9509 1.1 mrg indx = indx + 1; 9510 1.1 mrg } 9511 1.1 mrg 9512 1.1 mrg Otherwise, the data reference is potentially unaligned on a target that 9513 1.1 mrg does not support unaligned accesses (dr_explicit_realign_optimized) - 9514 1.1 mrg then generate the following code, in which the data in each iteration is 9515 1.1 mrg obtained by two vector loads, one from the previous iteration, and one 9516 1.1 mrg from the current iteration: 9517 1.1 mrg p1 = initial_addr; 9518 1.1 mrg msq_init = *(floor(p1)) 9519 1.1 mrg p2 = initial_addr + VS - 1; 9520 1.1 mrg realignment_token = call target_builtin; 9521 1.1 mrg indx = 0; 9522 1.1 mrg loop { 9523 1.1 mrg p2 = p2 + indx * vectype_size 9524 1.1 mrg lsq = *(floor(p2)) 9525 1.1 mrg vec_dest = realign_load (msq, lsq, realignment_token) 9526 1.1 mrg indx = indx + 1; 9527 1.1 mrg msq = lsq; 9528 1.1 mrg } */ 9529 1.1 mrg 9530 1.1 mrg /* If the misalignment remains the same throughout the execution of the 9531 1.1 mrg loop, we can create the init_addr and permutation mask at the loop 9532 1.1 mrg preheader. Otherwise, it needs to be created inside the loop. 9533 1.1 mrg This can only occur when vectorizing memory accesses in the inner-loop 9534 1.1 mrg nested within an outer-loop that is being vectorized. */ 9535 1.1 mrg 9536 1.1 mrg if (nested_in_vect_loop 9537 1.1 mrg && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr), 9538 1.1 mrg GET_MODE_SIZE (TYPE_MODE (vectype)))) 9539 1.1 mrg { 9540 1.1 mrg gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 9541 1.1 mrg compute_in_loop = true; 9542 1.1 mrg } 9543 1.1 mrg 9544 1.1 mrg bool diff_first_stmt_info 9545 1.1 mrg = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr; 9546 1.1 mrg 9547 1.1 mrg tree offset = NULL_TREE; 9548 1.1 mrg if ((alignment_support_scheme == dr_explicit_realign_optimized 9549 1.1 mrg || alignment_support_scheme == dr_explicit_realign) 9550 1.1 mrg && !compute_in_loop) 9551 1.1 mrg { 9552 1.1 mrg /* If we have different first_stmt_info, we can't set up realignment 9553 1.1 mrg here, since we can't guarantee first_stmt_info DR has been 9554 1.1 mrg initialized yet, use first_stmt_info_for_drptr DR by bumping the 9555 1.1 mrg distance from first_stmt_info DR instead as below. */ 9556 1.1 mrg if (!diff_first_stmt_info) 9557 1.1 mrg msq = vect_setup_realignment (vinfo, 9558 1.1 mrg first_stmt_info, gsi, &realignment_token, 9559 1.1 mrg alignment_support_scheme, NULL_TREE, 9560 1.1 mrg &at_loop); 9561 1.1 mrg if (alignment_support_scheme == dr_explicit_realign_optimized) 9562 1.1 mrg { 9563 1.1 mrg phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq)); 9564 1.1 mrg offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), 9565 1.1 mrg size_one_node); 9566 1.1 mrg gcc_assert (!first_stmt_info_for_drptr); 9567 1.1 mrg } 9568 1.1 mrg } 9569 1.1 mrg else 9570 1.1 mrg at_loop = loop; 9571 1.1 mrg 9572 1.1 mrg if (!known_eq (poffset, 0)) 9573 1.1 mrg offset = (offset 9574 1.1 mrg ? size_binop (PLUS_EXPR, offset, size_int (poffset)) 9575 1.1 mrg : size_int (poffset)); 9576 1.1 mrg 9577 1.1 mrg tree bump; 9578 1.1 mrg tree vec_offset = NULL_TREE; 9579 1.1 mrg if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9580 1.1 mrg { 9581 1.1 mrg aggr_type = NULL_TREE; 9582 1.1 mrg bump = NULL_TREE; 9583 1.1 mrg } 9584 1.1 mrg else if (memory_access_type == VMAT_GATHER_SCATTER) 9585 1.1 mrg { 9586 1.1 mrg aggr_type = elem_type; 9587 1.1 mrg vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, 9588 1.1 mrg &bump, &vec_offset); 9589 1.1 mrg } 9590 1.1 mrg else 9591 1.1 mrg { 9592 1.1 mrg if (memory_access_type == VMAT_LOAD_STORE_LANES) 9593 1.1 mrg aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 9594 1.1 mrg else 9595 1.1 mrg aggr_type = vectype; 9596 1.1 mrg bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type, 9597 1.1 mrg memory_access_type); 9598 1.1 mrg } 9599 1.1 mrg 9600 1.1 mrg auto_vec<tree> vec_offsets; 9601 1.1 mrg auto_vec<tree> vec_masks; 9602 1.1 mrg if (mask) 9603 1.1 mrg { 9604 1.1 mrg if (slp_node) 9605 1.1 mrg vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], 9606 1.1 mrg &vec_masks); 9607 1.1 mrg else 9608 1.1 mrg vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask, 9609 1.1 mrg &vec_masks, mask_vectype); 9610 1.1 mrg } 9611 1.1 mrg tree vec_mask = NULL_TREE; 9612 1.1 mrg poly_uint64 group_elt = 0; 9613 1.1 mrg for (j = 0; j < ncopies; j++) 9614 1.1 mrg { 9615 1.1 mrg /* 1. Create the vector or array pointer update chain. */ 9616 1.1 mrg if (j == 0) 9617 1.1 mrg { 9618 1.1 mrg bool simd_lane_access_p 9619 1.1 mrg = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; 9620 1.1 mrg if (simd_lane_access_p 9621 1.1 mrg && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR 9622 1.1 mrg && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0)) 9623 1.1 mrg && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info)) 9624 1.1 mrg && integer_zerop (DR_INIT (first_dr_info->dr)) 9625 1.1 mrg && alias_sets_conflict_p (get_alias_set (aggr_type), 9626 1.1 mrg get_alias_set (TREE_TYPE (ref_type))) 9627 1.1 mrg && (alignment_support_scheme == dr_aligned 9628 1.1 mrg || alignment_support_scheme == dr_unaligned_supported)) 9629 1.1 mrg { 9630 1.1 mrg dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr)); 9631 1.1 mrg dataref_offset = build_int_cst (ref_type, 0); 9632 1.1 mrg } 9633 1.1 mrg else if (diff_first_stmt_info) 9634 1.1 mrg { 9635 1.1 mrg dataref_ptr 9636 1.1 mrg = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr, 9637 1.1 mrg aggr_type, at_loop, offset, &dummy, 9638 1.1 mrg gsi, &ptr_incr, simd_lane_access_p, 9639 1.1 mrg bump); 9640 1.1 mrg /* Adjust the pointer by the difference to first_stmt. */ 9641 1.1 mrg data_reference_p ptrdr 9642 1.1 mrg = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr); 9643 1.1 mrg tree diff 9644 1.1 mrg = fold_convert (sizetype, 9645 1.1 mrg size_binop (MINUS_EXPR, 9646 1.1 mrg DR_INIT (first_dr_info->dr), 9647 1.1 mrg DR_INIT (ptrdr))); 9648 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, 9649 1.1 mrg stmt_info, diff); 9650 1.1 mrg if (alignment_support_scheme == dr_explicit_realign) 9651 1.1 mrg { 9652 1.1 mrg msq = vect_setup_realignment (vinfo, 9653 1.1 mrg first_stmt_info_for_drptr, gsi, 9654 1.1 mrg &realignment_token, 9655 1.1 mrg alignment_support_scheme, 9656 1.1 mrg dataref_ptr, &at_loop); 9657 1.1 mrg gcc_assert (!compute_in_loop); 9658 1.1 mrg } 9659 1.1 mrg } 9660 1.1 mrg else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9661 1.1 mrg { 9662 1.1 mrg vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, 9663 1.1 mrg slp_node, &gs_info, &dataref_ptr, 9664 1.1 mrg &vec_offsets); 9665 1.1 mrg } 9666 1.1 mrg else 9667 1.1 mrg dataref_ptr 9668 1.1 mrg = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, 9669 1.1 mrg at_loop, 9670 1.1 mrg offset, &dummy, gsi, &ptr_incr, 9671 1.1 mrg simd_lane_access_p, bump); 9672 1.1 mrg if (mask) 9673 1.1 mrg vec_mask = vec_masks[0]; 9674 1.1 mrg } 9675 1.1 mrg else 9676 1.1 mrg { 9677 1.1 mrg if (dataref_offset) 9678 1.1 mrg dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, 9679 1.1 mrg bump); 9680 1.1 mrg else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9681 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, 9682 1.1 mrg stmt_info, bump); 9683 1.1 mrg if (mask) 9684 1.1 mrg vec_mask = vec_masks[j]; 9685 1.1 mrg } 9686 1.1 mrg 9687 1.1 mrg if (grouped_load || slp_perm) 9688 1.1 mrg dr_chain.create (vec_num); 9689 1.1 mrg 9690 1.1 mrg gimple *new_stmt = NULL; 9691 1.1 mrg if (memory_access_type == VMAT_LOAD_STORE_LANES) 9692 1.1 mrg { 9693 1.1 mrg tree vec_array; 9694 1.1 mrg 9695 1.1 mrg vec_array = create_vector_array (vectype, vec_num); 9696 1.1 mrg 9697 1.1 mrg tree final_mask = NULL_TREE; 9698 1.1 mrg if (loop_masks) 9699 1.1 mrg final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 9700 1.1 mrg vectype, j); 9701 1.1 mrg if (vec_mask) 9702 1.1 mrg final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, 9703 1.1 mrg final_mask, vec_mask, gsi); 9704 1.1 mrg 9705 1.1 mrg gcall *call; 9706 1.1 mrg if (final_mask) 9707 1.1 mrg { 9708 1.1 mrg /* Emit: 9709 1.1 mrg VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, 9710 1.1 mrg VEC_MASK). */ 9711 1.1 mrg unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); 9712 1.1 mrg tree alias_ptr = build_int_cst (ref_type, align); 9713 1.1 mrg call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, 9714 1.1 mrg dataref_ptr, alias_ptr, 9715 1.1 mrg final_mask); 9716 1.1 mrg } 9717 1.1 mrg else 9718 1.1 mrg { 9719 1.1 mrg /* Emit: 9720 1.1 mrg VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 9721 1.1 mrg data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 9722 1.1 mrg call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 9723 1.1 mrg } 9724 1.1 mrg gimple_call_set_lhs (call, vec_array); 9725 1.1 mrg gimple_call_set_nothrow (call, true); 9726 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); 9727 1.1 mrg new_stmt = call; 9728 1.1 mrg 9729 1.1 mrg /* Extract each vector into an SSA_NAME. */ 9730 1.1 mrg for (i = 0; i < vec_num; i++) 9731 1.1 mrg { 9732 1.1 mrg new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest, 9733 1.1 mrg vec_array, i); 9734 1.1 mrg dr_chain.quick_push (new_temp); 9735 1.1 mrg } 9736 1.1 mrg 9737 1.1 mrg /* Record the mapping between SSA_NAMEs and statements. */ 9738 1.1 mrg vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain); 9739 1.1 mrg 9740 1.1 mrg /* Record that VEC_ARRAY is now dead. */ 9741 1.1 mrg vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); 9742 1.1 mrg } 9743 1.1 mrg else 9744 1.1 mrg { 9745 1.1 mrg for (i = 0; i < vec_num; i++) 9746 1.1 mrg { 9747 1.1 mrg tree final_mask = NULL_TREE; 9748 1.1 mrg if (loop_masks 9749 1.1 mrg && memory_access_type != VMAT_INVARIANT) 9750 1.1 mrg final_mask = vect_get_loop_mask (gsi, loop_masks, 9751 1.1 mrg vec_num * ncopies, 9752 1.1 mrg vectype, vec_num * j + i); 9753 1.1 mrg if (vec_mask) 9754 1.1 mrg final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, 9755 1.1 mrg final_mask, vec_mask, gsi); 9756 1.1 mrg 9757 1.1 mrg if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9758 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 9759 1.1 mrg gsi, stmt_info, bump); 9760 1.1 mrg 9761 1.1 mrg /* 2. Create the vector-load in the loop. */ 9762 1.1 mrg switch (alignment_support_scheme) 9763 1.1 mrg { 9764 1.1 mrg case dr_aligned: 9765 1.1 mrg case dr_unaligned_supported: 9766 1.1 mrg { 9767 1.1 mrg unsigned int misalign; 9768 1.1 mrg unsigned HOST_WIDE_INT align; 9769 1.1 mrg 9770 1.1 mrg if (memory_access_type == VMAT_GATHER_SCATTER 9771 1.1 mrg && gs_info.ifn != IFN_LAST) 9772 1.1 mrg { 9773 1.1 mrg if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9774 1.1 mrg vec_offset = vec_offsets[vec_num * j + i]; 9775 1.1 mrg tree zero = build_zero_cst (vectype); 9776 1.1 mrg tree scale = size_int (gs_info.scale); 9777 1.1 mrg gcall *call; 9778 1.1 mrg if (final_mask) 9779 1.1 mrg call = gimple_build_call_internal 9780 1.1 mrg (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, 9781 1.1 mrg vec_offset, scale, zero, final_mask); 9782 1.1 mrg else 9783 1.1 mrg call = gimple_build_call_internal 9784 1.1 mrg (IFN_GATHER_LOAD, 4, dataref_ptr, 9785 1.1 mrg vec_offset, scale, zero); 9786 1.1 mrg gimple_call_set_nothrow (call, true); 9787 1.1 mrg new_stmt = call; 9788 1.1 mrg data_ref = NULL_TREE; 9789 1.1 mrg break; 9790 1.1 mrg } 9791 1.1 mrg else if (memory_access_type == VMAT_GATHER_SCATTER) 9792 1.1 mrg { 9793 1.1 mrg /* Emulated gather-scatter. */ 9794 1.1 mrg gcc_assert (!final_mask); 9795 1.1 mrg unsigned HOST_WIDE_INT const_nunits 9796 1.1 mrg = nunits.to_constant (); 9797 1.1 mrg unsigned HOST_WIDE_INT const_offset_nunits 9798 1.1 mrg = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) 9799 1.1 mrg .to_constant (); 9800 1.1 mrg vec<constructor_elt, va_gc> *ctor_elts; 9801 1.1 mrg vec_alloc (ctor_elts, const_nunits); 9802 1.1 mrg gimple_seq stmts = NULL; 9803 1.1 mrg /* We support offset vectors with more elements 9804 1.1 mrg than the data vector for now. */ 9805 1.1 mrg unsigned HOST_WIDE_INT factor 9806 1.1 mrg = const_offset_nunits / const_nunits; 9807 1.1 mrg vec_offset = vec_offsets[j / factor]; 9808 1.1 mrg unsigned elt_offset = (j % factor) * const_nunits; 9809 1.1 mrg tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); 9810 1.1 mrg tree scale = size_int (gs_info.scale); 9811 1.1 mrg align 9812 1.1 mrg = get_object_alignment (DR_REF (first_dr_info->dr)); 9813 1.1 mrg tree ltype = build_aligned_type (TREE_TYPE (vectype), 9814 1.1 mrg align); 9815 1.1 mrg for (unsigned k = 0; k < const_nunits; ++k) 9816 1.1 mrg { 9817 1.1 mrg tree boff = size_binop (MULT_EXPR, 9818 1.1 mrg TYPE_SIZE (idx_type), 9819 1.1 mrg bitsize_int 9820 1.1 mrg (k + elt_offset)); 9821 1.1 mrg tree idx = gimple_build (&stmts, BIT_FIELD_REF, 9822 1.1 mrg idx_type, vec_offset, 9823 1.1 mrg TYPE_SIZE (idx_type), 9824 1.1 mrg boff); 9825 1.1 mrg idx = gimple_convert (&stmts, sizetype, idx); 9826 1.1 mrg idx = gimple_build (&stmts, MULT_EXPR, 9827 1.1 mrg sizetype, idx, scale); 9828 1.1 mrg tree ptr = gimple_build (&stmts, PLUS_EXPR, 9829 1.1 mrg TREE_TYPE (dataref_ptr), 9830 1.1 mrg dataref_ptr, idx); 9831 1.1 mrg ptr = gimple_convert (&stmts, ptr_type_node, ptr); 9832 1.1 mrg tree elt = make_ssa_name (TREE_TYPE (vectype)); 9833 1.1 mrg tree ref = build2 (MEM_REF, ltype, ptr, 9834 1.1 mrg build_int_cst (ref_type, 0)); 9835 1.1 mrg new_stmt = gimple_build_assign (elt, ref); 9836 1.1 mrg gimple_seq_add_stmt (&stmts, new_stmt); 9837 1.1 mrg CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt); 9838 1.1 mrg } 9839 1.1 mrg gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 9840 1.1 mrg new_stmt = gimple_build_assign (NULL_TREE, 9841 1.1 mrg build_constructor 9842 1.1 mrg (vectype, ctor_elts)); 9843 1.1 mrg data_ref = NULL_TREE; 9844 1.1 mrg break; 9845 1.1 mrg } 9846 1.1 mrg 9847 1.1 mrg align = 9848 1.1 mrg known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); 9849 1.1 mrg if (alignment_support_scheme == dr_aligned) 9850 1.1 mrg misalign = 0; 9851 1.1 mrg else if (misalignment == DR_MISALIGNMENT_UNKNOWN) 9852 1.1 mrg { 9853 1.1 mrg align = dr_alignment 9854 1.1 mrg (vect_dr_behavior (vinfo, first_dr_info)); 9855 1.1 mrg misalign = 0; 9856 1.1 mrg } 9857 1.1 mrg else 9858 1.1 mrg misalign = misalignment; 9859 1.1 mrg if (dataref_offset == NULL_TREE 9860 1.1 mrg && TREE_CODE (dataref_ptr) == SSA_NAME) 9861 1.1 mrg set_ptr_info_alignment (get_ptr_info (dataref_ptr), 9862 1.1 mrg align, misalign); 9863 1.1 mrg align = least_bit_hwi (misalign | align); 9864 1.1 mrg 9865 1.1 mrg if (final_mask) 9866 1.1 mrg { 9867 1.1 mrg tree ptr = build_int_cst (ref_type, 9868 1.1 mrg align * BITS_PER_UNIT); 9869 1.1 mrg gcall *call 9870 1.1 mrg = gimple_build_call_internal (IFN_MASK_LOAD, 3, 9871 1.1 mrg dataref_ptr, ptr, 9872 1.1 mrg final_mask); 9873 1.1 mrg gimple_call_set_nothrow (call, true); 9874 1.1 mrg new_stmt = call; 9875 1.1 mrg data_ref = NULL_TREE; 9876 1.1 mrg } 9877 1.1 mrg else if (loop_lens && memory_access_type != VMAT_INVARIANT) 9878 1.1 mrg { 9879 1.1 mrg tree final_len 9880 1.1 mrg = vect_get_loop_len (loop_vinfo, loop_lens, 9881 1.1 mrg vec_num * ncopies, 9882 1.1 mrg vec_num * j + i); 9883 1.1 mrg tree ptr = build_int_cst (ref_type, 9884 1.1 mrg align * BITS_PER_UNIT); 9885 1.1 mrg 9886 1.1 mrg machine_mode vmode = TYPE_MODE (vectype); 9887 1.1 mrg opt_machine_mode new_ovmode 9888 1.1 mrg = get_len_load_store_mode (vmode, true); 9889 1.1 mrg machine_mode new_vmode = new_ovmode.require (); 9890 1.1 mrg tree qi_type = unsigned_intQI_type_node; 9891 1.1 mrg 9892 1.1 mrg signed char biasval = 9893 1.1 mrg LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); 9894 1.1 mrg 9895 1.1 mrg tree bias = build_int_cst (intQI_type_node, biasval); 9896 1.1 mrg 9897 1.1 mrg gcall *call 9898 1.1 mrg = gimple_build_call_internal (IFN_LEN_LOAD, 4, 9899 1.1 mrg dataref_ptr, ptr, 9900 1.1 mrg final_len, bias); 9901 1.1 mrg gimple_call_set_nothrow (call, true); 9902 1.1 mrg new_stmt = call; 9903 1.1 mrg data_ref = NULL_TREE; 9904 1.1 mrg 9905 1.1 mrg /* Need conversion if it's wrapped with VnQI. */ 9906 1.1 mrg if (vmode != new_vmode) 9907 1.1 mrg { 9908 1.1 mrg tree new_vtype 9909 1.1 mrg = build_vector_type_for_mode (qi_type, new_vmode); 9910 1.1 mrg tree var = vect_get_new_ssa_name (new_vtype, 9911 1.1 mrg vect_simple_var); 9912 1.1 mrg gimple_set_lhs (call, var); 9913 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, call, 9914 1.1 mrg gsi); 9915 1.1 mrg tree op = build1 (VIEW_CONVERT_EXPR, vectype, var); 9916 1.1 mrg new_stmt 9917 1.1 mrg = gimple_build_assign (vec_dest, 9918 1.1 mrg VIEW_CONVERT_EXPR, op); 9919 1.1 mrg } 9920 1.1 mrg } 9921 1.1 mrg else 9922 1.1 mrg { 9923 1.1 mrg tree ltype = vectype; 9924 1.1 mrg tree new_vtype = NULL_TREE; 9925 1.1 mrg unsigned HOST_WIDE_INT gap 9926 1.1 mrg = DR_GROUP_GAP (first_stmt_info); 9927 1.1 mrg unsigned int vect_align 9928 1.1 mrg = vect_known_alignment_in_bytes (first_dr_info, 9929 1.1 mrg vectype); 9930 1.1 mrg unsigned int scalar_dr_size 9931 1.1 mrg = vect_get_scalar_dr_size (first_dr_info); 9932 1.1 mrg /* If there's no peeling for gaps but we have a gap 9933 1.1 mrg with slp loads then load the lower half of the 9934 1.1 mrg vector only. See get_group_load_store_type for 9935 1.1 mrg when we apply this optimization. */ 9936 1.1 mrg if (slp 9937 1.1 mrg && loop_vinfo 9938 1.1 mrg && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) 9939 1.1 mrg && gap != 0 9940 1.1 mrg && known_eq (nunits, (group_size - gap) * 2) 9941 1.1 mrg && known_eq (nunits, group_size) 9942 1.1 mrg && gap >= (vect_align / scalar_dr_size)) 9943 1.1 mrg { 9944 1.1 mrg tree half_vtype; 9945 1.1 mrg new_vtype 9946 1.1 mrg = vector_vector_composition_type (vectype, 2, 9947 1.1 mrg &half_vtype); 9948 1.1 mrg if (new_vtype != NULL_TREE) 9949 1.1 mrg ltype = half_vtype; 9950 1.1 mrg } 9951 1.1 mrg tree offset 9952 1.1 mrg = (dataref_offset ? dataref_offset 9953 1.1 mrg : build_int_cst (ref_type, 0)); 9954 1.1 mrg if (ltype != vectype 9955 1.1 mrg && memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9956 1.1 mrg { 9957 1.1 mrg unsigned HOST_WIDE_INT gap_offset 9958 1.1 mrg = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type)); 9959 1.1 mrg tree gapcst = build_int_cst (ref_type, gap_offset); 9960 1.1 mrg offset = size_binop (PLUS_EXPR, offset, gapcst); 9961 1.1 mrg } 9962 1.1 mrg data_ref 9963 1.1 mrg = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); 9964 1.1 mrg if (alignment_support_scheme == dr_aligned) 9965 1.1 mrg ; 9966 1.1 mrg else 9967 1.1 mrg TREE_TYPE (data_ref) 9968 1.1 mrg = build_aligned_type (TREE_TYPE (data_ref), 9969 1.1 mrg align * BITS_PER_UNIT); 9970 1.1 mrg if (ltype != vectype) 9971 1.1 mrg { 9972 1.1 mrg vect_copy_ref_info (data_ref, 9973 1.1 mrg DR_REF (first_dr_info->dr)); 9974 1.1 mrg tree tem = make_ssa_name (ltype); 9975 1.1 mrg new_stmt = gimple_build_assign (tem, data_ref); 9976 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 9977 1.1 mrg new_stmt, gsi); 9978 1.1 mrg data_ref = NULL; 9979 1.1 mrg vec<constructor_elt, va_gc> *v; 9980 1.1 mrg vec_alloc (v, 2); 9981 1.1 mrg if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9982 1.1 mrg { 9983 1.1 mrg CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9984 1.1 mrg build_zero_cst (ltype)); 9985 1.1 mrg CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); 9986 1.1 mrg } 9987 1.1 mrg else 9988 1.1 mrg { 9989 1.1 mrg CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); 9990 1.1 mrg CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9991 1.1 mrg build_zero_cst (ltype)); 9992 1.1 mrg } 9993 1.1 mrg gcc_assert (new_vtype != NULL_TREE); 9994 1.1 mrg if (new_vtype == vectype) 9995 1.1 mrg new_stmt = gimple_build_assign ( 9996 1.1 mrg vec_dest, build_constructor (vectype, v)); 9997 1.1 mrg else 9998 1.1 mrg { 9999 1.1 mrg tree new_vname = make_ssa_name (new_vtype); 10000 1.1 mrg new_stmt = gimple_build_assign ( 10001 1.1 mrg new_vname, build_constructor (new_vtype, v)); 10002 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10003 1.1 mrg new_stmt, gsi); 10004 1.1 mrg new_stmt = gimple_build_assign ( 10005 1.1 mrg vec_dest, build1 (VIEW_CONVERT_EXPR, vectype, 10006 1.1 mrg new_vname)); 10007 1.1 mrg } 10008 1.1 mrg } 10009 1.1 mrg } 10010 1.1 mrg break; 10011 1.1 mrg } 10012 1.1 mrg case dr_explicit_realign: 10013 1.1 mrg { 10014 1.1 mrg tree ptr, bump; 10015 1.1 mrg 10016 1.1 mrg tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype)); 10017 1.1 mrg 10018 1.1 mrg if (compute_in_loop) 10019 1.1 mrg msq = vect_setup_realignment (vinfo, first_stmt_info, gsi, 10020 1.1 mrg &realignment_token, 10021 1.1 mrg dr_explicit_realign, 10022 1.1 mrg dataref_ptr, NULL); 10023 1.1 mrg 10024 1.1 mrg if (TREE_CODE (dataref_ptr) == SSA_NAME) 10025 1.1 mrg ptr = copy_ssa_name (dataref_ptr); 10026 1.1 mrg else 10027 1.1 mrg ptr = make_ssa_name (TREE_TYPE (dataref_ptr)); 10028 1.1 mrg // For explicit realign the target alignment should be 10029 1.1 mrg // known at compile time. 10030 1.1 mrg unsigned HOST_WIDE_INT align = 10031 1.1 mrg DR_TARGET_ALIGNMENT (first_dr_info).to_constant (); 10032 1.1 mrg new_stmt = gimple_build_assign 10033 1.1 mrg (ptr, BIT_AND_EXPR, dataref_ptr, 10034 1.1 mrg build_int_cst 10035 1.1 mrg (TREE_TYPE (dataref_ptr), 10036 1.1 mrg -(HOST_WIDE_INT) align)); 10037 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10038 1.1 mrg new_stmt, gsi); 10039 1.1 mrg data_ref 10040 1.1 mrg = build2 (MEM_REF, vectype, ptr, 10041 1.1 mrg build_int_cst (ref_type, 0)); 10042 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 10043 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, 10044 1.1 mrg vectype); 10045 1.1 mrg new_stmt = gimple_build_assign (vec_dest, data_ref); 10046 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 10047 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 10048 1.1 mrg gimple_move_vops (new_stmt, stmt_info->stmt); 10049 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10050 1.1 mrg new_stmt, gsi); 10051 1.1 mrg msq = new_temp; 10052 1.1 mrg 10053 1.1 mrg bump = size_binop (MULT_EXPR, vs, 10054 1.1 mrg TYPE_SIZE_UNIT (elem_type)); 10055 1.1 mrg bump = size_binop (MINUS_EXPR, bump, size_one_node); 10056 1.1 mrg ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, 10057 1.1 mrg stmt_info, bump); 10058 1.1 mrg new_stmt = gimple_build_assign 10059 1.1 mrg (NULL_TREE, BIT_AND_EXPR, ptr, 10060 1.1 mrg build_int_cst 10061 1.1 mrg (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); 10062 1.1 mrg ptr = copy_ssa_name (ptr, new_stmt); 10063 1.1 mrg gimple_assign_set_lhs (new_stmt, ptr); 10064 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10065 1.1 mrg new_stmt, gsi); 10066 1.1 mrg data_ref 10067 1.1 mrg = build2 (MEM_REF, vectype, ptr, 10068 1.1 mrg build_int_cst (ref_type, 0)); 10069 1.1 mrg break; 10070 1.1 mrg } 10071 1.1 mrg case dr_explicit_realign_optimized: 10072 1.1 mrg { 10073 1.1 mrg if (TREE_CODE (dataref_ptr) == SSA_NAME) 10074 1.1 mrg new_temp = copy_ssa_name (dataref_ptr); 10075 1.1 mrg else 10076 1.1 mrg new_temp = make_ssa_name (TREE_TYPE (dataref_ptr)); 10077 1.1 mrg // We should only be doing this if we know the target 10078 1.1 mrg // alignment at compile time. 10079 1.1 mrg unsigned HOST_WIDE_INT align = 10080 1.1 mrg DR_TARGET_ALIGNMENT (first_dr_info).to_constant (); 10081 1.1 mrg new_stmt = gimple_build_assign 10082 1.1 mrg (new_temp, BIT_AND_EXPR, dataref_ptr, 10083 1.1 mrg build_int_cst (TREE_TYPE (dataref_ptr), 10084 1.1 mrg -(HOST_WIDE_INT) align)); 10085 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10086 1.1 mrg new_stmt, gsi); 10087 1.1 mrg data_ref 10088 1.1 mrg = build2 (MEM_REF, vectype, new_temp, 10089 1.1 mrg build_int_cst (ref_type, 0)); 10090 1.1 mrg break; 10091 1.1 mrg } 10092 1.1 mrg default: 10093 1.1 mrg gcc_unreachable (); 10094 1.1 mrg } 10095 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 10096 1.1 mrg /* DATA_REF is null if we've already built the statement. */ 10097 1.1 mrg if (data_ref) 10098 1.1 mrg { 10099 1.1 mrg vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 10100 1.1 mrg new_stmt = gimple_build_assign (vec_dest, data_ref); 10101 1.1 mrg } 10102 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 10103 1.1 mrg gimple_set_lhs (new_stmt, new_temp); 10104 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10105 1.1 mrg 10106 1.1 mrg /* 3. Handle explicit realignment if necessary/supported. 10107 1.1 mrg Create in loop: 10108 1.1 mrg vec_dest = realign_load (msq, lsq, realignment_token) */ 10109 1.1 mrg if (alignment_support_scheme == dr_explicit_realign_optimized 10110 1.1 mrg || alignment_support_scheme == dr_explicit_realign) 10111 1.1 mrg { 10112 1.1 mrg lsq = gimple_assign_lhs (new_stmt); 10113 1.1 mrg if (!realignment_token) 10114 1.1 mrg realignment_token = dataref_ptr; 10115 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 10116 1.1 mrg new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, 10117 1.1 mrg msq, lsq, realignment_token); 10118 1.1 mrg new_temp = make_ssa_name (vec_dest, new_stmt); 10119 1.1 mrg gimple_assign_set_lhs (new_stmt, new_temp); 10120 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10121 1.1 mrg 10122 1.1 mrg if (alignment_support_scheme == dr_explicit_realign_optimized) 10123 1.1 mrg { 10124 1.1 mrg gcc_assert (phi); 10125 1.1 mrg if (i == vec_num - 1 && j == ncopies - 1) 10126 1.1 mrg add_phi_arg (phi, lsq, 10127 1.1 mrg loop_latch_edge (containing_loop), 10128 1.1 mrg UNKNOWN_LOCATION); 10129 1.1 mrg msq = lsq; 10130 1.1 mrg } 10131 1.1 mrg } 10132 1.1 mrg 10133 1.1 mrg if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 10134 1.1 mrg { 10135 1.1 mrg tree perm_mask = perm_mask_for_reverse (vectype); 10136 1.1 mrg new_temp = permute_vec_elements (vinfo, new_temp, new_temp, 10137 1.1 mrg perm_mask, stmt_info, gsi); 10138 1.1 mrg new_stmt = SSA_NAME_DEF_STMT (new_temp); 10139 1.1 mrg } 10140 1.1 mrg 10141 1.1 mrg /* Collect vector loads and later create their permutation in 10142 1.1 mrg vect_transform_grouped_load (). */ 10143 1.1 mrg if (grouped_load || slp_perm) 10144 1.1 mrg dr_chain.quick_push (new_temp); 10145 1.1 mrg 10146 1.1 mrg /* Store vector loads in the corresponding SLP_NODE. */ 10147 1.1 mrg if (slp && !slp_perm) 10148 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 10149 1.1 mrg 10150 1.1 mrg /* With SLP permutation we load the gaps as well, without 10151 1.1 mrg we need to skip the gaps after we manage to fully load 10152 1.1 mrg all elements. group_gap_adj is DR_GROUP_SIZE here. */ 10153 1.1 mrg group_elt += nunits; 10154 1.1 mrg if (maybe_ne (group_gap_adj, 0U) 10155 1.1 mrg && !slp_perm 10156 1.1 mrg && known_eq (group_elt, group_size - group_gap_adj)) 10157 1.1 mrg { 10158 1.1 mrg poly_wide_int bump_val 10159 1.1 mrg = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 10160 1.1 mrg * group_gap_adj); 10161 1.1 mrg if (tree_int_cst_sgn 10162 1.1 mrg (vect_dr_behavior (vinfo, dr_info)->step) == -1) 10163 1.1 mrg bump_val = -bump_val; 10164 1.1 mrg tree bump = wide_int_to_tree (sizetype, bump_val); 10165 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 10166 1.1 mrg gsi, stmt_info, bump); 10167 1.1 mrg group_elt = 0; 10168 1.1 mrg } 10169 1.1 mrg } 10170 1.1 mrg /* Bump the vector pointer to account for a gap or for excess 10171 1.1 mrg elements loaded for a permuted SLP load. */ 10172 1.1 mrg if (maybe_ne (group_gap_adj, 0U) && slp_perm) 10173 1.1 mrg { 10174 1.1 mrg poly_wide_int bump_val 10175 1.1 mrg = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 10176 1.1 mrg * group_gap_adj); 10177 1.1 mrg if (tree_int_cst_sgn 10178 1.1 mrg (vect_dr_behavior (vinfo, dr_info)->step) == -1) 10179 1.1 mrg bump_val = -bump_val; 10180 1.1 mrg tree bump = wide_int_to_tree (sizetype, bump_val); 10181 1.1 mrg dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, 10182 1.1 mrg stmt_info, bump); 10183 1.1 mrg } 10184 1.1 mrg } 10185 1.1 mrg 10186 1.1 mrg if (slp && !slp_perm) 10187 1.1 mrg continue; 10188 1.1 mrg 10189 1.1 mrg if (slp_perm) 10190 1.1 mrg { 10191 1.1 mrg unsigned n_perms; 10192 1.1 mrg /* For SLP we know we've seen all possible uses of dr_chain so 10193 1.1 mrg direct vect_transform_slp_perm_load to DCE the unused parts. 10194 1.1 mrg ??? This is a hack to prevent compile-time issues as seen 10195 1.1 mrg in PR101120 and friends. */ 10196 1.1 mrg bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, 10197 1.1 mrg gsi, vf, false, &n_perms, 10198 1.1 mrg nullptr, true); 10199 1.1 mrg gcc_assert (ok); 10200 1.1 mrg } 10201 1.1 mrg else 10202 1.1 mrg { 10203 1.1 mrg if (grouped_load) 10204 1.1 mrg { 10205 1.1 mrg if (memory_access_type != VMAT_LOAD_STORE_LANES) 10206 1.1 mrg vect_transform_grouped_load (vinfo, stmt_info, dr_chain, 10207 1.1 mrg group_size, gsi); 10208 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 10209 1.1 mrg } 10210 1.1 mrg else 10211 1.1 mrg { 10212 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 10213 1.1 mrg } 10214 1.1 mrg } 10215 1.1 mrg dr_chain.release (); 10216 1.1 mrg } 10217 1.1 mrg if (!slp) 10218 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 10219 1.1 mrg 10220 1.1 mrg return true; 10221 1.1 mrg } 10222 1.1 mrg 10223 1.1 mrg /* Function vect_is_simple_cond. 10224 1.1 mrg 10225 1.1 mrg Input: 10226 1.1 mrg LOOP - the loop that is being vectorized. 10227 1.1 mrg COND - Condition that is checked for simple use. 10228 1.1 mrg 10229 1.1 mrg Output: 10230 1.1 mrg *COMP_VECTYPE - the vector type for the comparison. 10231 1.1 mrg *DTS - The def types for the arguments of the comparison 10232 1.1 mrg 10233 1.1 mrg Returns whether a COND can be vectorized. Checks whether 10234 1.1 mrg condition operands are supportable using vec_is_simple_use. */ 10235 1.1 mrg 10236 1.1 mrg static bool 10237 1.1 mrg vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info, 10238 1.1 mrg slp_tree slp_node, tree *comp_vectype, 10239 1.1 mrg enum vect_def_type *dts, tree vectype) 10240 1.1 mrg { 10241 1.1 mrg tree lhs, rhs; 10242 1.1 mrg tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 10243 1.1 mrg slp_tree slp_op; 10244 1.1 mrg 10245 1.1 mrg /* Mask case. */ 10246 1.1 mrg if (TREE_CODE (cond) == SSA_NAME 10247 1.1 mrg && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond))) 10248 1.1 mrg { 10249 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond, 10250 1.1 mrg &slp_op, &dts[0], comp_vectype) 10251 1.1 mrg || !*comp_vectype 10252 1.1 mrg || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype)) 10253 1.1 mrg return false; 10254 1.1 mrg return true; 10255 1.1 mrg } 10256 1.1 mrg 10257 1.1 mrg if (!COMPARISON_CLASS_P (cond)) 10258 1.1 mrg return false; 10259 1.1 mrg 10260 1.1 mrg lhs = TREE_OPERAND (cond, 0); 10261 1.1 mrg rhs = TREE_OPERAND (cond, 1); 10262 1.1 mrg 10263 1.1 mrg if (TREE_CODE (lhs) == SSA_NAME) 10264 1.1 mrg { 10265 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, 10266 1.1 mrg &lhs, &slp_op, &dts[0], &vectype1)) 10267 1.1 mrg return false; 10268 1.1 mrg } 10269 1.1 mrg else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST 10270 1.1 mrg || TREE_CODE (lhs) == FIXED_CST) 10271 1.1 mrg dts[0] = vect_constant_def; 10272 1.1 mrg else 10273 1.1 mrg return false; 10274 1.1 mrg 10275 1.1 mrg if (TREE_CODE (rhs) == SSA_NAME) 10276 1.1 mrg { 10277 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, 10278 1.1 mrg &rhs, &slp_op, &dts[1], &vectype2)) 10279 1.1 mrg return false; 10280 1.1 mrg } 10281 1.1 mrg else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST 10282 1.1 mrg || TREE_CODE (rhs) == FIXED_CST) 10283 1.1 mrg dts[1] = vect_constant_def; 10284 1.1 mrg else 10285 1.1 mrg return false; 10286 1.1 mrg 10287 1.1 mrg if (vectype1 && vectype2 10288 1.1 mrg && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 10289 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))) 10290 1.1 mrg return false; 10291 1.1 mrg 10292 1.1 mrg *comp_vectype = vectype1 ? vectype1 : vectype2; 10293 1.1 mrg /* Invariant comparison. */ 10294 1.1 mrg if (! *comp_vectype) 10295 1.1 mrg { 10296 1.1 mrg tree scalar_type = TREE_TYPE (lhs); 10297 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) 10298 1.1 mrg *comp_vectype = truth_type_for (vectype); 10299 1.1 mrg else 10300 1.1 mrg { 10301 1.1 mrg /* If we can widen the comparison to match vectype do so. */ 10302 1.1 mrg if (INTEGRAL_TYPE_P (scalar_type) 10303 1.1 mrg && !slp_node 10304 1.1 mrg && tree_int_cst_lt (TYPE_SIZE (scalar_type), 10305 1.1 mrg TYPE_SIZE (TREE_TYPE (vectype)))) 10306 1.1 mrg scalar_type = build_nonstandard_integer_type 10307 1.1 mrg (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type)); 10308 1.1 mrg *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, 10309 1.1 mrg slp_node); 10310 1.1 mrg } 10311 1.1 mrg } 10312 1.1 mrg 10313 1.1 mrg return true; 10314 1.1 mrg } 10315 1.1 mrg 10316 1.1 mrg /* vectorizable_condition. 10317 1.1 mrg 10318 1.1 mrg Check if STMT_INFO is conditional modify expression that can be vectorized. 10319 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 10320 1.1 mrg stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 10321 1.1 mrg at GSI. 10322 1.1 mrg 10323 1.1 mrg When STMT_INFO is vectorized as a nested cycle, for_reduction is true. 10324 1.1 mrg 10325 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 10326 1.1 mrg 10327 1.1 mrg static bool 10328 1.1 mrg vectorizable_condition (vec_info *vinfo, 10329 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 10330 1.1 mrg gimple **vec_stmt, 10331 1.1 mrg slp_tree slp_node, stmt_vector_for_cost *cost_vec) 10332 1.1 mrg { 10333 1.1 mrg tree scalar_dest = NULL_TREE; 10334 1.1 mrg tree vec_dest = NULL_TREE; 10335 1.1 mrg tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE; 10336 1.1 mrg tree then_clause, else_clause; 10337 1.1 mrg tree comp_vectype = NULL_TREE; 10338 1.1 mrg tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 10339 1.1 mrg tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 10340 1.1 mrg tree vec_compare; 10341 1.1 mrg tree new_temp; 10342 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 10343 1.1 mrg enum vect_def_type dts[4] 10344 1.1 mrg = {vect_unknown_def_type, vect_unknown_def_type, 10345 1.1 mrg vect_unknown_def_type, vect_unknown_def_type}; 10346 1.1 mrg int ndts = 4; 10347 1.1 mrg int ncopies; 10348 1.1 mrg int vec_num; 10349 1.1 mrg enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 10350 1.1 mrg int i; 10351 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 10352 1.1 mrg vec<tree> vec_oprnds0 = vNULL; 10353 1.1 mrg vec<tree> vec_oprnds1 = vNULL; 10354 1.1 mrg vec<tree> vec_oprnds2 = vNULL; 10355 1.1 mrg vec<tree> vec_oprnds3 = vNULL; 10356 1.1 mrg tree vec_cmp_type; 10357 1.1 mrg bool masked = false; 10358 1.1 mrg 10359 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 10360 1.1 mrg return false; 10361 1.1 mrg 10362 1.1 mrg /* Is vectorizable conditional operation? */ 10363 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 10364 1.1 mrg if (!stmt) 10365 1.1 mrg return false; 10366 1.1 mrg 10367 1.1 mrg code = gimple_assign_rhs_code (stmt); 10368 1.1 mrg if (code != COND_EXPR) 10369 1.1 mrg return false; 10370 1.1 mrg 10371 1.1 mrg stmt_vec_info reduc_info = NULL; 10372 1.1 mrg int reduc_index = -1; 10373 1.1 mrg vect_reduction_type reduction_type = TREE_CODE_REDUCTION; 10374 1.1 mrg bool for_reduction 10375 1.1 mrg = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL; 10376 1.1 mrg if (for_reduction) 10377 1.1 mrg { 10378 1.1 mrg if (slp_node) 10379 1.1 mrg return false; 10380 1.1 mrg reduc_info = info_for_reduction (vinfo, stmt_info); 10381 1.1 mrg reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); 10382 1.1 mrg reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); 10383 1.1 mrg gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION 10384 1.1 mrg || reduc_index != -1); 10385 1.1 mrg } 10386 1.1 mrg else 10387 1.1 mrg { 10388 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 10389 1.1 mrg return false; 10390 1.1 mrg } 10391 1.1 mrg 10392 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 10393 1.1 mrg tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 10394 1.1 mrg 10395 1.1 mrg if (slp_node) 10396 1.1 mrg { 10397 1.1 mrg ncopies = 1; 10398 1.1 mrg vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 10399 1.1 mrg } 10400 1.1 mrg else 10401 1.1 mrg { 10402 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 10403 1.1 mrg vec_num = 1; 10404 1.1 mrg } 10405 1.1 mrg 10406 1.1 mrg gcc_assert (ncopies >= 1); 10407 1.1 mrg if (for_reduction && ncopies > 1) 10408 1.1 mrg return false; /* FORNOW */ 10409 1.1 mrg 10410 1.1 mrg cond_expr = gimple_assign_rhs1 (stmt); 10411 1.1 mrg 10412 1.1 mrg if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node, 10413 1.1 mrg &comp_vectype, &dts[0], vectype) 10414 1.1 mrg || !comp_vectype) 10415 1.1 mrg return false; 10416 1.1 mrg 10417 1.1 mrg unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0; 10418 1.1 mrg slp_tree then_slp_node, else_slp_node; 10419 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust, 10420 1.1 mrg &then_clause, &then_slp_node, &dts[2], &vectype1)) 10421 1.1 mrg return false; 10422 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust, 10423 1.1 mrg &else_clause, &else_slp_node, &dts[3], &vectype2)) 10424 1.1 mrg return false; 10425 1.1 mrg 10426 1.1 mrg if (vectype1 && !useless_type_conversion_p (vectype, vectype1)) 10427 1.1 mrg return false; 10428 1.1 mrg 10429 1.1 mrg if (vectype2 && !useless_type_conversion_p (vectype, vectype2)) 10430 1.1 mrg return false; 10431 1.1 mrg 10432 1.1 mrg masked = !COMPARISON_CLASS_P (cond_expr); 10433 1.1 mrg vec_cmp_type = truth_type_for (comp_vectype); 10434 1.1 mrg 10435 1.1 mrg if (vec_cmp_type == NULL_TREE) 10436 1.1 mrg return false; 10437 1.1 mrg 10438 1.1 mrg cond_code = TREE_CODE (cond_expr); 10439 1.1 mrg if (!masked) 10440 1.1 mrg { 10441 1.1 mrg cond_expr0 = TREE_OPERAND (cond_expr, 0); 10442 1.1 mrg cond_expr1 = TREE_OPERAND (cond_expr, 1); 10443 1.1 mrg } 10444 1.1 mrg 10445 1.1 mrg /* For conditional reductions, the "then" value needs to be the candidate 10446 1.1 mrg value calculated by this iteration while the "else" value needs to be 10447 1.1 mrg the result carried over from previous iterations. If the COND_EXPR 10448 1.1 mrg is the other way around, we need to swap it. */ 10449 1.1 mrg bool must_invert_cmp_result = false; 10450 1.1 mrg if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1) 10451 1.1 mrg { 10452 1.1 mrg if (masked) 10453 1.1 mrg must_invert_cmp_result = true; 10454 1.1 mrg else 10455 1.1 mrg { 10456 1.1 mrg bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0)); 10457 1.1 mrg tree_code new_code = invert_tree_comparison (cond_code, honor_nans); 10458 1.1 mrg if (new_code == ERROR_MARK) 10459 1.1 mrg must_invert_cmp_result = true; 10460 1.1 mrg else 10461 1.1 mrg { 10462 1.1 mrg cond_code = new_code; 10463 1.1 mrg /* Make sure we don't accidentally use the old condition. */ 10464 1.1 mrg cond_expr = NULL_TREE; 10465 1.1 mrg } 10466 1.1 mrg } 10467 1.1 mrg std::swap (then_clause, else_clause); 10468 1.1 mrg } 10469 1.1 mrg 10470 1.1 mrg if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) 10471 1.1 mrg { 10472 1.1 mrg /* Boolean values may have another representation in vectors 10473 1.1 mrg and therefore we prefer bit operations over comparison for 10474 1.1 mrg them (which also works for scalar masks). We store opcodes 10475 1.1 mrg to use in bitop1 and bitop2. Statement is vectorized as 10476 1.1 mrg BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) 10477 1.1 mrg depending on bitop1 and bitop2 arity. */ 10478 1.1 mrg switch (cond_code) 10479 1.1 mrg { 10480 1.1 mrg case GT_EXPR: 10481 1.1 mrg bitop1 = BIT_NOT_EXPR; 10482 1.1 mrg bitop2 = BIT_AND_EXPR; 10483 1.1 mrg break; 10484 1.1 mrg case GE_EXPR: 10485 1.1 mrg bitop1 = BIT_NOT_EXPR; 10486 1.1 mrg bitop2 = BIT_IOR_EXPR; 10487 1.1 mrg break; 10488 1.1 mrg case LT_EXPR: 10489 1.1 mrg bitop1 = BIT_NOT_EXPR; 10490 1.1 mrg bitop2 = BIT_AND_EXPR; 10491 1.1 mrg std::swap (cond_expr0, cond_expr1); 10492 1.1 mrg break; 10493 1.1 mrg case LE_EXPR: 10494 1.1 mrg bitop1 = BIT_NOT_EXPR; 10495 1.1 mrg bitop2 = BIT_IOR_EXPR; 10496 1.1 mrg std::swap (cond_expr0, cond_expr1); 10497 1.1 mrg break; 10498 1.1 mrg case NE_EXPR: 10499 1.1 mrg bitop1 = BIT_XOR_EXPR; 10500 1.1 mrg break; 10501 1.1 mrg case EQ_EXPR: 10502 1.1 mrg bitop1 = BIT_XOR_EXPR; 10503 1.1 mrg bitop2 = BIT_NOT_EXPR; 10504 1.1 mrg break; 10505 1.1 mrg default: 10506 1.1 mrg return false; 10507 1.1 mrg } 10508 1.1 mrg cond_code = SSA_NAME; 10509 1.1 mrg } 10510 1.1 mrg 10511 1.1 mrg if (TREE_CODE_CLASS (cond_code) == tcc_comparison 10512 1.1 mrg && reduction_type == EXTRACT_LAST_REDUCTION 10513 1.1 mrg && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code)) 10514 1.1 mrg { 10515 1.1 mrg if (dump_enabled_p ()) 10516 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10517 1.1 mrg "reduction comparison operation not supported.\n"); 10518 1.1 mrg return false; 10519 1.1 mrg } 10520 1.1 mrg 10521 1.1 mrg if (!vec_stmt) 10522 1.1 mrg { 10523 1.1 mrg if (bitop1 != NOP_EXPR) 10524 1.1 mrg { 10525 1.1 mrg machine_mode mode = TYPE_MODE (comp_vectype); 10526 1.1 mrg optab optab; 10527 1.1 mrg 10528 1.1 mrg optab = optab_for_tree_code (bitop1, comp_vectype, optab_default); 10529 1.1 mrg if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10530 1.1 mrg return false; 10531 1.1 mrg 10532 1.1 mrg if (bitop2 != NOP_EXPR) 10533 1.1 mrg { 10534 1.1 mrg optab = optab_for_tree_code (bitop2, comp_vectype, 10535 1.1 mrg optab_default); 10536 1.1 mrg if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10537 1.1 mrg return false; 10538 1.1 mrg } 10539 1.1 mrg } 10540 1.1 mrg 10541 1.1 mrg vect_cost_for_stmt kind = vector_stmt; 10542 1.1 mrg if (reduction_type == EXTRACT_LAST_REDUCTION) 10543 1.1 mrg /* Count one reduction-like operation per vector. */ 10544 1.1 mrg kind = vec_to_scalar; 10545 1.1 mrg else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) 10546 1.1 mrg return false; 10547 1.1 mrg 10548 1.1 mrg if (slp_node 10549 1.1 mrg && (!vect_maybe_update_slp_op_vectype 10550 1.1 mrg (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype) 10551 1.1 mrg || (op_adjust == 1 10552 1.1 mrg && !vect_maybe_update_slp_op_vectype 10553 1.1 mrg (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype)) 10554 1.1 mrg || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype) 10555 1.1 mrg || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype))) 10556 1.1 mrg { 10557 1.1 mrg if (dump_enabled_p ()) 10558 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10559 1.1 mrg "incompatible vector types for invariants\n"); 10560 1.1 mrg return false; 10561 1.1 mrg } 10562 1.1 mrg 10563 1.1 mrg if (loop_vinfo && for_reduction 10564 1.1 mrg && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) 10565 1.1 mrg { 10566 1.1 mrg if (reduction_type == EXTRACT_LAST_REDUCTION) 10567 1.1 mrg vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), 10568 1.1 mrg ncopies * vec_num, vectype, NULL); 10569 1.1 mrg /* Extra inactive lanes should be safe for vect_nested_cycle. */ 10570 1.1 mrg else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle) 10571 1.1 mrg { 10572 1.1 mrg if (dump_enabled_p ()) 10573 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10574 1.1 mrg "conditional reduction prevents the use" 10575 1.1 mrg " of partial vectors.\n"); 10576 1.1 mrg LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; 10577 1.1 mrg } 10578 1.1 mrg } 10579 1.1 mrg 10580 1.1 mrg STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 10581 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node, 10582 1.1 mrg cost_vec, kind); 10583 1.1 mrg return true; 10584 1.1 mrg } 10585 1.1 mrg 10586 1.1 mrg /* Transform. */ 10587 1.1 mrg 10588 1.1 mrg /* Handle def. */ 10589 1.1 mrg scalar_dest = gimple_assign_lhs (stmt); 10590 1.1 mrg if (reduction_type != EXTRACT_LAST_REDUCTION) 10591 1.1 mrg vec_dest = vect_create_destination_var (scalar_dest, vectype); 10592 1.1 mrg 10593 1.1 mrg bool swap_cond_operands = false; 10594 1.1 mrg 10595 1.1 mrg /* See whether another part of the vectorized code applies a loop 10596 1.1 mrg mask to the condition, or to its inverse. */ 10597 1.1 mrg 10598 1.1 mrg vec_loop_masks *masks = NULL; 10599 1.1 mrg if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) 10600 1.1 mrg { 10601 1.1 mrg if (reduction_type == EXTRACT_LAST_REDUCTION) 10602 1.1 mrg masks = &LOOP_VINFO_MASKS (loop_vinfo); 10603 1.1 mrg else 10604 1.1 mrg { 10605 1.1 mrg scalar_cond_masked_key cond (cond_expr, ncopies); 10606 1.1 mrg if (loop_vinfo->scalar_cond_masked_set.contains (cond)) 10607 1.1 mrg masks = &LOOP_VINFO_MASKS (loop_vinfo); 10608 1.1 mrg else 10609 1.1 mrg { 10610 1.1 mrg bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0)); 10611 1.1 mrg tree_code orig_code = cond.code; 10612 1.1 mrg cond.code = invert_tree_comparison (cond.code, honor_nans); 10613 1.1 mrg if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond)) 10614 1.1 mrg { 10615 1.1 mrg masks = &LOOP_VINFO_MASKS (loop_vinfo); 10616 1.1 mrg cond_code = cond.code; 10617 1.1 mrg swap_cond_operands = true; 10618 1.1 mrg } 10619 1.1 mrg else 10620 1.1 mrg { 10621 1.1 mrg /* Try the inverse of the current mask. We check if the 10622 1.1 mrg inverse mask is live and if so we generate a negate of 10623 1.1 mrg the current mask such that we still honor NaNs. */ 10624 1.1 mrg cond.inverted_p = true; 10625 1.1 mrg cond.code = orig_code; 10626 1.1 mrg if (loop_vinfo->scalar_cond_masked_set.contains (cond)) 10627 1.1 mrg { 10628 1.1 mrg masks = &LOOP_VINFO_MASKS (loop_vinfo); 10629 1.1 mrg cond_code = cond.code; 10630 1.1 mrg swap_cond_operands = true; 10631 1.1 mrg must_invert_cmp_result = true; 10632 1.1 mrg } 10633 1.1 mrg } 10634 1.1 mrg } 10635 1.1 mrg } 10636 1.1 mrg } 10637 1.1 mrg 10638 1.1 mrg /* Handle cond expr. */ 10639 1.1 mrg if (masked) 10640 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 10641 1.1 mrg cond_expr, &vec_oprnds0, comp_vectype, 10642 1.1 mrg then_clause, &vec_oprnds2, vectype, 10643 1.1 mrg reduction_type != EXTRACT_LAST_REDUCTION 10644 1.1 mrg ? else_clause : NULL, &vec_oprnds3, vectype); 10645 1.1 mrg else 10646 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 10647 1.1 mrg cond_expr0, &vec_oprnds0, comp_vectype, 10648 1.1 mrg cond_expr1, &vec_oprnds1, comp_vectype, 10649 1.1 mrg then_clause, &vec_oprnds2, vectype, 10650 1.1 mrg reduction_type != EXTRACT_LAST_REDUCTION 10651 1.1 mrg ? else_clause : NULL, &vec_oprnds3, vectype); 10652 1.1 mrg 10653 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 10654 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs) 10655 1.1 mrg { 10656 1.1 mrg vec_then_clause = vec_oprnds2[i]; 10657 1.1 mrg if (reduction_type != EXTRACT_LAST_REDUCTION) 10658 1.1 mrg vec_else_clause = vec_oprnds3[i]; 10659 1.1 mrg 10660 1.1 mrg if (swap_cond_operands) 10661 1.1 mrg std::swap (vec_then_clause, vec_else_clause); 10662 1.1 mrg 10663 1.1 mrg if (masked) 10664 1.1 mrg vec_compare = vec_cond_lhs; 10665 1.1 mrg else 10666 1.1 mrg { 10667 1.1 mrg vec_cond_rhs = vec_oprnds1[i]; 10668 1.1 mrg if (bitop1 == NOP_EXPR) 10669 1.1 mrg { 10670 1.1 mrg gimple_seq stmts = NULL; 10671 1.1 mrg vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type, 10672 1.1 mrg vec_cond_lhs, vec_cond_rhs); 10673 1.1 mrg gsi_insert_before (gsi, stmts, GSI_SAME_STMT); 10674 1.1 mrg } 10675 1.1 mrg else 10676 1.1 mrg { 10677 1.1 mrg new_temp = make_ssa_name (vec_cmp_type); 10678 1.1 mrg gassign *new_stmt; 10679 1.1 mrg if (bitop1 == BIT_NOT_EXPR) 10680 1.1 mrg new_stmt = gimple_build_assign (new_temp, bitop1, 10681 1.1 mrg vec_cond_rhs); 10682 1.1 mrg else 10683 1.1 mrg new_stmt 10684 1.1 mrg = gimple_build_assign (new_temp, bitop1, vec_cond_lhs, 10685 1.1 mrg vec_cond_rhs); 10686 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10687 1.1 mrg if (bitop2 == NOP_EXPR) 10688 1.1 mrg vec_compare = new_temp; 10689 1.1 mrg else if (bitop2 == BIT_NOT_EXPR) 10690 1.1 mrg { 10691 1.1 mrg /* Instead of doing ~x ? y : z do x ? z : y. */ 10692 1.1 mrg vec_compare = new_temp; 10693 1.1 mrg std::swap (vec_then_clause, vec_else_clause); 10694 1.1 mrg } 10695 1.1 mrg else 10696 1.1 mrg { 10697 1.1 mrg vec_compare = make_ssa_name (vec_cmp_type); 10698 1.1 mrg new_stmt 10699 1.1 mrg = gimple_build_assign (vec_compare, bitop2, 10700 1.1 mrg vec_cond_lhs, new_temp); 10701 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, 10702 1.1 mrg new_stmt, gsi); 10703 1.1 mrg } 10704 1.1 mrg } 10705 1.1 mrg } 10706 1.1 mrg 10707 1.1 mrg /* If we decided to apply a loop mask to the result of the vector 10708 1.1 mrg comparison, AND the comparison with the mask now. Later passes 10709 1.1 mrg should then be able to reuse the AND results between mulitple 10710 1.1 mrg vector statements. 10711 1.1 mrg 10712 1.1 mrg For example: 10713 1.1 mrg for (int i = 0; i < 100; ++i) 10714 1.1 mrg x[i] = y[i] ? z[i] : 10; 10715 1.1 mrg 10716 1.1 mrg results in following optimized GIMPLE: 10717 1.1 mrg 10718 1.1 mrg mask__35.8_43 = vect__4.7_41 != { 0, ... }; 10719 1.1 mrg vec_mask_and_46 = loop_mask_40 & mask__35.8_43; 10720 1.1 mrg _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B]; 10721 1.1 mrg vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46); 10722 1.1 mrg vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46, 10723 1.1 mrg vect_iftmp.11_47, { 10, ... }>; 10724 1.1 mrg 10725 1.1 mrg instead of using a masked and unmasked forms of 10726 1.1 mrg vec != { 0, ... } (masked in the MASK_LOAD, 10727 1.1 mrg unmasked in the VEC_COND_EXPR). */ 10728 1.1 mrg 10729 1.1 mrg /* Force vec_compare to be an SSA_NAME rather than a comparison, 10730 1.1 mrg in cases where that's necessary. */ 10731 1.1 mrg 10732 1.1 mrg if (masks || reduction_type == EXTRACT_LAST_REDUCTION) 10733 1.1 mrg { 10734 1.1 mrg if (!is_gimple_val (vec_compare)) 10735 1.1 mrg { 10736 1.1 mrg tree vec_compare_name = make_ssa_name (vec_cmp_type); 10737 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_compare_name, 10738 1.1 mrg vec_compare); 10739 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10740 1.1 mrg vec_compare = vec_compare_name; 10741 1.1 mrg } 10742 1.1 mrg 10743 1.1 mrg if (must_invert_cmp_result) 10744 1.1 mrg { 10745 1.1 mrg tree vec_compare_name = make_ssa_name (vec_cmp_type); 10746 1.1 mrg gassign *new_stmt = gimple_build_assign (vec_compare_name, 10747 1.1 mrg BIT_NOT_EXPR, 10748 1.1 mrg vec_compare); 10749 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10750 1.1 mrg vec_compare = vec_compare_name; 10751 1.1 mrg } 10752 1.1 mrg 10753 1.1 mrg if (masks) 10754 1.1 mrg { 10755 1.1 mrg tree loop_mask 10756 1.1 mrg = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 10757 1.1 mrg vectype, i); 10758 1.1 mrg tree tmp2 = make_ssa_name (vec_cmp_type); 10759 1.1 mrg gassign *g 10760 1.1 mrg = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare, 10761 1.1 mrg loop_mask); 10762 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, g, gsi); 10763 1.1 mrg vec_compare = tmp2; 10764 1.1 mrg } 10765 1.1 mrg } 10766 1.1 mrg 10767 1.1 mrg gimple *new_stmt; 10768 1.1 mrg if (reduction_type == EXTRACT_LAST_REDUCTION) 10769 1.1 mrg { 10770 1.1 mrg gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt; 10771 1.1 mrg tree lhs = gimple_get_lhs (old_stmt); 10772 1.1 mrg new_stmt = gimple_build_call_internal 10773 1.1 mrg (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, 10774 1.1 mrg vec_then_clause); 10775 1.1 mrg gimple_call_set_lhs (new_stmt, lhs); 10776 1.1 mrg SSA_NAME_DEF_STMT (lhs) = new_stmt; 10777 1.1 mrg if (old_stmt == gsi_stmt (*gsi)) 10778 1.1 mrg vect_finish_replace_stmt (vinfo, stmt_info, new_stmt); 10779 1.1 mrg else 10780 1.1 mrg { 10781 1.1 mrg /* In this case we're moving the definition to later in the 10782 1.1 mrg block. That doesn't matter because the only uses of the 10783 1.1 mrg lhs are in phi statements. */ 10784 1.1 mrg gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt); 10785 1.1 mrg gsi_remove (&old_gsi, true); 10786 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10787 1.1 mrg } 10788 1.1 mrg } 10789 1.1 mrg else 10790 1.1 mrg { 10791 1.1 mrg new_temp = make_ssa_name (vec_dest); 10792 1.1 mrg new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare, 10793 1.1 mrg vec_then_clause, vec_else_clause); 10794 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 10795 1.1 mrg } 10796 1.1 mrg if (slp_node) 10797 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 10798 1.1 mrg else 10799 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 10800 1.1 mrg } 10801 1.1 mrg 10802 1.1 mrg if (!slp_node) 10803 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 10804 1.1 mrg 10805 1.1 mrg vec_oprnds0.release (); 10806 1.1 mrg vec_oprnds1.release (); 10807 1.1 mrg vec_oprnds2.release (); 10808 1.1 mrg vec_oprnds3.release (); 10809 1.1 mrg 10810 1.1 mrg return true; 10811 1.1 mrg } 10812 1.1 mrg 10813 1.1 mrg /* vectorizable_comparison. 10814 1.1 mrg 10815 1.1 mrg Check if STMT_INFO is comparison expression that can be vectorized. 10816 1.1 mrg If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 10817 1.1 mrg comparison, put it in VEC_STMT, and insert it at GSI. 10818 1.1 mrg 10819 1.1 mrg Return true if STMT_INFO is vectorizable in this way. */ 10820 1.1 mrg 10821 1.1 mrg static bool 10822 1.1 mrg vectorizable_comparison (vec_info *vinfo, 10823 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 10824 1.1 mrg gimple **vec_stmt, 10825 1.1 mrg slp_tree slp_node, stmt_vector_for_cost *cost_vec) 10826 1.1 mrg { 10827 1.1 mrg tree lhs, rhs1, rhs2; 10828 1.1 mrg tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 10829 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 10830 1.1 mrg tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; 10831 1.1 mrg tree new_temp; 10832 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 10833 1.1 mrg enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type}; 10834 1.1 mrg int ndts = 2; 10835 1.1 mrg poly_uint64 nunits; 10836 1.1 mrg int ncopies; 10837 1.1 mrg enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 10838 1.1 mrg int i; 10839 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 10840 1.1 mrg vec<tree> vec_oprnds0 = vNULL; 10841 1.1 mrg vec<tree> vec_oprnds1 = vNULL; 10842 1.1 mrg tree mask_type; 10843 1.1 mrg tree mask; 10844 1.1 mrg 10845 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 10846 1.1 mrg return false; 10847 1.1 mrg 10848 1.1 mrg if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)) 10849 1.1 mrg return false; 10850 1.1 mrg 10851 1.1 mrg mask_type = vectype; 10852 1.1 mrg nunits = TYPE_VECTOR_SUBPARTS (vectype); 10853 1.1 mrg 10854 1.1 mrg if (slp_node) 10855 1.1 mrg ncopies = 1; 10856 1.1 mrg else 10857 1.1 mrg ncopies = vect_get_num_copies (loop_vinfo, vectype); 10858 1.1 mrg 10859 1.1 mrg gcc_assert (ncopies >= 1); 10860 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 10861 1.1 mrg return false; 10862 1.1 mrg 10863 1.1 mrg gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 10864 1.1 mrg if (!stmt) 10865 1.1 mrg return false; 10866 1.1 mrg 10867 1.1 mrg code = gimple_assign_rhs_code (stmt); 10868 1.1 mrg 10869 1.1 mrg if (TREE_CODE_CLASS (code) != tcc_comparison) 10870 1.1 mrg return false; 10871 1.1 mrg 10872 1.1 mrg slp_tree slp_rhs1, slp_rhs2; 10873 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 10874 1.1 mrg 0, &rhs1, &slp_rhs1, &dts[0], &vectype1)) 10875 1.1 mrg return false; 10876 1.1 mrg 10877 1.1 mrg if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 10878 1.1 mrg 1, &rhs2, &slp_rhs2, &dts[1], &vectype2)) 10879 1.1 mrg return false; 10880 1.1 mrg 10881 1.1 mrg if (vectype1 && vectype2 10882 1.1 mrg && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 10883 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))) 10884 1.1 mrg return false; 10885 1.1 mrg 10886 1.1 mrg vectype = vectype1 ? vectype1 : vectype2; 10887 1.1 mrg 10888 1.1 mrg /* Invariant comparison. */ 10889 1.1 mrg if (!vectype) 10890 1.1 mrg { 10891 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) 10892 1.1 mrg vectype = mask_type; 10893 1.1 mrg else 10894 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), 10895 1.1 mrg slp_node); 10896 1.1 mrg if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) 10897 1.1 mrg return false; 10898 1.1 mrg } 10899 1.1 mrg else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype))) 10900 1.1 mrg return false; 10901 1.1 mrg 10902 1.1 mrg /* Can't compare mask and non-mask types. */ 10903 1.1 mrg if (vectype1 && vectype2 10904 1.1 mrg && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2))) 10905 1.1 mrg return false; 10906 1.1 mrg 10907 1.1 mrg /* Boolean values may have another representation in vectors 10908 1.1 mrg and therefore we prefer bit operations over comparison for 10909 1.1 mrg them (which also works for scalar masks). We store opcodes 10910 1.1 mrg to use in bitop1 and bitop2. Statement is vectorized as 10911 1.1 mrg BITOP2 (rhs1 BITOP1 rhs2) or 10912 1.1 mrg rhs1 BITOP2 (BITOP1 rhs2) 10913 1.1 mrg depending on bitop1 and bitop2 arity. */ 10914 1.1 mrg bool swap_p = false; 10915 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (vectype)) 10916 1.1 mrg { 10917 1.1 mrg if (code == GT_EXPR) 10918 1.1 mrg { 10919 1.1 mrg bitop1 = BIT_NOT_EXPR; 10920 1.1 mrg bitop2 = BIT_AND_EXPR; 10921 1.1 mrg } 10922 1.1 mrg else if (code == GE_EXPR) 10923 1.1 mrg { 10924 1.1 mrg bitop1 = BIT_NOT_EXPR; 10925 1.1 mrg bitop2 = BIT_IOR_EXPR; 10926 1.1 mrg } 10927 1.1 mrg else if (code == LT_EXPR) 10928 1.1 mrg { 10929 1.1 mrg bitop1 = BIT_NOT_EXPR; 10930 1.1 mrg bitop2 = BIT_AND_EXPR; 10931 1.1 mrg swap_p = true; 10932 1.1 mrg } 10933 1.1 mrg else if (code == LE_EXPR) 10934 1.1 mrg { 10935 1.1 mrg bitop1 = BIT_NOT_EXPR; 10936 1.1 mrg bitop2 = BIT_IOR_EXPR; 10937 1.1 mrg swap_p = true; 10938 1.1 mrg } 10939 1.1 mrg else 10940 1.1 mrg { 10941 1.1 mrg bitop1 = BIT_XOR_EXPR; 10942 1.1 mrg if (code == EQ_EXPR) 10943 1.1 mrg bitop2 = BIT_NOT_EXPR; 10944 1.1 mrg } 10945 1.1 mrg } 10946 1.1 mrg 10947 1.1 mrg if (!vec_stmt) 10948 1.1 mrg { 10949 1.1 mrg if (bitop1 == NOP_EXPR) 10950 1.1 mrg { 10951 1.1 mrg if (!expand_vec_cmp_expr_p (vectype, mask_type, code)) 10952 1.1 mrg return false; 10953 1.1 mrg } 10954 1.1 mrg else 10955 1.1 mrg { 10956 1.1 mrg machine_mode mode = TYPE_MODE (vectype); 10957 1.1 mrg optab optab; 10958 1.1 mrg 10959 1.1 mrg optab = optab_for_tree_code (bitop1, vectype, optab_default); 10960 1.1 mrg if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10961 1.1 mrg return false; 10962 1.1 mrg 10963 1.1 mrg if (bitop2 != NOP_EXPR) 10964 1.1 mrg { 10965 1.1 mrg optab = optab_for_tree_code (bitop2, vectype, optab_default); 10966 1.1 mrg if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10967 1.1 mrg return false; 10968 1.1 mrg } 10969 1.1 mrg } 10970 1.1 mrg 10971 1.1 mrg /* Put types on constant and invariant SLP children. */ 10972 1.1 mrg if (slp_node 10973 1.1 mrg && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype) 10974 1.1 mrg || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype))) 10975 1.1 mrg { 10976 1.1 mrg if (dump_enabled_p ()) 10977 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10978 1.1 mrg "incompatible vector types for invariants\n"); 10979 1.1 mrg return false; 10980 1.1 mrg } 10981 1.1 mrg 10982 1.1 mrg STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; 10983 1.1 mrg vect_model_simple_cost (vinfo, stmt_info, 10984 1.1 mrg ncopies * (1 + (bitop2 != NOP_EXPR)), 10985 1.1 mrg dts, ndts, slp_node, cost_vec); 10986 1.1 mrg return true; 10987 1.1 mrg } 10988 1.1 mrg 10989 1.1 mrg /* Transform. */ 10990 1.1 mrg 10991 1.1 mrg /* Handle def. */ 10992 1.1 mrg lhs = gimple_assign_lhs (stmt); 10993 1.1 mrg mask = vect_create_destination_var (lhs, mask_type); 10994 1.1 mrg 10995 1.1 mrg vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, 10996 1.1 mrg rhs1, &vec_oprnds0, vectype, 10997 1.1 mrg rhs2, &vec_oprnds1, vectype); 10998 1.1 mrg if (swap_p) 10999 1.1 mrg std::swap (vec_oprnds0, vec_oprnds1); 11000 1.1 mrg 11001 1.1 mrg /* Arguments are ready. Create the new vector stmt. */ 11002 1.1 mrg FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) 11003 1.1 mrg { 11004 1.1 mrg gimple *new_stmt; 11005 1.1 mrg vec_rhs2 = vec_oprnds1[i]; 11006 1.1 mrg 11007 1.1 mrg new_temp = make_ssa_name (mask); 11008 1.1 mrg if (bitop1 == NOP_EXPR) 11009 1.1 mrg { 11010 1.1 mrg new_stmt = gimple_build_assign (new_temp, code, 11011 1.1 mrg vec_rhs1, vec_rhs2); 11012 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 11013 1.1 mrg } 11014 1.1 mrg else 11015 1.1 mrg { 11016 1.1 mrg if (bitop1 == BIT_NOT_EXPR) 11017 1.1 mrg new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2); 11018 1.1 mrg else 11019 1.1 mrg new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1, 11020 1.1 mrg vec_rhs2); 11021 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 11022 1.1 mrg if (bitop2 != NOP_EXPR) 11023 1.1 mrg { 11024 1.1 mrg tree res = make_ssa_name (mask); 11025 1.1 mrg if (bitop2 == BIT_NOT_EXPR) 11026 1.1 mrg new_stmt = gimple_build_assign (res, bitop2, new_temp); 11027 1.1 mrg else 11028 1.1 mrg new_stmt = gimple_build_assign (res, bitop2, vec_rhs1, 11029 1.1 mrg new_temp); 11030 1.1 mrg vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); 11031 1.1 mrg } 11032 1.1 mrg } 11033 1.1 mrg if (slp_node) 11034 1.1 mrg SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 11035 1.1 mrg else 11036 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); 11037 1.1 mrg } 11038 1.1 mrg 11039 1.1 mrg if (!slp_node) 11040 1.1 mrg *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; 11041 1.1 mrg 11042 1.1 mrg vec_oprnds0.release (); 11043 1.1 mrg vec_oprnds1.release (); 11044 1.1 mrg 11045 1.1 mrg return true; 11046 1.1 mrg } 11047 1.1 mrg 11048 1.1 mrg /* If SLP_NODE is nonnull, return true if vectorizable_live_operation 11049 1.1 mrg can handle all live statements in the node. Otherwise return true 11050 1.1 mrg if STMT_INFO is not live or if vectorizable_live_operation can handle it. 11051 1.1 mrg GSI and VEC_STMT_P are as for vectorizable_live_operation. */ 11052 1.1 mrg 11053 1.1 mrg static bool 11054 1.1 mrg can_vectorize_live_stmts (vec_info *vinfo, 11055 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 11056 1.1 mrg slp_tree slp_node, slp_instance slp_node_instance, 11057 1.1 mrg bool vec_stmt_p, 11058 1.1 mrg stmt_vector_for_cost *cost_vec) 11059 1.1 mrg { 11060 1.1 mrg if (slp_node) 11061 1.1 mrg { 11062 1.1 mrg stmt_vec_info slp_stmt_info; 11063 1.1 mrg unsigned int i; 11064 1.1 mrg FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info) 11065 1.1 mrg { 11066 1.1 mrg if (STMT_VINFO_LIVE_P (slp_stmt_info) 11067 1.1 mrg && !vectorizable_live_operation (vinfo, 11068 1.1 mrg slp_stmt_info, gsi, slp_node, 11069 1.1 mrg slp_node_instance, i, 11070 1.1 mrg vec_stmt_p, cost_vec)) 11071 1.1 mrg return false; 11072 1.1 mrg } 11073 1.1 mrg } 11074 1.1 mrg else if (STMT_VINFO_LIVE_P (stmt_info) 11075 1.1 mrg && !vectorizable_live_operation (vinfo, stmt_info, gsi, 11076 1.1 mrg slp_node, slp_node_instance, -1, 11077 1.1 mrg vec_stmt_p, cost_vec)) 11078 1.1 mrg return false; 11079 1.1 mrg 11080 1.1 mrg return true; 11081 1.1 mrg } 11082 1.1 mrg 11083 1.1 mrg /* Make sure the statement is vectorizable. */ 11084 1.1 mrg 11085 1.1 mrg opt_result 11086 1.1 mrg vect_analyze_stmt (vec_info *vinfo, 11087 1.1 mrg stmt_vec_info stmt_info, bool *need_to_vectorize, 11088 1.1 mrg slp_tree node, slp_instance node_instance, 11089 1.1 mrg stmt_vector_for_cost *cost_vec) 11090 1.1 mrg { 11091 1.1 mrg bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); 11092 1.1 mrg enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 11093 1.1 mrg bool ok; 11094 1.1 mrg gimple_seq pattern_def_seq; 11095 1.1 mrg 11096 1.1 mrg if (dump_enabled_p ()) 11097 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", 11098 1.1 mrg stmt_info->stmt); 11099 1.1 mrg 11100 1.1 mrg if (gimple_has_volatile_ops (stmt_info->stmt)) 11101 1.1 mrg return opt_result::failure_at (stmt_info->stmt, 11102 1.1 mrg "not vectorized:" 11103 1.1 mrg " stmt has volatile operands: %G\n", 11104 1.1 mrg stmt_info->stmt); 11105 1.1 mrg 11106 1.1 mrg if (STMT_VINFO_IN_PATTERN_P (stmt_info) 11107 1.1 mrg && node == NULL 11108 1.1 mrg && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 11109 1.1 mrg { 11110 1.1 mrg gimple_stmt_iterator si; 11111 1.1 mrg 11112 1.1 mrg for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 11113 1.1 mrg { 11114 1.1 mrg stmt_vec_info pattern_def_stmt_info 11115 1.1 mrg = vinfo->lookup_stmt (gsi_stmt (si)); 11116 1.1 mrg if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) 11117 1.1 mrg || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) 11118 1.1 mrg { 11119 1.1 mrg /* Analyze def stmt of STMT if it's a pattern stmt. */ 11120 1.1 mrg if (dump_enabled_p ()) 11121 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11122 1.1 mrg "==> examining pattern def statement: %G", 11123 1.1 mrg pattern_def_stmt_info->stmt); 11124 1.1 mrg 11125 1.1 mrg opt_result res 11126 1.1 mrg = vect_analyze_stmt (vinfo, pattern_def_stmt_info, 11127 1.1 mrg need_to_vectorize, node, node_instance, 11128 1.1 mrg cost_vec); 11129 1.1 mrg if (!res) 11130 1.1 mrg return res; 11131 1.1 mrg } 11132 1.1 mrg } 11133 1.1 mrg } 11134 1.1 mrg 11135 1.1 mrg /* Skip stmts that do not need to be vectorized. In loops this is expected 11136 1.1 mrg to include: 11137 1.1 mrg - the COND_EXPR which is the loop exit condition 11138 1.1 mrg - any LABEL_EXPRs in the loop 11139 1.1 mrg - computations that are used only for array indexing or loop control. 11140 1.1 mrg In basic blocks we only analyze statements that are a part of some SLP 11141 1.1 mrg instance, therefore, all the statements are relevant. 11142 1.1 mrg 11143 1.1 mrg Pattern statement needs to be analyzed instead of the original statement 11144 1.1 mrg if the original statement is not relevant. Otherwise, we analyze both 11145 1.1 mrg statements. In basic blocks we are called from some SLP instance 11146 1.1 mrg traversal, don't analyze pattern stmts instead, the pattern stmts 11147 1.1 mrg already will be part of SLP instance. */ 11148 1.1 mrg 11149 1.1 mrg stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); 11150 1.1 mrg if (!STMT_VINFO_RELEVANT_P (stmt_info) 11151 1.1 mrg && !STMT_VINFO_LIVE_P (stmt_info)) 11152 1.1 mrg { 11153 1.1 mrg if (STMT_VINFO_IN_PATTERN_P (stmt_info) 11154 1.1 mrg && pattern_stmt_info 11155 1.1 mrg && (STMT_VINFO_RELEVANT_P (pattern_stmt_info) 11156 1.1 mrg || STMT_VINFO_LIVE_P (pattern_stmt_info))) 11157 1.1 mrg { 11158 1.1 mrg /* Analyze PATTERN_STMT instead of the original stmt. */ 11159 1.1 mrg stmt_info = pattern_stmt_info; 11160 1.1 mrg if (dump_enabled_p ()) 11161 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11162 1.1 mrg "==> examining pattern statement: %G", 11163 1.1 mrg stmt_info->stmt); 11164 1.1 mrg } 11165 1.1 mrg else 11166 1.1 mrg { 11167 1.1 mrg if (dump_enabled_p ()) 11168 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n"); 11169 1.1 mrg 11170 1.1 mrg return opt_result::success (); 11171 1.1 mrg } 11172 1.1 mrg } 11173 1.1 mrg else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 11174 1.1 mrg && node == NULL 11175 1.1 mrg && pattern_stmt_info 11176 1.1 mrg && (STMT_VINFO_RELEVANT_P (pattern_stmt_info) 11177 1.1 mrg || STMT_VINFO_LIVE_P (pattern_stmt_info))) 11178 1.1 mrg { 11179 1.1 mrg /* Analyze PATTERN_STMT too. */ 11180 1.1 mrg if (dump_enabled_p ()) 11181 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11182 1.1 mrg "==> examining pattern statement: %G", 11183 1.1 mrg pattern_stmt_info->stmt); 11184 1.1 mrg 11185 1.1 mrg opt_result res 11186 1.1 mrg = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node, 11187 1.1 mrg node_instance, cost_vec); 11188 1.1 mrg if (!res) 11189 1.1 mrg return res; 11190 1.1 mrg } 11191 1.1 mrg 11192 1.1 mrg switch (STMT_VINFO_DEF_TYPE (stmt_info)) 11193 1.1 mrg { 11194 1.1 mrg case vect_internal_def: 11195 1.1 mrg break; 11196 1.1 mrg 11197 1.1 mrg case vect_reduction_def: 11198 1.1 mrg case vect_nested_cycle: 11199 1.1 mrg gcc_assert (!bb_vinfo 11200 1.1 mrg && (relevance == vect_used_in_outer 11201 1.1 mrg || relevance == vect_used_in_outer_by_reduction 11202 1.1 mrg || relevance == vect_used_by_reduction 11203 1.1 mrg || relevance == vect_unused_in_scope 11204 1.1 mrg || relevance == vect_used_only_live)); 11205 1.1 mrg break; 11206 1.1 mrg 11207 1.1 mrg case vect_induction_def: 11208 1.1 mrg gcc_assert (!bb_vinfo); 11209 1.1 mrg break; 11210 1.1 mrg 11211 1.1 mrg case vect_constant_def: 11212 1.1 mrg case vect_external_def: 11213 1.1 mrg case vect_unknown_def_type: 11214 1.1 mrg default: 11215 1.1 mrg gcc_unreachable (); 11216 1.1 mrg } 11217 1.1 mrg 11218 1.1 mrg tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); 11219 1.1 mrg if (node) 11220 1.1 mrg STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node); 11221 1.1 mrg 11222 1.1 mrg if (STMT_VINFO_RELEVANT_P (stmt_info)) 11223 1.1 mrg { 11224 1.1 mrg gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 11225 1.1 mrg gcc_assert (STMT_VINFO_VECTYPE (stmt_info) 11226 1.1 mrg || (call && gimple_call_lhs (call) == NULL_TREE)); 11227 1.1 mrg *need_to_vectorize = true; 11228 1.1 mrg } 11229 1.1 mrg 11230 1.1 mrg if (PURE_SLP_STMT (stmt_info) && !node) 11231 1.1 mrg { 11232 1.1 mrg if (dump_enabled_p ()) 11233 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11234 1.1 mrg "handled only by SLP analysis\n"); 11235 1.1 mrg return opt_result::success (); 11236 1.1 mrg } 11237 1.1 mrg 11238 1.1 mrg ok = true; 11239 1.1 mrg if (!bb_vinfo 11240 1.1 mrg && (STMT_VINFO_RELEVANT_P (stmt_info) 11241 1.1 mrg || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 11242 1.1 mrg /* Prefer vectorizable_call over vectorizable_simd_clone_call so 11243 1.1 mrg -mveclibabi= takes preference over library functions with 11244 1.1 mrg the simd attribute. */ 11245 1.1 mrg ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec) 11246 1.1 mrg || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node, 11247 1.1 mrg cost_vec) 11248 1.1 mrg || vectorizable_conversion (vinfo, stmt_info, 11249 1.1 mrg NULL, NULL, node, cost_vec) 11250 1.1 mrg || vectorizable_operation (vinfo, stmt_info, 11251 1.1 mrg NULL, NULL, node, cost_vec) 11252 1.1 mrg || vectorizable_assignment (vinfo, stmt_info, 11253 1.1 mrg NULL, NULL, node, cost_vec) 11254 1.1 mrg || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec) 11255 1.1 mrg || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec) 11256 1.1 mrg || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info, 11257 1.1 mrg node, node_instance, cost_vec) 11258 1.1 mrg || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info, 11259 1.1 mrg NULL, node, cost_vec) 11260 1.1 mrg || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec) 11261 1.1 mrg || vectorizable_condition (vinfo, stmt_info, 11262 1.1 mrg NULL, NULL, node, cost_vec) 11263 1.1 mrg || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, 11264 1.1 mrg cost_vec) 11265 1.1 mrg || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), 11266 1.1 mrg stmt_info, NULL, node)); 11267 1.1 mrg else 11268 1.1 mrg { 11269 1.1 mrg if (bb_vinfo) 11270 1.1 mrg ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec) 11271 1.1 mrg || vectorizable_simd_clone_call (vinfo, stmt_info, 11272 1.1 mrg NULL, NULL, node, cost_vec) 11273 1.1 mrg || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node, 11274 1.1 mrg cost_vec) 11275 1.1 mrg || vectorizable_shift (vinfo, stmt_info, 11276 1.1 mrg NULL, NULL, node, cost_vec) 11277 1.1 mrg || vectorizable_operation (vinfo, stmt_info, 11278 1.1 mrg NULL, NULL, node, cost_vec) 11279 1.1 mrg || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node, 11280 1.1 mrg cost_vec) 11281 1.1 mrg || vectorizable_load (vinfo, stmt_info, 11282 1.1 mrg NULL, NULL, node, cost_vec) 11283 1.1 mrg || vectorizable_store (vinfo, stmt_info, 11284 1.1 mrg NULL, NULL, node, cost_vec) 11285 1.1 mrg || vectorizable_condition (vinfo, stmt_info, 11286 1.1 mrg NULL, NULL, node, cost_vec) 11287 1.1 mrg || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, 11288 1.1 mrg cost_vec) 11289 1.1 mrg || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); 11290 1.1 mrg } 11291 1.1 mrg 11292 1.1 mrg if (node) 11293 1.1 mrg STMT_VINFO_VECTYPE (stmt_info) = saved_vectype; 11294 1.1 mrg 11295 1.1 mrg if (!ok) 11296 1.1 mrg return opt_result::failure_at (stmt_info->stmt, 11297 1.1 mrg "not vectorized:" 11298 1.1 mrg " relevant stmt not supported: %G", 11299 1.1 mrg stmt_info->stmt); 11300 1.1 mrg 11301 1.1 mrg /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 11302 1.1 mrg need extra handling, except for vectorizable reductions. */ 11303 1.1 mrg if (!bb_vinfo 11304 1.1 mrg && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 11305 1.1 mrg && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type 11306 1.1 mrg && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo), 11307 1.1 mrg stmt_info, NULL, node, node_instance, 11308 1.1 mrg false, cost_vec)) 11309 1.1 mrg return opt_result::failure_at (stmt_info->stmt, 11310 1.1 mrg "not vectorized:" 11311 1.1 mrg " live stmt not supported: %G", 11312 1.1 mrg stmt_info->stmt); 11313 1.1 mrg 11314 1.1 mrg return opt_result::success (); 11315 1.1 mrg } 11316 1.1 mrg 11317 1.1 mrg 11318 1.1 mrg /* Function vect_transform_stmt. 11319 1.1 mrg 11320 1.1 mrg Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */ 11321 1.1 mrg 11322 1.1 mrg bool 11323 1.1 mrg vect_transform_stmt (vec_info *vinfo, 11324 1.1 mrg stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 11325 1.1 mrg slp_tree slp_node, slp_instance slp_node_instance) 11326 1.1 mrg { 11327 1.1 mrg bool is_store = false; 11328 1.1 mrg gimple *vec_stmt = NULL; 11329 1.1 mrg bool done; 11330 1.1 mrg 11331 1.1 mrg gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info)); 11332 1.1 mrg 11333 1.1 mrg tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); 11334 1.1 mrg if (slp_node) 11335 1.1 mrg STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node); 11336 1.1 mrg 11337 1.1 mrg switch (STMT_VINFO_TYPE (stmt_info)) 11338 1.1 mrg { 11339 1.1 mrg case type_demotion_vec_info_type: 11340 1.1 mrg case type_promotion_vec_info_type: 11341 1.1 mrg case type_conversion_vec_info_type: 11342 1.1 mrg done = vectorizable_conversion (vinfo, stmt_info, 11343 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11344 1.1 mrg gcc_assert (done); 11345 1.1 mrg break; 11346 1.1 mrg 11347 1.1 mrg case induc_vec_info_type: 11348 1.1 mrg done = vectorizable_induction (as_a <loop_vec_info> (vinfo), 11349 1.1 mrg stmt_info, &vec_stmt, slp_node, 11350 1.1 mrg NULL); 11351 1.1 mrg gcc_assert (done); 11352 1.1 mrg break; 11353 1.1 mrg 11354 1.1 mrg case shift_vec_info_type: 11355 1.1 mrg done = vectorizable_shift (vinfo, stmt_info, 11356 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11357 1.1 mrg gcc_assert (done); 11358 1.1 mrg break; 11359 1.1 mrg 11360 1.1 mrg case op_vec_info_type: 11361 1.1 mrg done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node, 11362 1.1 mrg NULL); 11363 1.1 mrg gcc_assert (done); 11364 1.1 mrg break; 11365 1.1 mrg 11366 1.1 mrg case assignment_vec_info_type: 11367 1.1 mrg done = vectorizable_assignment (vinfo, stmt_info, 11368 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11369 1.1 mrg gcc_assert (done); 11370 1.1 mrg break; 11371 1.1 mrg 11372 1.1 mrg case load_vec_info_type: 11373 1.1 mrg done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node, 11374 1.1 mrg NULL); 11375 1.1 mrg gcc_assert (done); 11376 1.1 mrg break; 11377 1.1 mrg 11378 1.1 mrg case store_vec_info_type: 11379 1.1 mrg done = vectorizable_store (vinfo, stmt_info, 11380 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11381 1.1 mrg gcc_assert (done); 11382 1.1 mrg if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) 11383 1.1 mrg { 11384 1.1 mrg /* In case of interleaving, the whole chain is vectorized when the 11385 1.1 mrg last store in the chain is reached. Store stmts before the last 11386 1.1 mrg one are skipped, and there vec_stmt_info shouldn't be freed 11387 1.1 mrg meanwhile. */ 11388 1.1 mrg stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 11389 1.1 mrg if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info)) 11390 1.1 mrg is_store = true; 11391 1.1 mrg } 11392 1.1 mrg else 11393 1.1 mrg is_store = true; 11394 1.1 mrg break; 11395 1.1 mrg 11396 1.1 mrg case condition_vec_info_type: 11397 1.1 mrg done = vectorizable_condition (vinfo, stmt_info, 11398 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11399 1.1 mrg gcc_assert (done); 11400 1.1 mrg break; 11401 1.1 mrg 11402 1.1 mrg case comparison_vec_info_type: 11403 1.1 mrg done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt, 11404 1.1 mrg slp_node, NULL); 11405 1.1 mrg gcc_assert (done); 11406 1.1 mrg break; 11407 1.1 mrg 11408 1.1 mrg case call_vec_info_type: 11409 1.1 mrg done = vectorizable_call (vinfo, stmt_info, 11410 1.1 mrg gsi, &vec_stmt, slp_node, NULL); 11411 1.1 mrg break; 11412 1.1 mrg 11413 1.1 mrg case call_simd_clone_vec_info_type: 11414 1.1 mrg done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt, 11415 1.1 mrg slp_node, NULL); 11416 1.1 mrg break; 11417 1.1 mrg 11418 1.1 mrg case reduc_vec_info_type: 11419 1.1 mrg done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info, 11420 1.1 mrg gsi, &vec_stmt, slp_node); 11421 1.1 mrg gcc_assert (done); 11422 1.1 mrg break; 11423 1.1 mrg 11424 1.1 mrg case cycle_phi_info_type: 11425 1.1 mrg done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info, 11426 1.1 mrg &vec_stmt, slp_node, slp_node_instance); 11427 1.1 mrg gcc_assert (done); 11428 1.1 mrg break; 11429 1.1 mrg 11430 1.1 mrg case lc_phi_info_type: 11431 1.1 mrg done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), 11432 1.1 mrg stmt_info, &vec_stmt, slp_node); 11433 1.1 mrg gcc_assert (done); 11434 1.1 mrg break; 11435 1.1 mrg 11436 1.1 mrg case phi_info_type: 11437 1.1 mrg done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL); 11438 1.1 mrg gcc_assert (done); 11439 1.1 mrg break; 11440 1.1 mrg 11441 1.1 mrg default: 11442 1.1 mrg if (!STMT_VINFO_LIVE_P (stmt_info)) 11443 1.1 mrg { 11444 1.1 mrg if (dump_enabled_p ()) 11445 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 11446 1.1 mrg "stmt not supported.\n"); 11447 1.1 mrg gcc_unreachable (); 11448 1.1 mrg } 11449 1.1 mrg done = true; 11450 1.1 mrg } 11451 1.1 mrg 11452 1.1 mrg if (!slp_node && vec_stmt) 11453 1.1 mrg gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ()); 11454 1.1 mrg 11455 1.1 mrg if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type) 11456 1.1 mrg { 11457 1.1 mrg /* Handle stmts whose DEF is used outside the loop-nest that is 11458 1.1 mrg being vectorized. */ 11459 1.1 mrg done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node, 11460 1.1 mrg slp_node_instance, true, NULL); 11461 1.1 mrg gcc_assert (done); 11462 1.1 mrg } 11463 1.1 mrg 11464 1.1 mrg if (slp_node) 11465 1.1 mrg STMT_VINFO_VECTYPE (stmt_info) = saved_vectype; 11466 1.1 mrg 11467 1.1 mrg return is_store; 11468 1.1 mrg } 11469 1.1 mrg 11470 1.1 mrg 11471 1.1 mrg /* Remove a group of stores (for SLP or interleaving), free their 11472 1.1 mrg stmt_vec_info. */ 11473 1.1 mrg 11474 1.1 mrg void 11475 1.1 mrg vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info) 11476 1.1 mrg { 11477 1.1 mrg stmt_vec_info next_stmt_info = first_stmt_info; 11478 1.1 mrg 11479 1.1 mrg while (next_stmt_info) 11480 1.1 mrg { 11481 1.1 mrg stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 11482 1.1 mrg next_stmt_info = vect_orig_stmt (next_stmt_info); 11483 1.1 mrg /* Free the attached stmt_vec_info and remove the stmt. */ 11484 1.1 mrg vinfo->remove_stmt (next_stmt_info); 11485 1.1 mrg next_stmt_info = tmp; 11486 1.1 mrg } 11487 1.1 mrg } 11488 1.1 mrg 11489 1.1 mrg /* If NUNITS is nonzero, return a vector type that contains NUNITS 11490 1.1 mrg elements of type SCALAR_TYPE, or null if the target doesn't support 11491 1.1 mrg such a type. 11492 1.1 mrg 11493 1.1 mrg If NUNITS is zero, return a vector type that contains elements of 11494 1.1 mrg type SCALAR_TYPE, choosing whichever vector size the target prefers. 11495 1.1 mrg 11496 1.1 mrg If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode 11497 1.1 mrg for this vectorization region and want to "autodetect" the best choice. 11498 1.1 mrg Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE 11499 1.1 mrg and we want the new type to be interoperable with it. PREVAILING_MODE 11500 1.1 mrg in this case can be a scalar integer mode or a vector mode; when it 11501 1.1 mrg is a vector mode, the function acts like a tree-level version of 11502 1.1 mrg related_vector_mode. */ 11503 1.1 mrg 11504 1.1 mrg tree 11505 1.1 mrg get_related_vectype_for_scalar_type (machine_mode prevailing_mode, 11506 1.1 mrg tree scalar_type, poly_uint64 nunits) 11507 1.1 mrg { 11508 1.1 mrg tree orig_scalar_type = scalar_type; 11509 1.1 mrg scalar_mode inner_mode; 11510 1.1 mrg machine_mode simd_mode; 11511 1.1 mrg tree vectype; 11512 1.1 mrg 11513 1.1 mrg if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode) 11514 1.1 mrg && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)) 11515 1.1 mrg return NULL_TREE; 11516 1.1 mrg 11517 1.1 mrg unsigned int nbytes = GET_MODE_SIZE (inner_mode); 11518 1.1 mrg 11519 1.1 mrg /* For vector types of elements whose mode precision doesn't 11520 1.1 mrg match their types precision we use a element type of mode 11521 1.1 mrg precision. The vectorization routines will have to make sure 11522 1.1 mrg they support the proper result truncation/extension. 11523 1.1 mrg We also make sure to build vector types with INTEGER_TYPE 11524 1.1 mrg component type only. */ 11525 1.1 mrg if (INTEGRAL_TYPE_P (scalar_type) 11526 1.1 mrg && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 11527 1.1 mrg || TREE_CODE (scalar_type) != INTEGER_TYPE)) 11528 1.1 mrg scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 11529 1.1 mrg TYPE_UNSIGNED (scalar_type)); 11530 1.1 mrg 11531 1.1 mrg /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 11532 1.1 mrg When the component mode passes the above test simply use a type 11533 1.1 mrg corresponding to that mode. The theory is that any use that 11534 1.1 mrg would cause problems with this will disable vectorization anyway. */ 11535 1.1 mrg else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 11536 1.1 mrg && !INTEGRAL_TYPE_P (scalar_type)) 11537 1.1 mrg scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 11538 1.1 mrg 11539 1.1 mrg /* We can't build a vector type of elements with alignment bigger than 11540 1.1 mrg their size. */ 11541 1.1 mrg else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 11542 1.1 mrg scalar_type = lang_hooks.types.type_for_mode (inner_mode, 11543 1.1 mrg TYPE_UNSIGNED (scalar_type)); 11544 1.1 mrg 11545 1.1 mrg /* If we felt back to using the mode fail if there was 11546 1.1 mrg no scalar type for it. */ 11547 1.1 mrg if (scalar_type == NULL_TREE) 11548 1.1 mrg return NULL_TREE; 11549 1.1 mrg 11550 1.1 mrg /* If no prevailing mode was supplied, use the mode the target prefers. 11551 1.1 mrg Otherwise lookup a vector mode based on the prevailing mode. */ 11552 1.1 mrg if (prevailing_mode == VOIDmode) 11553 1.1 mrg { 11554 1.1 mrg gcc_assert (known_eq (nunits, 0U)); 11555 1.1 mrg simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 11556 1.1 mrg if (SCALAR_INT_MODE_P (simd_mode)) 11557 1.1 mrg { 11558 1.1 mrg /* Traditional behavior is not to take the integer mode 11559 1.1 mrg literally, but simply to use it as a way of determining 11560 1.1 mrg the vector size. It is up to mode_for_vector to decide 11561 1.1 mrg what the TYPE_MODE should be. 11562 1.1 mrg 11563 1.1 mrg Note that nunits == 1 is allowed in order to support single 11564 1.1 mrg element vector types. */ 11565 1.1 mrg if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits) 11566 1.1 mrg || !mode_for_vector (inner_mode, nunits).exists (&simd_mode)) 11567 1.1 mrg return NULL_TREE; 11568 1.1 mrg } 11569 1.1 mrg } 11570 1.1 mrg else if (SCALAR_INT_MODE_P (prevailing_mode) 11571 1.1 mrg || !related_vector_mode (prevailing_mode, 11572 1.1 mrg inner_mode, nunits).exists (&simd_mode)) 11573 1.1 mrg { 11574 1.1 mrg /* Fall back to using mode_for_vector, mostly in the hope of being 11575 1.1 mrg able to use an integer mode. */ 11576 1.1 mrg if (known_eq (nunits, 0U) 11577 1.1 mrg && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits)) 11578 1.1 mrg return NULL_TREE; 11579 1.1 mrg 11580 1.1 mrg if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode)) 11581 1.1 mrg return NULL_TREE; 11582 1.1 mrg } 11583 1.1 mrg 11584 1.1 mrg vectype = build_vector_type_for_mode (scalar_type, simd_mode); 11585 1.1 mrg 11586 1.1 mrg /* In cases where the mode was chosen by mode_for_vector, check that 11587 1.1 mrg the target actually supports the chosen mode, or that it at least 11588 1.1 mrg allows the vector mode to be replaced by a like-sized integer. */ 11589 1.1 mrg if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 11590 1.1 mrg && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 11591 1.1 mrg return NULL_TREE; 11592 1.1 mrg 11593 1.1 mrg /* Re-attach the address-space qualifier if we canonicalized the scalar 11594 1.1 mrg type. */ 11595 1.1 mrg if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype)) 11596 1.1 mrg return build_qualified_type 11597 1.1 mrg (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type))); 11598 1.1 mrg 11599 1.1 mrg return vectype; 11600 1.1 mrg } 11601 1.1 mrg 11602 1.1 mrg /* Function get_vectype_for_scalar_type. 11603 1.1 mrg 11604 1.1 mrg Returns the vector type corresponding to SCALAR_TYPE as supported 11605 1.1 mrg by the target. If GROUP_SIZE is nonzero and we're performing BB 11606 1.1 mrg vectorization, make sure that the number of elements in the vector 11607 1.1 mrg is no bigger than GROUP_SIZE. */ 11608 1.1 mrg 11609 1.1 mrg tree 11610 1.1 mrg get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, 11611 1.1 mrg unsigned int group_size) 11612 1.1 mrg { 11613 1.1 mrg /* For BB vectorization, we should always have a group size once we've 11614 1.1 mrg constructed the SLP tree; the only valid uses of zero GROUP_SIZEs 11615 1.1 mrg are tentative requests during things like early data reference 11616 1.1 mrg analysis and pattern recognition. */ 11617 1.1 mrg if (is_a <bb_vec_info> (vinfo)) 11618 1.1 mrg gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); 11619 1.1 mrg else 11620 1.1 mrg group_size = 0; 11621 1.1 mrg 11622 1.1 mrg tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, 11623 1.1 mrg scalar_type); 11624 1.1 mrg if (vectype && vinfo->vector_mode == VOIDmode) 11625 1.1 mrg vinfo->vector_mode = TYPE_MODE (vectype); 11626 1.1 mrg 11627 1.1 mrg /* Register the natural choice of vector type, before the group size 11628 1.1 mrg has been applied. */ 11629 1.1 mrg if (vectype) 11630 1.1 mrg vinfo->used_vector_modes.add (TYPE_MODE (vectype)); 11631 1.1 mrg 11632 1.1 mrg /* If the natural choice of vector type doesn't satisfy GROUP_SIZE, 11633 1.1 mrg try again with an explicit number of elements. */ 11634 1.1 mrg if (vectype 11635 1.1 mrg && group_size 11636 1.1 mrg && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size)) 11637 1.1 mrg { 11638 1.1 mrg /* Start with the biggest number of units that fits within 11639 1.1 mrg GROUP_SIZE and halve it until we find a valid vector type. 11640 1.1 mrg Usually either the first attempt will succeed or all will 11641 1.1 mrg fail (in the latter case because GROUP_SIZE is too small 11642 1.1 mrg for the target), but it's possible that a target could have 11643 1.1 mrg a hole between supported vector types. 11644 1.1 mrg 11645 1.1 mrg If GROUP_SIZE is not a power of 2, this has the effect of 11646 1.1 mrg trying the largest power of 2 that fits within the group, 11647 1.1 mrg even though the group is not a multiple of that vector size. 11648 1.1 mrg The BB vectorizer will then try to carve up the group into 11649 1.1 mrg smaller pieces. */ 11650 1.1 mrg unsigned int nunits = 1 << floor_log2 (group_size); 11651 1.1 mrg do 11652 1.1 mrg { 11653 1.1 mrg vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, 11654 1.1 mrg scalar_type, nunits); 11655 1.1 mrg nunits /= 2; 11656 1.1 mrg } 11657 1.1 mrg while (nunits > 1 && !vectype); 11658 1.1 mrg } 11659 1.1 mrg 11660 1.1 mrg return vectype; 11661 1.1 mrg } 11662 1.1 mrg 11663 1.1 mrg /* Return the vector type corresponding to SCALAR_TYPE as supported 11664 1.1 mrg by the target. NODE, if nonnull, is the SLP tree node that will 11665 1.1 mrg use the returned vector type. */ 11666 1.1 mrg 11667 1.1 mrg tree 11668 1.1 mrg get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node) 11669 1.1 mrg { 11670 1.1 mrg unsigned int group_size = 0; 11671 1.1 mrg if (node) 11672 1.1 mrg group_size = SLP_TREE_LANES (node); 11673 1.1 mrg return get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 11674 1.1 mrg } 11675 1.1 mrg 11676 1.1 mrg /* Function get_mask_type_for_scalar_type. 11677 1.1 mrg 11678 1.1 mrg Returns the mask type corresponding to a result of comparison 11679 1.1 mrg of vectors of specified SCALAR_TYPE as supported by target. 11680 1.1 mrg If GROUP_SIZE is nonzero and we're performing BB vectorization, 11681 1.1 mrg make sure that the number of elements in the vector is no bigger 11682 1.1 mrg than GROUP_SIZE. */ 11683 1.1 mrg 11684 1.1 mrg tree 11685 1.1 mrg get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, 11686 1.1 mrg unsigned int group_size) 11687 1.1 mrg { 11688 1.1 mrg tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 11689 1.1 mrg 11690 1.1 mrg if (!vectype) 11691 1.1 mrg return NULL; 11692 1.1 mrg 11693 1.1 mrg return truth_type_for (vectype); 11694 1.1 mrg } 11695 1.1 mrg 11696 1.1 mrg /* Function get_same_sized_vectype 11697 1.1 mrg 11698 1.1 mrg Returns a vector type corresponding to SCALAR_TYPE of size 11699 1.1 mrg VECTOR_TYPE if supported by the target. */ 11700 1.1 mrg 11701 1.1 mrg tree 11702 1.1 mrg get_same_sized_vectype (tree scalar_type, tree vector_type) 11703 1.1 mrg { 11704 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) 11705 1.1 mrg return truth_type_for (vector_type); 11706 1.1 mrg 11707 1.1 mrg poly_uint64 nunits; 11708 1.1 mrg if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)), 11709 1.1 mrg GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits)) 11710 1.1 mrg return NULL_TREE; 11711 1.1 mrg 11712 1.1 mrg return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type), 11713 1.1 mrg scalar_type, nunits); 11714 1.1 mrg } 11715 1.1 mrg 11716 1.1 mrg /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE 11717 1.1 mrg would not change the chosen vector modes. */ 11718 1.1 mrg 11719 1.1 mrg bool 11720 1.1 mrg vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode) 11721 1.1 mrg { 11722 1.1 mrg for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin (); 11723 1.1 mrg i != vinfo->used_vector_modes.end (); ++i) 11724 1.1 mrg if (!VECTOR_MODE_P (*i) 11725 1.1 mrg || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i) 11726 1.1 mrg return false; 11727 1.1 mrg return true; 11728 1.1 mrg } 11729 1.1 mrg 11730 1.1 mrg /* Function vect_is_simple_use. 11731 1.1 mrg 11732 1.1 mrg Input: 11733 1.1 mrg VINFO - the vect info of the loop or basic block that is being vectorized. 11734 1.1 mrg OPERAND - operand in the loop or bb. 11735 1.1 mrg Output: 11736 1.1 mrg DEF_STMT_INFO_OUT (optional) - information about the defining stmt in 11737 1.1 mrg case OPERAND is an SSA_NAME that is defined in the vectorizable region 11738 1.1 mrg DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME; 11739 1.1 mrg the definition could be anywhere in the function 11740 1.1 mrg DT - the type of definition 11741 1.1 mrg 11742 1.1 mrg Returns whether a stmt with OPERAND can be vectorized. 11743 1.1 mrg For loops, supportable operands are constants, loop invariants, and operands 11744 1.1 mrg that are defined by the current iteration of the loop. Unsupportable 11745 1.1 mrg operands are those that are defined by a previous iteration of the loop (as 11746 1.1 mrg is the case in reduction/induction computations). 11747 1.1 mrg For basic blocks, supportable operands are constants and bb invariants. 11748 1.1 mrg For now, operands defined outside the basic block are not supported. */ 11749 1.1 mrg 11750 1.1 mrg bool 11751 1.1 mrg vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, 11752 1.1 mrg stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out) 11753 1.1 mrg { 11754 1.1 mrg if (def_stmt_info_out) 11755 1.1 mrg *def_stmt_info_out = NULL; 11756 1.1 mrg if (def_stmt_out) 11757 1.1 mrg *def_stmt_out = NULL; 11758 1.1 mrg *dt = vect_unknown_def_type; 11759 1.1 mrg 11760 1.1 mrg if (dump_enabled_p ()) 11761 1.1 mrg { 11762 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11763 1.1 mrg "vect_is_simple_use: operand "); 11764 1.1 mrg if (TREE_CODE (operand) == SSA_NAME 11765 1.1 mrg && !SSA_NAME_IS_DEFAULT_DEF (operand)) 11766 1.1 mrg dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0); 11767 1.1 mrg else 11768 1.1 mrg dump_generic_expr (MSG_NOTE, TDF_SLIM, operand); 11769 1.1 mrg } 11770 1.1 mrg 11771 1.1 mrg if (CONSTANT_CLASS_P (operand)) 11772 1.1 mrg *dt = vect_constant_def; 11773 1.1 mrg else if (is_gimple_min_invariant (operand)) 11774 1.1 mrg *dt = vect_external_def; 11775 1.1 mrg else if (TREE_CODE (operand) != SSA_NAME) 11776 1.1 mrg *dt = vect_unknown_def_type; 11777 1.1 mrg else if (SSA_NAME_IS_DEFAULT_DEF (operand)) 11778 1.1 mrg *dt = vect_external_def; 11779 1.1 mrg else 11780 1.1 mrg { 11781 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (operand); 11782 1.1 mrg stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand); 11783 1.1 mrg if (!stmt_vinfo) 11784 1.1 mrg *dt = vect_external_def; 11785 1.1 mrg else 11786 1.1 mrg { 11787 1.1 mrg stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo); 11788 1.1 mrg def_stmt = stmt_vinfo->stmt; 11789 1.1 mrg *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 11790 1.1 mrg if (def_stmt_info_out) 11791 1.1 mrg *def_stmt_info_out = stmt_vinfo; 11792 1.1 mrg } 11793 1.1 mrg if (def_stmt_out) 11794 1.1 mrg *def_stmt_out = def_stmt; 11795 1.1 mrg } 11796 1.1 mrg 11797 1.1 mrg if (dump_enabled_p ()) 11798 1.1 mrg { 11799 1.1 mrg dump_printf (MSG_NOTE, ", type of def: "); 11800 1.1 mrg switch (*dt) 11801 1.1 mrg { 11802 1.1 mrg case vect_uninitialized_def: 11803 1.1 mrg dump_printf (MSG_NOTE, "uninitialized\n"); 11804 1.1 mrg break; 11805 1.1 mrg case vect_constant_def: 11806 1.1 mrg dump_printf (MSG_NOTE, "constant\n"); 11807 1.1 mrg break; 11808 1.1 mrg case vect_external_def: 11809 1.1 mrg dump_printf (MSG_NOTE, "external\n"); 11810 1.1 mrg break; 11811 1.1 mrg case vect_internal_def: 11812 1.1 mrg dump_printf (MSG_NOTE, "internal\n"); 11813 1.1 mrg break; 11814 1.1 mrg case vect_induction_def: 11815 1.1 mrg dump_printf (MSG_NOTE, "induction\n"); 11816 1.1 mrg break; 11817 1.1 mrg case vect_reduction_def: 11818 1.1 mrg dump_printf (MSG_NOTE, "reduction\n"); 11819 1.1 mrg break; 11820 1.1 mrg case vect_double_reduction_def: 11821 1.1 mrg dump_printf (MSG_NOTE, "double reduction\n"); 11822 1.1 mrg break; 11823 1.1 mrg case vect_nested_cycle: 11824 1.1 mrg dump_printf (MSG_NOTE, "nested cycle\n"); 11825 1.1 mrg break; 11826 1.1 mrg case vect_unknown_def_type: 11827 1.1 mrg dump_printf (MSG_NOTE, "unknown\n"); 11828 1.1 mrg break; 11829 1.1 mrg } 11830 1.1 mrg } 11831 1.1 mrg 11832 1.1 mrg if (*dt == vect_unknown_def_type) 11833 1.1 mrg { 11834 1.1 mrg if (dump_enabled_p ()) 11835 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 11836 1.1 mrg "Unsupported pattern.\n"); 11837 1.1 mrg return false; 11838 1.1 mrg } 11839 1.1 mrg 11840 1.1 mrg return true; 11841 1.1 mrg } 11842 1.1 mrg 11843 1.1 mrg /* Function vect_is_simple_use. 11844 1.1 mrg 11845 1.1 mrg Same as vect_is_simple_use but also determines the vector operand 11846 1.1 mrg type of OPERAND and stores it to *VECTYPE. If the definition of 11847 1.1 mrg OPERAND is vect_uninitialized_def, vect_constant_def or 11848 1.1 mrg vect_external_def *VECTYPE will be set to NULL_TREE and the caller 11849 1.1 mrg is responsible to compute the best suited vector type for the 11850 1.1 mrg scalar operand. */ 11851 1.1 mrg 11852 1.1 mrg bool 11853 1.1 mrg vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, 11854 1.1 mrg tree *vectype, stmt_vec_info *def_stmt_info_out, 11855 1.1 mrg gimple **def_stmt_out) 11856 1.1 mrg { 11857 1.1 mrg stmt_vec_info def_stmt_info; 11858 1.1 mrg gimple *def_stmt; 11859 1.1 mrg if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt)) 11860 1.1 mrg return false; 11861 1.1 mrg 11862 1.1 mrg if (def_stmt_out) 11863 1.1 mrg *def_stmt_out = def_stmt; 11864 1.1 mrg if (def_stmt_info_out) 11865 1.1 mrg *def_stmt_info_out = def_stmt_info; 11866 1.1 mrg 11867 1.1 mrg /* Now get a vector type if the def is internal, otherwise supply 11868 1.1 mrg NULL_TREE and leave it up to the caller to figure out a proper 11869 1.1 mrg type for the use stmt. */ 11870 1.1 mrg if (*dt == vect_internal_def 11871 1.1 mrg || *dt == vect_induction_def 11872 1.1 mrg || *dt == vect_reduction_def 11873 1.1 mrg || *dt == vect_double_reduction_def 11874 1.1 mrg || *dt == vect_nested_cycle) 11875 1.1 mrg { 11876 1.1 mrg *vectype = STMT_VINFO_VECTYPE (def_stmt_info); 11877 1.1 mrg gcc_assert (*vectype != NULL_TREE); 11878 1.1 mrg if (dump_enabled_p ()) 11879 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 11880 1.1 mrg "vect_is_simple_use: vectype %T\n", *vectype); 11881 1.1 mrg } 11882 1.1 mrg else if (*dt == vect_uninitialized_def 11883 1.1 mrg || *dt == vect_constant_def 11884 1.1 mrg || *dt == vect_external_def) 11885 1.1 mrg *vectype = NULL_TREE; 11886 1.1 mrg else 11887 1.1 mrg gcc_unreachable (); 11888 1.1 mrg 11889 1.1 mrg return true; 11890 1.1 mrg } 11891 1.1 mrg 11892 1.1 mrg /* Function vect_is_simple_use. 11893 1.1 mrg 11894 1.1 mrg Same as vect_is_simple_use but determines the operand by operand 11895 1.1 mrg position OPERAND from either STMT or SLP_NODE, filling in *OP 11896 1.1 mrg and *SLP_DEF (when SLP_NODE is not NULL). */ 11897 1.1 mrg 11898 1.1 mrg bool 11899 1.1 mrg vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node, 11900 1.1 mrg unsigned operand, tree *op, slp_tree *slp_def, 11901 1.1 mrg enum vect_def_type *dt, 11902 1.1 mrg tree *vectype, stmt_vec_info *def_stmt_info_out) 11903 1.1 mrg { 11904 1.1 mrg if (slp_node) 11905 1.1 mrg { 11906 1.1 mrg slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand]; 11907 1.1 mrg *slp_def = child; 11908 1.1 mrg *vectype = SLP_TREE_VECTYPE (child); 11909 1.1 mrg if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) 11910 1.1 mrg { 11911 1.1 mrg *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt); 11912 1.1 mrg return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out); 11913 1.1 mrg } 11914 1.1 mrg else 11915 1.1 mrg { 11916 1.1 mrg if (def_stmt_info_out) 11917 1.1 mrg *def_stmt_info_out = NULL; 11918 1.1 mrg *op = SLP_TREE_SCALAR_OPS (child)[0]; 11919 1.1 mrg *dt = SLP_TREE_DEF_TYPE (child); 11920 1.1 mrg return true; 11921 1.1 mrg } 11922 1.1 mrg } 11923 1.1 mrg else 11924 1.1 mrg { 11925 1.1 mrg *slp_def = NULL; 11926 1.1 mrg if (gassign *ass = dyn_cast <gassign *> (stmt->stmt)) 11927 1.1 mrg { 11928 1.1 mrg if (gimple_assign_rhs_code (ass) == COND_EXPR 11929 1.1 mrg && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass))) 11930 1.1 mrg { 11931 1.1 mrg if (operand < 2) 11932 1.1 mrg *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand); 11933 1.1 mrg else 11934 1.1 mrg *op = gimple_op (ass, operand); 11935 1.1 mrg } 11936 1.1 mrg else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR) 11937 1.1 mrg *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0); 11938 1.1 mrg else 11939 1.1 mrg *op = gimple_op (ass, operand + 1); 11940 1.1 mrg } 11941 1.1 mrg else if (gcall *call = dyn_cast <gcall *> (stmt->stmt)) 11942 1.1 mrg *op = gimple_call_arg (call, operand); 11943 1.1 mrg else 11944 1.1 mrg gcc_unreachable (); 11945 1.1 mrg return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out); 11946 1.1 mrg } 11947 1.1 mrg } 11948 1.1 mrg 11949 1.1 mrg /* If OP is not NULL and is external or constant update its vector 11950 1.1 mrg type with VECTYPE. Returns true if successful or false if not, 11951 1.1 mrg for example when conflicting vector types are present. */ 11952 1.1 mrg 11953 1.1 mrg bool 11954 1.1 mrg vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype) 11955 1.1 mrg { 11956 1.1 mrg if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def) 11957 1.1 mrg return true; 11958 1.1 mrg if (SLP_TREE_VECTYPE (op)) 11959 1.1 mrg return types_compatible_p (SLP_TREE_VECTYPE (op), vectype); 11960 1.1 mrg SLP_TREE_VECTYPE (op) = vectype; 11961 1.1 mrg return true; 11962 1.1 mrg } 11963 1.1 mrg 11964 1.1 mrg /* Function supportable_widening_operation 11965 1.1 mrg 11966 1.1 mrg Check whether an operation represented by the code CODE is a 11967 1.1 mrg widening operation that is supported by the target platform in 11968 1.1 mrg vector form (i.e., when operating on arguments of type VECTYPE_IN 11969 1.1 mrg producing a result of type VECTYPE_OUT). 11970 1.1 mrg 11971 1.1 mrg Widening operations we currently support are NOP (CONVERT), FLOAT, 11972 1.1 mrg FIX_TRUNC and WIDEN_MULT. This function checks if these operations 11973 1.1 mrg are supported by the target platform either directly (via vector 11974 1.1 mrg tree-codes), or via target builtins. 11975 1.1 mrg 11976 1.1 mrg Output: 11977 1.1 mrg - CODE1 and CODE2 are codes of vector operations to be used when 11978 1.1 mrg vectorizing the operation, if available. 11979 1.1 mrg - MULTI_STEP_CVT determines the number of required intermediate steps in 11980 1.1 mrg case of multi-step conversion (like char->short->int - in that case 11981 1.1 mrg MULTI_STEP_CVT will be 1). 11982 1.1 mrg - INTERM_TYPES contains the intermediate type required to perform the 11983 1.1 mrg widening operation (short in the above example). */ 11984 1.1 mrg 11985 1.1 mrg bool 11986 1.1 mrg supportable_widening_operation (vec_info *vinfo, 11987 1.1 mrg enum tree_code code, stmt_vec_info stmt_info, 11988 1.1 mrg tree vectype_out, tree vectype_in, 11989 1.1 mrg enum tree_code *code1, enum tree_code *code2, 11990 1.1 mrg int *multi_step_cvt, 11991 1.1 mrg vec<tree> *interm_types) 11992 1.1 mrg { 11993 1.1 mrg loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo); 11994 1.1 mrg class loop *vect_loop = NULL; 11995 1.1 mrg machine_mode vec_mode; 11996 1.1 mrg enum insn_code icode1, icode2; 11997 1.1 mrg optab optab1, optab2; 11998 1.1 mrg tree vectype = vectype_in; 11999 1.1 mrg tree wide_vectype = vectype_out; 12000 1.1 mrg enum tree_code c1, c2; 12001 1.1 mrg int i; 12002 1.1 mrg tree prev_type, intermediate_type; 12003 1.1 mrg machine_mode intermediate_mode, prev_mode; 12004 1.1 mrg optab optab3, optab4; 12005 1.1 mrg 12006 1.1 mrg *multi_step_cvt = 0; 12007 1.1 mrg if (loop_info) 12008 1.1 mrg vect_loop = LOOP_VINFO_LOOP (loop_info); 12009 1.1 mrg 12010 1.1 mrg switch (code) 12011 1.1 mrg { 12012 1.1 mrg case WIDEN_MULT_EXPR: 12013 1.1 mrg /* The result of a vectorized widening operation usually requires 12014 1.1 mrg two vectors (because the widened results do not fit into one vector). 12015 1.1 mrg The generated vector results would normally be expected to be 12016 1.1 mrg generated in the same order as in the original scalar computation, 12017 1.1 mrg i.e. if 8 results are generated in each vector iteration, they are 12018 1.1 mrg to be organized as follows: 12019 1.1 mrg vect1: [res1,res2,res3,res4], 12020 1.1 mrg vect2: [res5,res6,res7,res8]. 12021 1.1 mrg 12022 1.1 mrg However, in the special case that the result of the widening 12023 1.1 mrg operation is used in a reduction computation only, the order doesn't 12024 1.1 mrg matter (because when vectorizing a reduction we change the order of 12025 1.1 mrg the computation). Some targets can take advantage of this and 12026 1.1 mrg generate more efficient code. For example, targets like Altivec, 12027 1.1 mrg that support widen_mult using a sequence of {mult_even,mult_odd} 12028 1.1 mrg generate the following vectors: 12029 1.1 mrg vect1: [res1,res3,res5,res7], 12030 1.1 mrg vect2: [res2,res4,res6,res8]. 12031 1.1 mrg 12032 1.1 mrg When vectorizing outer-loops, we execute the inner-loop sequentially 12033 1.1 mrg (each vectorized inner-loop iteration contributes to VF outer-loop 12034 1.1 mrg iterations in parallel). We therefore don't allow to change the 12035 1.1 mrg order of the computation in the inner-loop during outer-loop 12036 1.1 mrg vectorization. */ 12037 1.1 mrg /* TODO: Another case in which order doesn't *really* matter is when we 12038 1.1 mrg widen and then contract again, e.g. (short)((int)x * y >> 8). 12039 1.1 mrg Normally, pack_trunc performs an even/odd permute, whereas the 12040 1.1 mrg repack from an even/odd expansion would be an interleave, which 12041 1.1 mrg would be significantly simpler for e.g. AVX2. */ 12042 1.1 mrg /* In any case, in order to avoid duplicating the code below, recurse 12043 1.1 mrg on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values 12044 1.1 mrg are properly set up for the caller. If we fail, we'll continue with 12045 1.1 mrg a VEC_WIDEN_MULT_LO/HI_EXPR check. */ 12046 1.1 mrg if (vect_loop 12047 1.1 mrg && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 12048 1.1 mrg && !nested_in_vect_loop_p (vect_loop, stmt_info) 12049 1.1 mrg && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR, 12050 1.1 mrg stmt_info, vectype_out, 12051 1.1 mrg vectype_in, code1, code2, 12052 1.1 mrg multi_step_cvt, interm_types)) 12053 1.1 mrg { 12054 1.1 mrg /* Elements in a vector with vect_used_by_reduction property cannot 12055 1.1 mrg be reordered if the use chain with this property does not have the 12056 1.1 mrg same operation. One such an example is s += a * b, where elements 12057 1.1 mrg in a and b cannot be reordered. Here we check if the vector defined 12058 1.1 mrg by STMT is only directly used in the reduction statement. */ 12059 1.1 mrg tree lhs = gimple_assign_lhs (stmt_info->stmt); 12060 1.1 mrg stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs); 12061 1.1 mrg if (use_stmt_info 12062 1.1 mrg && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) 12063 1.1 mrg return true; 12064 1.1 mrg } 12065 1.1 mrg c1 = VEC_WIDEN_MULT_LO_EXPR; 12066 1.1 mrg c2 = VEC_WIDEN_MULT_HI_EXPR; 12067 1.1 mrg break; 12068 1.1 mrg 12069 1.1 mrg case DOT_PROD_EXPR: 12070 1.1 mrg c1 = DOT_PROD_EXPR; 12071 1.1 mrg c2 = DOT_PROD_EXPR; 12072 1.1 mrg break; 12073 1.1 mrg 12074 1.1 mrg case SAD_EXPR: 12075 1.1 mrg c1 = SAD_EXPR; 12076 1.1 mrg c2 = SAD_EXPR; 12077 1.1 mrg break; 12078 1.1 mrg 12079 1.1 mrg case VEC_WIDEN_MULT_EVEN_EXPR: 12080 1.1 mrg /* Support the recursion induced just above. */ 12081 1.1 mrg c1 = VEC_WIDEN_MULT_EVEN_EXPR; 12082 1.1 mrg c2 = VEC_WIDEN_MULT_ODD_EXPR; 12083 1.1 mrg break; 12084 1.1 mrg 12085 1.1 mrg case WIDEN_LSHIFT_EXPR: 12086 1.1 mrg c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 12087 1.1 mrg c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 12088 1.1 mrg break; 12089 1.1 mrg 12090 1.1 mrg case WIDEN_PLUS_EXPR: 12091 1.1 mrg c1 = VEC_WIDEN_PLUS_LO_EXPR; 12092 1.1 mrg c2 = VEC_WIDEN_PLUS_HI_EXPR; 12093 1.1 mrg break; 12094 1.1 mrg 12095 1.1 mrg case WIDEN_MINUS_EXPR: 12096 1.1 mrg c1 = VEC_WIDEN_MINUS_LO_EXPR; 12097 1.1 mrg c2 = VEC_WIDEN_MINUS_HI_EXPR; 12098 1.1 mrg break; 12099 1.1 mrg 12100 1.1 mrg CASE_CONVERT: 12101 1.1 mrg c1 = VEC_UNPACK_LO_EXPR; 12102 1.1 mrg c2 = VEC_UNPACK_HI_EXPR; 12103 1.1 mrg break; 12104 1.1 mrg 12105 1.1 mrg case FLOAT_EXPR: 12106 1.1 mrg c1 = VEC_UNPACK_FLOAT_LO_EXPR; 12107 1.1 mrg c2 = VEC_UNPACK_FLOAT_HI_EXPR; 12108 1.1 mrg break; 12109 1.1 mrg 12110 1.1 mrg case FIX_TRUNC_EXPR: 12111 1.1 mrg c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR; 12112 1.1 mrg c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR; 12113 1.1 mrg break; 12114 1.1 mrg 12115 1.1 mrg default: 12116 1.1 mrg gcc_unreachable (); 12117 1.1 mrg } 12118 1.1 mrg 12119 1.1 mrg if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) 12120 1.1 mrg std::swap (c1, c2); 12121 1.1 mrg 12122 1.1 mrg if (code == FIX_TRUNC_EXPR) 12123 1.1 mrg { 12124 1.1 mrg /* The signedness is determined from output operand. */ 12125 1.1 mrg optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 12126 1.1 mrg optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 12127 1.1 mrg } 12128 1.1 mrg else if (CONVERT_EXPR_CODE_P (code) 12129 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (wide_vectype) 12130 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (vectype) 12131 1.1 mrg && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype) 12132 1.1 mrg && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) 12133 1.1 mrg { 12134 1.1 mrg /* If the input and result modes are the same, a different optab 12135 1.1 mrg is needed where we pass in the number of units in vectype. */ 12136 1.1 mrg optab1 = vec_unpacks_sbool_lo_optab; 12137 1.1 mrg optab2 = vec_unpacks_sbool_hi_optab; 12138 1.1 mrg } 12139 1.1 mrg else 12140 1.1 mrg { 12141 1.1 mrg optab1 = optab_for_tree_code (c1, vectype, optab_default); 12142 1.1 mrg optab2 = optab_for_tree_code (c2, vectype, optab_default); 12143 1.1 mrg } 12144 1.1 mrg 12145 1.1 mrg if (!optab1 || !optab2) 12146 1.1 mrg return false; 12147 1.1 mrg 12148 1.1 mrg vec_mode = TYPE_MODE (vectype); 12149 1.1 mrg if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 12150 1.1 mrg || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 12151 1.1 mrg return false; 12152 1.1 mrg 12153 1.1 mrg *code1 = c1; 12154 1.1 mrg *code2 = c2; 12155 1.1 mrg 12156 1.1 mrg if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 12157 1.1 mrg && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 12158 1.1 mrg { 12159 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12160 1.1 mrg return true; 12161 1.1 mrg /* For scalar masks we may have different boolean 12162 1.1 mrg vector types having the same QImode. Thus we 12163 1.1 mrg add additional check for elements number. */ 12164 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 12165 1.1 mrg TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) 12166 1.1 mrg return true; 12167 1.1 mrg } 12168 1.1 mrg 12169 1.1 mrg /* Check if it's a multi-step conversion that can be done using intermediate 12170 1.1 mrg types. */ 12171 1.1 mrg 12172 1.1 mrg prev_type = vectype; 12173 1.1 mrg prev_mode = vec_mode; 12174 1.1 mrg 12175 1.1 mrg if (!CONVERT_EXPR_CODE_P (code)) 12176 1.1 mrg return false; 12177 1.1 mrg 12178 1.1 mrg /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 12179 1.1 mrg intermediate steps in promotion sequence. We try 12180 1.1 mrg MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 12181 1.1 mrg not. */ 12182 1.1 mrg interm_types->create (MAX_INTERM_CVT_STEPS); 12183 1.1 mrg for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 12184 1.1 mrg { 12185 1.1 mrg intermediate_mode = insn_data[icode1].operand[0].mode; 12186 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 12187 1.1 mrg intermediate_type 12188 1.1 mrg = vect_halve_mask_nunits (prev_type, intermediate_mode); 12189 1.1 mrg else 12190 1.1 mrg intermediate_type 12191 1.1 mrg = lang_hooks.types.type_for_mode (intermediate_mode, 12192 1.1 mrg TYPE_UNSIGNED (prev_type)); 12193 1.1 mrg 12194 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) 12195 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (prev_type) 12196 1.1 mrg && intermediate_mode == prev_mode 12197 1.1 mrg && SCALAR_INT_MODE_P (prev_mode)) 12198 1.1 mrg { 12199 1.1 mrg /* If the input and result modes are the same, a different optab 12200 1.1 mrg is needed where we pass in the number of units in vectype. */ 12201 1.1 mrg optab3 = vec_unpacks_sbool_lo_optab; 12202 1.1 mrg optab4 = vec_unpacks_sbool_hi_optab; 12203 1.1 mrg } 12204 1.1 mrg else 12205 1.1 mrg { 12206 1.1 mrg optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 12207 1.1 mrg optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 12208 1.1 mrg } 12209 1.1 mrg 12210 1.1 mrg if (!optab3 || !optab4 12211 1.1 mrg || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 12212 1.1 mrg || insn_data[icode1].operand[0].mode != intermediate_mode 12213 1.1 mrg || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 12214 1.1 mrg || insn_data[icode2].operand[0].mode != intermediate_mode 12215 1.1 mrg || ((icode1 = optab_handler (optab3, intermediate_mode)) 12216 1.1 mrg == CODE_FOR_nothing) 12217 1.1 mrg || ((icode2 = optab_handler (optab4, intermediate_mode)) 12218 1.1 mrg == CODE_FOR_nothing)) 12219 1.1 mrg break; 12220 1.1 mrg 12221 1.1 mrg interm_types->quick_push (intermediate_type); 12222 1.1 mrg (*multi_step_cvt)++; 12223 1.1 mrg 12224 1.1 mrg if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 12225 1.1 mrg && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 12226 1.1 mrg { 12227 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12228 1.1 mrg return true; 12229 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type), 12230 1.1 mrg TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) 12231 1.1 mrg return true; 12232 1.1 mrg } 12233 1.1 mrg 12234 1.1 mrg prev_type = intermediate_type; 12235 1.1 mrg prev_mode = intermediate_mode; 12236 1.1 mrg } 12237 1.1 mrg 12238 1.1 mrg interm_types->release (); 12239 1.1 mrg return false; 12240 1.1 mrg } 12241 1.1 mrg 12242 1.1 mrg 12243 1.1 mrg /* Function supportable_narrowing_operation 12244 1.1 mrg 12245 1.1 mrg Check whether an operation represented by the code CODE is a 12246 1.1 mrg narrowing operation that is supported by the target platform in 12247 1.1 mrg vector form (i.e., when operating on arguments of type VECTYPE_IN 12248 1.1 mrg and producing a result of type VECTYPE_OUT). 12249 1.1 mrg 12250 1.1 mrg Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC 12251 1.1 mrg and FLOAT. This function checks if these operations are supported by 12252 1.1 mrg the target platform directly via vector tree-codes. 12253 1.1 mrg 12254 1.1 mrg Output: 12255 1.1 mrg - CODE1 is the code of a vector operation to be used when 12256 1.1 mrg vectorizing the operation, if available. 12257 1.1 mrg - MULTI_STEP_CVT determines the number of required intermediate steps in 12258 1.1 mrg case of multi-step conversion (like int->short->char - in that case 12259 1.1 mrg MULTI_STEP_CVT will be 1). 12260 1.1 mrg - INTERM_TYPES contains the intermediate type required to perform the 12261 1.1 mrg narrowing operation (short in the above example). */ 12262 1.1 mrg 12263 1.1 mrg bool 12264 1.1 mrg supportable_narrowing_operation (enum tree_code code, 12265 1.1 mrg tree vectype_out, tree vectype_in, 12266 1.1 mrg enum tree_code *code1, int *multi_step_cvt, 12267 1.1 mrg vec<tree> *interm_types) 12268 1.1 mrg { 12269 1.1 mrg machine_mode vec_mode; 12270 1.1 mrg enum insn_code icode1; 12271 1.1 mrg optab optab1, interm_optab; 12272 1.1 mrg tree vectype = vectype_in; 12273 1.1 mrg tree narrow_vectype = vectype_out; 12274 1.1 mrg enum tree_code c1; 12275 1.1 mrg tree intermediate_type, prev_type; 12276 1.1 mrg machine_mode intermediate_mode, prev_mode; 12277 1.1 mrg int i; 12278 1.1 mrg unsigned HOST_WIDE_INT n_elts; 12279 1.1 mrg bool uns; 12280 1.1 mrg 12281 1.1 mrg *multi_step_cvt = 0; 12282 1.1 mrg switch (code) 12283 1.1 mrg { 12284 1.1 mrg CASE_CONVERT: 12285 1.1 mrg c1 = VEC_PACK_TRUNC_EXPR; 12286 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype) 12287 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (vectype) 12288 1.1 mrg && SCALAR_INT_MODE_P (TYPE_MODE (vectype)) 12289 1.1 mrg && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts) 12290 1.1 mrg && n_elts < BITS_PER_UNIT) 12291 1.1 mrg optab1 = vec_pack_sbool_trunc_optab; 12292 1.1 mrg else 12293 1.1 mrg optab1 = optab_for_tree_code (c1, vectype, optab_default); 12294 1.1 mrg break; 12295 1.1 mrg 12296 1.1 mrg case FIX_TRUNC_EXPR: 12297 1.1 mrg c1 = VEC_PACK_FIX_TRUNC_EXPR; 12298 1.1 mrg /* The signedness is determined from output operand. */ 12299 1.1 mrg optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 12300 1.1 mrg break; 12301 1.1 mrg 12302 1.1 mrg case FLOAT_EXPR: 12303 1.1 mrg c1 = VEC_PACK_FLOAT_EXPR; 12304 1.1 mrg optab1 = optab_for_tree_code (c1, vectype, optab_default); 12305 1.1 mrg break; 12306 1.1 mrg 12307 1.1 mrg default: 12308 1.1 mrg gcc_unreachable (); 12309 1.1 mrg } 12310 1.1 mrg 12311 1.1 mrg if (!optab1) 12312 1.1 mrg return false; 12313 1.1 mrg 12314 1.1 mrg vec_mode = TYPE_MODE (vectype); 12315 1.1 mrg if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 12316 1.1 mrg return false; 12317 1.1 mrg 12318 1.1 mrg *code1 = c1; 12319 1.1 mrg 12320 1.1 mrg if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 12321 1.1 mrg { 12322 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12323 1.1 mrg return true; 12324 1.1 mrg /* For scalar masks we may have different boolean 12325 1.1 mrg vector types having the same QImode. Thus we 12326 1.1 mrg add additional check for elements number. */ 12327 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, 12328 1.1 mrg TYPE_VECTOR_SUBPARTS (narrow_vectype))) 12329 1.1 mrg return true; 12330 1.1 mrg } 12331 1.1 mrg 12332 1.1 mrg if (code == FLOAT_EXPR) 12333 1.1 mrg return false; 12334 1.1 mrg 12335 1.1 mrg /* Check if it's a multi-step conversion that can be done using intermediate 12336 1.1 mrg types. */ 12337 1.1 mrg prev_mode = vec_mode; 12338 1.1 mrg prev_type = vectype; 12339 1.1 mrg if (code == FIX_TRUNC_EXPR) 12340 1.1 mrg uns = TYPE_UNSIGNED (vectype_out); 12341 1.1 mrg else 12342 1.1 mrg uns = TYPE_UNSIGNED (vectype); 12343 1.1 mrg 12344 1.1 mrg /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 12345 1.1 mrg conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 12346 1.1 mrg costly than signed. */ 12347 1.1 mrg if (code == FIX_TRUNC_EXPR && uns) 12348 1.1 mrg { 12349 1.1 mrg enum insn_code icode2; 12350 1.1 mrg 12351 1.1 mrg intermediate_type 12352 1.1 mrg = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 12353 1.1 mrg interm_optab 12354 1.1 mrg = optab_for_tree_code (c1, intermediate_type, optab_default); 12355 1.1 mrg if (interm_optab != unknown_optab 12356 1.1 mrg && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 12357 1.1 mrg && insn_data[icode1].operand[0].mode 12358 1.1 mrg == insn_data[icode2].operand[0].mode) 12359 1.1 mrg { 12360 1.1 mrg uns = false; 12361 1.1 mrg optab1 = interm_optab; 12362 1.1 mrg icode1 = icode2; 12363 1.1 mrg } 12364 1.1 mrg } 12365 1.1 mrg 12366 1.1 mrg /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 12367 1.1 mrg intermediate steps in promotion sequence. We try 12368 1.1 mrg MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 12369 1.1 mrg interm_types->create (MAX_INTERM_CVT_STEPS); 12370 1.1 mrg for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 12371 1.1 mrg { 12372 1.1 mrg intermediate_mode = insn_data[icode1].operand[0].mode; 12373 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 12374 1.1 mrg intermediate_type 12375 1.1 mrg = vect_double_mask_nunits (prev_type, intermediate_mode); 12376 1.1 mrg else 12377 1.1 mrg intermediate_type 12378 1.1 mrg = lang_hooks.types.type_for_mode (intermediate_mode, uns); 12379 1.1 mrg if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) 12380 1.1 mrg && VECTOR_BOOLEAN_TYPE_P (prev_type) 12381 1.1 mrg && SCALAR_INT_MODE_P (prev_mode) 12382 1.1 mrg && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts) 12383 1.1 mrg && n_elts < BITS_PER_UNIT) 12384 1.1 mrg interm_optab = vec_pack_sbool_trunc_optab; 12385 1.1 mrg else 12386 1.1 mrg interm_optab 12387 1.1 mrg = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 12388 1.1 mrg optab_default); 12389 1.1 mrg if (!interm_optab 12390 1.1 mrg || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 12391 1.1 mrg || insn_data[icode1].operand[0].mode != intermediate_mode 12392 1.1 mrg || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 12393 1.1 mrg == CODE_FOR_nothing)) 12394 1.1 mrg break; 12395 1.1 mrg 12396 1.1 mrg interm_types->quick_push (intermediate_type); 12397 1.1 mrg (*multi_step_cvt)++; 12398 1.1 mrg 12399 1.1 mrg if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 12400 1.1 mrg { 12401 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12402 1.1 mrg return true; 12403 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2, 12404 1.1 mrg TYPE_VECTOR_SUBPARTS (narrow_vectype))) 12405 1.1 mrg return true; 12406 1.1 mrg } 12407 1.1 mrg 12408 1.1 mrg prev_mode = intermediate_mode; 12409 1.1 mrg prev_type = intermediate_type; 12410 1.1 mrg optab1 = interm_optab; 12411 1.1 mrg } 12412 1.1 mrg 12413 1.1 mrg interm_types->release (); 12414 1.1 mrg return false; 12415 1.1 mrg } 12416 1.1 mrg 12417 1.1 mrg /* Generate and return a vector mask of MASK_TYPE such that 12418 1.1 mrg mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I. 12419 1.1 mrg Add the statements to SEQ. */ 12420 1.1 mrg 12421 1.1 mrg tree 12422 1.1 mrg vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index, 12423 1.1 mrg tree end_index, const char *name) 12424 1.1 mrg { 12425 1.1 mrg tree cmp_type = TREE_TYPE (start_index); 12426 1.1 mrg gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT, 12427 1.1 mrg cmp_type, mask_type, 12428 1.1 mrg OPTIMIZE_FOR_SPEED)); 12429 1.1 mrg gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3, 12430 1.1 mrg start_index, end_index, 12431 1.1 mrg build_zero_cst (mask_type)); 12432 1.1 mrg tree tmp; 12433 1.1 mrg if (name) 12434 1.1 mrg tmp = make_temp_ssa_name (mask_type, NULL, name); 12435 1.1 mrg else 12436 1.1 mrg tmp = make_ssa_name (mask_type); 12437 1.1 mrg gimple_call_set_lhs (call, tmp); 12438 1.1 mrg gimple_seq_add_stmt (seq, call); 12439 1.1 mrg return tmp; 12440 1.1 mrg } 12441 1.1 mrg 12442 1.1 mrg /* Generate a vector mask of type MASK_TYPE for which index I is false iff 12443 1.1 mrg J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */ 12444 1.1 mrg 12445 1.1 mrg tree 12446 1.1 mrg vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, 12447 1.1 mrg tree end_index) 12448 1.1 mrg { 12449 1.1 mrg tree tmp = vect_gen_while (seq, mask_type, start_index, end_index); 12450 1.1 mrg return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); 12451 1.1 mrg } 12452 1.1 mrg 12453 1.1 mrg /* Try to compute the vector types required to vectorize STMT_INFO, 12454 1.1 mrg returning true on success and false if vectorization isn't possible. 12455 1.1 mrg If GROUP_SIZE is nonzero and we're performing BB vectorization, 12456 1.1 mrg take sure that the number of elements in the vectors is no bigger 12457 1.1 mrg than GROUP_SIZE. 12458 1.1 mrg 12459 1.1 mrg On success: 12460 1.1 mrg 12461 1.1 mrg - Set *STMT_VECTYPE_OUT to: 12462 1.1 mrg - NULL_TREE if the statement doesn't need to be vectorized; 12463 1.1 mrg - the equivalent of STMT_VINFO_VECTYPE otherwise. 12464 1.1 mrg 12465 1.1 mrg - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum 12466 1.1 mrg number of units needed to vectorize STMT_INFO, or NULL_TREE if the 12467 1.1 mrg statement does not help to determine the overall number of units. */ 12468 1.1 mrg 12469 1.1 mrg opt_result 12470 1.1 mrg vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, 12471 1.1 mrg tree *stmt_vectype_out, 12472 1.1 mrg tree *nunits_vectype_out, 12473 1.1 mrg unsigned int group_size) 12474 1.1 mrg { 12475 1.1 mrg gimple *stmt = stmt_info->stmt; 12476 1.1 mrg 12477 1.1 mrg /* For BB vectorization, we should always have a group size once we've 12478 1.1 mrg constructed the SLP tree; the only valid uses of zero GROUP_SIZEs 12479 1.1 mrg are tentative requests during things like early data reference 12480 1.1 mrg analysis and pattern recognition. */ 12481 1.1 mrg if (is_a <bb_vec_info> (vinfo)) 12482 1.1 mrg gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); 12483 1.1 mrg else 12484 1.1 mrg group_size = 0; 12485 1.1 mrg 12486 1.1 mrg *stmt_vectype_out = NULL_TREE; 12487 1.1 mrg *nunits_vectype_out = NULL_TREE; 12488 1.1 mrg 12489 1.1 mrg if (gimple_get_lhs (stmt) == NULL_TREE 12490 1.1 mrg /* MASK_STORE has no lhs, but is ok. */ 12491 1.1 mrg && !gimple_call_internal_p (stmt, IFN_MASK_STORE)) 12492 1.1 mrg { 12493 1.1 mrg if (is_a <gcall *> (stmt)) 12494 1.1 mrg { 12495 1.1 mrg /* Ignore calls with no lhs. These must be calls to 12496 1.1 mrg #pragma omp simd functions, and what vectorization factor 12497 1.1 mrg it really needs can't be determined until 12498 1.1 mrg vectorizable_simd_clone_call. */ 12499 1.1 mrg if (dump_enabled_p ()) 12500 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 12501 1.1 mrg "defer to SIMD clone analysis.\n"); 12502 1.1 mrg return opt_result::success (); 12503 1.1 mrg } 12504 1.1 mrg 12505 1.1 mrg return opt_result::failure_at (stmt, 12506 1.1 mrg "not vectorized: irregular stmt.%G", stmt); 12507 1.1 mrg } 12508 1.1 mrg 12509 1.1 mrg tree vectype; 12510 1.1 mrg tree scalar_type = NULL_TREE; 12511 1.1 mrg if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) 12512 1.1 mrg { 12513 1.1 mrg vectype = STMT_VINFO_VECTYPE (stmt_info); 12514 1.1 mrg if (dump_enabled_p ()) 12515 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 12516 1.1 mrg "precomputed vectype: %T\n", vectype); 12517 1.1 mrg } 12518 1.1 mrg else if (vect_use_mask_type_p (stmt_info)) 12519 1.1 mrg { 12520 1.1 mrg unsigned int precision = stmt_info->mask_precision; 12521 1.1 mrg scalar_type = build_nonstandard_integer_type (precision, 1); 12522 1.1 mrg vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size); 12523 1.1 mrg if (!vectype) 12524 1.1 mrg return opt_result::failure_at (stmt, "not vectorized: unsupported" 12525 1.1 mrg " data-type %T\n", scalar_type); 12526 1.1 mrg if (dump_enabled_p ()) 12527 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); 12528 1.1 mrg } 12529 1.1 mrg else 12530 1.1 mrg { 12531 1.1 mrg if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) 12532 1.1 mrg scalar_type = TREE_TYPE (DR_REF (dr)); 12533 1.1 mrg else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) 12534 1.1 mrg scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); 12535 1.1 mrg else 12536 1.1 mrg scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 12537 1.1 mrg 12538 1.1 mrg if (dump_enabled_p ()) 12539 1.1 mrg { 12540 1.1 mrg if (group_size) 12541 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 12542 1.1 mrg "get vectype for scalar type (group size %d):" 12543 1.1 mrg " %T\n", group_size, scalar_type); 12544 1.1 mrg else 12545 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 12546 1.1 mrg "get vectype for scalar type: %T\n", scalar_type); 12547 1.1 mrg } 12548 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 12549 1.1 mrg if (!vectype) 12550 1.1 mrg return opt_result::failure_at (stmt, 12551 1.1 mrg "not vectorized:" 12552 1.1 mrg " unsupported data-type %T\n", 12553 1.1 mrg scalar_type); 12554 1.1 mrg 12555 1.1 mrg if (dump_enabled_p ()) 12556 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); 12557 1.1 mrg } 12558 1.1 mrg 12559 1.1 mrg if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type))) 12560 1.1 mrg return opt_result::failure_at (stmt, 12561 1.1 mrg "not vectorized: vector stmt in loop:%G", 12562 1.1 mrg stmt); 12563 1.1 mrg 12564 1.1 mrg *stmt_vectype_out = vectype; 12565 1.1 mrg 12566 1.1 mrg /* Don't try to compute scalar types if the stmt produces a boolean 12567 1.1 mrg vector; use the existing vector type instead. */ 12568 1.1 mrg tree nunits_vectype = vectype; 12569 1.1 mrg if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12570 1.1 mrg { 12571 1.1 mrg /* The number of units is set according to the smallest scalar 12572 1.1 mrg type (or the largest vector size, but we only support one 12573 1.1 mrg vector size per vectorization). */ 12574 1.1 mrg scalar_type = vect_get_smallest_scalar_type (stmt_info, 12575 1.1 mrg TREE_TYPE (vectype)); 12576 1.1 mrg if (scalar_type != TREE_TYPE (vectype)) 12577 1.1 mrg { 12578 1.1 mrg if (dump_enabled_p ()) 12579 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 12580 1.1 mrg "get vectype for smallest scalar type: %T\n", 12581 1.1 mrg scalar_type); 12582 1.1 mrg nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, 12583 1.1 mrg group_size); 12584 1.1 mrg if (!nunits_vectype) 12585 1.1 mrg return opt_result::failure_at 12586 1.1 mrg (stmt, "not vectorized: unsupported data-type %T\n", 12587 1.1 mrg scalar_type); 12588 1.1 mrg if (dump_enabled_p ()) 12589 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n", 12590 1.1 mrg nunits_vectype); 12591 1.1 mrg } 12592 1.1 mrg } 12593 1.1 mrg 12594 1.1 mrg if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), 12595 1.1 mrg TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))) 12596 1.1 mrg return opt_result::failure_at (stmt, 12597 1.1 mrg "Not vectorized: Incompatible number " 12598 1.1 mrg "of vector subparts between %T and %T\n", 12599 1.1 mrg nunits_vectype, *stmt_vectype_out); 12600 1.1 mrg 12601 1.1 mrg if (dump_enabled_p ()) 12602 1.1 mrg { 12603 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); 12604 1.1 mrg dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype)); 12605 1.1 mrg dump_printf (MSG_NOTE, "\n"); 12606 1.1 mrg } 12607 1.1 mrg 12608 1.1 mrg *nunits_vectype_out = nunits_vectype; 12609 1.1 mrg return opt_result::success (); 12610 1.1 mrg } 12611 1.1 mrg 12612 1.1 mrg /* Generate and return statement sequence that sets vector length LEN that is: 12613 1.1 mrg 12614 1.1 mrg min_of_start_and_end = min (START_INDEX, END_INDEX); 12615 1.1 mrg left_len = END_INDEX - min_of_start_and_end; 12616 1.1 mrg rhs = min (left_len, LEN_LIMIT); 12617 1.1 mrg LEN = rhs; 12618 1.1 mrg 12619 1.1 mrg Note: the cost of the code generated by this function is modeled 12620 1.1 mrg by vect_estimate_min_profitable_iters, so changes here may need 12621 1.1 mrg corresponding changes there. */ 12622 1.1 mrg 12623 1.1 mrg gimple_seq 12624 1.1 mrg vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit) 12625 1.1 mrg { 12626 1.1 mrg gimple_seq stmts = NULL; 12627 1.1 mrg tree len_type = TREE_TYPE (len); 12628 1.1 mrg gcc_assert (TREE_TYPE (start_index) == len_type); 12629 1.1 mrg 12630 1.1 mrg tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index); 12631 1.1 mrg tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min); 12632 1.1 mrg tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit); 12633 1.1 mrg gimple* stmt = gimple_build_assign (len, rhs); 12634 1.1 mrg gimple_seq_add_stmt (&stmts, stmt); 12635 1.1 mrg 12636 1.1 mrg return stmts; 12637 1.1 mrg } 12638 1.1 mrg 12639