1 1.1 mrg /* Analysis Utilities for Loop Vectorization. 2 1.1 mrg Copyright (C) 2006-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Dorit Nuzman <dorit (at) il.ibm.com> 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 8 1.1 mrg the terms of the GNU General Public License as published by the Free 9 1.1 mrg Software Foundation; either version 3, or (at your option) any later 10 1.1 mrg version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 1.1 mrg for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg #include "config.h" 22 1.1 mrg #include "system.h" 23 1.1 mrg #include "coretypes.h" 24 1.1 mrg #include "backend.h" 25 1.1 mrg #include "rtl.h" 26 1.1 mrg #include "tree.h" 27 1.1 mrg #include "gimple.h" 28 1.1 mrg #include "ssa.h" 29 1.1 mrg #include "expmed.h" 30 1.1 mrg #include "optabs-tree.h" 31 1.1 mrg #include "insn-config.h" 32 1.1 mrg #include "recog.h" /* FIXME: for insn_data */ 33 1.1 mrg #include "fold-const.h" 34 1.1 mrg #include "stor-layout.h" 35 1.1 mrg #include "tree-eh.h" 36 1.1 mrg #include "gimplify.h" 37 1.1 mrg #include "gimple-iterator.h" 38 1.1 mrg #include "cfgloop.h" 39 1.1 mrg #include "tree-vectorizer.h" 40 1.1 mrg #include "dumpfile.h" 41 1.1 mrg #include "builtins.h" 42 1.1 mrg #include "internal-fn.h" 43 1.1 mrg #include "case-cfn-macros.h" 44 1.1 mrg #include "fold-const-call.h" 45 1.1 mrg #include "attribs.h" 46 1.1 mrg #include "cgraph.h" 47 1.1 mrg #include "omp-simd-clone.h" 48 1.1 mrg #include "predict.h" 49 1.1 mrg #include "tree-vector-builder.h" 50 1.1 mrg #include "vec-perm-indices.h" 51 1.1 mrg #include "gimple-range.h" 52 1.1 mrg 53 1.1 mrg /* Return true if we have a useful VR_RANGE range for VAR, storing it 54 1.1 mrg in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ 55 1.1 mrg 56 1.1 mrg static bool 57 1.1 mrg vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value) 58 1.1 mrg { 59 1.1 mrg value_range vr; 60 1.1 mrg get_range_query (cfun)->range_of_expr (vr, var); 61 1.1 mrg if (vr.undefined_p ()) 62 1.1 mrg vr.set_varying (TREE_TYPE (var)); 63 1.1 mrg *min_value = wi::to_wide (vr.min ()); 64 1.1 mrg *max_value = wi::to_wide (vr.max ()); 65 1.1 mrg value_range_kind vr_type = vr.kind (); 66 1.1 mrg wide_int nonzero = get_nonzero_bits (var); 67 1.1 mrg signop sgn = TYPE_SIGN (TREE_TYPE (var)); 68 1.1 mrg if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value, 69 1.1 mrg nonzero, sgn) == VR_RANGE) 70 1.1 mrg { 71 1.1 mrg if (dump_enabled_p ()) 72 1.1 mrg { 73 1.1 mrg dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var); 74 1.1 mrg dump_printf (MSG_NOTE, " has range ["); 75 1.1 mrg dump_hex (MSG_NOTE, *min_value); 76 1.1 mrg dump_printf (MSG_NOTE, ", "); 77 1.1 mrg dump_hex (MSG_NOTE, *max_value); 78 1.1 mrg dump_printf (MSG_NOTE, "]\n"); 79 1.1 mrg } 80 1.1 mrg return true; 81 1.1 mrg } 82 1.1 mrg else 83 1.1 mrg { 84 1.1 mrg if (dump_enabled_p ()) 85 1.1 mrg { 86 1.1 mrg dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var); 87 1.1 mrg dump_printf (MSG_NOTE, " has no range info\n"); 88 1.1 mrg } 89 1.1 mrg return false; 90 1.1 mrg } 91 1.1 mrg } 92 1.1 mrg 93 1.1 mrg /* Report that we've found an instance of pattern PATTERN in 94 1.1 mrg statement STMT. */ 95 1.1 mrg 96 1.1 mrg static void 97 1.1 mrg vect_pattern_detected (const char *name, gimple *stmt) 98 1.1 mrg { 99 1.1 mrg if (dump_enabled_p ()) 100 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt); 101 1.1 mrg } 102 1.1 mrg 103 1.1 mrg /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and 104 1.1 mrg return the pattern statement's stmt_vec_info. Set its vector type to 105 1.1 mrg VECTYPE if it doesn't have one already. */ 106 1.1 mrg 107 1.1 mrg static stmt_vec_info 108 1.1 mrg vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, 109 1.1 mrg stmt_vec_info orig_stmt_info, tree vectype) 110 1.1 mrg { 111 1.1 mrg stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt); 112 1.1 mrg if (pattern_stmt_info == NULL) 113 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt); 114 1.1 mrg gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt)); 115 1.1 mrg 116 1.1 mrg pattern_stmt_info->pattern_stmt_p = true; 117 1.1 mrg STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info; 118 1.1 mrg STMT_VINFO_DEF_TYPE (pattern_stmt_info) 119 1.1 mrg = STMT_VINFO_DEF_TYPE (orig_stmt_info); 120 1.1 mrg if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) 121 1.1 mrg { 122 1.1 mrg gcc_assert (!vectype 123 1.1 mrg || (VECTOR_BOOLEAN_TYPE_P (vectype) 124 1.1 mrg == vect_use_mask_type_p (orig_stmt_info))); 125 1.1 mrg STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; 126 1.1 mrg pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision; 127 1.1 mrg } 128 1.1 mrg return pattern_stmt_info; 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT. 132 1.1 mrg Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't 133 1.1 mrg have one already. */ 134 1.1 mrg 135 1.1 mrg static void 136 1.1 mrg vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, 137 1.1 mrg stmt_vec_info orig_stmt_info, tree vectype) 138 1.1 mrg { 139 1.1 mrg STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; 140 1.1 mrg STMT_VINFO_RELATED_STMT (orig_stmt_info) 141 1.1 mrg = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype); 142 1.1 mrg } 143 1.1 mrg 144 1.1 mrg /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE 145 1.1 mrg is nonnull, record that NEW_STMT's vector type is VECTYPE, which might 146 1.1 mrg be different from the vector type of the final pattern statement. 147 1.1 mrg If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type 148 1.1 mrg from which it was derived. */ 149 1.1 mrg 150 1.1 mrg static inline void 151 1.1 mrg append_pattern_def_seq (vec_info *vinfo, 152 1.1 mrg stmt_vec_info stmt_info, gimple *new_stmt, 153 1.1 mrg tree vectype = NULL_TREE, 154 1.1 mrg tree scalar_type_for_mask = NULL_TREE) 155 1.1 mrg { 156 1.1 mrg gcc_assert (!scalar_type_for_mask 157 1.1 mrg == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))); 158 1.1 mrg if (vectype) 159 1.1 mrg { 160 1.1 mrg stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt); 161 1.1 mrg STMT_VINFO_VECTYPE (new_stmt_info) = vectype; 162 1.1 mrg if (scalar_type_for_mask) 163 1.1 mrg new_stmt_info->mask_precision 164 1.1 mrg = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask)); 165 1.1 mrg } 166 1.1 mrg gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 167 1.1 mrg new_stmt); 168 1.1 mrg } 169 1.1 mrg 170 1.1 mrg /* The caller wants to perform new operations on vect_external variable 171 1.1 mrg VAR, so that the result of the operations would also be vect_external. 172 1.1 mrg Return the edge on which the operations can be performed, if one exists. 173 1.1 mrg Return null if the operations should instead be treated as part of 174 1.1 mrg the pattern that needs them. */ 175 1.1 mrg 176 1.1 mrg static edge 177 1.1 mrg vect_get_external_def_edge (vec_info *vinfo, tree var) 178 1.1 mrg { 179 1.1 mrg edge e = NULL; 180 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) 181 1.1 mrg { 182 1.1 mrg e = loop_preheader_edge (loop_vinfo->loop); 183 1.1 mrg if (!SSA_NAME_IS_DEFAULT_DEF (var)) 184 1.1 mrg { 185 1.1 mrg basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 186 1.1 mrg if (bb == NULL 187 1.1 mrg || !dominated_by_p (CDI_DOMINATORS, e->dest, bb)) 188 1.1 mrg e = NULL; 189 1.1 mrg } 190 1.1 mrg } 191 1.1 mrg return e; 192 1.1 mrg } 193 1.1 mrg 194 1.1 mrg /* Return true if the target supports a vector version of CODE, 195 1.1 mrg where CODE is known to map to a direct optab with the given SUBTYPE. 196 1.1 mrg ITYPE specifies the type of (some of) the scalar inputs and OTYPE 197 1.1 mrg specifies the type of the scalar result. 198 1.1 mrg 199 1.1 mrg If CODE allows the inputs and outputs to have different type 200 1.1 mrg (such as for WIDEN_SUM_EXPR), it is the input mode rather 201 1.1 mrg than the output mode that determines the appropriate target pattern. 202 1.1 mrg Operand 0 of the target pattern then specifies the mode that the output 203 1.1 mrg must have. 204 1.1 mrg 205 1.1 mrg When returning true, set *VECOTYPE_OUT to the vector version of OTYPE. 206 1.1 mrg Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT 207 1.1 mrg is nonnull. */ 208 1.1 mrg 209 1.1 mrg static bool 210 1.1 mrg vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, 211 1.1 mrg tree itype, tree *vecotype_out, 212 1.1 mrg tree *vecitype_out = NULL, 213 1.1 mrg enum optab_subtype subtype = optab_default) 214 1.1 mrg { 215 1.1 mrg tree vecitype = get_vectype_for_scalar_type (vinfo, itype); 216 1.1 mrg if (!vecitype) 217 1.1 mrg return false; 218 1.1 mrg 219 1.1 mrg tree vecotype = get_vectype_for_scalar_type (vinfo, otype); 220 1.1 mrg if (!vecotype) 221 1.1 mrg return false; 222 1.1 mrg 223 1.1 mrg optab optab = optab_for_tree_code (code, vecitype, subtype); 224 1.1 mrg if (!optab) 225 1.1 mrg return false; 226 1.1 mrg 227 1.1 mrg insn_code icode = optab_handler (optab, TYPE_MODE (vecitype)); 228 1.1 mrg if (icode == CODE_FOR_nothing 229 1.1 mrg || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype)) 230 1.1 mrg return false; 231 1.1 mrg 232 1.1 mrg *vecotype_out = vecotype; 233 1.1 mrg if (vecitype_out) 234 1.1 mrg *vecitype_out = vecitype; 235 1.1 mrg return true; 236 1.1 mrg } 237 1.1 mrg 238 1.1 mrg /* Round bit precision PRECISION up to a full element. */ 239 1.1 mrg 240 1.1 mrg static unsigned int 241 1.1 mrg vect_element_precision (unsigned int precision) 242 1.1 mrg { 243 1.1 mrg precision = 1 << ceil_log2 (precision); 244 1.1 mrg return MAX (precision, BITS_PER_UNIT); 245 1.1 mrg } 246 1.1 mrg 247 1.1 mrg /* If OP is defined by a statement that's being considered for vectorization, 248 1.1 mrg return information about that statement, otherwise return NULL. */ 249 1.1 mrg 250 1.1 mrg static stmt_vec_info 251 1.1 mrg vect_get_internal_def (vec_info *vinfo, tree op) 252 1.1 mrg { 253 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (op); 254 1.1 mrg if (def_stmt_info 255 1.1 mrg && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def) 256 1.1 mrg return def_stmt_info; 257 1.1 mrg return NULL; 258 1.1 mrg } 259 1.1 mrg 260 1.1 mrg /* Check whether NAME, an ssa-name used in STMT_VINFO, 261 1.1 mrg is a result of a type promotion, such that: 262 1.1 mrg DEF_STMT: NAME = NOP (name0) 263 1.1 mrg If CHECK_SIGN is TRUE, check that either both types are signed or both are 264 1.1 mrg unsigned. */ 265 1.1 mrg 266 1.1 mrg static bool 267 1.1 mrg type_conversion_p (vec_info *vinfo, tree name, bool check_sign, 268 1.1 mrg tree *orig_type, gimple **def_stmt, bool *promotion) 269 1.1 mrg { 270 1.1 mrg tree type = TREE_TYPE (name); 271 1.1 mrg tree oprnd0; 272 1.1 mrg enum vect_def_type dt; 273 1.1 mrg 274 1.1 mrg stmt_vec_info def_stmt_info; 275 1.1 mrg if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt)) 276 1.1 mrg return false; 277 1.1 mrg 278 1.1 mrg if (dt != vect_internal_def 279 1.1 mrg && dt != vect_external_def && dt != vect_constant_def) 280 1.1 mrg return false; 281 1.1 mrg 282 1.1 mrg if (!*def_stmt) 283 1.1 mrg return false; 284 1.1 mrg 285 1.1 mrg if (!is_gimple_assign (*def_stmt)) 286 1.1 mrg return false; 287 1.1 mrg 288 1.1 mrg if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) 289 1.1 mrg return false; 290 1.1 mrg 291 1.1 mrg oprnd0 = gimple_assign_rhs1 (*def_stmt); 292 1.1 mrg 293 1.1 mrg *orig_type = TREE_TYPE (oprnd0); 294 1.1 mrg if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) 295 1.1 mrg || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) 296 1.1 mrg return false; 297 1.1 mrg 298 1.1 mrg if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) 299 1.1 mrg *promotion = true; 300 1.1 mrg else 301 1.1 mrg *promotion = false; 302 1.1 mrg 303 1.1 mrg if (!vect_is_simple_use (oprnd0, vinfo, &dt)) 304 1.1 mrg return false; 305 1.1 mrg 306 1.1 mrg return true; 307 1.1 mrg } 308 1.1 mrg 309 1.1 mrg /* Holds information about an input operand after some sign changes 310 1.1 mrg and type promotions have been peeled away. */ 311 1.1 mrg class vect_unpromoted_value { 312 1.1 mrg public: 313 1.1 mrg vect_unpromoted_value (); 314 1.1 mrg 315 1.1 mrg void set_op (tree, vect_def_type, stmt_vec_info = NULL); 316 1.1 mrg 317 1.1 mrg /* The value obtained after peeling away zero or more casts. */ 318 1.1 mrg tree op; 319 1.1 mrg 320 1.1 mrg /* The type of OP. */ 321 1.1 mrg tree type; 322 1.1 mrg 323 1.1 mrg /* The definition type of OP. */ 324 1.1 mrg vect_def_type dt; 325 1.1 mrg 326 1.1 mrg /* If OP is the result of peeling at least one cast, and if the cast 327 1.1 mrg of OP itself is a vectorizable statement, CASTER identifies that 328 1.1 mrg statement, otherwise it is null. */ 329 1.1 mrg stmt_vec_info caster; 330 1.1 mrg }; 331 1.1 mrg 332 1.1 mrg inline vect_unpromoted_value::vect_unpromoted_value () 333 1.1 mrg : op (NULL_TREE), 334 1.1 mrg type (NULL_TREE), 335 1.1 mrg dt (vect_uninitialized_def), 336 1.1 mrg caster (NULL) 337 1.1 mrg { 338 1.1 mrg } 339 1.1 mrg 340 1.1 mrg /* Set the operand to OP_IN, its definition type to DT_IN, and the 341 1.1 mrg statement that casts it to CASTER_IN. */ 342 1.1 mrg 343 1.1 mrg inline void 344 1.1 mrg vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in, 345 1.1 mrg stmt_vec_info caster_in) 346 1.1 mrg { 347 1.1 mrg op = op_in; 348 1.1 mrg type = TREE_TYPE (op); 349 1.1 mrg dt = dt_in; 350 1.1 mrg caster = caster_in; 351 1.1 mrg } 352 1.1 mrg 353 1.1 mrg /* If OP is a vectorizable SSA name, strip a sequence of integer conversions 354 1.1 mrg to reach some vectorizable inner operand OP', continuing as long as it 355 1.1 mrg is possible to convert OP' back to OP using a possible sign change 356 1.1 mrg followed by a possible promotion P. Return this OP', or null if OP is 357 1.1 mrg not a vectorizable SSA name. If there is a promotion P, describe its 358 1.1 mrg input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P 359 1.1 mrg is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved 360 1.1 mrg have more than one user. 361 1.1 mrg 362 1.1 mrg A successful return means that it is possible to go from OP' to OP 363 1.1 mrg via UNPROM. The cast from OP' to UNPROM is at most a sign change, 364 1.1 mrg whereas the cast from UNPROM to OP might be a promotion, a sign 365 1.1 mrg change, or a nop. 366 1.1 mrg 367 1.1 mrg E.g. say we have: 368 1.1 mrg 369 1.1 mrg signed short *ptr = ...; 370 1.1 mrg signed short C = *ptr; 371 1.1 mrg unsigned short B = (unsigned short) C; // sign change 372 1.1 mrg signed int A = (signed int) B; // unsigned promotion 373 1.1 mrg ...possible other uses of A... 374 1.1 mrg unsigned int OP = (unsigned int) A; // sign change 375 1.1 mrg 376 1.1 mrg In this case it's possible to go directly from C to OP using: 377 1.1 mrg 378 1.1 mrg OP = (unsigned int) (unsigned short) C; 379 1.1 mrg +------------+ +--------------+ 380 1.1 mrg promotion sign change 381 1.1 mrg 382 1.1 mrg so OP' would be C. The input to the promotion is B, so UNPROM 383 1.1 mrg would describe B. */ 384 1.1 mrg 385 1.1 mrg static tree 386 1.1 mrg vect_look_through_possible_promotion (vec_info *vinfo, tree op, 387 1.1 mrg vect_unpromoted_value *unprom, 388 1.1 mrg bool *single_use_p = NULL) 389 1.1 mrg { 390 1.1 mrg tree res = NULL_TREE; 391 1.1 mrg tree op_type = TREE_TYPE (op); 392 1.1 mrg unsigned int orig_precision = TYPE_PRECISION (op_type); 393 1.1 mrg unsigned int min_precision = orig_precision; 394 1.1 mrg stmt_vec_info caster = NULL; 395 1.1 mrg while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type)) 396 1.1 mrg { 397 1.1 mrg /* See whether OP is simple enough to vectorize. */ 398 1.1 mrg stmt_vec_info def_stmt_info; 399 1.1 mrg gimple *def_stmt; 400 1.1 mrg vect_def_type dt; 401 1.1 mrg if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt)) 402 1.1 mrg break; 403 1.1 mrg 404 1.1 mrg /* If OP is the input of a demotion, skip over it to see whether 405 1.1 mrg OP is itself the result of a promotion. If so, the combined 406 1.1 mrg effect of the promotion and the demotion might fit the required 407 1.1 mrg pattern, otherwise neither operation fits. 408 1.1 mrg 409 1.1 mrg This copes with cases such as the result of an arithmetic 410 1.1 mrg operation being truncated before being stored, and where that 411 1.1 mrg arithmetic operation has been recognized as an over-widened one. */ 412 1.1 mrg if (TYPE_PRECISION (op_type) <= min_precision) 413 1.1 mrg { 414 1.1 mrg /* Use OP as the UNPROM described above if we haven't yet 415 1.1 mrg found a promotion, or if using the new input preserves the 416 1.1 mrg sign of the previous promotion. */ 417 1.1 mrg if (!res 418 1.1 mrg || TYPE_PRECISION (unprom->type) == orig_precision 419 1.1 mrg || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)) 420 1.1 mrg { 421 1.1 mrg unprom->set_op (op, dt, caster); 422 1.1 mrg min_precision = TYPE_PRECISION (op_type); 423 1.1 mrg } 424 1.1 mrg /* Stop if we've already seen a promotion and if this 425 1.1 mrg conversion does more than change the sign. */ 426 1.1 mrg else if (TYPE_PRECISION (op_type) 427 1.1 mrg != TYPE_PRECISION (unprom->type)) 428 1.1 mrg break; 429 1.1 mrg 430 1.1 mrg /* The sequence now extends to OP. */ 431 1.1 mrg res = op; 432 1.1 mrg } 433 1.1 mrg 434 1.1 mrg /* See whether OP is defined by a cast. Record it as CASTER if 435 1.1 mrg the cast is potentially vectorizable. */ 436 1.1 mrg if (!def_stmt) 437 1.1 mrg break; 438 1.1 mrg caster = def_stmt_info; 439 1.1 mrg 440 1.1 mrg /* Ignore pattern statements, since we don't link uses for them. */ 441 1.1 mrg if (caster 442 1.1 mrg && single_use_p 443 1.1 mrg && !STMT_VINFO_RELATED_STMT (caster) 444 1.1 mrg && !has_single_use (res)) 445 1.1 mrg *single_use_p = false; 446 1.1 mrg 447 1.1 mrg gassign *assign = dyn_cast <gassign *> (def_stmt); 448 1.1 mrg if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt))) 449 1.1 mrg break; 450 1.1 mrg 451 1.1 mrg /* Continue with the input to the cast. */ 452 1.1 mrg op = gimple_assign_rhs1 (def_stmt); 453 1.1 mrg op_type = TREE_TYPE (op); 454 1.1 mrg } 455 1.1 mrg return res; 456 1.1 mrg } 457 1.1 mrg 458 1.1 mrg /* OP is an integer operand to an operation that returns TYPE, and we 459 1.1 mrg want to treat the operation as a widening one. So far we can treat 460 1.1 mrg it as widening from *COMMON_TYPE. 461 1.1 mrg 462 1.1 mrg Return true if OP is suitable for such a widening operation, 463 1.1 mrg either widening from *COMMON_TYPE or from some supertype of it. 464 1.1 mrg Update *COMMON_TYPE to the supertype in the latter case. 465 1.1 mrg 466 1.1 mrg SHIFT_P is true if OP is a shift amount. */ 467 1.1 mrg 468 1.1 mrg static bool 469 1.1 mrg vect_joust_widened_integer (tree type, bool shift_p, tree op, 470 1.1 mrg tree *common_type) 471 1.1 mrg { 472 1.1 mrg /* Calculate the minimum precision required by OP, without changing 473 1.1 mrg the sign of either operand. */ 474 1.1 mrg unsigned int precision; 475 1.1 mrg if (shift_p) 476 1.1 mrg { 477 1.1 mrg if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2)) 478 1.1 mrg return false; 479 1.1 mrg precision = TREE_INT_CST_LOW (op); 480 1.1 mrg } 481 1.1 mrg else 482 1.1 mrg { 483 1.1 mrg precision = wi::min_precision (wi::to_widest (op), 484 1.1 mrg TYPE_SIGN (*common_type)); 485 1.1 mrg if (precision * 2 > TYPE_PRECISION (type)) 486 1.1 mrg return false; 487 1.1 mrg } 488 1.1 mrg 489 1.1 mrg /* If OP requires a wider type, switch to that type. The checks 490 1.1 mrg above ensure that this is still narrower than the result. */ 491 1.1 mrg precision = vect_element_precision (precision); 492 1.1 mrg if (TYPE_PRECISION (*common_type) < precision) 493 1.1 mrg *common_type = build_nonstandard_integer_type 494 1.1 mrg (precision, TYPE_UNSIGNED (*common_type)); 495 1.1 mrg return true; 496 1.1 mrg } 497 1.1 mrg 498 1.1 mrg /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE 499 1.1 mrg is narrower than type, storing the supertype in *COMMON_TYPE if so. */ 500 1.1 mrg 501 1.1 mrg static bool 502 1.1 mrg vect_joust_widened_type (tree type, tree new_type, tree *common_type) 503 1.1 mrg { 504 1.1 mrg if (types_compatible_p (*common_type, new_type)) 505 1.1 mrg return true; 506 1.1 mrg 507 1.1 mrg /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */ 508 1.1 mrg if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type)) 509 1.1 mrg && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type))) 510 1.1 mrg return true; 511 1.1 mrg 512 1.1 mrg /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */ 513 1.1 mrg if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type) 514 1.1 mrg && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type))) 515 1.1 mrg { 516 1.1 mrg *common_type = new_type; 517 1.1 mrg return true; 518 1.1 mrg } 519 1.1 mrg 520 1.1 mrg /* We have mismatched signs, with the signed type being 521 1.1 mrg no wider than the unsigned type. In this case we need 522 1.1 mrg a wider signed type. */ 523 1.1 mrg unsigned int precision = MAX (TYPE_PRECISION (*common_type), 524 1.1 mrg TYPE_PRECISION (new_type)); 525 1.1 mrg precision *= 2; 526 1.1 mrg 527 1.1 mrg if (precision * 2 > TYPE_PRECISION (type)) 528 1.1 mrg return false; 529 1.1 mrg 530 1.1 mrg *common_type = build_nonstandard_integer_type (precision, false); 531 1.1 mrg return true; 532 1.1 mrg } 533 1.1 mrg 534 1.1 mrg /* Check whether STMT_INFO can be viewed as a tree of integer operations 535 1.1 mrg in which each node either performs CODE or WIDENED_CODE, and where 536 1.1 mrg each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS 537 1.1 mrg specifies the maximum number of leaf operands. SHIFT_P says whether 538 1.1 mrg CODE and WIDENED_CODE are some sort of shift. 539 1.1 mrg 540 1.1 mrg If STMT_INFO is such a tree, return the number of leaf operands 541 1.1 mrg and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE 542 1.1 mrg to a type that (a) is narrower than the result of STMT_INFO and 543 1.1 mrg (b) can hold all leaf operand values. 544 1.1 mrg 545 1.1 mrg If SUBTYPE then allow that the signs of the operands 546 1.1 mrg may differ in signs but not in precision. SUBTYPE is updated to reflect 547 1.1 mrg this. 548 1.1 mrg 549 1.1 mrg Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE 550 1.1 mrg exists. */ 551 1.1 mrg 552 1.1 mrg static unsigned int 553 1.1 mrg vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, 554 1.1 mrg tree_code widened_code, bool shift_p, 555 1.1 mrg unsigned int max_nops, 556 1.1 mrg vect_unpromoted_value *unprom, tree *common_type, 557 1.1 mrg enum optab_subtype *subtype = NULL) 558 1.1 mrg { 559 1.1 mrg /* Check for an integer operation with the right code. */ 560 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); 561 1.1 mrg if (!assign) 562 1.1 mrg return 0; 563 1.1 mrg 564 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign); 565 1.1 mrg if (rhs_code != code && rhs_code != widened_code) 566 1.1 mrg return 0; 567 1.1 mrg 568 1.1 mrg tree type = TREE_TYPE (gimple_assign_lhs (assign)); 569 1.1 mrg if (!INTEGRAL_TYPE_P (type)) 570 1.1 mrg return 0; 571 1.1 mrg 572 1.1 mrg /* Assume that both operands will be leaf operands. */ 573 1.1 mrg max_nops -= 2; 574 1.1 mrg 575 1.1 mrg /* Check the operands. */ 576 1.1 mrg unsigned int next_op = 0; 577 1.1 mrg for (unsigned int i = 0; i < 2; ++i) 578 1.1 mrg { 579 1.1 mrg vect_unpromoted_value *this_unprom = &unprom[next_op]; 580 1.1 mrg unsigned int nops = 1; 581 1.1 mrg tree op = gimple_op (assign, i + 1); 582 1.1 mrg if (i == 1 && TREE_CODE (op) == INTEGER_CST) 583 1.1 mrg { 584 1.1 mrg /* We already have a common type from earlier operands. 585 1.1 mrg Update it to account for OP. */ 586 1.1 mrg this_unprom->set_op (op, vect_constant_def); 587 1.1 mrg if (!vect_joust_widened_integer (type, shift_p, op, common_type)) 588 1.1 mrg return 0; 589 1.1 mrg } 590 1.1 mrg else 591 1.1 mrg { 592 1.1 mrg /* Only allow shifts by constants. */ 593 1.1 mrg if (shift_p && i == 1) 594 1.1 mrg return 0; 595 1.1 mrg 596 1.1 mrg if (rhs_code != code) 597 1.1 mrg { 598 1.1 mrg /* If rhs_code is widened_code, don't look through further 599 1.1 mrg possible promotions, there is a promotion already embedded 600 1.1 mrg in the WIDEN_*_EXPR. */ 601 1.1 mrg if (TREE_CODE (op) != SSA_NAME 602 1.1 mrg || !INTEGRAL_TYPE_P (TREE_TYPE (op))) 603 1.1 mrg return 0; 604 1.1 mrg 605 1.1 mrg stmt_vec_info def_stmt_info; 606 1.1 mrg gimple *def_stmt; 607 1.1 mrg vect_def_type dt; 608 1.1 mrg if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, 609 1.1 mrg &def_stmt)) 610 1.1 mrg return 0; 611 1.1 mrg this_unprom->set_op (op, dt, NULL); 612 1.1 mrg } 613 1.1 mrg else if (!vect_look_through_possible_promotion (vinfo, op, 614 1.1 mrg this_unprom)) 615 1.1 mrg return 0; 616 1.1 mrg 617 1.1 mrg if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type)) 618 1.1 mrg { 619 1.1 mrg /* The operand isn't widened. If STMT_INFO has the code 620 1.1 mrg for an unwidened operation, recursively check whether 621 1.1 mrg this operand is a node of the tree. */ 622 1.1 mrg if (rhs_code != code 623 1.1 mrg || max_nops == 0 624 1.1 mrg || this_unprom->dt != vect_internal_def) 625 1.1 mrg return 0; 626 1.1 mrg 627 1.1 mrg /* Give back the leaf slot allocated above now that we're 628 1.1 mrg not treating this as a leaf operand. */ 629 1.1 mrg max_nops += 1; 630 1.1 mrg 631 1.1 mrg /* Recursively process the definition of the operand. */ 632 1.1 mrg stmt_vec_info def_stmt_info 633 1.1 mrg = vinfo->lookup_def (this_unprom->op); 634 1.1 mrg nops = vect_widened_op_tree (vinfo, def_stmt_info, code, 635 1.1 mrg widened_code, shift_p, max_nops, 636 1.1 mrg this_unprom, common_type, 637 1.1 mrg subtype); 638 1.1 mrg if (nops == 0) 639 1.1 mrg return 0; 640 1.1 mrg 641 1.1 mrg max_nops -= nops; 642 1.1 mrg } 643 1.1 mrg else 644 1.1 mrg { 645 1.1 mrg /* Make sure that the operand is narrower than the result. */ 646 1.1 mrg if (TYPE_PRECISION (this_unprom->type) * 2 647 1.1 mrg > TYPE_PRECISION (type)) 648 1.1 mrg return 0; 649 1.1 mrg 650 1.1 mrg /* Update COMMON_TYPE for the new operand. */ 651 1.1 mrg if (i == 0) 652 1.1 mrg *common_type = this_unprom->type; 653 1.1 mrg else if (!vect_joust_widened_type (type, this_unprom->type, 654 1.1 mrg common_type)) 655 1.1 mrg { 656 1.1 mrg if (subtype) 657 1.1 mrg { 658 1.1 mrg /* See if we can sign extend the smaller type. */ 659 1.1 mrg if (TYPE_PRECISION (this_unprom->type) 660 1.1 mrg > TYPE_PRECISION (*common_type)) 661 1.1 mrg *common_type = this_unprom->type; 662 1.1 mrg *subtype = optab_vector_mixed_sign; 663 1.1 mrg } 664 1.1 mrg else 665 1.1 mrg return 0; 666 1.1 mrg } 667 1.1 mrg } 668 1.1 mrg } 669 1.1 mrg next_op += nops; 670 1.1 mrg } 671 1.1 mrg return next_op; 672 1.1 mrg } 673 1.1 mrg 674 1.1 mrg /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 675 1.1 mrg is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 676 1.1 mrg 677 1.1 mrg static tree 678 1.1 mrg vect_recog_temp_ssa_var (tree type, gimple *stmt) 679 1.1 mrg { 680 1.1 mrg return make_temp_ssa_name (type, stmt, "patt"); 681 1.1 mrg } 682 1.1 mrg 683 1.1 mrg /* STMT2_INFO describes a type conversion that could be split into STMT1 684 1.1 mrg followed by a version of STMT2_INFO that takes NEW_RHS as its first 685 1.1 mrg input. Try to do this using pattern statements, returning true on 686 1.1 mrg success. */ 687 1.1 mrg 688 1.1 mrg static bool 689 1.1 mrg vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs, 690 1.1 mrg gimple *stmt1, tree vectype) 691 1.1 mrg { 692 1.1 mrg if (is_pattern_stmt_p (stmt2_info)) 693 1.1 mrg { 694 1.1 mrg /* STMT2_INFO is part of a pattern. Get the statement to which 695 1.1 mrg the pattern is attached. */ 696 1.1 mrg stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info); 697 1.1 mrg vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype); 698 1.1 mrg 699 1.1 mrg if (dump_enabled_p ()) 700 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 701 1.1 mrg "Splitting pattern statement: %G", stmt2_info->stmt); 702 1.1 mrg 703 1.1 mrg /* Since STMT2_INFO is a pattern statement, we can change it 704 1.1 mrg in-situ without worrying about changing the code for the 705 1.1 mrg containing block. */ 706 1.1 mrg gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs); 707 1.1 mrg 708 1.1 mrg if (dump_enabled_p ()) 709 1.1 mrg { 710 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1); 711 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "and: %G", 712 1.1 mrg stmt2_info->stmt); 713 1.1 mrg } 714 1.1 mrg 715 1.1 mrg gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info); 716 1.1 mrg if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info) 717 1.1 mrg /* STMT2_INFO is the actual pattern statement. Add STMT1 718 1.1 mrg to the end of the definition sequence. */ 719 1.1 mrg gimple_seq_add_stmt_without_update (def_seq, stmt1); 720 1.1 mrg else 721 1.1 mrg { 722 1.1 mrg /* STMT2_INFO belongs to the definition sequence. Insert STMT1 723 1.1 mrg before it. */ 724 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq); 725 1.1 mrg gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT); 726 1.1 mrg } 727 1.1 mrg return true; 728 1.1 mrg } 729 1.1 mrg else 730 1.1 mrg { 731 1.1 mrg /* STMT2_INFO doesn't yet have a pattern. Try to create a 732 1.1 mrg two-statement pattern now. */ 733 1.1 mrg gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info)); 734 1.1 mrg tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt)); 735 1.1 mrg tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type); 736 1.1 mrg if (!lhs_vectype) 737 1.1 mrg return false; 738 1.1 mrg 739 1.1 mrg if (dump_enabled_p ()) 740 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 741 1.1 mrg "Splitting statement: %G", stmt2_info->stmt); 742 1.1 mrg 743 1.1 mrg /* Add STMT1 as a singleton pattern definition sequence. */ 744 1.1 mrg gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info); 745 1.1 mrg vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype); 746 1.1 mrg gimple_seq_add_stmt_without_update (def_seq, stmt1); 747 1.1 mrg 748 1.1 mrg /* Build the second of the two pattern statements. */ 749 1.1 mrg tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL); 750 1.1 mrg gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs); 751 1.1 mrg vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype); 752 1.1 mrg 753 1.1 mrg if (dump_enabled_p ()) 754 1.1 mrg { 755 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 756 1.1 mrg "into pattern statements: %G", stmt1); 757 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2); 758 1.1 mrg } 759 1.1 mrg 760 1.1 mrg return true; 761 1.1 mrg } 762 1.1 mrg } 763 1.1 mrg 764 1.1 mrg /* Convert UNPROM to TYPE and return the result, adding new statements 765 1.1 mrg to STMT_INFO's pattern definition statements if no better way is 766 1.1 mrg available. VECTYPE is the vector form of TYPE. 767 1.1 mrg 768 1.1 mrg If SUBTYPE then convert the type based on the subtype. */ 769 1.1 mrg 770 1.1 mrg static tree 771 1.1 mrg vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type, 772 1.1 mrg vect_unpromoted_value *unprom, tree vectype, 773 1.1 mrg enum optab_subtype subtype = optab_default) 774 1.1 mrg { 775 1.1 mrg 776 1.1 mrg /* Update the type if the signs differ. */ 777 1.1 mrg if (subtype == optab_vector_mixed_sign 778 1.1 mrg && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op))) 779 1.1 mrg type = build_nonstandard_integer_type (TYPE_PRECISION (type), 780 1.1 mrg TYPE_SIGN (unprom->type)); 781 1.1 mrg 782 1.1 mrg /* Check for a no-op conversion. */ 783 1.1 mrg if (types_compatible_p (type, TREE_TYPE (unprom->op))) 784 1.1 mrg return unprom->op; 785 1.1 mrg 786 1.1 mrg /* Allow the caller to create constant vect_unpromoted_values. */ 787 1.1 mrg if (TREE_CODE (unprom->op) == INTEGER_CST) 788 1.1 mrg return wide_int_to_tree (type, wi::to_widest (unprom->op)); 789 1.1 mrg 790 1.1 mrg tree input = unprom->op; 791 1.1 mrg if (unprom->caster) 792 1.1 mrg { 793 1.1 mrg tree lhs = gimple_get_lhs (unprom->caster->stmt); 794 1.1 mrg tree lhs_type = TREE_TYPE (lhs); 795 1.1 mrg 796 1.1 mrg /* If the result of the existing cast is the right width, use it 797 1.1 mrg instead of the source of the cast. */ 798 1.1 mrg if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type)) 799 1.1 mrg input = lhs; 800 1.1 mrg /* If the precision we want is between the source and result 801 1.1 mrg precisions of the existing cast, try splitting the cast into 802 1.1 mrg two and tapping into a mid-way point. */ 803 1.1 mrg else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type) 804 1.1 mrg && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)) 805 1.1 mrg { 806 1.1 mrg /* In order to preserve the semantics of the original cast, 807 1.1 mrg give the mid-way point the same signedness as the input value. 808 1.1 mrg 809 1.1 mrg It would be possible to use a signed type here instead if 810 1.1 mrg TYPE is signed and UNPROM->TYPE is unsigned, but that would 811 1.1 mrg make the sign of the midtype sensitive to the order in 812 1.1 mrg which we process the statements, since the signedness of 813 1.1 mrg TYPE is the signedness required by just one of possibly 814 1.1 mrg many users. Also, unsigned promotions are usually as cheap 815 1.1 mrg as or cheaper than signed ones, so it's better to keep an 816 1.1 mrg unsigned promotion. */ 817 1.1 mrg tree midtype = build_nonstandard_integer_type 818 1.1 mrg (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type)); 819 1.1 mrg tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype); 820 1.1 mrg if (vec_midtype) 821 1.1 mrg { 822 1.1 mrg input = vect_recog_temp_ssa_var (midtype, NULL); 823 1.1 mrg gassign *new_stmt = gimple_build_assign (input, NOP_EXPR, 824 1.1 mrg unprom->op); 825 1.1 mrg if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt, 826 1.1 mrg vec_midtype)) 827 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, 828 1.1 mrg new_stmt, vec_midtype); 829 1.1 mrg } 830 1.1 mrg } 831 1.1 mrg 832 1.1 mrg /* See if we can reuse an existing result. */ 833 1.1 mrg if (types_compatible_p (type, TREE_TYPE (input))) 834 1.1 mrg return input; 835 1.1 mrg } 836 1.1 mrg 837 1.1 mrg /* We need a new conversion statement. */ 838 1.1 mrg tree new_op = vect_recog_temp_ssa_var (type, NULL); 839 1.1 mrg gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input); 840 1.1 mrg 841 1.1 mrg /* If OP is an external value, see if we can insert the new statement 842 1.1 mrg on an incoming edge. */ 843 1.1 mrg if (input == unprom->op && unprom->dt == vect_external_def) 844 1.1 mrg if (edge e = vect_get_external_def_edge (vinfo, input)) 845 1.1 mrg { 846 1.1 mrg basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt); 847 1.1 mrg gcc_assert (!new_bb); 848 1.1 mrg return new_op; 849 1.1 mrg } 850 1.1 mrg 851 1.1 mrg /* As a (common) last resort, add the statement to the pattern itself. */ 852 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype); 853 1.1 mrg return new_op; 854 1.1 mrg } 855 1.1 mrg 856 1.1 mrg /* Invoke vect_convert_input for N elements of UNPROM and store the 857 1.1 mrg result in the corresponding elements of RESULT. 858 1.1 mrg 859 1.1 mrg If SUBTYPE then convert the type based on the subtype. */ 860 1.1 mrg 861 1.1 mrg static void 862 1.1 mrg vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n, 863 1.1 mrg tree *result, tree type, vect_unpromoted_value *unprom, 864 1.1 mrg tree vectype, enum optab_subtype subtype = optab_default) 865 1.1 mrg { 866 1.1 mrg for (unsigned int i = 0; i < n; ++i) 867 1.1 mrg { 868 1.1 mrg unsigned int j; 869 1.1 mrg for (j = 0; j < i; ++j) 870 1.1 mrg if (unprom[j].op == unprom[i].op) 871 1.1 mrg break; 872 1.1 mrg 873 1.1 mrg if (j < i) 874 1.1 mrg result[i] = result[j]; 875 1.1 mrg else 876 1.1 mrg result[i] = vect_convert_input (vinfo, stmt_info, 877 1.1 mrg type, &unprom[i], vectype, subtype); 878 1.1 mrg } 879 1.1 mrg } 880 1.1 mrg 881 1.1 mrg /* The caller has created a (possibly empty) sequence of pattern definition 882 1.1 mrg statements followed by a single statement PATTERN_STMT. Cast the result 883 1.1 mrg of this final statement to TYPE. If a new statement is needed, add 884 1.1 mrg PATTERN_STMT to the end of STMT_INFO's pattern definition statements 885 1.1 mrg and return the new statement, otherwise return PATTERN_STMT as-is. 886 1.1 mrg VECITYPE is the vector form of PATTERN_STMT's result type. */ 887 1.1 mrg 888 1.1 mrg static gimple * 889 1.1 mrg vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type, 890 1.1 mrg gimple *pattern_stmt, tree vecitype) 891 1.1 mrg { 892 1.1 mrg tree lhs = gimple_get_lhs (pattern_stmt); 893 1.1 mrg if (!types_compatible_p (type, TREE_TYPE (lhs))) 894 1.1 mrg { 895 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype); 896 1.1 mrg tree cast_var = vect_recog_temp_ssa_var (type, NULL); 897 1.1 mrg pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs); 898 1.1 mrg } 899 1.1 mrg return pattern_stmt; 900 1.1 mrg } 901 1.1 mrg 902 1.1 mrg /* Return true if STMT_VINFO describes a reduction for which reassociation 903 1.1 mrg is allowed. If STMT_INFO is part of a group, assume that it's part of 904 1.1 mrg a reduction chain and optimistically assume that all statements 905 1.1 mrg except the last allow reassociation. 906 1.1 mrg Also require it to have code CODE and to be a reduction 907 1.1 mrg in the outermost loop. When returning true, store the operands in 908 1.1 mrg *OP0_OUT and *OP1_OUT. */ 909 1.1 mrg 910 1.1 mrg static bool 911 1.1 mrg vect_reassociating_reduction_p (vec_info *vinfo, 912 1.1 mrg stmt_vec_info stmt_info, tree_code code, 913 1.1 mrg tree *op0_out, tree *op1_out) 914 1.1 mrg { 915 1.1 mrg loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo); 916 1.1 mrg if (!loop_info) 917 1.1 mrg return false; 918 1.1 mrg 919 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); 920 1.1 mrg if (!assign || gimple_assign_rhs_code (assign) != code) 921 1.1 mrg return false; 922 1.1 mrg 923 1.1 mrg /* We don't allow changing the order of the computation in the inner-loop 924 1.1 mrg when doing outer-loop vectorization. */ 925 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_info); 926 1.1 mrg if (loop && nested_in_vect_loop_p (loop, stmt_info)) 927 1.1 mrg return false; 928 1.1 mrg 929 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) 930 1.1 mrg { 931 1.1 mrg if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)), 932 1.1 mrg code)) 933 1.1 mrg return false; 934 1.1 mrg } 935 1.1 mrg else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL) 936 1.1 mrg return false; 937 1.1 mrg 938 1.1 mrg *op0_out = gimple_assign_rhs1 (assign); 939 1.1 mrg *op1_out = gimple_assign_rhs2 (assign); 940 1.1 mrg if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0) 941 1.1 mrg std::swap (*op0_out, *op1_out); 942 1.1 mrg return true; 943 1.1 mrg } 944 1.1 mrg 945 1.1 mrg /* match.pd function to match 946 1.1 mrg (cond (cmp@3 a b) (convert@1 c) (convert@2 d)) 947 1.1 mrg with conditions: 948 1.1 mrg 1) @1, @2, c, d, a, b are all integral type. 949 1.1 mrg 2) There's single_use for both @1 and @2. 950 1.1 mrg 3) a, c have same precision. 951 1.1 mrg 4) c and @1 have different precision. 952 1.1 mrg 5) c, d are the same type or they can differ in sign when convert is 953 1.1 mrg truncation. 954 1.1 mrg 955 1.1 mrg record a and c and d and @3. */ 956 1.1 mrg 957 1.1 mrg extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree)); 958 1.1 mrg 959 1.1 mrg /* Function vect_recog_cond_expr_convert 960 1.1 mrg 961 1.1 mrg Try to find the following pattern: 962 1.1 mrg 963 1.1 mrg TYPE_AB A,B; 964 1.1 mrg TYPE_CD C,D; 965 1.1 mrg TYPE_E E; 966 1.1 mrg TYPE_E op_true = (TYPE_E) A; 967 1.1 mrg TYPE_E op_false = (TYPE_E) B; 968 1.1 mrg 969 1.1 mrg E = C cmp D ? op_true : op_false; 970 1.1 mrg 971 1.1 mrg where 972 1.1 mrg TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD); 973 1.1 mrg TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD); 974 1.1 mrg single_use of op_true and op_false. 975 1.1 mrg TYPE_AB could differ in sign when (TYPE_E) A is a truncation. 976 1.1 mrg 977 1.1 mrg Input: 978 1.1 mrg 979 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. 980 1.1 mrg here it starts with E = c cmp D ? op_true : op_false; 981 1.1 mrg 982 1.1 mrg Output: 983 1.1 mrg 984 1.1 mrg TYPE1 E' = C cmp D ? A : B; 985 1.1 mrg TYPE3 E = (TYPE3) E'; 986 1.1 mrg 987 1.1 mrg There may extra nop_convert for A or B to handle different signness. 988 1.1 mrg 989 1.1 mrg * TYPE_OUT: The vector type of the output of this pattern. 990 1.1 mrg 991 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 992 1.1 mrg stmts that constitute the pattern. In this case it will be: 993 1.1 mrg E = (TYPE3)E'; 994 1.1 mrg E' = C cmp D ? A : B; is recorded in pattern definition statements; */ 995 1.1 mrg 996 1.1 mrg static gimple * 997 1.1 mrg vect_recog_cond_expr_convert_pattern (vec_info *vinfo, 998 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 999 1.1 mrg { 1000 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); 1001 1.1 mrg tree lhs, match[4], temp, type, new_lhs, op2; 1002 1.1 mrg gimple *cond_stmt; 1003 1.1 mrg gimple *pattern_stmt; 1004 1.1 mrg 1005 1.1 mrg if (!last_stmt) 1006 1.1 mrg return NULL; 1007 1.1 mrg 1008 1.1 mrg lhs = gimple_assign_lhs (last_stmt); 1009 1.1 mrg 1010 1.1 mrg /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B; 1011 1.1 mrg TYPE_PRECISION (A) == TYPE_PRECISION (C). */ 1012 1.1 mrg if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL)) 1013 1.1 mrg return NULL; 1014 1.1 mrg 1015 1.1 mrg vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt); 1016 1.1 mrg 1017 1.1 mrg op2 = match[2]; 1018 1.1 mrg type = TREE_TYPE (match[1]); 1019 1.1 mrg if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) 1020 1.1 mrg { 1021 1.1 mrg op2 = vect_recog_temp_ssa_var (type, NULL); 1022 1.1 mrg gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); 1023 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, 1024 1.1 mrg get_vectype_for_scalar_type (vinfo, type)); 1025 1.1 mrg } 1026 1.1 mrg 1027 1.1 mrg temp = vect_recog_temp_ssa_var (type, NULL); 1028 1.1 mrg cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3], 1029 1.1 mrg match[1], op2)); 1030 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, 1031 1.1 mrg get_vectype_for_scalar_type (vinfo, type)); 1032 1.1 mrg new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 1033 1.1 mrg pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp); 1034 1.1 mrg *type_out = STMT_VINFO_VECTYPE (stmt_vinfo); 1035 1.1 mrg 1036 1.1 mrg if (dump_enabled_p ()) 1037 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1038 1.1 mrg "created pattern stmt: %G", pattern_stmt); 1039 1.1 mrg return pattern_stmt; 1040 1.1 mrg } 1041 1.1 mrg 1042 1.1 mrg /* Function vect_recog_dot_prod_pattern 1043 1.1 mrg 1044 1.1 mrg Try to find the following pattern: 1045 1.1 mrg 1046 1.1 mrg type1a x_t 1047 1.1 mrg type1b y_t; 1048 1.1 mrg TYPE1 prod; 1049 1.1 mrg TYPE2 sum = init; 1050 1.1 mrg loop: 1051 1.1 mrg sum_0 = phi <init, sum_1> 1052 1.1 mrg S1 x_t = ... 1053 1.1 mrg S2 y_t = ... 1054 1.1 mrg S3 x_T = (TYPE1) x_t; 1055 1.1 mrg S4 y_T = (TYPE1) y_t; 1056 1.1 mrg S5 prod = x_T * y_T; 1057 1.1 mrg [S6 prod = (TYPE2) prod; #optional] 1058 1.1 mrg S7 sum_1 = prod + sum_0; 1059 1.1 mrg 1060 1.1 mrg where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', 1061 1.1 mrg the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of 1062 1.1 mrg 'type1a' and 'type1b' can differ. 1063 1.1 mrg 1064 1.1 mrg Input: 1065 1.1 mrg 1066 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the 1067 1.1 mrg example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} 1068 1.1 mrg will be detected. 1069 1.1 mrg 1070 1.1 mrg Output: 1071 1.1 mrg 1072 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 1073 1.1 mrg 1074 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 1075 1.1 mrg stmts that constitute the pattern. In this case it will be: 1076 1.1 mrg WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 1077 1.1 mrg 1078 1.1 mrg Note: The dot-prod idiom is a widening reduction pattern that is 1079 1.1 mrg vectorized without preserving all the intermediate results. It 1080 1.1 mrg produces only N/2 (widened) results (by summing up pairs of 1081 1.1 mrg intermediate results) rather than all N results. Therefore, we 1082 1.1 mrg cannot allow this pattern when we want to get all the results and in 1083 1.1 mrg the correct order (as is the case when this computation is in an 1084 1.1 mrg inner-loop nested in an outer-loop that us being vectorized). */ 1085 1.1 mrg 1086 1.1 mrg static gimple * 1087 1.1 mrg vect_recog_dot_prod_pattern (vec_info *vinfo, 1088 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 1089 1.1 mrg { 1090 1.1 mrg tree oprnd0, oprnd1; 1091 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 1092 1.1 mrg tree type, half_type; 1093 1.1 mrg gimple *pattern_stmt; 1094 1.1 mrg tree var; 1095 1.1 mrg 1096 1.1 mrg /* Look for the following pattern 1097 1.1 mrg DX = (TYPE1) X; 1098 1.1 mrg DY = (TYPE1) Y; 1099 1.1 mrg DPROD = DX * DY; 1100 1.1 mrg DDPROD = (TYPE2) DPROD; 1101 1.1 mrg sum_1 = DDPROD + sum_0; 1102 1.1 mrg In which 1103 1.1 mrg - DX is double the size of X 1104 1.1 mrg - DY is double the size of Y 1105 1.1 mrg - DX, DY, DPROD all have the same type but the sign 1106 1.1 mrg between X, Y and DPROD can differ. 1107 1.1 mrg - sum is the same size of DPROD or bigger 1108 1.1 mrg - sum has been recognized as a reduction variable. 1109 1.1 mrg 1110 1.1 mrg This is equivalent to: 1111 1.1 mrg DPROD = X w* Y; #widen mult 1112 1.1 mrg sum_1 = DPROD w+ sum_0; #widen summation 1113 1.1 mrg or 1114 1.1 mrg DPROD = X w* Y; #widen mult 1115 1.1 mrg sum_1 = DPROD + sum_0; #summation 1116 1.1 mrg */ 1117 1.1 mrg 1118 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search 1119 1.1 mrg of the above pattern. */ 1120 1.1 mrg 1121 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, 1122 1.1 mrg &oprnd0, &oprnd1)) 1123 1.1 mrg return NULL; 1124 1.1 mrg 1125 1.1 mrg type = TREE_TYPE (gimple_get_lhs (last_stmt)); 1126 1.1 mrg 1127 1.1 mrg vect_unpromoted_value unprom_mult; 1128 1.1 mrg oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult); 1129 1.1 mrg 1130 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction, 1131 1.1 mrg we know that oprnd1 is the reduction variable (defined by a loop-header 1132 1.1 mrg phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 1133 1.1 mrg Left to check that oprnd0 is defined by a (widen_)mult_expr */ 1134 1.1 mrg if (!oprnd0) 1135 1.1 mrg return NULL; 1136 1.1 mrg 1137 1.1 mrg stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0); 1138 1.1 mrg if (!mult_vinfo) 1139 1.1 mrg return NULL; 1140 1.1 mrg 1141 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 1142 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */ 1143 1.1 mrg vect_unpromoted_value unprom0[2]; 1144 1.1 mrg enum optab_subtype subtype = optab_vector; 1145 1.1 mrg if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, 1146 1.1 mrg false, 2, unprom0, &half_type, &subtype)) 1147 1.1 mrg return NULL; 1148 1.1 mrg 1149 1.1 mrg /* If there are two widening operations, make sure they agree on the sign 1150 1.1 mrg of the extension. The result of an optab_vector_mixed_sign operation 1151 1.1 mrg is signed; otherwise, the result has the same sign as the operands. */ 1152 1.1 mrg if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type) 1153 1.1 mrg && (subtype == optab_vector_mixed_sign 1154 1.1 mrg ? TYPE_UNSIGNED (unprom_mult.type) 1155 1.1 mrg : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))) 1156 1.1 mrg return NULL; 1157 1.1 mrg 1158 1.1 mrg vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt); 1159 1.1 mrg 1160 1.1 mrg tree half_vectype; 1161 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type, 1162 1.1 mrg type_out, &half_vectype, subtype)) 1163 1.1 mrg return NULL; 1164 1.1 mrg 1165 1.1 mrg /* Get the inputs in the appropriate types. */ 1166 1.1 mrg tree mult_oprnd[2]; 1167 1.1 mrg vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type, 1168 1.1 mrg unprom0, half_vectype, subtype); 1169 1.1 mrg 1170 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL); 1171 1.1 mrg pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, 1172 1.1 mrg mult_oprnd[0], mult_oprnd[1], oprnd1); 1173 1.1 mrg 1174 1.1 mrg return pattern_stmt; 1175 1.1 mrg } 1176 1.1 mrg 1177 1.1 mrg 1178 1.1 mrg /* Function vect_recog_sad_pattern 1179 1.1 mrg 1180 1.1 mrg Try to find the following Sum of Absolute Difference (SAD) pattern: 1181 1.1 mrg 1182 1.1 mrg type x_t, y_t; 1183 1.1 mrg signed TYPE1 diff, abs_diff; 1184 1.1 mrg TYPE2 sum = init; 1185 1.1 mrg loop: 1186 1.1 mrg sum_0 = phi <init, sum_1> 1187 1.1 mrg S1 x_t = ... 1188 1.1 mrg S2 y_t = ... 1189 1.1 mrg S3 x_T = (TYPE1) x_t; 1190 1.1 mrg S4 y_T = (TYPE1) y_t; 1191 1.1 mrg S5 diff = x_T - y_T; 1192 1.1 mrg S6 abs_diff = ABS_EXPR <diff>; 1193 1.1 mrg [S7 abs_diff = (TYPE2) abs_diff; #optional] 1194 1.1 mrg S8 sum_1 = abs_diff + sum_0; 1195 1.1 mrg 1196 1.1 mrg where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the 1197 1.1 mrg same size of 'TYPE1' or bigger. This is a special case of a reduction 1198 1.1 mrg computation. 1199 1.1 mrg 1200 1.1 mrg Input: 1201 1.1 mrg 1202 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the 1203 1.1 mrg example, when this function is called with S8, the pattern 1204 1.1 mrg {S3,S4,S5,S6,S7,S8} will be detected. 1205 1.1 mrg 1206 1.1 mrg Output: 1207 1.1 mrg 1208 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 1209 1.1 mrg 1210 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 1211 1.1 mrg stmts that constitute the pattern. In this case it will be: 1212 1.1 mrg SAD_EXPR <x_t, y_t, sum_0> 1213 1.1 mrg */ 1214 1.1 mrg 1215 1.1 mrg static gimple * 1216 1.1 mrg vect_recog_sad_pattern (vec_info *vinfo, 1217 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 1218 1.1 mrg { 1219 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 1220 1.1 mrg tree half_type; 1221 1.1 mrg 1222 1.1 mrg /* Look for the following pattern 1223 1.1 mrg DX = (TYPE1) X; 1224 1.1 mrg DY = (TYPE1) Y; 1225 1.1 mrg DDIFF = DX - DY; 1226 1.1 mrg DAD = ABS_EXPR <DDIFF>; 1227 1.1 mrg DDPROD = (TYPE2) DPROD; 1228 1.1 mrg sum_1 = DAD + sum_0; 1229 1.1 mrg In which 1230 1.1 mrg - DX is at least double the size of X 1231 1.1 mrg - DY is at least double the size of Y 1232 1.1 mrg - DX, DY, DDIFF, DAD all have the same type 1233 1.1 mrg - sum is the same size of DAD or bigger 1234 1.1 mrg - sum has been recognized as a reduction variable. 1235 1.1 mrg 1236 1.1 mrg This is equivalent to: 1237 1.1 mrg DDIFF = X w- Y; #widen sub 1238 1.1 mrg DAD = ABS_EXPR <DDIFF>; 1239 1.1 mrg sum_1 = DAD w+ sum_0; #widen summation 1240 1.1 mrg or 1241 1.1 mrg DDIFF = X w- Y; #widen sub 1242 1.1 mrg DAD = ABS_EXPR <DDIFF>; 1243 1.1 mrg sum_1 = DAD + sum_0; #summation 1244 1.1 mrg */ 1245 1.1 mrg 1246 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search 1247 1.1 mrg of the above pattern. */ 1248 1.1 mrg 1249 1.1 mrg tree plus_oprnd0, plus_oprnd1; 1250 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, 1251 1.1 mrg &plus_oprnd0, &plus_oprnd1)) 1252 1.1 mrg return NULL; 1253 1.1 mrg 1254 1.1 mrg tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt)); 1255 1.1 mrg 1256 1.1 mrg /* Any non-truncating sequence of conversions is OK here, since 1257 1.1 mrg with a successful match, the result of the ABS(U) is known to fit 1258 1.1 mrg within the nonnegative range of the result type. (It cannot be the 1259 1.1 mrg negative of the minimum signed value due to the range of the widening 1260 1.1 mrg MINUS_EXPR.) */ 1261 1.1 mrg vect_unpromoted_value unprom_abs; 1262 1.1 mrg plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0, 1263 1.1 mrg &unprom_abs); 1264 1.1 mrg 1265 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction, 1266 1.1 mrg we know that plus_oprnd1 is the reduction variable (defined by a loop-header 1267 1.1 mrg phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body. 1268 1.1 mrg Then check that plus_oprnd0 is defined by an abs_expr. */ 1269 1.1 mrg 1270 1.1 mrg if (!plus_oprnd0) 1271 1.1 mrg return NULL; 1272 1.1 mrg 1273 1.1 mrg stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0); 1274 1.1 mrg if (!abs_stmt_vinfo) 1275 1.1 mrg return NULL; 1276 1.1 mrg 1277 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 1278 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */ 1279 1.1 mrg gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt); 1280 1.1 mrg if (!abs_stmt 1281 1.1 mrg || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR 1282 1.1 mrg && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR)) 1283 1.1 mrg return NULL; 1284 1.1 mrg 1285 1.1 mrg tree abs_oprnd = gimple_assign_rhs1 (abs_stmt); 1286 1.1 mrg tree abs_type = TREE_TYPE (abs_oprnd); 1287 1.1 mrg if (TYPE_UNSIGNED (abs_type)) 1288 1.1 mrg return NULL; 1289 1.1 mrg 1290 1.1 mrg /* Peel off conversions from the ABS input. This can involve sign 1291 1.1 mrg changes (e.g. from an unsigned subtraction to a signed ABS input) 1292 1.1 mrg or signed promotion, but it can't include unsigned promotion. 1293 1.1 mrg (Note that ABS of an unsigned promotion should have been folded 1294 1.1 mrg away before now anyway.) */ 1295 1.1 mrg vect_unpromoted_value unprom_diff; 1296 1.1 mrg abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd, 1297 1.1 mrg &unprom_diff); 1298 1.1 mrg if (!abs_oprnd) 1299 1.1 mrg return NULL; 1300 1.1 mrg if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type) 1301 1.1 mrg && TYPE_UNSIGNED (unprom_diff.type)) 1302 1.1 mrg return NULL; 1303 1.1 mrg 1304 1.1 mrg /* We then detect if the operand of abs_expr is defined by a minus_expr. */ 1305 1.1 mrg stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd); 1306 1.1 mrg if (!diff_stmt_vinfo) 1307 1.1 mrg return NULL; 1308 1.1 mrg 1309 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 1310 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */ 1311 1.1 mrg vect_unpromoted_value unprom[2]; 1312 1.1 mrg if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR, 1313 1.1 mrg false, 2, unprom, &half_type)) 1314 1.1 mrg return NULL; 1315 1.1 mrg 1316 1.1 mrg vect_pattern_detected ("vect_recog_sad_pattern", last_stmt); 1317 1.1 mrg 1318 1.1 mrg tree half_vectype; 1319 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type, 1320 1.1 mrg type_out, &half_vectype)) 1321 1.1 mrg return NULL; 1322 1.1 mrg 1323 1.1 mrg /* Get the inputs to the SAD_EXPR in the appropriate types. */ 1324 1.1 mrg tree sad_oprnd[2]; 1325 1.1 mrg vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type, 1326 1.1 mrg unprom, half_vectype); 1327 1.1 mrg 1328 1.1 mrg tree var = vect_recog_temp_ssa_var (sum_type, NULL); 1329 1.1 mrg gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0], 1330 1.1 mrg sad_oprnd[1], plus_oprnd1); 1331 1.1 mrg 1332 1.1 mrg return pattern_stmt; 1333 1.1 mrg } 1334 1.1 mrg 1335 1.1 mrg /* Recognize an operation that performs ORIG_CODE on widened inputs, 1336 1.1 mrg so that it can be treated as though it had the form: 1337 1.1 mrg 1338 1.1 mrg A_TYPE a; 1339 1.1 mrg B_TYPE b; 1340 1.1 mrg HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op 1341 1.1 mrg HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op 1342 1.1 mrg | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE 1343 1.1 mrg | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE 1344 1.1 mrg | RES_TYPE res = a_extend ORIG_CODE b_extend; 1345 1.1 mrg 1346 1.1 mrg Try to replace the pattern with: 1347 1.1 mrg 1348 1.1 mrg A_TYPE a; 1349 1.1 mrg B_TYPE b; 1350 1.1 mrg HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op 1351 1.1 mrg HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op 1352 1.1 mrg | EXT_TYPE ext = a_cast WIDE_CODE b_cast; 1353 1.1 mrg | RES_TYPE res = (EXT_TYPE) ext; // possible no-op 1354 1.1 mrg 1355 1.1 mrg where EXT_TYPE is wider than HALF_TYPE but has the same signedness. 1356 1.1 mrg 1357 1.1 mrg SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the 1358 1.1 mrg name of the pattern being matched, for dump purposes. */ 1359 1.1 mrg 1360 1.1 mrg static gimple * 1361 1.1 mrg vect_recog_widen_op_pattern (vec_info *vinfo, 1362 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out, 1363 1.1 mrg tree_code orig_code, tree_code wide_code, 1364 1.1 mrg bool shift_p, const char *name) 1365 1.1 mrg { 1366 1.1 mrg gimple *last_stmt = last_stmt_info->stmt; 1367 1.1 mrg 1368 1.1 mrg vect_unpromoted_value unprom[2]; 1369 1.1 mrg tree half_type; 1370 1.1 mrg if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code, 1371 1.1 mrg shift_p, 2, unprom, &half_type)) 1372 1.1 mrg return NULL; 1373 1.1 mrg 1374 1.1 mrg /* Pattern detected. */ 1375 1.1 mrg vect_pattern_detected (name, last_stmt); 1376 1.1 mrg 1377 1.1 mrg tree type = TREE_TYPE (gimple_get_lhs (last_stmt)); 1378 1.1 mrg tree itype = type; 1379 1.1 mrg if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2 1380 1.1 mrg || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type)) 1381 1.1 mrg itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2, 1382 1.1 mrg TYPE_UNSIGNED (half_type)); 1383 1.1 mrg 1384 1.1 mrg /* Check target support */ 1385 1.1 mrg tree vectype = get_vectype_for_scalar_type (vinfo, half_type); 1386 1.1 mrg tree vecitype = get_vectype_for_scalar_type (vinfo, itype); 1387 1.1 mrg tree ctype = itype; 1388 1.1 mrg tree vecctype = vecitype; 1389 1.1 mrg if (orig_code == MINUS_EXPR 1390 1.1 mrg && TYPE_UNSIGNED (itype) 1391 1.1 mrg && TYPE_PRECISION (type) > TYPE_PRECISION (itype)) 1392 1.1 mrg { 1393 1.1 mrg /* Subtraction is special, even if half_type is unsigned and no matter 1394 1.1 mrg whether type is signed or unsigned, if type is wider than itype, 1395 1.1 mrg we need to sign-extend from the widening operation result to the 1396 1.1 mrg result type. 1397 1.1 mrg Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff, 1398 1.1 mrg itype unsigned short and type either int or unsigned int. 1399 1.1 mrg Widened (unsigned short) 0xfe - (unsigned short) 0xff is 1400 1.1 mrg (unsigned short) 0xffff, but for type int we want the result -1 1401 1.1 mrg and for type unsigned int 0xffffffff rather than 0xffff. */ 1402 1.1 mrg ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0); 1403 1.1 mrg vecctype = get_vectype_for_scalar_type (vinfo, ctype); 1404 1.1 mrg } 1405 1.1 mrg 1406 1.1 mrg enum tree_code dummy_code; 1407 1.1 mrg int dummy_int; 1408 1.1 mrg auto_vec<tree> dummy_vec; 1409 1.1 mrg if (!vectype 1410 1.1 mrg || !vecitype 1411 1.1 mrg || !vecctype 1412 1.1 mrg || !supportable_widening_operation (vinfo, wide_code, last_stmt_info, 1413 1.1 mrg vecitype, vectype, 1414 1.1 mrg &dummy_code, &dummy_code, 1415 1.1 mrg &dummy_int, &dummy_vec)) 1416 1.1 mrg return NULL; 1417 1.1 mrg 1418 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type); 1419 1.1 mrg if (!*type_out) 1420 1.1 mrg return NULL; 1421 1.1 mrg 1422 1.1 mrg tree oprnd[2]; 1423 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, 1424 1.1 mrg 2, oprnd, half_type, unprom, vectype); 1425 1.1 mrg 1426 1.1 mrg tree var = vect_recog_temp_ssa_var (itype, NULL); 1427 1.1 mrg gimple *pattern_stmt = gimple_build_assign (var, wide_code, 1428 1.1 mrg oprnd[0], oprnd[1]); 1429 1.1 mrg 1430 1.1 mrg if (vecctype != vecitype) 1431 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype, 1432 1.1 mrg pattern_stmt, vecitype); 1433 1.1 mrg 1434 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, 1435 1.1 mrg type, pattern_stmt, vecctype); 1436 1.1 mrg } 1437 1.1 mrg 1438 1.1 mrg /* Try to detect multiplication on widened inputs, converting MULT_EXPR 1439 1.1 mrg to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */ 1440 1.1 mrg 1441 1.1 mrg static gimple * 1442 1.1 mrg vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, 1443 1.1 mrg tree *type_out) 1444 1.1 mrg { 1445 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, 1446 1.1 mrg MULT_EXPR, WIDEN_MULT_EXPR, false, 1447 1.1 mrg "vect_recog_widen_mult_pattern"); 1448 1.1 mrg } 1449 1.1 mrg 1450 1.1 mrg /* Try to detect addition on widened inputs, converting PLUS_EXPR 1451 1.1 mrg to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */ 1452 1.1 mrg 1453 1.1 mrg static gimple * 1454 1.1 mrg vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, 1455 1.1 mrg tree *type_out) 1456 1.1 mrg { 1457 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, 1458 1.1 mrg PLUS_EXPR, WIDEN_PLUS_EXPR, false, 1459 1.1 mrg "vect_recog_widen_plus_pattern"); 1460 1.1 mrg } 1461 1.1 mrg 1462 1.1 mrg /* Try to detect subtraction on widened inputs, converting MINUS_EXPR 1463 1.1 mrg to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */ 1464 1.1 mrg static gimple * 1465 1.1 mrg vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, 1466 1.1 mrg tree *type_out) 1467 1.1 mrg { 1468 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, 1469 1.1 mrg MINUS_EXPR, WIDEN_MINUS_EXPR, false, 1470 1.1 mrg "vect_recog_widen_minus_pattern"); 1471 1.1 mrg } 1472 1.1 mrg 1473 1.1 mrg /* Function vect_recog_popcount_pattern 1474 1.1 mrg 1475 1.1 mrg Try to find the following pattern: 1476 1.1 mrg 1477 1.1 mrg UTYPE1 A; 1478 1.1 mrg TYPE1 B; 1479 1.1 mrg UTYPE2 temp_in; 1480 1.1 mrg TYPE3 temp_out; 1481 1.1 mrg temp_in = (UTYPE2)A; 1482 1.1 mrg 1483 1.1 mrg temp_out = __builtin_popcount{,l,ll} (temp_in); 1484 1.1 mrg B = (TYPE1) temp_out; 1485 1.1 mrg 1486 1.1 mrg TYPE2 may or may not be equal to TYPE3. 1487 1.1 mrg i.e. TYPE2 is equal to TYPE3 for __builtin_popcount 1488 1.1 mrg i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll 1489 1.1 mrg 1490 1.1 mrg Input: 1491 1.1 mrg 1492 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. 1493 1.1 mrg here it starts with B = (TYPE1) temp_out; 1494 1.1 mrg 1495 1.1 mrg Output: 1496 1.1 mrg 1497 1.1 mrg * TYPE_OUT: The vector type of the output of this pattern. 1498 1.1 mrg 1499 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 1500 1.1 mrg stmts that constitute the pattern. In this case it will be: 1501 1.1 mrg B = .POPCOUNT (A); 1502 1.1 mrg */ 1503 1.1 mrg 1504 1.1 mrg static gimple * 1505 1.1 mrg vect_recog_popcount_pattern (vec_info *vinfo, 1506 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 1507 1.1 mrg { 1508 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); 1509 1.1 mrg gimple *popcount_stmt, *pattern_stmt; 1510 1.1 mrg tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var; 1511 1.1 mrg auto_vec<tree> vargs; 1512 1.1 mrg 1513 1.1 mrg /* Find B = (TYPE1) temp_out. */ 1514 1.1 mrg if (!last_stmt) 1515 1.1 mrg return NULL; 1516 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt); 1517 1.1 mrg if (!CONVERT_EXPR_CODE_P (code)) 1518 1.1 mrg return NULL; 1519 1.1 mrg 1520 1.1 mrg lhs_oprnd = gimple_assign_lhs (last_stmt); 1521 1.1 mrg lhs_type = TREE_TYPE (lhs_oprnd); 1522 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type)) 1523 1.1 mrg return NULL; 1524 1.1 mrg 1525 1.1 mrg rhs_oprnd = gimple_assign_rhs1 (last_stmt); 1526 1.1 mrg if (TREE_CODE (rhs_oprnd) != SSA_NAME 1527 1.1 mrg || !has_single_use (rhs_oprnd)) 1528 1.1 mrg return NULL; 1529 1.1 mrg popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd); 1530 1.1 mrg 1531 1.1 mrg /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */ 1532 1.1 mrg if (!is_gimple_call (popcount_stmt)) 1533 1.1 mrg return NULL; 1534 1.1 mrg switch (gimple_call_combined_fn (popcount_stmt)) 1535 1.1 mrg { 1536 1.1 mrg CASE_CFN_POPCOUNT: 1537 1.1 mrg break; 1538 1.1 mrg default: 1539 1.1 mrg return NULL; 1540 1.1 mrg } 1541 1.1 mrg 1542 1.1 mrg if (gimple_call_num_args (popcount_stmt) != 1) 1543 1.1 mrg return NULL; 1544 1.1 mrg 1545 1.1 mrg rhs_oprnd = gimple_call_arg (popcount_stmt, 0); 1546 1.1 mrg vect_unpromoted_value unprom_diff; 1547 1.1 mrg rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd, 1548 1.1 mrg &unprom_diff); 1549 1.1 mrg 1550 1.1 mrg if (!rhs_origin) 1551 1.1 mrg return NULL; 1552 1.1 mrg 1553 1.1 mrg /* Input and output of .POPCOUNT should be same-precision integer. 1554 1.1 mrg Also A should be unsigned or same precision as temp_in, 1555 1.1 mrg otherwise there would be sign_extend from A to temp_in. */ 1556 1.1 mrg if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type) 1557 1.1 mrg || (!TYPE_UNSIGNED (unprom_diff.type) 1558 1.1 mrg && (TYPE_PRECISION (unprom_diff.type) 1559 1.1 mrg != TYPE_PRECISION (TREE_TYPE (rhs_oprnd))))) 1560 1.1 mrg return NULL; 1561 1.1 mrg vargs.safe_push (unprom_diff.op); 1562 1.1 mrg 1563 1.1 mrg vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt); 1564 1.1 mrg vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); 1565 1.1 mrg /* Do it only if the backend has popcount<vector_mode>2 pattern. */ 1566 1.1 mrg if (!vec_type 1567 1.1 mrg || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type, 1568 1.1 mrg OPTIMIZE_FOR_SPEED)) 1569 1.1 mrg return NULL; 1570 1.1 mrg 1571 1.1 mrg /* Create B = .POPCOUNT (A). */ 1572 1.1 mrg new_var = vect_recog_temp_ssa_var (lhs_type, NULL); 1573 1.1 mrg pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs); 1574 1.1 mrg gimple_call_set_lhs (pattern_stmt, new_var); 1575 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt)); 1576 1.1 mrg *type_out = vec_type; 1577 1.1 mrg 1578 1.1 mrg if (dump_enabled_p ()) 1579 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 1580 1.1 mrg "created pattern stmt: %G", pattern_stmt); 1581 1.1 mrg return pattern_stmt; 1582 1.1 mrg } 1583 1.1 mrg 1584 1.1 mrg /* Function vect_recog_pow_pattern 1585 1.1 mrg 1586 1.1 mrg Try to find the following pattern: 1587 1.1 mrg 1588 1.1 mrg x = POW (y, N); 1589 1.1 mrg 1590 1.1 mrg with POW being one of pow, powf, powi, powif and N being 1591 1.1 mrg either 2 or 0.5. 1592 1.1 mrg 1593 1.1 mrg Input: 1594 1.1 mrg 1595 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. 1596 1.1 mrg 1597 1.1 mrg Output: 1598 1.1 mrg 1599 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 1600 1.1 mrg 1601 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 1602 1.1 mrg stmts that constitute the pattern. In this case it will be: 1603 1.1 mrg x = x * x 1604 1.1 mrg or 1605 1.1 mrg x = sqrt (x) 1606 1.1 mrg */ 1607 1.1 mrg 1608 1.1 mrg static gimple * 1609 1.1 mrg vect_recog_pow_pattern (vec_info *vinfo, 1610 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 1611 1.1 mrg { 1612 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 1613 1.1 mrg tree base, exp; 1614 1.1 mrg gimple *stmt; 1615 1.1 mrg tree var; 1616 1.1 mrg 1617 1.1 mrg if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 1618 1.1 mrg return NULL; 1619 1.1 mrg 1620 1.1 mrg switch (gimple_call_combined_fn (last_stmt)) 1621 1.1 mrg { 1622 1.1 mrg CASE_CFN_POW: 1623 1.1 mrg CASE_CFN_POWI: 1624 1.1 mrg break; 1625 1.1 mrg 1626 1.1 mrg default: 1627 1.1 mrg return NULL; 1628 1.1 mrg } 1629 1.1 mrg 1630 1.1 mrg base = gimple_call_arg (last_stmt, 0); 1631 1.1 mrg exp = gimple_call_arg (last_stmt, 1); 1632 1.1 mrg if (TREE_CODE (exp) != REAL_CST 1633 1.1 mrg && TREE_CODE (exp) != INTEGER_CST) 1634 1.1 mrg { 1635 1.1 mrg if (flag_unsafe_math_optimizations 1636 1.1 mrg && TREE_CODE (base) == REAL_CST 1637 1.1 mrg && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL)) 1638 1.1 mrg { 1639 1.1 mrg combined_fn log_cfn; 1640 1.1 mrg built_in_function exp_bfn; 1641 1.1 mrg switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt))) 1642 1.1 mrg { 1643 1.1 mrg case BUILT_IN_POW: 1644 1.1 mrg log_cfn = CFN_BUILT_IN_LOG; 1645 1.1 mrg exp_bfn = BUILT_IN_EXP; 1646 1.1 mrg break; 1647 1.1 mrg case BUILT_IN_POWF: 1648 1.1 mrg log_cfn = CFN_BUILT_IN_LOGF; 1649 1.1 mrg exp_bfn = BUILT_IN_EXPF; 1650 1.1 mrg break; 1651 1.1 mrg case BUILT_IN_POWL: 1652 1.1 mrg log_cfn = CFN_BUILT_IN_LOGL; 1653 1.1 mrg exp_bfn = BUILT_IN_EXPL; 1654 1.1 mrg break; 1655 1.1 mrg default: 1656 1.1 mrg return NULL; 1657 1.1 mrg } 1658 1.1 mrg tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base); 1659 1.1 mrg tree exp_decl = builtin_decl_implicit (exp_bfn); 1660 1.1 mrg /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd 1661 1.1 mrg does that, but if C is a power of 2, we want to use 1662 1.1 mrg exp2 (log2 (C) * x) in the non-vectorized version, but for 1663 1.1 mrg vectorization we don't have vectorized exp2. */ 1664 1.1 mrg if (logc 1665 1.1 mrg && TREE_CODE (logc) == REAL_CST 1666 1.1 mrg && exp_decl 1667 1.1 mrg && lookup_attribute ("omp declare simd", 1668 1.1 mrg DECL_ATTRIBUTES (exp_decl))) 1669 1.1 mrg { 1670 1.1 mrg cgraph_node *node = cgraph_node::get_create (exp_decl); 1671 1.1 mrg if (node->simd_clones == NULL) 1672 1.1 mrg { 1673 1.1 mrg if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL 1674 1.1 mrg || node->definition) 1675 1.1 mrg return NULL; 1676 1.1 mrg expand_simd_clones (node); 1677 1.1 mrg if (node->simd_clones == NULL) 1678 1.1 mrg return NULL; 1679 1.1 mrg } 1680 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base)); 1681 1.1 mrg if (!*type_out) 1682 1.1 mrg return NULL; 1683 1.1 mrg tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 1684 1.1 mrg gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc); 1685 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, g); 1686 1.1 mrg tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 1687 1.1 mrg g = gimple_build_call (exp_decl, 1, def); 1688 1.1 mrg gimple_call_set_lhs (g, res); 1689 1.1 mrg return g; 1690 1.1 mrg } 1691 1.1 mrg } 1692 1.1 mrg 1693 1.1 mrg return NULL; 1694 1.1 mrg } 1695 1.1 mrg 1696 1.1 mrg /* We now have a pow or powi builtin function call with a constant 1697 1.1 mrg exponent. */ 1698 1.1 mrg 1699 1.1 mrg /* Catch squaring. */ 1700 1.1 mrg if ((tree_fits_shwi_p (exp) 1701 1.1 mrg && tree_to_shwi (exp) == 2) 1702 1.1 mrg || (TREE_CODE (exp) == REAL_CST 1703 1.1 mrg && real_equal (&TREE_REAL_CST (exp), &dconst2))) 1704 1.1 mrg { 1705 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR, 1706 1.1 mrg TREE_TYPE (base), type_out)) 1707 1.1 mrg return NULL; 1708 1.1 mrg 1709 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 1710 1.1 mrg stmt = gimple_build_assign (var, MULT_EXPR, base, base); 1711 1.1 mrg return stmt; 1712 1.1 mrg } 1713 1.1 mrg 1714 1.1 mrg /* Catch square root. */ 1715 1.1 mrg if (TREE_CODE (exp) == REAL_CST 1716 1.1 mrg && real_equal (&TREE_REAL_CST (exp), &dconsthalf)) 1717 1.1 mrg { 1718 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base)); 1719 1.1 mrg if (*type_out 1720 1.1 mrg && direct_internal_fn_supported_p (IFN_SQRT, *type_out, 1721 1.1 mrg OPTIMIZE_FOR_SPEED)) 1722 1.1 mrg { 1723 1.1 mrg gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base); 1724 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 1725 1.1 mrg gimple_call_set_lhs (stmt, var); 1726 1.1 mrg gimple_call_set_nothrow (stmt, true); 1727 1.1 mrg return stmt; 1728 1.1 mrg } 1729 1.1 mrg } 1730 1.1 mrg 1731 1.1 mrg return NULL; 1732 1.1 mrg } 1733 1.1 mrg 1734 1.1 mrg 1735 1.1 mrg /* Function vect_recog_widen_sum_pattern 1736 1.1 mrg 1737 1.1 mrg Try to find the following pattern: 1738 1.1 mrg 1739 1.1 mrg type x_t; 1740 1.1 mrg TYPE x_T, sum = init; 1741 1.1 mrg loop: 1742 1.1 mrg sum_0 = phi <init, sum_1> 1743 1.1 mrg S1 x_t = *p; 1744 1.1 mrg S2 x_T = (TYPE) x_t; 1745 1.1 mrg S3 sum_1 = x_T + sum_0; 1746 1.1 mrg 1747 1.1 mrg where type 'TYPE' is at least double the size of type 'type', i.e - we're 1748 1.1 mrg summing elements of type 'type' into an accumulator of type 'TYPE'. This is 1749 1.1 mrg a special case of a reduction computation. 1750 1.1 mrg 1751 1.1 mrg Input: 1752 1.1 mrg 1753 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the example, 1754 1.1 mrg when this function is called with S3, the pattern {S2,S3} will be detected. 1755 1.1 mrg 1756 1.1 mrg Output: 1757 1.1 mrg 1758 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 1759 1.1 mrg 1760 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of 1761 1.1 mrg stmts that constitute the pattern. In this case it will be: 1762 1.1 mrg WIDEN_SUM <x_t, sum_0> 1763 1.1 mrg 1764 1.1 mrg Note: The widening-sum idiom is a widening reduction pattern that is 1765 1.1 mrg vectorized without preserving all the intermediate results. It 1766 1.1 mrg produces only N/2 (widened) results (by summing up pairs of 1767 1.1 mrg intermediate results) rather than all N results. Therefore, we 1768 1.1 mrg cannot allow this pattern when we want to get all the results and in 1769 1.1 mrg the correct order (as is the case when this computation is in an 1770 1.1 mrg inner-loop nested in an outer-loop that us being vectorized). */ 1771 1.1 mrg 1772 1.1 mrg static gimple * 1773 1.1 mrg vect_recog_widen_sum_pattern (vec_info *vinfo, 1774 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 1775 1.1 mrg { 1776 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 1777 1.1 mrg tree oprnd0, oprnd1; 1778 1.1 mrg tree type; 1779 1.1 mrg gimple *pattern_stmt; 1780 1.1 mrg tree var; 1781 1.1 mrg 1782 1.1 mrg /* Look for the following pattern 1783 1.1 mrg DX = (TYPE) X; 1784 1.1 mrg sum_1 = DX + sum_0; 1785 1.1 mrg In which DX is at least double the size of X, and sum_1 has been 1786 1.1 mrg recognized as a reduction variable. 1787 1.1 mrg */ 1788 1.1 mrg 1789 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search 1790 1.1 mrg of the above pattern. */ 1791 1.1 mrg 1792 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, 1793 1.1 mrg &oprnd0, &oprnd1) 1794 1.1 mrg || TREE_CODE (oprnd0) != SSA_NAME 1795 1.1 mrg || !vinfo->lookup_def (oprnd0)) 1796 1.1 mrg return NULL; 1797 1.1 mrg 1798 1.1 mrg type = TREE_TYPE (gimple_get_lhs (last_stmt)); 1799 1.1 mrg 1800 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction, 1801 1.1 mrg we know that oprnd1 is the reduction variable (defined by a loop-header 1802 1.1 mrg phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 1803 1.1 mrg Left to check that oprnd0 is defined by a cast from type 'type' to type 1804 1.1 mrg 'TYPE'. */ 1805 1.1 mrg 1806 1.1 mrg vect_unpromoted_value unprom0; 1807 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0) 1808 1.1 mrg || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type)) 1809 1.1 mrg return NULL; 1810 1.1 mrg 1811 1.1 mrg vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt); 1812 1.1 mrg 1813 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR, 1814 1.1 mrg unprom0.type, type_out)) 1815 1.1 mrg return NULL; 1816 1.1 mrg 1817 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL); 1818 1.1 mrg pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1); 1819 1.1 mrg 1820 1.1 mrg return pattern_stmt; 1821 1.1 mrg } 1822 1.1 mrg 1823 1.1 mrg /* Recognize cases in which an operation is performed in one type WTYPE 1824 1.1 mrg but could be done more efficiently in a narrower type NTYPE. For example, 1825 1.1 mrg if we have: 1826 1.1 mrg 1827 1.1 mrg ATYPE a; // narrower than NTYPE 1828 1.1 mrg BTYPE b; // narrower than NTYPE 1829 1.1 mrg WTYPE aw = (WTYPE) a; 1830 1.1 mrg WTYPE bw = (WTYPE) b; 1831 1.1 mrg WTYPE res = aw + bw; // only uses of aw and bw 1832 1.1 mrg 1833 1.1 mrg then it would be more efficient to do: 1834 1.1 mrg 1835 1.1 mrg NTYPE an = (NTYPE) a; 1836 1.1 mrg NTYPE bn = (NTYPE) b; 1837 1.1 mrg NTYPE resn = an + bn; 1838 1.1 mrg WTYPE res = (WTYPE) resn; 1839 1.1 mrg 1840 1.1 mrg Other situations include things like: 1841 1.1 mrg 1842 1.1 mrg ATYPE a; // NTYPE or narrower 1843 1.1 mrg WTYPE aw = (WTYPE) a; 1844 1.1 mrg WTYPE res = aw + b; 1845 1.1 mrg 1846 1.1 mrg when only "(NTYPE) res" is significant. In that case it's more efficient 1847 1.1 mrg to truncate "b" and do the operation on NTYPE instead: 1848 1.1 mrg 1849 1.1 mrg NTYPE an = (NTYPE) a; 1850 1.1 mrg NTYPE bn = (NTYPE) b; // truncation 1851 1.1 mrg NTYPE resn = an + bn; 1852 1.1 mrg WTYPE res = (WTYPE) resn; 1853 1.1 mrg 1854 1.1 mrg All users of "res" should then use "resn" instead, making the final 1855 1.1 mrg statement dead (not marked as relevant). The final statement is still 1856 1.1 mrg needed to maintain the type correctness of the IR. 1857 1.1 mrg 1858 1.1 mrg vect_determine_precisions has already determined the minimum 1859 1.1 mrg precison of the operation and the minimum precision required 1860 1.1 mrg by users of the result. */ 1861 1.1 mrg 1862 1.1 mrg static gimple * 1863 1.1 mrg vect_recog_over_widening_pattern (vec_info *vinfo, 1864 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out) 1865 1.1 mrg { 1866 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); 1867 1.1 mrg if (!last_stmt) 1868 1.1 mrg return NULL; 1869 1.1 mrg 1870 1.1 mrg /* See whether we have found that this operation can be done on a 1871 1.1 mrg narrower type without changing its semantics. */ 1872 1.1 mrg unsigned int new_precision = last_stmt_info->operation_precision; 1873 1.1 mrg if (!new_precision) 1874 1.1 mrg return NULL; 1875 1.1 mrg 1876 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt); 1877 1.1 mrg tree type = TREE_TYPE (lhs); 1878 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt); 1879 1.1 mrg 1880 1.1 mrg /* Punt for reductions where we don't handle the type conversions. */ 1881 1.1 mrg if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def) 1882 1.1 mrg return NULL; 1883 1.1 mrg 1884 1.1 mrg /* Keep the first operand of a COND_EXPR as-is: only the other two 1885 1.1 mrg operands are interesting. */ 1886 1.1 mrg unsigned int first_op = (code == COND_EXPR ? 2 : 1); 1887 1.1 mrg 1888 1.1 mrg /* Check the operands. */ 1889 1.1 mrg unsigned int nops = gimple_num_ops (last_stmt) - first_op; 1890 1.1 mrg auto_vec <vect_unpromoted_value, 3> unprom (nops); 1891 1.1 mrg unprom.quick_grow (nops); 1892 1.1 mrg unsigned int min_precision = 0; 1893 1.1 mrg bool single_use_p = false; 1894 1.1 mrg for (unsigned int i = 0; i < nops; ++i) 1895 1.1 mrg { 1896 1.1 mrg tree op = gimple_op (last_stmt, first_op + i); 1897 1.1 mrg if (TREE_CODE (op) == INTEGER_CST) 1898 1.1 mrg unprom[i].set_op (op, vect_constant_def); 1899 1.1 mrg else if (TREE_CODE (op) == SSA_NAME) 1900 1.1 mrg { 1901 1.1 mrg bool op_single_use_p = true; 1902 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i], 1903 1.1 mrg &op_single_use_p)) 1904 1.1 mrg return NULL; 1905 1.1 mrg /* If: 1906 1.1 mrg 1907 1.1 mrg (1) N bits of the result are needed; 1908 1.1 mrg (2) all inputs are widened from M<N bits; and 1909 1.1 mrg (3) one operand OP is a single-use SSA name 1910 1.1 mrg 1911 1.1 mrg we can shift the M->N widening from OP to the output 1912 1.1 mrg without changing the number or type of extensions involved. 1913 1.1 mrg This then reduces the number of copies of STMT_INFO. 1914 1.1 mrg 1915 1.1 mrg If instead of (3) more than one operand is a single-use SSA name, 1916 1.1 mrg shifting the extension to the output is even more of a win. 1917 1.1 mrg 1918 1.1 mrg If instead: 1919 1.1 mrg 1920 1.1 mrg (1) N bits of the result are needed; 1921 1.1 mrg (2) one operand OP2 is widened from M2<N bits; 1922 1.1 mrg (3) another operand OP1 is widened from M1<M2 bits; and 1923 1.1 mrg (4) both OP1 and OP2 are single-use 1924 1.1 mrg 1925 1.1 mrg the choice is between: 1926 1.1 mrg 1927 1.1 mrg (a) truncating OP2 to M1, doing the operation on M1, 1928 1.1 mrg and then widening the result to N 1929 1.1 mrg 1930 1.1 mrg (b) widening OP1 to M2, doing the operation on M2, and then 1931 1.1 mrg widening the result to N 1932 1.1 mrg 1933 1.1 mrg Both shift the M2->N widening of the inputs to the output. 1934 1.1 mrg (a) additionally shifts the M1->M2 widening to the output; 1935 1.1 mrg it requires fewer copies of STMT_INFO but requires an extra 1936 1.1 mrg M2->M1 truncation. 1937 1.1 mrg 1938 1.1 mrg Which is better will depend on the complexity and cost of 1939 1.1 mrg STMT_INFO, which is hard to predict at this stage. However, 1940 1.1 mrg a clear tie-breaker in favor of (b) is the fact that the 1941 1.1 mrg truncation in (a) increases the length of the operation chain. 1942 1.1 mrg 1943 1.1 mrg If instead of (4) only one of OP1 or OP2 is single-use, 1944 1.1 mrg (b) is still a win over doing the operation in N bits: 1945 1.1 mrg it still shifts the M2->N widening on the single-use operand 1946 1.1 mrg to the output and reduces the number of STMT_INFO copies. 1947 1.1 mrg 1948 1.1 mrg If neither operand is single-use then operating on fewer than 1949 1.1 mrg N bits might lead to more extensions overall. Whether it does 1950 1.1 mrg or not depends on global information about the vectorization 1951 1.1 mrg region, and whether that's a good trade-off would again 1952 1.1 mrg depend on the complexity and cost of the statements involved, 1953 1.1 mrg as well as things like register pressure that are not normally 1954 1.1 mrg modelled at this stage. We therefore ignore these cases 1955 1.1 mrg and just optimize the clear single-use wins above. 1956 1.1 mrg 1957 1.1 mrg Thus we take the maximum precision of the unpromoted operands 1958 1.1 mrg and record whether any operand is single-use. */ 1959 1.1 mrg if (unprom[i].dt == vect_internal_def) 1960 1.1 mrg { 1961 1.1 mrg min_precision = MAX (min_precision, 1962 1.1 mrg TYPE_PRECISION (unprom[i].type)); 1963 1.1 mrg single_use_p |= op_single_use_p; 1964 1.1 mrg } 1965 1.1 mrg } 1966 1.1 mrg else 1967 1.1 mrg return NULL; 1968 1.1 mrg } 1969 1.1 mrg 1970 1.1 mrg /* Although the operation could be done in operation_precision, we have 1971 1.1 mrg to balance that against introducing extra truncations or extensions. 1972 1.1 mrg Calculate the minimum precision that can be handled efficiently. 1973 1.1 mrg 1974 1.1 mrg The loop above determined that the operation could be handled 1975 1.1 mrg efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an 1976 1.1 mrg extension from the inputs to the output without introducing more 1977 1.1 mrg instructions, and would reduce the number of instructions required 1978 1.1 mrg for STMT_INFO itself. 1979 1.1 mrg 1980 1.1 mrg vect_determine_precisions has also determined that the result only 1981 1.1 mrg needs min_output_precision bits. Truncating by a factor of N times 1982 1.1 mrg requires a tree of N - 1 instructions, so if TYPE is N times wider 1983 1.1 mrg than min_output_precision, doing the operation in TYPE and truncating 1984 1.1 mrg the result requires N + (N - 1) = 2N - 1 instructions per output vector. 1985 1.1 mrg In contrast: 1986 1.1 mrg 1987 1.1 mrg - truncating the input to a unary operation and doing the operation 1988 1.1 mrg in the new type requires at most N - 1 + 1 = N instructions per 1989 1.1 mrg output vector 1990 1.1 mrg 1991 1.1 mrg - doing the same for a binary operation requires at most 1992 1.1 mrg (N - 1) * 2 + 1 = 2N - 1 instructions per output vector 1993 1.1 mrg 1994 1.1 mrg Both unary and binary operations require fewer instructions than 1995 1.1 mrg this if the operands were extended from a suitable truncated form. 1996 1.1 mrg Thus there is usually nothing to lose by doing operations in 1997 1.1 mrg min_output_precision bits, but there can be something to gain. */ 1998 1.1 mrg if (!single_use_p) 1999 1.1 mrg min_precision = last_stmt_info->min_output_precision; 2000 1.1 mrg else 2001 1.1 mrg min_precision = MIN (min_precision, last_stmt_info->min_output_precision); 2002 1.1 mrg 2003 1.1 mrg /* Apply the minimum efficient precision we just calculated. */ 2004 1.1 mrg if (new_precision < min_precision) 2005 1.1 mrg new_precision = min_precision; 2006 1.1 mrg new_precision = vect_element_precision (new_precision); 2007 1.1 mrg if (new_precision >= TYPE_PRECISION (type)) 2008 1.1 mrg return NULL; 2009 1.1 mrg 2010 1.1 mrg vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt); 2011 1.1 mrg 2012 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type); 2013 1.1 mrg if (!*type_out) 2014 1.1 mrg return NULL; 2015 1.1 mrg 2016 1.1 mrg /* We've found a viable pattern. Get the new type of the operation. */ 2017 1.1 mrg bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED); 2018 1.1 mrg tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p); 2019 1.1 mrg 2020 1.1 mrg /* If we're truncating an operation, we need to make sure that we 2021 1.1 mrg don't introduce new undefined overflow. The codes tested here are 2022 1.1 mrg a subset of those accepted by vect_truncatable_operation_p. */ 2023 1.1 mrg tree op_type = new_type; 2024 1.1 mrg if (TYPE_OVERFLOW_UNDEFINED (new_type) 2025 1.1 mrg && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR)) 2026 1.1 mrg op_type = build_nonstandard_integer_type (new_precision, true); 2027 1.1 mrg 2028 1.1 mrg /* We specifically don't check here whether the target supports the 2029 1.1 mrg new operation, since it might be something that a later pattern 2030 1.1 mrg wants to rewrite anyway. If targets have a minimum element size 2031 1.1 mrg for some optabs, we should pattern-match smaller ops to larger ops 2032 1.1 mrg where beneficial. */ 2033 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); 2034 1.1 mrg tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type); 2035 1.1 mrg if (!new_vectype || !op_vectype) 2036 1.1 mrg return NULL; 2037 1.1 mrg 2038 1.1 mrg if (dump_enabled_p ()) 2039 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n", 2040 1.1 mrg type, new_type); 2041 1.1 mrg 2042 1.1 mrg /* Calculate the rhs operands for an operation on OP_TYPE. */ 2043 1.1 mrg tree ops[3] = {}; 2044 1.1 mrg for (unsigned int i = 1; i < first_op; ++i) 2045 1.1 mrg ops[i - 1] = gimple_op (last_stmt, i); 2046 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1], 2047 1.1 mrg op_type, &unprom[0], op_vectype); 2048 1.1 mrg 2049 1.1 mrg /* Use the operation to produce a result of type OP_TYPE. */ 2050 1.1 mrg tree new_var = vect_recog_temp_ssa_var (op_type, NULL); 2051 1.1 mrg gimple *pattern_stmt = gimple_build_assign (new_var, code, 2052 1.1 mrg ops[0], ops[1], ops[2]); 2053 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt)); 2054 1.1 mrg 2055 1.1 mrg if (dump_enabled_p ()) 2056 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 2057 1.1 mrg "created pattern stmt: %G", pattern_stmt); 2058 1.1 mrg 2059 1.1 mrg /* Convert back to the original signedness, if OP_TYPE is different 2060 1.1 mrg from NEW_TYPE. */ 2061 1.1 mrg if (op_type != new_type) 2062 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type, 2063 1.1 mrg pattern_stmt, op_vectype); 2064 1.1 mrg 2065 1.1 mrg /* Promote the result to the original type. */ 2066 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type, 2067 1.1 mrg pattern_stmt, new_vectype); 2068 1.1 mrg 2069 1.1 mrg return pattern_stmt; 2070 1.1 mrg } 2071 1.1 mrg 2072 1.1 mrg /* Recognize the following patterns: 2073 1.1 mrg 2074 1.1 mrg ATYPE a; // narrower than TYPE 2075 1.1 mrg BTYPE b; // narrower than TYPE 2076 1.1 mrg 2077 1.1 mrg 1) Multiply high with scaling 2078 1.1 mrg TYPE res = ((TYPE) a * (TYPE) b) >> c; 2079 1.1 mrg Here, c is bitsize (TYPE) / 2 - 1. 2080 1.1 mrg 2081 1.1 mrg 2) ... or also with rounding 2082 1.1 mrg TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; 2083 1.1 mrg Here, d is bitsize (TYPE) / 2 - 2. 2084 1.1 mrg 2085 1.1 mrg 3) Normal multiply high 2086 1.1 mrg TYPE res = ((TYPE) a * (TYPE) b) >> e; 2087 1.1 mrg Here, e is bitsize (TYPE) / 2. 2088 1.1 mrg 2089 1.1 mrg where only the bottom half of res is used. */ 2090 1.1 mrg 2091 1.1 mrg static gimple * 2092 1.1 mrg vect_recog_mulhs_pattern (vec_info *vinfo, 2093 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out) 2094 1.1 mrg { 2095 1.1 mrg /* Check for a right shift. */ 2096 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); 2097 1.1 mrg if (!last_stmt 2098 1.1 mrg || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR) 2099 1.1 mrg return NULL; 2100 1.1 mrg 2101 1.1 mrg /* Check that the shift result is wider than the users of the 2102 1.1 mrg result need (i.e. that narrowing would be a natural choice). */ 2103 1.1 mrg tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); 2104 1.1 mrg unsigned int target_precision 2105 1.1 mrg = vect_element_precision (last_stmt_info->min_output_precision); 2106 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type) 2107 1.1 mrg || target_precision >= TYPE_PRECISION (lhs_type)) 2108 1.1 mrg return NULL; 2109 1.1 mrg 2110 1.1 mrg /* Look through any change in sign on the outer shift input. */ 2111 1.1 mrg vect_unpromoted_value unprom_rshift_input; 2112 1.1 mrg tree rshift_input = vect_look_through_possible_promotion 2113 1.1 mrg (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input); 2114 1.1 mrg if (!rshift_input 2115 1.1 mrg || TYPE_PRECISION (TREE_TYPE (rshift_input)) 2116 1.1 mrg != TYPE_PRECISION (lhs_type)) 2117 1.1 mrg return NULL; 2118 1.1 mrg 2119 1.1 mrg /* Get the definition of the shift input. */ 2120 1.1 mrg stmt_vec_info rshift_input_stmt_info 2121 1.1 mrg = vect_get_internal_def (vinfo, rshift_input); 2122 1.1 mrg if (!rshift_input_stmt_info) 2123 1.1 mrg return NULL; 2124 1.1 mrg gassign *rshift_input_stmt 2125 1.1 mrg = dyn_cast <gassign *> (rshift_input_stmt_info->stmt); 2126 1.1 mrg if (!rshift_input_stmt) 2127 1.1 mrg return NULL; 2128 1.1 mrg 2129 1.1 mrg stmt_vec_info mulh_stmt_info; 2130 1.1 mrg tree scale_term; 2131 1.1 mrg bool rounding_p = false; 2132 1.1 mrg 2133 1.1 mrg /* Check for the presence of the rounding term. */ 2134 1.1 mrg if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) 2135 1.1 mrg { 2136 1.1 mrg /* Check that the outer shift was by 1. */ 2137 1.1 mrg if (!integer_onep (gimple_assign_rhs2 (last_stmt))) 2138 1.1 mrg return NULL; 2139 1.1 mrg 2140 1.1 mrg /* Check that the second operand of the PLUS_EXPR is 1. */ 2141 1.1 mrg if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt))) 2142 1.1 mrg return NULL; 2143 1.1 mrg 2144 1.1 mrg /* Look through any change in sign on the addition input. */ 2145 1.1 mrg vect_unpromoted_value unprom_plus_input; 2146 1.1 mrg tree plus_input = vect_look_through_possible_promotion 2147 1.1 mrg (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input); 2148 1.1 mrg if (!plus_input 2149 1.1 mrg || TYPE_PRECISION (TREE_TYPE (plus_input)) 2150 1.1 mrg != TYPE_PRECISION (TREE_TYPE (rshift_input))) 2151 1.1 mrg return NULL; 2152 1.1 mrg 2153 1.1 mrg /* Get the definition of the multiply-high-scale part. */ 2154 1.1 mrg stmt_vec_info plus_input_stmt_info 2155 1.1 mrg = vect_get_internal_def (vinfo, plus_input); 2156 1.1 mrg if (!plus_input_stmt_info) 2157 1.1 mrg return NULL; 2158 1.1 mrg gassign *plus_input_stmt 2159 1.1 mrg = dyn_cast <gassign *> (plus_input_stmt_info->stmt); 2160 1.1 mrg if (!plus_input_stmt 2161 1.1 mrg || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR) 2162 1.1 mrg return NULL; 2163 1.1 mrg 2164 1.1 mrg /* Look through any change in sign on the scaling input. */ 2165 1.1 mrg vect_unpromoted_value unprom_scale_input; 2166 1.1 mrg tree scale_input = vect_look_through_possible_promotion 2167 1.1 mrg (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input); 2168 1.1 mrg if (!scale_input 2169 1.1 mrg || TYPE_PRECISION (TREE_TYPE (scale_input)) 2170 1.1 mrg != TYPE_PRECISION (TREE_TYPE (plus_input))) 2171 1.1 mrg return NULL; 2172 1.1 mrg 2173 1.1 mrg /* Get the definition of the multiply-high part. */ 2174 1.1 mrg mulh_stmt_info = vect_get_internal_def (vinfo, scale_input); 2175 1.1 mrg if (!mulh_stmt_info) 2176 1.1 mrg return NULL; 2177 1.1 mrg 2178 1.1 mrg /* Get the scaling term. */ 2179 1.1 mrg scale_term = gimple_assign_rhs2 (plus_input_stmt); 2180 1.1 mrg rounding_p = true; 2181 1.1 mrg } 2182 1.1 mrg else 2183 1.1 mrg { 2184 1.1 mrg mulh_stmt_info = rshift_input_stmt_info; 2185 1.1 mrg scale_term = gimple_assign_rhs2 (last_stmt); 2186 1.1 mrg } 2187 1.1 mrg 2188 1.1 mrg /* Check that the scaling factor is constant. */ 2189 1.1 mrg if (TREE_CODE (scale_term) != INTEGER_CST) 2190 1.1 mrg return NULL; 2191 1.1 mrg 2192 1.1 mrg /* Check whether the scaling input term can be seen as two widened 2193 1.1 mrg inputs multiplied together. */ 2194 1.1 mrg vect_unpromoted_value unprom_mult[2]; 2195 1.1 mrg tree new_type; 2196 1.1 mrg unsigned int nops 2197 1.1 mrg = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR, 2198 1.1 mrg false, 2, unprom_mult, &new_type); 2199 1.1 mrg if (nops != 2) 2200 1.1 mrg return NULL; 2201 1.1 mrg 2202 1.1 mrg /* Adjust output precision. */ 2203 1.1 mrg if (TYPE_PRECISION (new_type) < target_precision) 2204 1.1 mrg new_type = build_nonstandard_integer_type 2205 1.1 mrg (target_precision, TYPE_UNSIGNED (new_type)); 2206 1.1 mrg 2207 1.1 mrg unsigned mult_precision = TYPE_PRECISION (new_type); 2208 1.1 mrg internal_fn ifn; 2209 1.1 mrg /* Check that the scaling factor is expected. Instead of 2210 1.1 mrg target_precision, we should use the one that we actually 2211 1.1 mrg use for internal function. */ 2212 1.1 mrg if (rounding_p) 2213 1.1 mrg { 2214 1.1 mrg /* Check pattern 2). */ 2215 1.1 mrg if (wi::to_widest (scale_term) + mult_precision + 2 2216 1.1 mrg != TYPE_PRECISION (lhs_type)) 2217 1.1 mrg return NULL; 2218 1.1 mrg 2219 1.1 mrg ifn = IFN_MULHRS; 2220 1.1 mrg } 2221 1.1 mrg else 2222 1.1 mrg { 2223 1.1 mrg /* Check for pattern 1). */ 2224 1.1 mrg if (wi::to_widest (scale_term) + mult_precision + 1 2225 1.1 mrg == TYPE_PRECISION (lhs_type)) 2226 1.1 mrg ifn = IFN_MULHS; 2227 1.1 mrg /* Check for pattern 3). */ 2228 1.1 mrg else if (wi::to_widest (scale_term) + mult_precision 2229 1.1 mrg == TYPE_PRECISION (lhs_type)) 2230 1.1 mrg ifn = IFN_MULH; 2231 1.1 mrg else 2232 1.1 mrg return NULL; 2233 1.1 mrg } 2234 1.1 mrg 2235 1.1 mrg vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt); 2236 1.1 mrg 2237 1.1 mrg /* Check for target support. */ 2238 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); 2239 1.1 mrg if (!new_vectype 2240 1.1 mrg || !direct_internal_fn_supported_p 2241 1.1 mrg (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) 2242 1.1 mrg return NULL; 2243 1.1 mrg 2244 1.1 mrg /* The IR requires a valid vector type for the cast result, even though 2245 1.1 mrg it's likely to be discarded. */ 2246 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, lhs_type); 2247 1.1 mrg if (!*type_out) 2248 1.1 mrg return NULL; 2249 1.1 mrg 2250 1.1 mrg /* Generate the IFN_MULHRS call. */ 2251 1.1 mrg tree new_var = vect_recog_temp_ssa_var (new_type, NULL); 2252 1.1 mrg tree new_ops[2]; 2253 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, 2254 1.1 mrg unprom_mult, new_vectype); 2255 1.1 mrg gcall *mulhrs_stmt 2256 1.1 mrg = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); 2257 1.1 mrg gimple_call_set_lhs (mulhrs_stmt, new_var); 2258 1.1 mrg gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); 2259 1.1 mrg 2260 1.1 mrg if (dump_enabled_p ()) 2261 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 2262 1.1 mrg "created pattern stmt: %G", mulhrs_stmt); 2263 1.1 mrg 2264 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, lhs_type, 2265 1.1 mrg mulhrs_stmt, new_vectype); 2266 1.1 mrg } 2267 1.1 mrg 2268 1.1 mrg /* Recognize the patterns: 2269 1.1 mrg 2270 1.1 mrg ATYPE a; // narrower than TYPE 2271 1.1 mrg BTYPE b; // narrower than TYPE 2272 1.1 mrg (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1; 2273 1.1 mrg or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1; 2274 1.1 mrg 2275 1.1 mrg where only the bottom half of avg is used. Try to transform them into: 2276 1.1 mrg 2277 1.1 mrg (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b); 2278 1.1 mrg or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b); 2279 1.1 mrg 2280 1.1 mrg followed by: 2281 1.1 mrg 2282 1.1 mrg TYPE avg = (TYPE) avg'; 2283 1.1 mrg 2284 1.1 mrg where NTYPE is no wider than half of TYPE. Since only the bottom half 2285 1.1 mrg of avg is used, all or part of the cast of avg' should become redundant. 2286 1.1 mrg 2287 1.1 mrg If there is no target support available, generate code to distribute rshift 2288 1.1 mrg over plus and add a carry. */ 2289 1.1 mrg 2290 1.1 mrg static gimple * 2291 1.1 mrg vect_recog_average_pattern (vec_info *vinfo, 2292 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out) 2293 1.1 mrg { 2294 1.1 mrg /* Check for a shift right by one bit. */ 2295 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); 2296 1.1 mrg if (!last_stmt 2297 1.1 mrg || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR 2298 1.1 mrg || !integer_onep (gimple_assign_rhs2 (last_stmt))) 2299 1.1 mrg return NULL; 2300 1.1 mrg 2301 1.1 mrg /* Check that the shift result is wider than the users of the 2302 1.1 mrg result need (i.e. that narrowing would be a natural choice). */ 2303 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt); 2304 1.1 mrg tree type = TREE_TYPE (lhs); 2305 1.1 mrg unsigned int target_precision 2306 1.1 mrg = vect_element_precision (last_stmt_info->min_output_precision); 2307 1.1 mrg if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) 2308 1.1 mrg return NULL; 2309 1.1 mrg 2310 1.1 mrg /* Look through any change in sign on the shift input. */ 2311 1.1 mrg tree rshift_rhs = gimple_assign_rhs1 (last_stmt); 2312 1.1 mrg vect_unpromoted_value unprom_plus; 2313 1.1 mrg rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs, 2314 1.1 mrg &unprom_plus); 2315 1.1 mrg if (!rshift_rhs 2316 1.1 mrg || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type)) 2317 1.1 mrg return NULL; 2318 1.1 mrg 2319 1.1 mrg /* Get the definition of the shift input. */ 2320 1.1 mrg stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); 2321 1.1 mrg if (!plus_stmt_info) 2322 1.1 mrg return NULL; 2323 1.1 mrg 2324 1.1 mrg /* Check whether the shift input can be seen as a tree of additions on 2325 1.1 mrg 2 or 3 widened inputs. 2326 1.1 mrg 2327 1.1 mrg Note that the pattern should be a win even if the result of one or 2328 1.1 mrg more additions is reused elsewhere: if the pattern matches, we'd be 2329 1.1 mrg replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */ 2330 1.1 mrg internal_fn ifn = IFN_AVG_FLOOR; 2331 1.1 mrg vect_unpromoted_value unprom[3]; 2332 1.1 mrg tree new_type; 2333 1.1 mrg unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR, 2334 1.1 mrg WIDEN_PLUS_EXPR, false, 3, 2335 1.1 mrg unprom, &new_type); 2336 1.1 mrg if (nops == 0) 2337 1.1 mrg return NULL; 2338 1.1 mrg if (nops == 3) 2339 1.1 mrg { 2340 1.1 mrg /* Check that one operand is 1. */ 2341 1.1 mrg unsigned int i; 2342 1.1 mrg for (i = 0; i < 3; ++i) 2343 1.1 mrg if (integer_onep (unprom[i].op)) 2344 1.1 mrg break; 2345 1.1 mrg if (i == 3) 2346 1.1 mrg return NULL; 2347 1.1 mrg /* Throw away the 1 operand and keep the other two. */ 2348 1.1 mrg if (i < 2) 2349 1.1 mrg unprom[i] = unprom[2]; 2350 1.1 mrg ifn = IFN_AVG_CEIL; 2351 1.1 mrg } 2352 1.1 mrg 2353 1.1 mrg vect_pattern_detected ("vect_recog_average_pattern", last_stmt); 2354 1.1 mrg 2355 1.1 mrg /* We know that: 2356 1.1 mrg 2357 1.1 mrg (a) the operation can be viewed as: 2358 1.1 mrg 2359 1.1 mrg TYPE widened0 = (TYPE) UNPROM[0]; 2360 1.1 mrg TYPE widened1 = (TYPE) UNPROM[1]; 2361 1.1 mrg TYPE tmp1 = widened0 + widened1 {+ 1}; 2362 1.1 mrg TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO 2363 1.1 mrg 2364 1.1 mrg (b) the first two statements are equivalent to: 2365 1.1 mrg 2366 1.1 mrg TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0]; 2367 1.1 mrg TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1]; 2368 1.1 mrg 2369 1.1 mrg (c) vect_recog_over_widening_pattern has already tried to narrow TYPE 2370 1.1 mrg where sensible; 2371 1.1 mrg 2372 1.1 mrg (d) all the operations can be performed correctly at twice the width of 2373 1.1 mrg NEW_TYPE, due to the nature of the average operation; and 2374 1.1 mrg 2375 1.1 mrg (e) users of the result of the right shift need only TARGET_PRECISION 2376 1.1 mrg bits, where TARGET_PRECISION is no more than half of TYPE's 2377 1.1 mrg precision. 2378 1.1 mrg 2379 1.1 mrg Under these circumstances, the only situation in which NEW_TYPE 2380 1.1 mrg could be narrower than TARGET_PRECISION is if widened0, widened1 2381 1.1 mrg and an addition result are all used more than once. Thus we can 2382 1.1 mrg treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION 2383 1.1 mrg as "free", whereas widening the result of the average instruction 2384 1.1 mrg from NEW_TYPE to TARGET_PRECISION would be a new operation. It's 2385 1.1 mrg therefore better not to go narrower than TARGET_PRECISION. */ 2386 1.1 mrg if (TYPE_PRECISION (new_type) < target_precision) 2387 1.1 mrg new_type = build_nonstandard_integer_type (target_precision, 2388 1.1 mrg TYPE_UNSIGNED (new_type)); 2389 1.1 mrg 2390 1.1 mrg /* Check for target support. */ 2391 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); 2392 1.1 mrg if (!new_vectype) 2393 1.1 mrg return NULL; 2394 1.1 mrg 2395 1.1 mrg bool fallback_p = false; 2396 1.1 mrg 2397 1.1 mrg if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) 2398 1.1 mrg ; 2399 1.1 mrg else if (TYPE_UNSIGNED (new_type) 2400 1.1 mrg && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar) 2401 1.1 mrg && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default) 2402 1.1 mrg && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default) 2403 1.1 mrg && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default)) 2404 1.1 mrg fallback_p = true; 2405 1.1 mrg else 2406 1.1 mrg return NULL; 2407 1.1 mrg 2408 1.1 mrg /* The IR requires a valid vector type for the cast result, even though 2409 1.1 mrg it's likely to be discarded. */ 2410 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type); 2411 1.1 mrg if (!*type_out) 2412 1.1 mrg return NULL; 2413 1.1 mrg 2414 1.1 mrg tree new_var = vect_recog_temp_ssa_var (new_type, NULL); 2415 1.1 mrg tree new_ops[2]; 2416 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, 2417 1.1 mrg unprom, new_vectype); 2418 1.1 mrg 2419 1.1 mrg if (fallback_p) 2420 1.1 mrg { 2421 1.1 mrg /* As a fallback, generate code for following sequence: 2422 1.1 mrg 2423 1.1 mrg shifted_op0 = new_ops[0] >> 1; 2424 1.1 mrg shifted_op1 = new_ops[1] >> 1; 2425 1.1 mrg sum_of_shifted = shifted_op0 + shifted_op1; 2426 1.1 mrg unmasked_carry = new_ops[0] and/or new_ops[1]; 2427 1.1 mrg carry = unmasked_carry & 1; 2428 1.1 mrg new_var = sum_of_shifted + carry; 2429 1.1 mrg */ 2430 1.1 mrg 2431 1.1 mrg tree one_cst = build_one_cst (new_type); 2432 1.1 mrg gassign *g; 2433 1.1 mrg 2434 1.1 mrg tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL); 2435 1.1 mrg g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst); 2436 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); 2437 1.1 mrg 2438 1.1 mrg tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL); 2439 1.1 mrg g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst); 2440 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); 2441 1.1 mrg 2442 1.1 mrg tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL); 2443 1.1 mrg g = gimple_build_assign (sum_of_shifted, PLUS_EXPR, 2444 1.1 mrg shifted_op0, shifted_op1); 2445 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); 2446 1.1 mrg 2447 1.1 mrg tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL); 2448 1.1 mrg tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR; 2449 1.1 mrg g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]); 2450 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); 2451 1.1 mrg 2452 1.1 mrg tree carry = vect_recog_temp_ssa_var (new_type, NULL); 2453 1.1 mrg g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst); 2454 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); 2455 1.1 mrg 2456 1.1 mrg g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry); 2457 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype); 2458 1.1 mrg } 2459 1.1 mrg 2460 1.1 mrg /* Generate the IFN_AVG* call. */ 2461 1.1 mrg gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], 2462 1.1 mrg new_ops[1]); 2463 1.1 mrg gimple_call_set_lhs (average_stmt, new_var); 2464 1.1 mrg gimple_set_location (average_stmt, gimple_location (last_stmt)); 2465 1.1 mrg 2466 1.1 mrg if (dump_enabled_p ()) 2467 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 2468 1.1 mrg "created pattern stmt: %G", average_stmt); 2469 1.1 mrg 2470 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, 2471 1.1 mrg type, average_stmt, new_vectype); 2472 1.1 mrg } 2473 1.1 mrg 2474 1.1 mrg /* Recognize cases in which the input to a cast is wider than its 2475 1.1 mrg output, and the input is fed by a widening operation. Fold this 2476 1.1 mrg by removing the unnecessary intermediate widening. E.g.: 2477 1.1 mrg 2478 1.1 mrg unsigned char a; 2479 1.1 mrg unsigned int b = (unsigned int) a; 2480 1.1 mrg unsigned short c = (unsigned short) b; 2481 1.1 mrg 2482 1.1 mrg --> 2483 1.1 mrg 2484 1.1 mrg unsigned short c = (unsigned short) a; 2485 1.1 mrg 2486 1.1 mrg Although this is rare in input IR, it is an expected side-effect 2487 1.1 mrg of the over-widening pattern above. 2488 1.1 mrg 2489 1.1 mrg This is beneficial also for integer-to-float conversions, if the 2490 1.1 mrg widened integer has more bits than the float, and if the unwidened 2491 1.1 mrg input doesn't. */ 2492 1.1 mrg 2493 1.1 mrg static gimple * 2494 1.1 mrg vect_recog_cast_forwprop_pattern (vec_info *vinfo, 2495 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out) 2496 1.1 mrg { 2497 1.1 mrg /* Check for a cast, including an integer-to-float conversion. */ 2498 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); 2499 1.1 mrg if (!last_stmt) 2500 1.1 mrg return NULL; 2501 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt); 2502 1.1 mrg if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR) 2503 1.1 mrg return NULL; 2504 1.1 mrg 2505 1.1 mrg /* Make sure that the rhs is a scalar with a natural bitsize. */ 2506 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt); 2507 1.1 mrg if (!lhs) 2508 1.1 mrg return NULL; 2509 1.1 mrg tree lhs_type = TREE_TYPE (lhs); 2510 1.1 mrg scalar_mode lhs_mode; 2511 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type) 2512 1.1 mrg || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode)) 2513 1.1 mrg return NULL; 2514 1.1 mrg 2515 1.1 mrg /* Check for a narrowing operation (from a vector point of view). */ 2516 1.1 mrg tree rhs = gimple_assign_rhs1 (last_stmt); 2517 1.1 mrg tree rhs_type = TREE_TYPE (rhs); 2518 1.1 mrg if (!INTEGRAL_TYPE_P (rhs_type) 2519 1.1 mrg || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type) 2520 1.1 mrg || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode)) 2521 1.1 mrg return NULL; 2522 1.1 mrg 2523 1.1 mrg /* Try to find an unpromoted input. */ 2524 1.1 mrg vect_unpromoted_value unprom; 2525 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom) 2526 1.1 mrg || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type)) 2527 1.1 mrg return NULL; 2528 1.1 mrg 2529 1.1 mrg /* If the bits above RHS_TYPE matter, make sure that they're the 2530 1.1 mrg same when extending from UNPROM as they are when extending from RHS. */ 2531 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type) 2532 1.1 mrg && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type)) 2533 1.1 mrg return NULL; 2534 1.1 mrg 2535 1.1 mrg /* We can get the same result by casting UNPROM directly, to avoid 2536 1.1 mrg the unnecessary widening and narrowing. */ 2537 1.1 mrg vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt); 2538 1.1 mrg 2539 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, lhs_type); 2540 1.1 mrg if (!*type_out) 2541 1.1 mrg return NULL; 2542 1.1 mrg 2543 1.1 mrg tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL); 2544 1.1 mrg gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op); 2545 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt)); 2546 1.1 mrg 2547 1.1 mrg return pattern_stmt; 2548 1.1 mrg } 2549 1.1 mrg 2550 1.1 mrg /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR 2551 1.1 mrg to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */ 2552 1.1 mrg 2553 1.1 mrg static gimple * 2554 1.1 mrg vect_recog_widen_shift_pattern (vec_info *vinfo, 2555 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out) 2556 1.1 mrg { 2557 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, 2558 1.1 mrg LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true, 2559 1.1 mrg "vect_recog_widen_shift_pattern"); 2560 1.1 mrg } 2561 1.1 mrg 2562 1.1 mrg /* Detect a rotate pattern wouldn't be otherwise vectorized: 2563 1.1 mrg 2564 1.1 mrg type a_t, b_t, c_t; 2565 1.1 mrg 2566 1.1 mrg S0 a_t = b_t r<< c_t; 2567 1.1 mrg 2568 1.1 mrg Input/Output: 2569 1.1 mrg 2570 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins, 2571 1.1 mrg i.e. the shift/rotate stmt. The original stmt (S0) is replaced 2572 1.1 mrg with a sequence: 2573 1.1 mrg 2574 1.1 mrg S1 d_t = -c_t; 2575 1.1 mrg S2 e_t = d_t & (B - 1); 2576 1.1 mrg S3 f_t = b_t << c_t; 2577 1.1 mrg S4 g_t = b_t >> e_t; 2578 1.1 mrg S0 a_t = f_t | g_t; 2579 1.1 mrg 2580 1.1 mrg where B is element bitsize of type. 2581 1.1 mrg 2582 1.1 mrg Output: 2583 1.1 mrg 2584 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 2585 1.1 mrg 2586 1.1 mrg * Return value: A new stmt that will be used to replace the rotate 2587 1.1 mrg S0 stmt. */ 2588 1.1 mrg 2589 1.1 mrg static gimple * 2590 1.1 mrg vect_recog_rotate_pattern (vec_info *vinfo, 2591 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 2592 1.1 mrg { 2593 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 2594 1.1 mrg tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; 2595 1.1 mrg gimple *pattern_stmt, *def_stmt; 2596 1.1 mrg enum tree_code rhs_code; 2597 1.1 mrg enum vect_def_type dt; 2598 1.1 mrg optab optab1, optab2; 2599 1.1 mrg edge ext_def = NULL; 2600 1.1 mrg bool bswap16_p = false; 2601 1.1 mrg 2602 1.1 mrg if (is_gimple_assign (last_stmt)) 2603 1.1 mrg { 2604 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt); 2605 1.1 mrg switch (rhs_code) 2606 1.1 mrg { 2607 1.1 mrg case LROTATE_EXPR: 2608 1.1 mrg case RROTATE_EXPR: 2609 1.1 mrg break; 2610 1.1 mrg default: 2611 1.1 mrg return NULL; 2612 1.1 mrg } 2613 1.1 mrg 2614 1.1 mrg lhs = gimple_assign_lhs (last_stmt); 2615 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt); 2616 1.1 mrg type = TREE_TYPE (oprnd0); 2617 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt); 2618 1.1 mrg } 2619 1.1 mrg else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16)) 2620 1.1 mrg { 2621 1.1 mrg /* __builtin_bswap16 (x) is another form of x r>> 8. 2622 1.1 mrg The vectorizer has bswap support, but only if the argument isn't 2623 1.1 mrg promoted. */ 2624 1.1 mrg lhs = gimple_call_lhs (last_stmt); 2625 1.1 mrg oprnd0 = gimple_call_arg (last_stmt, 0); 2626 1.1 mrg type = TREE_TYPE (oprnd0); 2627 1.1 mrg if (!lhs 2628 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs)) != 16 2629 1.1 mrg || TYPE_PRECISION (type) <= 16 2630 1.1 mrg || TREE_CODE (oprnd0) != SSA_NAME 2631 1.1 mrg || BITS_PER_UNIT != 8 2632 1.1 mrg || !TYPE_UNSIGNED (TREE_TYPE (lhs))) 2633 1.1 mrg return NULL; 2634 1.1 mrg 2635 1.1 mrg stmt_vec_info def_stmt_info; 2636 1.1 mrg if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt)) 2637 1.1 mrg return NULL; 2638 1.1 mrg 2639 1.1 mrg if (dt != vect_internal_def) 2640 1.1 mrg return NULL; 2641 1.1 mrg 2642 1.1 mrg if (gimple_assign_cast_p (def_stmt)) 2643 1.1 mrg { 2644 1.1 mrg def = gimple_assign_rhs1 (def_stmt); 2645 1.1 mrg if (INTEGRAL_TYPE_P (TREE_TYPE (def)) 2646 1.1 mrg && TYPE_PRECISION (TREE_TYPE (def)) == 16) 2647 1.1 mrg oprnd0 = def; 2648 1.1 mrg } 2649 1.1 mrg 2650 1.1 mrg type = TREE_TYPE (lhs); 2651 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type); 2652 1.1 mrg if (vectype == NULL_TREE) 2653 1.1 mrg return NULL; 2654 1.1 mrg 2655 1.1 mrg if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype)) 2656 1.1 mrg { 2657 1.1 mrg /* The encoding uses one stepped pattern for each byte in the 2658 1.1 mrg 16-bit word. */ 2659 1.1 mrg vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3); 2660 1.1 mrg for (unsigned i = 0; i < 3; ++i) 2661 1.1 mrg for (unsigned j = 0; j < 2; ++j) 2662 1.1 mrg elts.quick_push ((i + 1) * 2 - j - 1); 2663 1.1 mrg 2664 1.1 mrg vec_perm_indices indices (elts, 1, 2665 1.1 mrg TYPE_VECTOR_SUBPARTS (char_vectype)); 2666 1.1 mrg if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) 2667 1.1 mrg { 2668 1.1 mrg /* vectorizable_bswap can handle the __builtin_bswap16 if we 2669 1.1 mrg undo the argument promotion. */ 2670 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) 2671 1.1 mrg { 2672 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL); 2673 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); 2674 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2675 1.1 mrg oprnd0 = def; 2676 1.1 mrg } 2677 1.1 mrg 2678 1.1 mrg /* Pattern detected. */ 2679 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); 2680 1.1 mrg 2681 1.1 mrg *type_out = vectype; 2682 1.1 mrg 2683 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the 2684 1.1 mrg pattern, with the unpromoted argument. */ 2685 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL); 2686 1.1 mrg pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt), 2687 1.1 mrg 1, oprnd0); 2688 1.1 mrg gimple_call_set_lhs (pattern_stmt, var); 2689 1.1 mrg gimple_call_set_fntype (as_a <gcall *> (pattern_stmt), 2690 1.1 mrg gimple_call_fntype (last_stmt)); 2691 1.1 mrg return pattern_stmt; 2692 1.1 mrg } 2693 1.1 mrg } 2694 1.1 mrg 2695 1.1 mrg oprnd1 = build_int_cst (integer_type_node, 8); 2696 1.1 mrg rhs_code = LROTATE_EXPR; 2697 1.1 mrg bswap16_p = true; 2698 1.1 mrg } 2699 1.1 mrg else 2700 1.1 mrg return NULL; 2701 1.1 mrg 2702 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME 2703 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) 2704 1.1 mrg || !INTEGRAL_TYPE_P (type) 2705 1.1 mrg || !TYPE_UNSIGNED (type)) 2706 1.1 mrg return NULL; 2707 1.1 mrg 2708 1.1 mrg stmt_vec_info def_stmt_info; 2709 1.1 mrg if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt)) 2710 1.1 mrg return NULL; 2711 1.1 mrg 2712 1.1 mrg if (dt != vect_internal_def 2713 1.1 mrg && dt != vect_constant_def 2714 1.1 mrg && dt != vect_external_def) 2715 1.1 mrg return NULL; 2716 1.1 mrg 2717 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type); 2718 1.1 mrg if (vectype == NULL_TREE) 2719 1.1 mrg return NULL; 2720 1.1 mrg 2721 1.1 mrg /* If vector/vector or vector/scalar rotate is supported by the target, 2722 1.1 mrg don't do anything here. */ 2723 1.1 mrg optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); 2724 1.1 mrg if (optab1 2725 1.1 mrg && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) 2726 1.1 mrg { 2727 1.1 mrg use_rotate: 2728 1.1 mrg if (bswap16_p) 2729 1.1 mrg { 2730 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) 2731 1.1 mrg { 2732 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL); 2733 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); 2734 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2735 1.1 mrg oprnd0 = def; 2736 1.1 mrg } 2737 1.1 mrg 2738 1.1 mrg /* Pattern detected. */ 2739 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); 2740 1.1 mrg 2741 1.1 mrg *type_out = vectype; 2742 1.1 mrg 2743 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the 2744 1.1 mrg pattern. */ 2745 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL); 2746 1.1 mrg pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0, 2747 1.1 mrg oprnd1); 2748 1.1 mrg return pattern_stmt; 2749 1.1 mrg } 2750 1.1 mrg return NULL; 2751 1.1 mrg } 2752 1.1 mrg 2753 1.1 mrg if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def) 2754 1.1 mrg { 2755 1.1 mrg optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); 2756 1.1 mrg if (optab2 2757 1.1 mrg && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) 2758 1.1 mrg goto use_rotate; 2759 1.1 mrg } 2760 1.1 mrg 2761 1.1 mrg /* If vector/vector or vector/scalar shifts aren't supported by the target, 2762 1.1 mrg don't do anything here either. */ 2763 1.1 mrg optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); 2764 1.1 mrg optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); 2765 1.1 mrg if (!optab1 2766 1.1 mrg || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 2767 1.1 mrg || !optab2 2768 1.1 mrg || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 2769 1.1 mrg { 2770 1.1 mrg if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def) 2771 1.1 mrg return NULL; 2772 1.1 mrg optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); 2773 1.1 mrg optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); 2774 1.1 mrg if (!optab1 2775 1.1 mrg || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 2776 1.1 mrg || !optab2 2777 1.1 mrg || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 2778 1.1 mrg return NULL; 2779 1.1 mrg } 2780 1.1 mrg 2781 1.1 mrg *type_out = vectype; 2782 1.1 mrg 2783 1.1 mrg if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0))) 2784 1.1 mrg { 2785 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL); 2786 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); 2787 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2788 1.1 mrg oprnd0 = def; 2789 1.1 mrg } 2790 1.1 mrg 2791 1.1 mrg if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME) 2792 1.1 mrg ext_def = vect_get_external_def_edge (vinfo, oprnd1); 2793 1.1 mrg 2794 1.1 mrg def = NULL_TREE; 2795 1.1 mrg scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); 2796 1.1 mrg if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode) 2797 1.1 mrg def = oprnd1; 2798 1.1 mrg else if (def_stmt && gimple_assign_cast_p (def_stmt)) 2799 1.1 mrg { 2800 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt); 2801 1.1 mrg if (TYPE_MODE (TREE_TYPE (rhs1)) == mode 2802 1.1 mrg && TYPE_PRECISION (TREE_TYPE (rhs1)) 2803 1.1 mrg == TYPE_PRECISION (type)) 2804 1.1 mrg def = rhs1; 2805 1.1 mrg } 2806 1.1 mrg 2807 1.1 mrg if (def == NULL_TREE) 2808 1.1 mrg { 2809 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL); 2810 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 2811 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2812 1.1 mrg } 2813 1.1 mrg stype = TREE_TYPE (def); 2814 1.1 mrg 2815 1.1 mrg if (TREE_CODE (def) == INTEGER_CST) 2816 1.1 mrg { 2817 1.1 mrg if (!tree_fits_uhwi_p (def) 2818 1.1 mrg || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode) 2819 1.1 mrg || integer_zerop (def)) 2820 1.1 mrg return NULL; 2821 1.1 mrg def2 = build_int_cst (stype, 2822 1.1 mrg GET_MODE_PRECISION (mode) - tree_to_uhwi (def)); 2823 1.1 mrg } 2824 1.1 mrg else 2825 1.1 mrg { 2826 1.1 mrg tree vecstype = get_vectype_for_scalar_type (vinfo, stype); 2827 1.1 mrg 2828 1.1 mrg if (vecstype == NULL_TREE) 2829 1.1 mrg return NULL; 2830 1.1 mrg def2 = vect_recog_temp_ssa_var (stype, NULL); 2831 1.1 mrg def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def); 2832 1.1 mrg if (ext_def) 2833 1.1 mrg { 2834 1.1 mrg basic_block new_bb 2835 1.1 mrg = gsi_insert_on_edge_immediate (ext_def, def_stmt); 2836 1.1 mrg gcc_assert (!new_bb); 2837 1.1 mrg } 2838 1.1 mrg else 2839 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); 2840 1.1 mrg 2841 1.1 mrg def2 = vect_recog_temp_ssa_var (stype, NULL); 2842 1.1 mrg tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1); 2843 1.1 mrg def_stmt = gimple_build_assign (def2, BIT_AND_EXPR, 2844 1.1 mrg gimple_assign_lhs (def_stmt), mask); 2845 1.1 mrg if (ext_def) 2846 1.1 mrg { 2847 1.1 mrg basic_block new_bb 2848 1.1 mrg = gsi_insert_on_edge_immediate (ext_def, def_stmt); 2849 1.1 mrg gcc_assert (!new_bb); 2850 1.1 mrg } 2851 1.1 mrg else 2852 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); 2853 1.1 mrg } 2854 1.1 mrg 2855 1.1 mrg var1 = vect_recog_temp_ssa_var (type, NULL); 2856 1.1 mrg def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR 2857 1.1 mrg ? LSHIFT_EXPR : RSHIFT_EXPR, 2858 1.1 mrg oprnd0, def); 2859 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2860 1.1 mrg 2861 1.1 mrg var2 = vect_recog_temp_ssa_var (type, NULL); 2862 1.1 mrg def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR 2863 1.1 mrg ? RSHIFT_EXPR : LSHIFT_EXPR, 2864 1.1 mrg oprnd0, def2); 2865 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2866 1.1 mrg 2867 1.1 mrg /* Pattern detected. */ 2868 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); 2869 1.1 mrg 2870 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the pattern. */ 2871 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL); 2872 1.1 mrg pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2); 2873 1.1 mrg 2874 1.1 mrg return pattern_stmt; 2875 1.1 mrg } 2876 1.1 mrg 2877 1.1 mrg /* Detect a vector by vector shift pattern that wouldn't be otherwise 2878 1.1 mrg vectorized: 2879 1.1 mrg 2880 1.1 mrg type a_t; 2881 1.1 mrg TYPE b_T, res_T; 2882 1.1 mrg 2883 1.1 mrg S1 a_t = ; 2884 1.1 mrg S2 b_T = ; 2885 1.1 mrg S3 res_T = b_T op a_t; 2886 1.1 mrg 2887 1.1 mrg where type 'TYPE' is a type with different size than 'type', 2888 1.1 mrg and op is <<, >> or rotate. 2889 1.1 mrg 2890 1.1 mrg Also detect cases: 2891 1.1 mrg 2892 1.1 mrg type a_t; 2893 1.1 mrg TYPE b_T, c_T, res_T; 2894 1.1 mrg 2895 1.1 mrg S0 c_T = ; 2896 1.1 mrg S1 a_t = (type) c_T; 2897 1.1 mrg S2 b_T = ; 2898 1.1 mrg S3 res_T = b_T op a_t; 2899 1.1 mrg 2900 1.1 mrg Input/Output: 2901 1.1 mrg 2902 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins, 2903 1.1 mrg i.e. the shift/rotate stmt. The original stmt (S3) is replaced 2904 1.1 mrg with a shift/rotate which has same type on both operands, in the 2905 1.1 mrg second case just b_T op c_T, in the first case with added cast 2906 1.1 mrg from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. 2907 1.1 mrg 2908 1.1 mrg Output: 2909 1.1 mrg 2910 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 2911 1.1 mrg 2912 1.1 mrg * Return value: A new stmt that will be used to replace the shift/rotate 2913 1.1 mrg S3 stmt. */ 2914 1.1 mrg 2915 1.1 mrg static gimple * 2916 1.1 mrg vect_recog_vector_vector_shift_pattern (vec_info *vinfo, 2917 1.1 mrg stmt_vec_info stmt_vinfo, 2918 1.1 mrg tree *type_out) 2919 1.1 mrg { 2920 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 2921 1.1 mrg tree oprnd0, oprnd1, lhs, var; 2922 1.1 mrg gimple *pattern_stmt; 2923 1.1 mrg enum tree_code rhs_code; 2924 1.1 mrg 2925 1.1 mrg if (!is_gimple_assign (last_stmt)) 2926 1.1 mrg return NULL; 2927 1.1 mrg 2928 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt); 2929 1.1 mrg switch (rhs_code) 2930 1.1 mrg { 2931 1.1 mrg case LSHIFT_EXPR: 2932 1.1 mrg case RSHIFT_EXPR: 2933 1.1 mrg case LROTATE_EXPR: 2934 1.1 mrg case RROTATE_EXPR: 2935 1.1 mrg break; 2936 1.1 mrg default: 2937 1.1 mrg return NULL; 2938 1.1 mrg } 2939 1.1 mrg 2940 1.1 mrg lhs = gimple_assign_lhs (last_stmt); 2941 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt); 2942 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt); 2943 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME 2944 1.1 mrg || TREE_CODE (oprnd1) != SSA_NAME 2945 1.1 mrg || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) 2946 1.1 mrg || !type_has_mode_precision_p (TREE_TYPE (oprnd1)) 2947 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs)) 2948 1.1 mrg != TYPE_PRECISION (TREE_TYPE (oprnd0))) 2949 1.1 mrg return NULL; 2950 1.1 mrg 2951 1.1 mrg stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1); 2952 1.1 mrg if (!def_vinfo) 2953 1.1 mrg return NULL; 2954 1.1 mrg 2955 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0)); 2956 1.1 mrg if (*type_out == NULL_TREE) 2957 1.1 mrg return NULL; 2958 1.1 mrg 2959 1.1 mrg tree def = NULL_TREE; 2960 1.1 mrg gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt); 2961 1.1 mrg if (def_stmt && gimple_assign_cast_p (def_stmt)) 2962 1.1 mrg { 2963 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt); 2964 1.1 mrg if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) 2965 1.1 mrg && TYPE_PRECISION (TREE_TYPE (rhs1)) 2966 1.1 mrg == TYPE_PRECISION (TREE_TYPE (oprnd0))) 2967 1.1 mrg { 2968 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (oprnd1)) 2969 1.1 mrg >= TYPE_PRECISION (TREE_TYPE (rhs1))) 2970 1.1 mrg def = rhs1; 2971 1.1 mrg else 2972 1.1 mrg { 2973 1.1 mrg tree mask 2974 1.1 mrg = build_low_bits_mask (TREE_TYPE (rhs1), 2975 1.1 mrg TYPE_PRECISION (TREE_TYPE (oprnd1))); 2976 1.1 mrg def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); 2977 1.1 mrg def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask); 2978 1.1 mrg tree vecstype = get_vectype_for_scalar_type (vinfo, 2979 1.1 mrg TREE_TYPE (rhs1)); 2980 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); 2981 1.1 mrg } 2982 1.1 mrg } 2983 1.1 mrg } 2984 1.1 mrg 2985 1.1 mrg if (def == NULL_TREE) 2986 1.1 mrg { 2987 1.1 mrg def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2988 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 2989 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 2990 1.1 mrg } 2991 1.1 mrg 2992 1.1 mrg /* Pattern detected. */ 2993 1.1 mrg vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt); 2994 1.1 mrg 2995 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the pattern. */ 2996 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2997 1.1 mrg pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def); 2998 1.1 mrg 2999 1.1 mrg return pattern_stmt; 3000 1.1 mrg } 3001 1.1 mrg 3002 1.1 mrg /* Return true iff the target has a vector optab implementing the operation 3003 1.1 mrg CODE on type VECTYPE. */ 3004 1.1 mrg 3005 1.1 mrg static bool 3006 1.1 mrg target_has_vecop_for_code (tree_code code, tree vectype) 3007 1.1 mrg { 3008 1.1 mrg optab voptab = optab_for_tree_code (code, vectype, optab_vector); 3009 1.1 mrg return voptab 3010 1.1 mrg && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing; 3011 1.1 mrg } 3012 1.1 mrg 3013 1.1 mrg /* Verify that the target has optabs of VECTYPE to perform all the steps 3014 1.1 mrg needed by the multiplication-by-immediate synthesis algorithm described by 3015 1.1 mrg ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is 3016 1.1 mrg present. Return true iff the target supports all the steps. */ 3017 1.1 mrg 3018 1.1 mrg static bool 3019 1.1 mrg target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var, 3020 1.1 mrg tree vectype, bool synth_shift_p) 3021 1.1 mrg { 3022 1.1 mrg if (alg->op[0] != alg_zero && alg->op[0] != alg_m) 3023 1.1 mrg return false; 3024 1.1 mrg 3025 1.1 mrg bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype); 3026 1.1 mrg bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype); 3027 1.1 mrg 3028 1.1 mrg if (var == negate_variant 3029 1.1 mrg && !target_has_vecop_for_code (NEGATE_EXPR, vectype)) 3030 1.1 mrg return false; 3031 1.1 mrg 3032 1.1 mrg /* If we must synthesize shifts with additions make sure that vector 3033 1.1 mrg addition is available. */ 3034 1.1 mrg if ((var == add_variant || synth_shift_p) && !supports_vplus) 3035 1.1 mrg return false; 3036 1.1 mrg 3037 1.1 mrg for (int i = 1; i < alg->ops; i++) 3038 1.1 mrg { 3039 1.1 mrg switch (alg->op[i]) 3040 1.1 mrg { 3041 1.1 mrg case alg_shift: 3042 1.1 mrg break; 3043 1.1 mrg case alg_add_t_m2: 3044 1.1 mrg case alg_add_t2_m: 3045 1.1 mrg case alg_add_factor: 3046 1.1 mrg if (!supports_vplus) 3047 1.1 mrg return false; 3048 1.1 mrg break; 3049 1.1 mrg case alg_sub_t_m2: 3050 1.1 mrg case alg_sub_t2_m: 3051 1.1 mrg case alg_sub_factor: 3052 1.1 mrg if (!supports_vminus) 3053 1.1 mrg return false; 3054 1.1 mrg break; 3055 1.1 mrg case alg_unknown: 3056 1.1 mrg case alg_m: 3057 1.1 mrg case alg_zero: 3058 1.1 mrg case alg_impossible: 3059 1.1 mrg return false; 3060 1.1 mrg default: 3061 1.1 mrg gcc_unreachable (); 3062 1.1 mrg } 3063 1.1 mrg } 3064 1.1 mrg 3065 1.1 mrg return true; 3066 1.1 mrg } 3067 1.1 mrg 3068 1.1 mrg /* Synthesize a left shift of OP by AMNT bits using a series of additions and 3069 1.1 mrg putting the final result in DEST. Append all statements but the last into 3070 1.1 mrg VINFO. Return the last statement. */ 3071 1.1 mrg 3072 1.1 mrg static gimple * 3073 1.1 mrg synth_lshift_by_additions (vec_info *vinfo, 3074 1.1 mrg tree dest, tree op, HOST_WIDE_INT amnt, 3075 1.1 mrg stmt_vec_info stmt_info) 3076 1.1 mrg { 3077 1.1 mrg HOST_WIDE_INT i; 3078 1.1 mrg tree itype = TREE_TYPE (op); 3079 1.1 mrg tree prev_res = op; 3080 1.1 mrg gcc_assert (amnt >= 0); 3081 1.1 mrg for (i = 0; i < amnt; i++) 3082 1.1 mrg { 3083 1.1 mrg tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL) 3084 1.1 mrg : dest; 3085 1.1 mrg gimple *stmt 3086 1.1 mrg = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res); 3087 1.1 mrg prev_res = tmp_var; 3088 1.1 mrg if (i < amnt - 1) 3089 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, stmt); 3090 1.1 mrg else 3091 1.1 mrg return stmt; 3092 1.1 mrg } 3093 1.1 mrg gcc_unreachable (); 3094 1.1 mrg return NULL; 3095 1.1 mrg } 3096 1.1 mrg 3097 1.1 mrg /* Helper for vect_synth_mult_by_constant. Apply a binary operation 3098 1.1 mrg CODE to operands OP1 and OP2, creating a new temporary SSA var in 3099 1.1 mrg the process if necessary. Append the resulting assignment statements 3100 1.1 mrg to the sequence in STMT_VINFO. Return the SSA variable that holds the 3101 1.1 mrg result of the binary operation. If SYNTH_SHIFT_P is true synthesize 3102 1.1 mrg left shifts using additions. */ 3103 1.1 mrg 3104 1.1 mrg static tree 3105 1.1 mrg apply_binop_and_append_stmt (vec_info *vinfo, 3106 1.1 mrg tree_code code, tree op1, tree op2, 3107 1.1 mrg stmt_vec_info stmt_vinfo, bool synth_shift_p) 3108 1.1 mrg { 3109 1.1 mrg if (integer_zerop (op2) 3110 1.1 mrg && (code == LSHIFT_EXPR 3111 1.1 mrg || code == PLUS_EXPR)) 3112 1.1 mrg { 3113 1.1 mrg gcc_assert (TREE_CODE (op1) == SSA_NAME); 3114 1.1 mrg return op1; 3115 1.1 mrg } 3116 1.1 mrg 3117 1.1 mrg gimple *stmt; 3118 1.1 mrg tree itype = TREE_TYPE (op1); 3119 1.1 mrg tree tmp_var = vect_recog_temp_ssa_var (itype, NULL); 3120 1.1 mrg 3121 1.1 mrg if (code == LSHIFT_EXPR 3122 1.1 mrg && synth_shift_p) 3123 1.1 mrg { 3124 1.1 mrg stmt = synth_lshift_by_additions (vinfo, tmp_var, op1, 3125 1.1 mrg TREE_INT_CST_LOW (op2), stmt_vinfo); 3126 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3127 1.1 mrg return tmp_var; 3128 1.1 mrg } 3129 1.1 mrg 3130 1.1 mrg stmt = gimple_build_assign (tmp_var, code, op1, op2); 3131 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3132 1.1 mrg return tmp_var; 3133 1.1 mrg } 3134 1.1 mrg 3135 1.1 mrg /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts 3136 1.1 mrg and simple arithmetic operations to be vectorized. Record the statements 3137 1.1 mrg produced in STMT_VINFO and return the last statement in the sequence or 3138 1.1 mrg NULL if it's not possible to synthesize such a multiplication. 3139 1.1 mrg This function mirrors the behavior of expand_mult_const in expmed.cc but 3140 1.1 mrg works on tree-ssa form. */ 3141 1.1 mrg 3142 1.1 mrg static gimple * 3143 1.1 mrg vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val, 3144 1.1 mrg stmt_vec_info stmt_vinfo) 3145 1.1 mrg { 3146 1.1 mrg tree itype = TREE_TYPE (op); 3147 1.1 mrg machine_mode mode = TYPE_MODE (itype); 3148 1.1 mrg struct algorithm alg; 3149 1.1 mrg mult_variant variant; 3150 1.1 mrg if (!tree_fits_shwi_p (val)) 3151 1.1 mrg return NULL; 3152 1.1 mrg 3153 1.1 mrg /* Multiplication synthesis by shifts, adds and subs can introduce 3154 1.1 mrg signed overflow where the original operation didn't. Perform the 3155 1.1 mrg operations on an unsigned type and cast back to avoid this. 3156 1.1 mrg In the future we may want to relax this for synthesis algorithms 3157 1.1 mrg that we can prove do not cause unexpected overflow. */ 3158 1.1 mrg bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype); 3159 1.1 mrg 3160 1.1 mrg tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype; 3161 1.1 mrg tree vectype = get_vectype_for_scalar_type (vinfo, multtype); 3162 1.1 mrg if (!vectype) 3163 1.1 mrg return NULL; 3164 1.1 mrg 3165 1.1 mrg /* Targets that don't support vector shifts but support vector additions 3166 1.1 mrg can synthesize shifts that way. */ 3167 1.1 mrg bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype); 3168 1.1 mrg 3169 1.1 mrg HOST_WIDE_INT hwval = tree_to_shwi (val); 3170 1.1 mrg /* Use MAX_COST here as we don't want to limit the sequence on rtx costs. 3171 1.1 mrg The vectorizer's benefit analysis will decide whether it's beneficial 3172 1.1 mrg to do this. */ 3173 1.1 mrg bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype)) 3174 1.1 mrg ? TYPE_MODE (vectype) : mode, 3175 1.1 mrg hwval, &alg, &variant, MAX_COST); 3176 1.1 mrg if (!possible) 3177 1.1 mrg return NULL; 3178 1.1 mrg 3179 1.1 mrg if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p)) 3180 1.1 mrg return NULL; 3181 1.1 mrg 3182 1.1 mrg tree accumulator; 3183 1.1 mrg 3184 1.1 mrg /* Clear out the sequence of statements so we can populate it below. */ 3185 1.1 mrg gimple *stmt = NULL; 3186 1.1 mrg 3187 1.1 mrg if (cast_to_unsigned_p) 3188 1.1 mrg { 3189 1.1 mrg tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL); 3190 1.1 mrg stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op); 3191 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3192 1.1 mrg op = tmp_op; 3193 1.1 mrg } 3194 1.1 mrg 3195 1.1 mrg if (alg.op[0] == alg_zero) 3196 1.1 mrg accumulator = build_int_cst (multtype, 0); 3197 1.1 mrg else 3198 1.1 mrg accumulator = op; 3199 1.1 mrg 3200 1.1 mrg bool needs_fixup = (variant == negate_variant) 3201 1.1 mrg || (variant == add_variant); 3202 1.1 mrg 3203 1.1 mrg for (int i = 1; i < alg.ops; i++) 3204 1.1 mrg { 3205 1.1 mrg tree shft_log = build_int_cst (multtype, alg.log[i]); 3206 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 3207 1.1 mrg tree tmp_var = NULL_TREE; 3208 1.1 mrg 3209 1.1 mrg switch (alg.op[i]) 3210 1.1 mrg { 3211 1.1 mrg case alg_shift: 3212 1.1 mrg if (synth_shift_p) 3213 1.1 mrg stmt 3214 1.1 mrg = synth_lshift_by_additions (vinfo, accum_tmp, accumulator, 3215 1.1 mrg alg.log[i], stmt_vinfo); 3216 1.1 mrg else 3217 1.1 mrg stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator, 3218 1.1 mrg shft_log); 3219 1.1 mrg break; 3220 1.1 mrg case alg_add_t_m2: 3221 1.1 mrg tmp_var 3222 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log, 3223 1.1 mrg stmt_vinfo, synth_shift_p); 3224 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, 3225 1.1 mrg tmp_var); 3226 1.1 mrg break; 3227 1.1 mrg case alg_sub_t_m2: 3228 1.1 mrg tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, 3229 1.1 mrg shft_log, stmt_vinfo, 3230 1.1 mrg synth_shift_p); 3231 1.1 mrg /* In some algorithms the first step involves zeroing the 3232 1.1 mrg accumulator. If subtracting from such an accumulator 3233 1.1 mrg just emit the negation directly. */ 3234 1.1 mrg if (integer_zerop (accumulator)) 3235 1.1 mrg stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var); 3236 1.1 mrg else 3237 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator, 3238 1.1 mrg tmp_var); 3239 1.1 mrg break; 3240 1.1 mrg case alg_add_t2_m: 3241 1.1 mrg tmp_var 3242 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator, 3243 1.1 mrg shft_log, stmt_vinfo, synth_shift_p); 3244 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op); 3245 1.1 mrg break; 3246 1.1 mrg case alg_sub_t2_m: 3247 1.1 mrg tmp_var 3248 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator, 3249 1.1 mrg shft_log, stmt_vinfo, synth_shift_p); 3250 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op); 3251 1.1 mrg break; 3252 1.1 mrg case alg_add_factor: 3253 1.1 mrg tmp_var 3254 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator, 3255 1.1 mrg shft_log, stmt_vinfo, synth_shift_p); 3256 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, 3257 1.1 mrg tmp_var); 3258 1.1 mrg break; 3259 1.1 mrg case alg_sub_factor: 3260 1.1 mrg tmp_var 3261 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator, 3262 1.1 mrg shft_log, stmt_vinfo, synth_shift_p); 3263 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, 3264 1.1 mrg accumulator); 3265 1.1 mrg break; 3266 1.1 mrg default: 3267 1.1 mrg gcc_unreachable (); 3268 1.1 mrg } 3269 1.1 mrg /* We don't want to append the last stmt in the sequence to stmt_vinfo 3270 1.1 mrg but rather return it directly. */ 3271 1.1 mrg 3272 1.1 mrg if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p) 3273 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3274 1.1 mrg accumulator = accum_tmp; 3275 1.1 mrg } 3276 1.1 mrg if (variant == negate_variant) 3277 1.1 mrg { 3278 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 3279 1.1 mrg stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator); 3280 1.1 mrg accumulator = accum_tmp; 3281 1.1 mrg if (cast_to_unsigned_p) 3282 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3283 1.1 mrg } 3284 1.1 mrg else if (variant == add_variant) 3285 1.1 mrg { 3286 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 3287 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op); 3288 1.1 mrg accumulator = accum_tmp; 3289 1.1 mrg if (cast_to_unsigned_p) 3290 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt); 3291 1.1 mrg } 3292 1.1 mrg /* Move back to a signed if needed. */ 3293 1.1 mrg if (cast_to_unsigned_p) 3294 1.1 mrg { 3295 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL); 3296 1.1 mrg stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator); 3297 1.1 mrg } 3298 1.1 mrg 3299 1.1 mrg return stmt; 3300 1.1 mrg } 3301 1.1 mrg 3302 1.1 mrg /* Detect multiplication by constant and convert it into a sequence of 3303 1.1 mrg shifts and additions, subtractions, negations. We reuse the 3304 1.1 mrg choose_mult_variant algorithms from expmed.cc 3305 1.1 mrg 3306 1.1 mrg Input/Output: 3307 1.1 mrg 3308 1.1 mrg STMT_VINFO: The stmt from which the pattern search begins, 3309 1.1 mrg i.e. the mult stmt. 3310 1.1 mrg 3311 1.1 mrg Output: 3312 1.1 mrg 3313 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 3314 1.1 mrg 3315 1.1 mrg * Return value: A new stmt that will be used to replace 3316 1.1 mrg the multiplication. */ 3317 1.1 mrg 3318 1.1 mrg static gimple * 3319 1.1 mrg vect_recog_mult_pattern (vec_info *vinfo, 3320 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 3321 1.1 mrg { 3322 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 3323 1.1 mrg tree oprnd0, oprnd1, vectype, itype; 3324 1.1 mrg gimple *pattern_stmt; 3325 1.1 mrg 3326 1.1 mrg if (!is_gimple_assign (last_stmt)) 3327 1.1 mrg return NULL; 3328 1.1 mrg 3329 1.1 mrg if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 3330 1.1 mrg return NULL; 3331 1.1 mrg 3332 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt); 3333 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt); 3334 1.1 mrg itype = TREE_TYPE (oprnd0); 3335 1.1 mrg 3336 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME 3337 1.1 mrg || TREE_CODE (oprnd1) != INTEGER_CST 3338 1.1 mrg || !INTEGRAL_TYPE_P (itype) 3339 1.1 mrg || !type_has_mode_precision_p (itype)) 3340 1.1 mrg return NULL; 3341 1.1 mrg 3342 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, itype); 3343 1.1 mrg if (vectype == NULL_TREE) 3344 1.1 mrg return NULL; 3345 1.1 mrg 3346 1.1 mrg /* If the target can handle vectorized multiplication natively, 3347 1.1 mrg don't attempt to optimize this. */ 3348 1.1 mrg optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default); 3349 1.1 mrg if (mul_optab != unknown_optab) 3350 1.1 mrg { 3351 1.1 mrg machine_mode vec_mode = TYPE_MODE (vectype); 3352 1.1 mrg int icode = (int) optab_handler (mul_optab, vec_mode); 3353 1.1 mrg if (icode != CODE_FOR_nothing) 3354 1.1 mrg return NULL; 3355 1.1 mrg } 3356 1.1 mrg 3357 1.1 mrg pattern_stmt = vect_synth_mult_by_constant (vinfo, 3358 1.1 mrg oprnd0, oprnd1, stmt_vinfo); 3359 1.1 mrg if (!pattern_stmt) 3360 1.1 mrg return NULL; 3361 1.1 mrg 3362 1.1 mrg /* Pattern detected. */ 3363 1.1 mrg vect_pattern_detected ("vect_recog_mult_pattern", last_stmt); 3364 1.1 mrg 3365 1.1 mrg *type_out = vectype; 3366 1.1 mrg 3367 1.1 mrg return pattern_stmt; 3368 1.1 mrg } 3369 1.1 mrg 3370 1.1 mrg /* Detect a signed division by a constant that wouldn't be 3371 1.1 mrg otherwise vectorized: 3372 1.1 mrg 3373 1.1 mrg type a_t, b_t; 3374 1.1 mrg 3375 1.1 mrg S1 a_t = b_t / N; 3376 1.1 mrg 3377 1.1 mrg where type 'type' is an integral type and N is a constant. 3378 1.1 mrg 3379 1.1 mrg Similarly handle modulo by a constant: 3380 1.1 mrg 3381 1.1 mrg S4 a_t = b_t % N; 3382 1.1 mrg 3383 1.1 mrg Input/Output: 3384 1.1 mrg 3385 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins, 3386 1.1 mrg i.e. the division stmt. S1 is replaced by if N is a power 3387 1.1 mrg of two constant and type is signed: 3388 1.1 mrg S3 y_t = b_t < 0 ? N - 1 : 0; 3389 1.1 mrg S2 x_t = b_t + y_t; 3390 1.1 mrg S1' a_t = x_t >> log2 (N); 3391 1.1 mrg 3392 1.1 mrg S4 is replaced if N is a power of two constant and 3393 1.1 mrg type is signed by (where *_T temporaries have unsigned type): 3394 1.1 mrg S9 y_T = b_t < 0 ? -1U : 0U; 3395 1.1 mrg S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); 3396 1.1 mrg S7 z_t = (type) z_T; 3397 1.1 mrg S6 w_t = b_t + z_t; 3398 1.1 mrg S5 x_t = w_t & (N - 1); 3399 1.1 mrg S4' a_t = x_t - z_t; 3400 1.1 mrg 3401 1.1 mrg Output: 3402 1.1 mrg 3403 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 3404 1.1 mrg 3405 1.1 mrg * Return value: A new stmt that will be used to replace the division 3406 1.1 mrg S1 or modulo S4 stmt. */ 3407 1.1 mrg 3408 1.1 mrg static gimple * 3409 1.1 mrg vect_recog_divmod_pattern (vec_info *vinfo, 3410 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 3411 1.1 mrg { 3412 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 3413 1.1 mrg tree oprnd0, oprnd1, vectype, itype, cond; 3414 1.1 mrg gimple *pattern_stmt, *def_stmt; 3415 1.1 mrg enum tree_code rhs_code; 3416 1.1 mrg optab optab; 3417 1.1 mrg tree q; 3418 1.1 mrg int dummy_int, prec; 3419 1.1 mrg 3420 1.1 mrg if (!is_gimple_assign (last_stmt)) 3421 1.1 mrg return NULL; 3422 1.1 mrg 3423 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt); 3424 1.1 mrg switch (rhs_code) 3425 1.1 mrg { 3426 1.1 mrg case TRUNC_DIV_EXPR: 3427 1.1 mrg case EXACT_DIV_EXPR: 3428 1.1 mrg case TRUNC_MOD_EXPR: 3429 1.1 mrg break; 3430 1.1 mrg default: 3431 1.1 mrg return NULL; 3432 1.1 mrg } 3433 1.1 mrg 3434 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt); 3435 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt); 3436 1.1 mrg itype = TREE_TYPE (oprnd0); 3437 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME 3438 1.1 mrg || TREE_CODE (oprnd1) != INTEGER_CST 3439 1.1 mrg || TREE_CODE (itype) != INTEGER_TYPE 3440 1.1 mrg || !type_has_mode_precision_p (itype)) 3441 1.1 mrg return NULL; 3442 1.1 mrg 3443 1.1 mrg scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype); 3444 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, itype); 3445 1.1 mrg if (vectype == NULL_TREE) 3446 1.1 mrg return NULL; 3447 1.1 mrg 3448 1.1 mrg if (optimize_bb_for_size_p (gimple_bb (last_stmt))) 3449 1.1 mrg { 3450 1.1 mrg /* If the target can handle vectorized division or modulo natively, 3451 1.1 mrg don't attempt to optimize this, since native division is likely 3452 1.1 mrg to give smaller code. */ 3453 1.1 mrg optab = optab_for_tree_code (rhs_code, vectype, optab_default); 3454 1.1 mrg if (optab != unknown_optab) 3455 1.1 mrg { 3456 1.1 mrg machine_mode vec_mode = TYPE_MODE (vectype); 3457 1.1 mrg int icode = (int) optab_handler (optab, vec_mode); 3458 1.1 mrg if (icode != CODE_FOR_nothing) 3459 1.1 mrg return NULL; 3460 1.1 mrg } 3461 1.1 mrg } 3462 1.1 mrg 3463 1.1 mrg prec = TYPE_PRECISION (itype); 3464 1.1 mrg if (integer_pow2p (oprnd1)) 3465 1.1 mrg { 3466 1.1 mrg if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) 3467 1.1 mrg return NULL; 3468 1.1 mrg 3469 1.1 mrg /* Pattern detected. */ 3470 1.1 mrg vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); 3471 1.1 mrg 3472 1.1 mrg *type_out = vectype; 3473 1.1 mrg 3474 1.1 mrg /* Check if the target supports this internal function. */ 3475 1.1 mrg internal_fn ifn = IFN_DIV_POW2; 3476 1.1 mrg if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) 3477 1.1 mrg { 3478 1.1 mrg tree shift = build_int_cst (itype, tree_log2 (oprnd1)); 3479 1.1 mrg 3480 1.1 mrg tree var_div = vect_recog_temp_ssa_var (itype, NULL); 3481 1.1 mrg gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift); 3482 1.1 mrg gimple_call_set_lhs (div_stmt, var_div); 3483 1.1 mrg 3484 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR) 3485 1.1 mrg { 3486 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt); 3487 1.1 mrg def_stmt 3488 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3489 1.1 mrg LSHIFT_EXPR, var_div, shift); 3490 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3491 1.1 mrg pattern_stmt 3492 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3493 1.1 mrg MINUS_EXPR, oprnd0, 3494 1.1 mrg gimple_assign_lhs (def_stmt)); 3495 1.1 mrg } 3496 1.1 mrg else 3497 1.1 mrg pattern_stmt = div_stmt; 3498 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt)); 3499 1.1 mrg 3500 1.1 mrg return pattern_stmt; 3501 1.1 mrg } 3502 1.1 mrg 3503 1.1 mrg cond = build2 (LT_EXPR, boolean_type_node, oprnd0, 3504 1.1 mrg build_int_cst (itype, 0)); 3505 1.1 mrg if (rhs_code == TRUNC_DIV_EXPR 3506 1.1 mrg || rhs_code == EXACT_DIV_EXPR) 3507 1.1 mrg { 3508 1.1 mrg tree var = vect_recog_temp_ssa_var (itype, NULL); 3509 1.1 mrg tree shift; 3510 1.1 mrg def_stmt 3511 1.1 mrg = gimple_build_assign (var, COND_EXPR, cond, 3512 1.1 mrg fold_build2 (MINUS_EXPR, itype, oprnd1, 3513 1.1 mrg build_int_cst (itype, 1)), 3514 1.1 mrg build_int_cst (itype, 0)); 3515 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3516 1.1 mrg var = vect_recog_temp_ssa_var (itype, NULL); 3517 1.1 mrg def_stmt 3518 1.1 mrg = gimple_build_assign (var, PLUS_EXPR, oprnd0, 3519 1.1 mrg gimple_assign_lhs (def_stmt)); 3520 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3521 1.1 mrg 3522 1.1 mrg shift = build_int_cst (itype, tree_log2 (oprnd1)); 3523 1.1 mrg pattern_stmt 3524 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3525 1.1 mrg RSHIFT_EXPR, var, shift); 3526 1.1 mrg } 3527 1.1 mrg else 3528 1.1 mrg { 3529 1.1 mrg tree signmask; 3530 1.1 mrg if (compare_tree_int (oprnd1, 2) == 0) 3531 1.1 mrg { 3532 1.1 mrg signmask = vect_recog_temp_ssa_var (itype, NULL); 3533 1.1 mrg def_stmt = gimple_build_assign (signmask, COND_EXPR, cond, 3534 1.1 mrg build_int_cst (itype, 1), 3535 1.1 mrg build_int_cst (itype, 0)); 3536 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3537 1.1 mrg } 3538 1.1 mrg else 3539 1.1 mrg { 3540 1.1 mrg tree utype 3541 1.1 mrg = build_nonstandard_integer_type (prec, 1); 3542 1.1 mrg tree vecutype = get_vectype_for_scalar_type (vinfo, utype); 3543 1.1 mrg tree shift 3544 1.1 mrg = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode) 3545 1.1 mrg - tree_log2 (oprnd1)); 3546 1.1 mrg tree var = vect_recog_temp_ssa_var (utype, NULL); 3547 1.1 mrg 3548 1.1 mrg def_stmt = gimple_build_assign (var, COND_EXPR, cond, 3549 1.1 mrg build_int_cst (utype, -1), 3550 1.1 mrg build_int_cst (utype, 0)); 3551 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype); 3552 1.1 mrg var = vect_recog_temp_ssa_var (utype, NULL); 3553 1.1 mrg def_stmt = gimple_build_assign (var, RSHIFT_EXPR, 3554 1.1 mrg gimple_assign_lhs (def_stmt), 3555 1.1 mrg shift); 3556 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype); 3557 1.1 mrg signmask = vect_recog_temp_ssa_var (itype, NULL); 3558 1.1 mrg def_stmt 3559 1.1 mrg = gimple_build_assign (signmask, NOP_EXPR, var); 3560 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3561 1.1 mrg } 3562 1.1 mrg def_stmt 3563 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3564 1.1 mrg PLUS_EXPR, oprnd0, signmask); 3565 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3566 1.1 mrg def_stmt 3567 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3568 1.1 mrg BIT_AND_EXPR, gimple_assign_lhs (def_stmt), 3569 1.1 mrg fold_build2 (MINUS_EXPR, itype, oprnd1, 3570 1.1 mrg build_int_cst (itype, 1))); 3571 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3572 1.1 mrg 3573 1.1 mrg pattern_stmt 3574 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3575 1.1 mrg MINUS_EXPR, gimple_assign_lhs (def_stmt), 3576 1.1 mrg signmask); 3577 1.1 mrg } 3578 1.1 mrg 3579 1.1 mrg return pattern_stmt; 3580 1.1 mrg } 3581 1.1 mrg 3582 1.1 mrg if (prec > HOST_BITS_PER_WIDE_INT 3583 1.1 mrg || integer_zerop (oprnd1)) 3584 1.1 mrg return NULL; 3585 1.1 mrg 3586 1.1 mrg if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype))) 3587 1.1 mrg return NULL; 3588 1.1 mrg 3589 1.1 mrg if (TYPE_UNSIGNED (itype)) 3590 1.1 mrg { 3591 1.1 mrg unsigned HOST_WIDE_INT mh, ml; 3592 1.1 mrg int pre_shift, post_shift; 3593 1.1 mrg unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1) 3594 1.1 mrg & GET_MODE_MASK (itype_mode)); 3595 1.1 mrg tree t1, t2, t3, t4; 3596 1.1 mrg 3597 1.1 mrg if (d >= (HOST_WIDE_INT_1U << (prec - 1))) 3598 1.1 mrg /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ 3599 1.1 mrg return NULL; 3600 1.1 mrg 3601 1.1 mrg /* Find a suitable multiplier and right shift count 3602 1.1 mrg instead of multiplying with D. */ 3603 1.1 mrg mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 3604 1.1 mrg 3605 1.1 mrg /* If the suggested multiplier is more than SIZE bits, we can do better 3606 1.1 mrg for even divisors, using an initial right shift. */ 3607 1.1 mrg if (mh != 0 && (d & 1) == 0) 3608 1.1 mrg { 3609 1.1 mrg pre_shift = ctz_or_zero (d); 3610 1.1 mrg mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift, 3611 1.1 mrg &ml, &post_shift, &dummy_int); 3612 1.1 mrg gcc_assert (!mh); 3613 1.1 mrg } 3614 1.1 mrg else 3615 1.1 mrg pre_shift = 0; 3616 1.1 mrg 3617 1.1 mrg if (mh != 0) 3618 1.1 mrg { 3619 1.1 mrg if (post_shift - 1 >= prec) 3620 1.1 mrg return NULL; 3621 1.1 mrg 3622 1.1 mrg /* t1 = oprnd0 h* ml; 3623 1.1 mrg t2 = oprnd0 - t1; 3624 1.1 mrg t3 = t2 >> 1; 3625 1.1 mrg t4 = t1 + t3; 3626 1.1 mrg q = t4 >> (post_shift - 1); */ 3627 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL); 3628 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 3629 1.1 mrg build_int_cst (itype, ml)); 3630 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3631 1.1 mrg 3632 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL); 3633 1.1 mrg def_stmt 3634 1.1 mrg = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1); 3635 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3636 1.1 mrg 3637 1.1 mrg t3 = vect_recog_temp_ssa_var (itype, NULL); 3638 1.1 mrg def_stmt 3639 1.1 mrg = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node); 3640 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3641 1.1 mrg 3642 1.1 mrg t4 = vect_recog_temp_ssa_var (itype, NULL); 3643 1.1 mrg def_stmt 3644 1.1 mrg = gimple_build_assign (t4, PLUS_EXPR, t1, t3); 3645 1.1 mrg 3646 1.1 mrg if (post_shift != 1) 3647 1.1 mrg { 3648 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3649 1.1 mrg 3650 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL); 3651 1.1 mrg pattern_stmt 3652 1.1 mrg = gimple_build_assign (q, RSHIFT_EXPR, t4, 3653 1.1 mrg build_int_cst (itype, post_shift - 1)); 3654 1.1 mrg } 3655 1.1 mrg else 3656 1.1 mrg { 3657 1.1 mrg q = t4; 3658 1.1 mrg pattern_stmt = def_stmt; 3659 1.1 mrg } 3660 1.1 mrg } 3661 1.1 mrg else 3662 1.1 mrg { 3663 1.1 mrg if (pre_shift >= prec || post_shift >= prec) 3664 1.1 mrg return NULL; 3665 1.1 mrg 3666 1.1 mrg /* t1 = oprnd0 >> pre_shift; 3667 1.1 mrg t2 = t1 h* ml; 3668 1.1 mrg q = t2 >> post_shift; */ 3669 1.1 mrg if (pre_shift) 3670 1.1 mrg { 3671 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL); 3672 1.1 mrg def_stmt 3673 1.1 mrg = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0, 3674 1.1 mrg build_int_cst (NULL, pre_shift)); 3675 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3676 1.1 mrg } 3677 1.1 mrg else 3678 1.1 mrg t1 = oprnd0; 3679 1.1 mrg 3680 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL); 3681 1.1 mrg def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1, 3682 1.1 mrg build_int_cst (itype, ml)); 3683 1.1 mrg 3684 1.1 mrg if (post_shift) 3685 1.1 mrg { 3686 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3687 1.1 mrg 3688 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL); 3689 1.1 mrg def_stmt 3690 1.1 mrg = gimple_build_assign (q, RSHIFT_EXPR, t2, 3691 1.1 mrg build_int_cst (itype, post_shift)); 3692 1.1 mrg } 3693 1.1 mrg else 3694 1.1 mrg q = t2; 3695 1.1 mrg 3696 1.1 mrg pattern_stmt = def_stmt; 3697 1.1 mrg } 3698 1.1 mrg } 3699 1.1 mrg else 3700 1.1 mrg { 3701 1.1 mrg unsigned HOST_WIDE_INT ml; 3702 1.1 mrg int post_shift; 3703 1.1 mrg HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1); 3704 1.1 mrg unsigned HOST_WIDE_INT abs_d; 3705 1.1 mrg bool add = false; 3706 1.1 mrg tree t1, t2, t3, t4; 3707 1.1 mrg 3708 1.1 mrg /* Give up for -1. */ 3709 1.1 mrg if (d == -1) 3710 1.1 mrg return NULL; 3711 1.1 mrg 3712 1.1 mrg /* Since d might be INT_MIN, we have to cast to 3713 1.1 mrg unsigned HOST_WIDE_INT before negating to avoid 3714 1.1 mrg undefined signed overflow. */ 3715 1.1 mrg abs_d = (d >= 0 3716 1.1 mrg ? (unsigned HOST_WIDE_INT) d 3717 1.1 mrg : - (unsigned HOST_WIDE_INT) d); 3718 1.1 mrg 3719 1.1 mrg /* n rem d = n rem -d */ 3720 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR && d < 0) 3721 1.1 mrg { 3722 1.1 mrg d = abs_d; 3723 1.1 mrg oprnd1 = build_int_cst (itype, abs_d); 3724 1.1 mrg } 3725 1.1 mrg if (HOST_BITS_PER_WIDE_INT >= prec 3726 1.1 mrg && abs_d == HOST_WIDE_INT_1U << (prec - 1)) 3727 1.1 mrg /* This case is not handled correctly below. */ 3728 1.1 mrg return NULL; 3729 1.1 mrg 3730 1.1 mrg choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int); 3731 1.1 mrg if (ml >= HOST_WIDE_INT_1U << (prec - 1)) 3732 1.1 mrg { 3733 1.1 mrg add = true; 3734 1.1 mrg ml |= HOST_WIDE_INT_M1U << (prec - 1); 3735 1.1 mrg } 3736 1.1 mrg if (post_shift >= prec) 3737 1.1 mrg return NULL; 3738 1.1 mrg 3739 1.1 mrg /* t1 = oprnd0 h* ml; */ 3740 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL); 3741 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 3742 1.1 mrg build_int_cst (itype, ml)); 3743 1.1 mrg 3744 1.1 mrg if (add) 3745 1.1 mrg { 3746 1.1 mrg /* t2 = t1 + oprnd0; */ 3747 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3748 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL); 3749 1.1 mrg def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0); 3750 1.1 mrg } 3751 1.1 mrg else 3752 1.1 mrg t2 = t1; 3753 1.1 mrg 3754 1.1 mrg if (post_shift) 3755 1.1 mrg { 3756 1.1 mrg /* t3 = t2 >> post_shift; */ 3757 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3758 1.1 mrg t3 = vect_recog_temp_ssa_var (itype, NULL); 3759 1.1 mrg def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2, 3760 1.1 mrg build_int_cst (itype, post_shift)); 3761 1.1 mrg } 3762 1.1 mrg else 3763 1.1 mrg t3 = t2; 3764 1.1 mrg 3765 1.1 mrg int msb = 1; 3766 1.1 mrg value_range r; 3767 1.1 mrg get_range_query (cfun)->range_of_expr (r, oprnd0); 3768 1.1 mrg if (r.kind () == VR_RANGE) 3769 1.1 mrg { 3770 1.1 mrg if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype))) 3771 1.1 mrg msb = 0; 3772 1.1 mrg else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype))) 3773 1.1 mrg msb = -1; 3774 1.1 mrg } 3775 1.1 mrg 3776 1.1 mrg if (msb == 0 && d >= 0) 3777 1.1 mrg { 3778 1.1 mrg /* q = t3; */ 3779 1.1 mrg q = t3; 3780 1.1 mrg pattern_stmt = def_stmt; 3781 1.1 mrg } 3782 1.1 mrg else 3783 1.1 mrg { 3784 1.1 mrg /* t4 = oprnd0 >> (prec - 1); 3785 1.1 mrg or if we know from VRP that oprnd0 >= 0 3786 1.1 mrg t4 = 0; 3787 1.1 mrg or if we know from VRP that oprnd0 < 0 3788 1.1 mrg t4 = -1; */ 3789 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3790 1.1 mrg t4 = vect_recog_temp_ssa_var (itype, NULL); 3791 1.1 mrg if (msb != 1) 3792 1.1 mrg def_stmt = gimple_build_assign (t4, INTEGER_CST, 3793 1.1 mrg build_int_cst (itype, msb)); 3794 1.1 mrg else 3795 1.1 mrg def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0, 3796 1.1 mrg build_int_cst (itype, prec - 1)); 3797 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3798 1.1 mrg 3799 1.1 mrg /* q = t3 - t4; or q = t4 - t3; */ 3800 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL); 3801 1.1 mrg pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3, 3802 1.1 mrg d < 0 ? t3 : t4); 3803 1.1 mrg } 3804 1.1 mrg } 3805 1.1 mrg 3806 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR) 3807 1.1 mrg { 3808 1.1 mrg tree r, t1; 3809 1.1 mrg 3810 1.1 mrg /* We divided. Now finish by: 3811 1.1 mrg t1 = q * oprnd1; 3812 1.1 mrg r = oprnd0 - t1; */ 3813 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt); 3814 1.1 mrg 3815 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL); 3816 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1); 3817 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); 3818 1.1 mrg 3819 1.1 mrg r = vect_recog_temp_ssa_var (itype, NULL); 3820 1.1 mrg pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1); 3821 1.1 mrg } 3822 1.1 mrg 3823 1.1 mrg /* Pattern detected. */ 3824 1.1 mrg vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); 3825 1.1 mrg 3826 1.1 mrg *type_out = vectype; 3827 1.1 mrg return pattern_stmt; 3828 1.1 mrg } 3829 1.1 mrg 3830 1.1 mrg /* Function vect_recog_mixed_size_cond_pattern 3831 1.1 mrg 3832 1.1 mrg Try to find the following pattern: 3833 1.1 mrg 3834 1.1 mrg type x_t, y_t; 3835 1.1 mrg TYPE a_T, b_T, c_T; 3836 1.1 mrg loop: 3837 1.1 mrg S1 a_T = x_t CMP y_t ? b_T : c_T; 3838 1.1 mrg 3839 1.1 mrg where type 'TYPE' is an integral type which has different size 3840 1.1 mrg from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider 3841 1.1 mrg than 'type', the constants need to fit into an integer type 3842 1.1 mrg with the same width as 'type') or results of conversion from 'type'. 3843 1.1 mrg 3844 1.1 mrg Input: 3845 1.1 mrg 3846 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. 3847 1.1 mrg 3848 1.1 mrg Output: 3849 1.1 mrg 3850 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 3851 1.1 mrg 3852 1.1 mrg * Return value: A new stmt that will be used to replace the pattern. 3853 1.1 mrg Additionally a def_stmt is added. 3854 1.1 mrg 3855 1.1 mrg a_it = x_t CMP y_t ? b_it : c_it; 3856 1.1 mrg a_T = (TYPE) a_it; */ 3857 1.1 mrg 3858 1.1 mrg static gimple * 3859 1.1 mrg vect_recog_mixed_size_cond_pattern (vec_info *vinfo, 3860 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 3861 1.1 mrg { 3862 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 3863 1.1 mrg tree cond_expr, then_clause, else_clause; 3864 1.1 mrg tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype; 3865 1.1 mrg gimple *pattern_stmt, *def_stmt; 3866 1.1 mrg tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; 3867 1.1 mrg gimple *def_stmt0 = NULL, *def_stmt1 = NULL; 3868 1.1 mrg bool promotion; 3869 1.1 mrg tree comp_scalar_type; 3870 1.1 mrg 3871 1.1 mrg if (!is_gimple_assign (last_stmt) 3872 1.1 mrg || gimple_assign_rhs_code (last_stmt) != COND_EXPR 3873 1.1 mrg || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 3874 1.1 mrg return NULL; 3875 1.1 mrg 3876 1.1 mrg cond_expr = gimple_assign_rhs1 (last_stmt); 3877 1.1 mrg then_clause = gimple_assign_rhs2 (last_stmt); 3878 1.1 mrg else_clause = gimple_assign_rhs3 (last_stmt); 3879 1.1 mrg 3880 1.1 mrg if (!COMPARISON_CLASS_P (cond_expr)) 3881 1.1 mrg return NULL; 3882 1.1 mrg 3883 1.1 mrg comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); 3884 1.1 mrg comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type); 3885 1.1 mrg if (comp_vectype == NULL_TREE) 3886 1.1 mrg return NULL; 3887 1.1 mrg 3888 1.1 mrg type = TREE_TYPE (gimple_assign_lhs (last_stmt)); 3889 1.1 mrg if (types_compatible_p (type, comp_scalar_type) 3890 1.1 mrg || ((TREE_CODE (then_clause) != INTEGER_CST 3891 1.1 mrg || TREE_CODE (else_clause) != INTEGER_CST) 3892 1.1 mrg && !INTEGRAL_TYPE_P (comp_scalar_type)) 3893 1.1 mrg || !INTEGRAL_TYPE_P (type)) 3894 1.1 mrg return NULL; 3895 1.1 mrg 3896 1.1 mrg if ((TREE_CODE (then_clause) != INTEGER_CST 3897 1.1 mrg && !type_conversion_p (vinfo, then_clause, false, 3898 1.1 mrg &orig_type0, &def_stmt0, &promotion)) 3899 1.1 mrg || (TREE_CODE (else_clause) != INTEGER_CST 3900 1.1 mrg && !type_conversion_p (vinfo, else_clause, false, 3901 1.1 mrg &orig_type1, &def_stmt1, &promotion))) 3902 1.1 mrg return NULL; 3903 1.1 mrg 3904 1.1 mrg if (orig_type0 && orig_type1 3905 1.1 mrg && !types_compatible_p (orig_type0, orig_type1)) 3906 1.1 mrg return NULL; 3907 1.1 mrg 3908 1.1 mrg if (orig_type0) 3909 1.1 mrg { 3910 1.1 mrg if (!types_compatible_p (orig_type0, comp_scalar_type)) 3911 1.1 mrg return NULL; 3912 1.1 mrg then_clause = gimple_assign_rhs1 (def_stmt0); 3913 1.1 mrg itype = orig_type0; 3914 1.1 mrg } 3915 1.1 mrg 3916 1.1 mrg if (orig_type1) 3917 1.1 mrg { 3918 1.1 mrg if (!types_compatible_p (orig_type1, comp_scalar_type)) 3919 1.1 mrg return NULL; 3920 1.1 mrg else_clause = gimple_assign_rhs1 (def_stmt1); 3921 1.1 mrg itype = orig_type1; 3922 1.1 mrg } 3923 1.1 mrg 3924 1.1 mrg 3925 1.1 mrg HOST_WIDE_INT cmp_mode_size 3926 1.1 mrg = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype)); 3927 1.1 mrg 3928 1.1 mrg scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type); 3929 1.1 mrg if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size) 3930 1.1 mrg return NULL; 3931 1.1 mrg 3932 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type); 3933 1.1 mrg if (vectype == NULL_TREE) 3934 1.1 mrg return NULL; 3935 1.1 mrg 3936 1.1 mrg if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr))) 3937 1.1 mrg return NULL; 3938 1.1 mrg 3939 1.1 mrg if (itype == NULL_TREE) 3940 1.1 mrg itype = build_nonstandard_integer_type (cmp_mode_size, 3941 1.1 mrg TYPE_UNSIGNED (type)); 3942 1.1 mrg 3943 1.1 mrg if (itype == NULL_TREE 3944 1.1 mrg || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size) 3945 1.1 mrg return NULL; 3946 1.1 mrg 3947 1.1 mrg vecitype = get_vectype_for_scalar_type (vinfo, itype); 3948 1.1 mrg if (vecitype == NULL_TREE) 3949 1.1 mrg return NULL; 3950 1.1 mrg 3951 1.1 mrg if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr))) 3952 1.1 mrg return NULL; 3953 1.1 mrg 3954 1.1 mrg if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size) 3955 1.1 mrg { 3956 1.1 mrg if ((TREE_CODE (then_clause) == INTEGER_CST 3957 1.1 mrg && !int_fits_type_p (then_clause, itype)) 3958 1.1 mrg || (TREE_CODE (else_clause) == INTEGER_CST 3959 1.1 mrg && !int_fits_type_p (else_clause, itype))) 3960 1.1 mrg return NULL; 3961 1.1 mrg } 3962 1.1 mrg 3963 1.1 mrg def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3964 1.1 mrg COND_EXPR, unshare_expr (cond_expr), 3965 1.1 mrg fold_convert (itype, then_clause), 3966 1.1 mrg fold_convert (itype, else_clause)); 3967 1.1 mrg pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 3968 1.1 mrg NOP_EXPR, gimple_assign_lhs (def_stmt)); 3969 1.1 mrg 3970 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype); 3971 1.1 mrg *type_out = vectype; 3972 1.1 mrg 3973 1.1 mrg vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt); 3974 1.1 mrg 3975 1.1 mrg return pattern_stmt; 3976 1.1 mrg } 3977 1.1 mrg 3978 1.1 mrg 3979 1.1 mrg /* Helper function of vect_recog_bool_pattern. Called recursively, return 3980 1.1 mrg true if bool VAR can and should be optimized that way. Assume it shouldn't 3981 1.1 mrg in case it's a result of a comparison which can be directly vectorized into 3982 1.1 mrg a vector comparison. Fills in STMTS with all stmts visited during the 3983 1.1 mrg walk. */ 3984 1.1 mrg 3985 1.1 mrg static bool 3986 1.1 mrg check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts) 3987 1.1 mrg { 3988 1.1 mrg tree rhs1; 3989 1.1 mrg enum tree_code rhs_code; 3990 1.1 mrg 3991 1.1 mrg stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var); 3992 1.1 mrg if (!def_stmt_info) 3993 1.1 mrg return false; 3994 1.1 mrg 3995 1.1 mrg gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt); 3996 1.1 mrg if (!def_stmt) 3997 1.1 mrg return false; 3998 1.1 mrg 3999 1.1 mrg if (stmts.contains (def_stmt)) 4000 1.1 mrg return true; 4001 1.1 mrg 4002 1.1 mrg rhs1 = gimple_assign_rhs1 (def_stmt); 4003 1.1 mrg rhs_code = gimple_assign_rhs_code (def_stmt); 4004 1.1 mrg switch (rhs_code) 4005 1.1 mrg { 4006 1.1 mrg case SSA_NAME: 4007 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts)) 4008 1.1 mrg return false; 4009 1.1 mrg break; 4010 1.1 mrg 4011 1.1 mrg CASE_CONVERT: 4012 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) 4013 1.1 mrg return false; 4014 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts)) 4015 1.1 mrg return false; 4016 1.1 mrg break; 4017 1.1 mrg 4018 1.1 mrg case BIT_NOT_EXPR: 4019 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts)) 4020 1.1 mrg return false; 4021 1.1 mrg break; 4022 1.1 mrg 4023 1.1 mrg case BIT_AND_EXPR: 4024 1.1 mrg case BIT_IOR_EXPR: 4025 1.1 mrg case BIT_XOR_EXPR: 4026 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts) 4027 1.1 mrg || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts)) 4028 1.1 mrg return false; 4029 1.1 mrg break; 4030 1.1 mrg 4031 1.1 mrg default: 4032 1.1 mrg if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 4033 1.1 mrg { 4034 1.1 mrg tree vecitype, comp_vectype; 4035 1.1 mrg 4036 1.1 mrg /* If the comparison can throw, then is_gimple_condexpr will be 4037 1.1 mrg false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ 4038 1.1 mrg if (stmt_could_throw_p (cfun, def_stmt)) 4039 1.1 mrg return false; 4040 1.1 mrg 4041 1.1 mrg comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1)); 4042 1.1 mrg if (comp_vectype == NULL_TREE) 4043 1.1 mrg return false; 4044 1.1 mrg 4045 1.1 mrg tree mask_type = get_mask_type_for_scalar_type (vinfo, 4046 1.1 mrg TREE_TYPE (rhs1)); 4047 1.1 mrg if (mask_type 4048 1.1 mrg && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) 4049 1.1 mrg return false; 4050 1.1 mrg 4051 1.1 mrg if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) 4052 1.1 mrg { 4053 1.1 mrg scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1)); 4054 1.1 mrg tree itype 4055 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 4056 1.1 mrg vecitype = get_vectype_for_scalar_type (vinfo, itype); 4057 1.1 mrg if (vecitype == NULL_TREE) 4058 1.1 mrg return false; 4059 1.1 mrg } 4060 1.1 mrg else 4061 1.1 mrg vecitype = comp_vectype; 4062 1.1 mrg if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code)) 4063 1.1 mrg return false; 4064 1.1 mrg } 4065 1.1 mrg else 4066 1.1 mrg return false; 4067 1.1 mrg break; 4068 1.1 mrg } 4069 1.1 mrg 4070 1.1 mrg bool res = stmts.add (def_stmt); 4071 1.1 mrg /* We can't end up recursing when just visiting SSA defs but not PHIs. */ 4072 1.1 mrg gcc_assert (!res); 4073 1.1 mrg 4074 1.1 mrg return true; 4075 1.1 mrg } 4076 1.1 mrg 4077 1.1 mrg 4078 1.1 mrg /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous 4079 1.1 mrg stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs 4080 1.1 mrg pattern sequence. */ 4081 1.1 mrg 4082 1.1 mrg static tree 4083 1.1 mrg adjust_bool_pattern_cast (vec_info *vinfo, 4084 1.1 mrg tree type, tree var, stmt_vec_info stmt_info) 4085 1.1 mrg { 4086 1.1 mrg gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 4087 1.1 mrg NOP_EXPR, var); 4088 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, cast_stmt, 4089 1.1 mrg get_vectype_for_scalar_type (vinfo, type)); 4090 1.1 mrg return gimple_assign_lhs (cast_stmt); 4091 1.1 mrg } 4092 1.1 mrg 4093 1.1 mrg /* Helper function of vect_recog_bool_pattern. Do the actual transformations. 4094 1.1 mrg VAR is an SSA_NAME that should be transformed from bool to a wider integer 4095 1.1 mrg type, OUT_TYPE is the desired final integer type of the whole pattern. 4096 1.1 mrg STMT_INFO is the info of the pattern root and is where pattern stmts should 4097 1.1 mrg be associated with. DEFS is a map of pattern defs. */ 4098 1.1 mrg 4099 1.1 mrg static void 4100 1.1 mrg adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type, 4101 1.1 mrg stmt_vec_info stmt_info, hash_map <tree, tree> &defs) 4102 1.1 mrg { 4103 1.1 mrg gimple *stmt = SSA_NAME_DEF_STMT (var); 4104 1.1 mrg enum tree_code rhs_code, def_rhs_code; 4105 1.1 mrg tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; 4106 1.1 mrg location_t loc; 4107 1.1 mrg gimple *pattern_stmt, *def_stmt; 4108 1.1 mrg tree trueval = NULL_TREE; 4109 1.1 mrg 4110 1.1 mrg rhs1 = gimple_assign_rhs1 (stmt); 4111 1.1 mrg rhs2 = gimple_assign_rhs2 (stmt); 4112 1.1 mrg rhs_code = gimple_assign_rhs_code (stmt); 4113 1.1 mrg loc = gimple_location (stmt); 4114 1.1 mrg switch (rhs_code) 4115 1.1 mrg { 4116 1.1 mrg case SSA_NAME: 4117 1.1 mrg CASE_CONVERT: 4118 1.1 mrg irhs1 = *defs.get (rhs1); 4119 1.1 mrg itype = TREE_TYPE (irhs1); 4120 1.1 mrg pattern_stmt 4121 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 4122 1.1 mrg SSA_NAME, irhs1); 4123 1.1 mrg break; 4124 1.1 mrg 4125 1.1 mrg case BIT_NOT_EXPR: 4126 1.1 mrg irhs1 = *defs.get (rhs1); 4127 1.1 mrg itype = TREE_TYPE (irhs1); 4128 1.1 mrg pattern_stmt 4129 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 4130 1.1 mrg BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1)); 4131 1.1 mrg break; 4132 1.1 mrg 4133 1.1 mrg case BIT_AND_EXPR: 4134 1.1 mrg /* Try to optimize x = y & (a < b ? 1 : 0); into 4135 1.1 mrg x = (a < b ? y : 0); 4136 1.1 mrg 4137 1.1 mrg E.g. for: 4138 1.1 mrg bool a_b, b_b, c_b; 4139 1.1 mrg TYPE d_T; 4140 1.1 mrg 4141 1.1 mrg S1 a_b = x1 CMP1 y1; 4142 1.1 mrg S2 b_b = x2 CMP2 y2; 4143 1.1 mrg S3 c_b = a_b & b_b; 4144 1.1 mrg S4 d_T = (TYPE) c_b; 4145 1.1 mrg 4146 1.1 mrg we would normally emit: 4147 1.1 mrg 4148 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0; 4149 1.1 mrg S2' b_T = x2 CMP2 y2 ? 1 : 0; 4150 1.1 mrg S3' c_T = a_T & b_T; 4151 1.1 mrg S4' d_T = c_T; 4152 1.1 mrg 4153 1.1 mrg but we can save one stmt by using the 4154 1.1 mrg result of one of the COND_EXPRs in the other COND_EXPR and leave 4155 1.1 mrg BIT_AND_EXPR stmt out: 4156 1.1 mrg 4157 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0; 4158 1.1 mrg S3' c_T = x2 CMP2 y2 ? a_T : 0; 4159 1.1 mrg S4' f_T = c_T; 4160 1.1 mrg 4161 1.1 mrg At least when VEC_COND_EXPR is implemented using masks 4162 1.1 mrg cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it 4163 1.1 mrg computes the comparison masks and ands it, in one case with 4164 1.1 mrg all ones vector, in the other case with a vector register. 4165 1.1 mrg Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is 4166 1.1 mrg often more expensive. */ 4167 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (rhs2); 4168 1.1 mrg def_rhs_code = gimple_assign_rhs_code (def_stmt); 4169 1.1 mrg if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 4170 1.1 mrg { 4171 1.1 mrg irhs1 = *defs.get (rhs1); 4172 1.1 mrg tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 4173 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs1)) 4174 1.1 mrg == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1)))) 4175 1.1 mrg { 4176 1.1 mrg rhs_code = def_rhs_code; 4177 1.1 mrg rhs1 = def_rhs1; 4178 1.1 mrg rhs2 = gimple_assign_rhs2 (def_stmt); 4179 1.1 mrg trueval = irhs1; 4180 1.1 mrg goto do_compare; 4181 1.1 mrg } 4182 1.1 mrg else 4183 1.1 mrg irhs2 = *defs.get (rhs2); 4184 1.1 mrg goto and_ior_xor; 4185 1.1 mrg } 4186 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (rhs1); 4187 1.1 mrg def_rhs_code = gimple_assign_rhs_code (def_stmt); 4188 1.1 mrg if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 4189 1.1 mrg { 4190 1.1 mrg irhs2 = *defs.get (rhs2); 4191 1.1 mrg tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 4192 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs2)) 4193 1.1 mrg == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1)))) 4194 1.1 mrg { 4195 1.1 mrg rhs_code = def_rhs_code; 4196 1.1 mrg rhs1 = def_rhs1; 4197 1.1 mrg rhs2 = gimple_assign_rhs2 (def_stmt); 4198 1.1 mrg trueval = irhs2; 4199 1.1 mrg goto do_compare; 4200 1.1 mrg } 4201 1.1 mrg else 4202 1.1 mrg irhs1 = *defs.get (rhs1); 4203 1.1 mrg goto and_ior_xor; 4204 1.1 mrg } 4205 1.1 mrg /* FALLTHRU */ 4206 1.1 mrg case BIT_IOR_EXPR: 4207 1.1 mrg case BIT_XOR_EXPR: 4208 1.1 mrg irhs1 = *defs.get (rhs1); 4209 1.1 mrg irhs2 = *defs.get (rhs2); 4210 1.1 mrg and_ior_xor: 4211 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs1)) 4212 1.1 mrg != TYPE_PRECISION (TREE_TYPE (irhs2))) 4213 1.1 mrg { 4214 1.1 mrg int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); 4215 1.1 mrg int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); 4216 1.1 mrg int out_prec = TYPE_PRECISION (out_type); 4217 1.1 mrg if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) 4218 1.1 mrg irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2, 4219 1.1 mrg stmt_info); 4220 1.1 mrg else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) 4221 1.1 mrg irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1, 4222 1.1 mrg stmt_info); 4223 1.1 mrg else 4224 1.1 mrg { 4225 1.1 mrg irhs1 = adjust_bool_pattern_cast (vinfo, 4226 1.1 mrg out_type, irhs1, stmt_info); 4227 1.1 mrg irhs2 = adjust_bool_pattern_cast (vinfo, 4228 1.1 mrg out_type, irhs2, stmt_info); 4229 1.1 mrg } 4230 1.1 mrg } 4231 1.1 mrg itype = TREE_TYPE (irhs1); 4232 1.1 mrg pattern_stmt 4233 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 4234 1.1 mrg rhs_code, irhs1, irhs2); 4235 1.1 mrg break; 4236 1.1 mrg 4237 1.1 mrg default: 4238 1.1 mrg do_compare: 4239 1.1 mrg gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); 4240 1.1 mrg if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 4241 1.1 mrg || !TYPE_UNSIGNED (TREE_TYPE (rhs1)) 4242 1.1 mrg || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)), 4243 1.1 mrg GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1))))) 4244 1.1 mrg { 4245 1.1 mrg scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1)); 4246 1.1 mrg itype 4247 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 4248 1.1 mrg } 4249 1.1 mrg else 4250 1.1 mrg itype = TREE_TYPE (rhs1); 4251 1.1 mrg cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); 4252 1.1 mrg if (trueval == NULL_TREE) 4253 1.1 mrg trueval = build_int_cst (itype, 1); 4254 1.1 mrg else 4255 1.1 mrg gcc_checking_assert (useless_type_conversion_p (itype, 4256 1.1 mrg TREE_TYPE (trueval))); 4257 1.1 mrg pattern_stmt 4258 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 4259 1.1 mrg COND_EXPR, cond_expr, trueval, 4260 1.1 mrg build_int_cst (itype, 0)); 4261 1.1 mrg break; 4262 1.1 mrg } 4263 1.1 mrg 4264 1.1 mrg gimple_set_location (pattern_stmt, loc); 4265 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, 4266 1.1 mrg get_vectype_for_scalar_type (vinfo, itype)); 4267 1.1 mrg defs.put (var, gimple_assign_lhs (pattern_stmt)); 4268 1.1 mrg } 4269 1.1 mrg 4270 1.1 mrg /* Comparison function to qsort a vector of gimple stmts after UID. */ 4271 1.1 mrg 4272 1.1 mrg static int 4273 1.1 mrg sort_after_uid (const void *p1, const void *p2) 4274 1.1 mrg { 4275 1.1 mrg const gimple *stmt1 = *(const gimple * const *)p1; 4276 1.1 mrg const gimple *stmt2 = *(const gimple * const *)p2; 4277 1.1 mrg return gimple_uid (stmt1) - gimple_uid (stmt2); 4278 1.1 mrg } 4279 1.1 mrg 4280 1.1 mrg /* Create pattern stmts for all stmts participating in the bool pattern 4281 1.1 mrg specified by BOOL_STMT_SET and its root STMT_INFO with the desired type 4282 1.1 mrg OUT_TYPE. Return the def of the pattern root. */ 4283 1.1 mrg 4284 1.1 mrg static tree 4285 1.1 mrg adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set, 4286 1.1 mrg tree out_type, stmt_vec_info stmt_info) 4287 1.1 mrg { 4288 1.1 mrg /* Gather original stmts in the bool pattern in their order of appearance 4289 1.1 mrg in the IL. */ 4290 1.1 mrg auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ()); 4291 1.1 mrg for (hash_set <gimple *>::iterator i = bool_stmt_set.begin (); 4292 1.1 mrg i != bool_stmt_set.end (); ++i) 4293 1.1 mrg bool_stmts.quick_push (*i); 4294 1.1 mrg bool_stmts.qsort (sort_after_uid); 4295 1.1 mrg 4296 1.1 mrg /* Now process them in that order, producing pattern stmts. */ 4297 1.1 mrg hash_map <tree, tree> defs; 4298 1.1 mrg for (unsigned i = 0; i < bool_stmts.length (); ++i) 4299 1.1 mrg adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]), 4300 1.1 mrg out_type, stmt_info, defs); 4301 1.1 mrg 4302 1.1 mrg /* Pop the last pattern seq stmt and install it as pattern root for STMT. */ 4303 1.1 mrg gimple *pattern_stmt 4304 1.1 mrg = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)); 4305 1.1 mrg return gimple_assign_lhs (pattern_stmt); 4306 1.1 mrg } 4307 1.1 mrg 4308 1.1 mrg /* Return the proper type for converting bool VAR into 4309 1.1 mrg an integer value or NULL_TREE if no such type exists. 4310 1.1 mrg The type is chosen so that the converted value has the 4311 1.1 mrg same number of elements as VAR's vector type. */ 4312 1.1 mrg 4313 1.1 mrg static tree 4314 1.1 mrg integer_type_for_mask (tree var, vec_info *vinfo) 4315 1.1 mrg { 4316 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) 4317 1.1 mrg return NULL_TREE; 4318 1.1 mrg 4319 1.1 mrg stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var); 4320 1.1 mrg if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info)) 4321 1.1 mrg return NULL_TREE; 4322 1.1 mrg 4323 1.1 mrg return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1); 4324 1.1 mrg } 4325 1.1 mrg 4326 1.1 mrg /* Function vect_recog_bool_pattern 4327 1.1 mrg 4328 1.1 mrg Try to find pattern like following: 4329 1.1 mrg 4330 1.1 mrg bool a_b, b_b, c_b, d_b, e_b; 4331 1.1 mrg TYPE f_T; 4332 1.1 mrg loop: 4333 1.1 mrg S1 a_b = x1 CMP1 y1; 4334 1.1 mrg S2 b_b = x2 CMP2 y2; 4335 1.1 mrg S3 c_b = a_b & b_b; 4336 1.1 mrg S4 d_b = x3 CMP3 y3; 4337 1.1 mrg S5 e_b = c_b | d_b; 4338 1.1 mrg S6 f_T = (TYPE) e_b; 4339 1.1 mrg 4340 1.1 mrg where type 'TYPE' is an integral type. Or a similar pattern 4341 1.1 mrg ending in 4342 1.1 mrg 4343 1.1 mrg S6 f_Y = e_b ? r_Y : s_Y; 4344 1.1 mrg 4345 1.1 mrg as results from if-conversion of a complex condition. 4346 1.1 mrg 4347 1.1 mrg Input: 4348 1.1 mrg 4349 1.1 mrg * STMT_VINFO: The stmt at the end from which the pattern 4350 1.1 mrg search begins, i.e. cast of a bool to 4351 1.1 mrg an integer type. 4352 1.1 mrg 4353 1.1 mrg Output: 4354 1.1 mrg 4355 1.1 mrg * TYPE_OUT: The type of the output of this pattern. 4356 1.1 mrg 4357 1.1 mrg * Return value: A new stmt that will be used to replace the pattern. 4358 1.1 mrg 4359 1.1 mrg Assuming size of TYPE is the same as size of all comparisons 4360 1.1 mrg (otherwise some casts would be added where needed), the above 4361 1.1 mrg sequence we create related pattern stmts: 4362 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0; 4363 1.1 mrg S3' c_T = x2 CMP2 y2 ? a_T : 0; 4364 1.1 mrg S4' d_T = x3 CMP3 y3 ? 1 : 0; 4365 1.1 mrg S5' e_T = c_T | d_T; 4366 1.1 mrg S6' f_T = e_T; 4367 1.1 mrg 4368 1.1 mrg Instead of the above S3' we could emit: 4369 1.1 mrg S2' b_T = x2 CMP2 y2 ? 1 : 0; 4370 1.1 mrg S3' c_T = a_T | b_T; 4371 1.1 mrg but the above is more efficient. */ 4372 1.1 mrg 4373 1.1 mrg static gimple * 4374 1.1 mrg vect_recog_bool_pattern (vec_info *vinfo, 4375 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 4376 1.1 mrg { 4377 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 4378 1.1 mrg enum tree_code rhs_code; 4379 1.1 mrg tree var, lhs, rhs, vectype; 4380 1.1 mrg gimple *pattern_stmt; 4381 1.1 mrg 4382 1.1 mrg if (!is_gimple_assign (last_stmt)) 4383 1.1 mrg return NULL; 4384 1.1 mrg 4385 1.1 mrg var = gimple_assign_rhs1 (last_stmt); 4386 1.1 mrg lhs = gimple_assign_lhs (last_stmt); 4387 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt); 4388 1.1 mrg 4389 1.1 mrg if (rhs_code == VIEW_CONVERT_EXPR) 4390 1.1 mrg var = TREE_OPERAND (var, 0); 4391 1.1 mrg 4392 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) 4393 1.1 mrg return NULL; 4394 1.1 mrg 4395 1.1 mrg hash_set<gimple *> bool_stmts; 4396 1.1 mrg 4397 1.1 mrg if (CONVERT_EXPR_CODE_P (rhs_code) 4398 1.1 mrg || rhs_code == VIEW_CONVERT_EXPR) 4399 1.1 mrg { 4400 1.1 mrg if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4401 1.1 mrg || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) 4402 1.1 mrg return NULL; 4403 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); 4404 1.1 mrg 4405 1.1 mrg if (check_bool_pattern (var, vinfo, bool_stmts)) 4406 1.1 mrg { 4407 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts, 4408 1.1 mrg TREE_TYPE (lhs), stmt_vinfo); 4409 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4410 1.1 mrg if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 4411 1.1 mrg pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 4412 1.1 mrg else 4413 1.1 mrg pattern_stmt 4414 1.1 mrg = gimple_build_assign (lhs, NOP_EXPR, rhs); 4415 1.1 mrg } 4416 1.1 mrg else 4417 1.1 mrg { 4418 1.1 mrg tree type = integer_type_for_mask (var, vinfo); 4419 1.1 mrg tree cst0, cst1, tmp; 4420 1.1 mrg 4421 1.1 mrg if (!type) 4422 1.1 mrg return NULL; 4423 1.1 mrg 4424 1.1 mrg /* We may directly use cond with narrowed type to avoid 4425 1.1 mrg multiple cond exprs with following result packing and 4426 1.1 mrg perform single cond with packed mask instead. In case 4427 1.1 mrg of widening we better make cond first and then extract 4428 1.1 mrg results. */ 4429 1.1 mrg if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs))) 4430 1.1 mrg type = TREE_TYPE (lhs); 4431 1.1 mrg 4432 1.1 mrg cst0 = build_int_cst (type, 0); 4433 1.1 mrg cst1 = build_int_cst (type, 1); 4434 1.1 mrg tmp = vect_recog_temp_ssa_var (type, NULL); 4435 1.1 mrg pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0); 4436 1.1 mrg 4437 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) 4438 1.1 mrg { 4439 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, type); 4440 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, 4441 1.1 mrg pattern_stmt, new_vectype); 4442 1.1 mrg 4443 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4444 1.1 mrg pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp); 4445 1.1 mrg } 4446 1.1 mrg } 4447 1.1 mrg 4448 1.1 mrg *type_out = vectype; 4449 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt); 4450 1.1 mrg 4451 1.1 mrg return pattern_stmt; 4452 1.1 mrg } 4453 1.1 mrg else if (rhs_code == COND_EXPR 4454 1.1 mrg && TREE_CODE (var) == SSA_NAME) 4455 1.1 mrg { 4456 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); 4457 1.1 mrg if (vectype == NULL_TREE) 4458 1.1 mrg return NULL; 4459 1.1 mrg 4460 1.1 mrg /* Build a scalar type for the boolean result that when 4461 1.1 mrg vectorized matches the vector type of the result in 4462 1.1 mrg size and number of elements. */ 4463 1.1 mrg unsigned prec 4464 1.1 mrg = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)), 4465 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype)); 4466 1.1 mrg 4467 1.1 mrg tree type 4468 1.1 mrg = build_nonstandard_integer_type (prec, 4469 1.1 mrg TYPE_UNSIGNED (TREE_TYPE (var))); 4470 1.1 mrg if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE) 4471 1.1 mrg return NULL; 4472 1.1 mrg 4473 1.1 mrg if (!check_bool_pattern (var, vinfo, bool_stmts)) 4474 1.1 mrg return NULL; 4475 1.1 mrg 4476 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo); 4477 1.1 mrg 4478 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4479 1.1 mrg pattern_stmt 4480 1.1 mrg = gimple_build_assign (lhs, COND_EXPR, 4481 1.1 mrg build2 (NE_EXPR, boolean_type_node, 4482 1.1 mrg rhs, build_int_cst (type, 0)), 4483 1.1 mrg gimple_assign_rhs2 (last_stmt), 4484 1.1 mrg gimple_assign_rhs3 (last_stmt)); 4485 1.1 mrg *type_out = vectype; 4486 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt); 4487 1.1 mrg 4488 1.1 mrg return pattern_stmt; 4489 1.1 mrg } 4490 1.1 mrg else if (rhs_code == SSA_NAME 4491 1.1 mrg && STMT_VINFO_DATA_REF (stmt_vinfo)) 4492 1.1 mrg { 4493 1.1 mrg stmt_vec_info pattern_stmt_info; 4494 1.1 mrg tree nunits_vectype; 4495 1.1 mrg if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype, 4496 1.1 mrg &nunits_vectype) 4497 1.1 mrg || !VECTOR_MODE_P (TYPE_MODE (vectype))) 4498 1.1 mrg return NULL; 4499 1.1 mrg 4500 1.1 mrg if (check_bool_pattern (var, vinfo, bool_stmts)) 4501 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts, 4502 1.1 mrg TREE_TYPE (vectype), stmt_vinfo); 4503 1.1 mrg else 4504 1.1 mrg { 4505 1.1 mrg tree type = integer_type_for_mask (var, vinfo); 4506 1.1 mrg tree cst0, cst1, new_vectype; 4507 1.1 mrg 4508 1.1 mrg if (!type) 4509 1.1 mrg return NULL; 4510 1.1 mrg 4511 1.1 mrg if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype))) 4512 1.1 mrg type = TREE_TYPE (vectype); 4513 1.1 mrg 4514 1.1 mrg cst0 = build_int_cst (type, 0); 4515 1.1 mrg cst1 = build_int_cst (type, 1); 4516 1.1 mrg new_vectype = get_vectype_for_scalar_type (vinfo, type); 4517 1.1 mrg 4518 1.1 mrg rhs = vect_recog_temp_ssa_var (type, NULL); 4519 1.1 mrg pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0); 4520 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype); 4521 1.1 mrg } 4522 1.1 mrg 4523 1.1 mrg lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); 4524 1.1 mrg if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 4525 1.1 mrg { 4526 1.1 mrg tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4527 1.1 mrg gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs); 4528 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt); 4529 1.1 mrg rhs = rhs2; 4530 1.1 mrg } 4531 1.1 mrg pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 4532 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt); 4533 1.1 mrg vinfo->move_dr (pattern_stmt_info, stmt_vinfo); 4534 1.1 mrg *type_out = vectype; 4535 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt); 4536 1.1 mrg 4537 1.1 mrg return pattern_stmt; 4538 1.1 mrg } 4539 1.1 mrg else 4540 1.1 mrg return NULL; 4541 1.1 mrg } 4542 1.1 mrg 4543 1.1 mrg 4544 1.1 mrg /* A helper for vect_recog_mask_conversion_pattern. Build 4545 1.1 mrg conversion of MASK to a type suitable for masking VECTYPE. 4546 1.1 mrg Built statement gets required vectype and is appended to 4547 1.1 mrg a pattern sequence of STMT_VINFO. 4548 1.1 mrg 4549 1.1 mrg Return converted mask. */ 4550 1.1 mrg 4551 1.1 mrg static tree 4552 1.1 mrg build_mask_conversion (vec_info *vinfo, 4553 1.1 mrg tree mask, tree vectype, stmt_vec_info stmt_vinfo) 4554 1.1 mrg { 4555 1.1 mrg gimple *stmt; 4556 1.1 mrg tree masktype, tmp; 4557 1.1 mrg 4558 1.1 mrg masktype = truth_type_for (vectype); 4559 1.1 mrg tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL); 4560 1.1 mrg stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask); 4561 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, 4562 1.1 mrg stmt, masktype, TREE_TYPE (vectype)); 4563 1.1 mrg 4564 1.1 mrg return tmp; 4565 1.1 mrg } 4566 1.1 mrg 4567 1.1 mrg 4568 1.1 mrg /* Function vect_recog_mask_conversion_pattern 4569 1.1 mrg 4570 1.1 mrg Try to find statements which require boolean type 4571 1.1 mrg converison. Additional conversion statements are 4572 1.1 mrg added to handle such cases. For example: 4573 1.1 mrg 4574 1.1 mrg bool m_1, m_2, m_3; 4575 1.1 mrg int i_4, i_5; 4576 1.1 mrg double d_6, d_7; 4577 1.1 mrg char c_1, c_2, c_3; 4578 1.1 mrg 4579 1.1 mrg S1 m_1 = i_4 > i_5; 4580 1.1 mrg S2 m_2 = d_6 < d_7; 4581 1.1 mrg S3 m_3 = m_1 & m_2; 4582 1.1 mrg S4 c_1 = m_3 ? c_2 : c_3; 4583 1.1 mrg 4584 1.1 mrg Will be transformed into: 4585 1.1 mrg 4586 1.1 mrg S1 m_1 = i_4 > i_5; 4587 1.1 mrg S2 m_2 = d_6 < d_7; 4588 1.1 mrg S3'' m_2' = (_Bool[bitsize=32])m_2 4589 1.1 mrg S3' m_3' = m_1 & m_2'; 4590 1.1 mrg S4'' m_3'' = (_Bool[bitsize=8])m_3' 4591 1.1 mrg S4' c_1' = m_3'' ? c_2 : c_3; */ 4592 1.1 mrg 4593 1.1 mrg static gimple * 4594 1.1 mrg vect_recog_mask_conversion_pattern (vec_info *vinfo, 4595 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out) 4596 1.1 mrg { 4597 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt; 4598 1.1 mrg enum tree_code rhs_code; 4599 1.1 mrg tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type; 4600 1.1 mrg tree vectype1, vectype2; 4601 1.1 mrg stmt_vec_info pattern_stmt_info; 4602 1.1 mrg tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE; 4603 1.1 mrg tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE; 4604 1.1 mrg 4605 1.1 mrg /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ 4606 1.1 mrg if (is_gimple_call (last_stmt) 4607 1.1 mrg && gimple_call_internal_p (last_stmt)) 4608 1.1 mrg { 4609 1.1 mrg gcall *pattern_stmt; 4610 1.1 mrg 4611 1.1 mrg internal_fn ifn = gimple_call_internal_fn (last_stmt); 4612 1.1 mrg int mask_argno = internal_fn_mask_index (ifn); 4613 1.1 mrg if (mask_argno < 0) 4614 1.1 mrg return NULL; 4615 1.1 mrg 4616 1.1 mrg bool store_p = internal_store_fn_p (ifn); 4617 1.1 mrg if (store_p) 4618 1.1 mrg { 4619 1.1 mrg int rhs_index = internal_fn_stored_value_index (ifn); 4620 1.1 mrg tree rhs = gimple_call_arg (last_stmt, rhs_index); 4621 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs)); 4622 1.1 mrg } 4623 1.1 mrg else 4624 1.1 mrg { 4625 1.1 mrg lhs = gimple_call_lhs (last_stmt); 4626 1.1 mrg if (!lhs) 4627 1.1 mrg return NULL; 4628 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); 4629 1.1 mrg } 4630 1.1 mrg 4631 1.1 mrg tree mask_arg = gimple_call_arg (last_stmt, mask_argno); 4632 1.1 mrg tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo); 4633 1.1 mrg if (!mask_arg_type) 4634 1.1 mrg return NULL; 4635 1.1 mrg vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); 4636 1.1 mrg 4637 1.1 mrg if (!vectype1 || !vectype2 4638 1.1 mrg || known_eq (TYPE_VECTOR_SUBPARTS (vectype1), 4639 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))) 4640 1.1 mrg return NULL; 4641 1.1 mrg 4642 1.1 mrg tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo); 4643 1.1 mrg 4644 1.1 mrg auto_vec<tree, 8> args; 4645 1.1 mrg unsigned int nargs = gimple_call_num_args (last_stmt); 4646 1.1 mrg args.safe_grow (nargs, true); 4647 1.1 mrg for (unsigned int i = 0; i < nargs; ++i) 4648 1.1 mrg args[i] = ((int) i == mask_argno 4649 1.1 mrg ? tmp 4650 1.1 mrg : gimple_call_arg (last_stmt, i)); 4651 1.1 mrg pattern_stmt = gimple_build_call_internal_vec (ifn, args); 4652 1.1 mrg 4653 1.1 mrg if (!store_p) 4654 1.1 mrg { 4655 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4656 1.1 mrg gimple_call_set_lhs (pattern_stmt, lhs); 4657 1.1 mrg } 4658 1.1 mrg gimple_call_set_nothrow (pattern_stmt, true); 4659 1.1 mrg 4660 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt); 4661 1.1 mrg if (STMT_VINFO_DATA_REF (stmt_vinfo)) 4662 1.1 mrg vinfo->move_dr (pattern_stmt_info, stmt_vinfo); 4663 1.1 mrg 4664 1.1 mrg *type_out = vectype1; 4665 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt); 4666 1.1 mrg 4667 1.1 mrg return pattern_stmt; 4668 1.1 mrg } 4669 1.1 mrg 4670 1.1 mrg if (!is_gimple_assign (last_stmt)) 4671 1.1 mrg return NULL; 4672 1.1 mrg 4673 1.1 mrg gimple *pattern_stmt; 4674 1.1 mrg lhs = gimple_assign_lhs (last_stmt); 4675 1.1 mrg rhs1 = gimple_assign_rhs1 (last_stmt); 4676 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt); 4677 1.1 mrg 4678 1.1 mrg /* Check for cond expression requiring mask conversion. */ 4679 1.1 mrg if (rhs_code == COND_EXPR) 4680 1.1 mrg { 4681 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); 4682 1.1 mrg 4683 1.1 mrg if (TREE_CODE (rhs1) == SSA_NAME) 4684 1.1 mrg { 4685 1.1 mrg rhs1_type = integer_type_for_mask (rhs1, vinfo); 4686 1.1 mrg if (!rhs1_type) 4687 1.1 mrg return NULL; 4688 1.1 mrg } 4689 1.1 mrg else if (COMPARISON_CLASS_P (rhs1)) 4690 1.1 mrg { 4691 1.1 mrg /* Check whether we're comparing scalar booleans and (if so) 4692 1.1 mrg whether a better mask type exists than the mask associated 4693 1.1 mrg with boolean-sized elements. This avoids unnecessary packs 4694 1.1 mrg and unpacks if the booleans are set from comparisons of 4695 1.1 mrg wider types. E.g. in: 4696 1.1 mrg 4697 1.1 mrg int x1, x2, x3, x4, y1, y1; 4698 1.1 mrg ... 4699 1.1 mrg bool b1 = (x1 == x2); 4700 1.1 mrg bool b2 = (x3 == x4); 4701 1.1 mrg ... = b1 == b2 ? y1 : y2; 4702 1.1 mrg 4703 1.1 mrg it is better for b1 and b2 to use the mask type associated 4704 1.1 mrg with int elements rather bool (byte) elements. */ 4705 1.1 mrg rhs1_op0 = TREE_OPERAND (rhs1, 0); 4706 1.1 mrg rhs1_op1 = TREE_OPERAND (rhs1, 1); 4707 1.1 mrg if (!rhs1_op0 || !rhs1_op1) 4708 1.1 mrg return NULL; 4709 1.1 mrg rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo); 4710 1.1 mrg rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo); 4711 1.1 mrg 4712 1.1 mrg if (!rhs1_op0_type) 4713 1.1 mrg rhs1_type = TREE_TYPE (rhs1_op0); 4714 1.1 mrg else if (!rhs1_op1_type) 4715 1.1 mrg rhs1_type = TREE_TYPE (rhs1_op1); 4716 1.1 mrg else if (TYPE_PRECISION (rhs1_op0_type) 4717 1.1 mrg != TYPE_PRECISION (rhs1_op1_type)) 4718 1.1 mrg { 4719 1.1 mrg int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type) 4720 1.1 mrg - (int) TYPE_PRECISION (TREE_TYPE (lhs)); 4721 1.1 mrg int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type) 4722 1.1 mrg - (int) TYPE_PRECISION (TREE_TYPE (lhs)); 4723 1.1 mrg if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0)) 4724 1.1 mrg { 4725 1.1 mrg if (abs (tmp0) > abs (tmp1)) 4726 1.1 mrg rhs1_type = rhs1_op1_type; 4727 1.1 mrg else 4728 1.1 mrg rhs1_type = rhs1_op0_type; 4729 1.1 mrg } 4730 1.1 mrg else 4731 1.1 mrg rhs1_type = build_nonstandard_integer_type 4732 1.1 mrg (TYPE_PRECISION (TREE_TYPE (lhs)), 1); 4733 1.1 mrg } 4734 1.1 mrg else 4735 1.1 mrg rhs1_type = rhs1_op0_type; 4736 1.1 mrg } 4737 1.1 mrg else 4738 1.1 mrg return NULL; 4739 1.1 mrg 4740 1.1 mrg vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type); 4741 1.1 mrg 4742 1.1 mrg if (!vectype1 || !vectype2) 4743 1.1 mrg return NULL; 4744 1.1 mrg 4745 1.1 mrg /* Continue if a conversion is needed. Also continue if we have 4746 1.1 mrg a comparison whose vector type would normally be different from 4747 1.1 mrg VECTYPE2 when considered in isolation. In that case we'll 4748 1.1 mrg replace the comparison with an SSA name (so that we can record 4749 1.1 mrg its vector type) and behave as though the comparison was an SSA 4750 1.1 mrg name from the outset. */ 4751 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1), 4752 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2)) 4753 1.1 mrg && !rhs1_op0_type 4754 1.1 mrg && !rhs1_op1_type) 4755 1.1 mrg return NULL; 4756 1.1 mrg 4757 1.1 mrg /* If rhs1 is invariant and we can promote it leave the COND_EXPR 4758 1.1 mrg in place, we can handle it in vectorizable_condition. This avoids 4759 1.1 mrg unnecessary promotion stmts and increased vectorization factor. */ 4760 1.1 mrg if (COMPARISON_CLASS_P (rhs1) 4761 1.1 mrg && INTEGRAL_TYPE_P (rhs1_type) 4762 1.1 mrg && known_le (TYPE_VECTOR_SUBPARTS (vectype1), 4763 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))) 4764 1.1 mrg { 4765 1.1 mrg enum vect_def_type dt; 4766 1.1 mrg if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt) 4767 1.1 mrg && dt == vect_external_def 4768 1.1 mrg && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt) 4769 1.1 mrg && (dt == vect_external_def 4770 1.1 mrg || dt == vect_constant_def)) 4771 1.1 mrg { 4772 1.1 mrg tree wide_scalar_type = build_nonstandard_integer_type 4773 1.1 mrg (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type)); 4774 1.1 mrg tree vectype3 = get_vectype_for_scalar_type (vinfo, 4775 1.1 mrg wide_scalar_type); 4776 1.1 mrg if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1))) 4777 1.1 mrg return NULL; 4778 1.1 mrg } 4779 1.1 mrg } 4780 1.1 mrg 4781 1.1 mrg /* If rhs1 is a comparison we need to move it into a 4782 1.1 mrg separate statement. */ 4783 1.1 mrg if (TREE_CODE (rhs1) != SSA_NAME) 4784 1.1 mrg { 4785 1.1 mrg tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); 4786 1.1 mrg if (rhs1_op0_type 4787 1.1 mrg && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type)) 4788 1.1 mrg rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0, 4789 1.1 mrg vectype2, stmt_vinfo); 4790 1.1 mrg if (rhs1_op1_type 4791 1.1 mrg && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type)) 4792 1.1 mrg rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1, 4793 1.1 mrg vectype2, stmt_vinfo); 4794 1.1 mrg pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), 4795 1.1 mrg rhs1_op0, rhs1_op1); 4796 1.1 mrg rhs1 = tmp; 4797 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2, 4798 1.1 mrg rhs1_type); 4799 1.1 mrg } 4800 1.1 mrg 4801 1.1 mrg if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 4802 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))) 4803 1.1 mrg tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo); 4804 1.1 mrg else 4805 1.1 mrg tmp = rhs1; 4806 1.1 mrg 4807 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4808 1.1 mrg pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp, 4809 1.1 mrg gimple_assign_rhs2 (last_stmt), 4810 1.1 mrg gimple_assign_rhs3 (last_stmt)); 4811 1.1 mrg 4812 1.1 mrg *type_out = vectype1; 4813 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt); 4814 1.1 mrg 4815 1.1 mrg return pattern_stmt; 4816 1.1 mrg } 4817 1.1 mrg 4818 1.1 mrg /* Now check for binary boolean operations requiring conversion for 4819 1.1 mrg one of operands. */ 4820 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) 4821 1.1 mrg return NULL; 4822 1.1 mrg 4823 1.1 mrg if (rhs_code != BIT_IOR_EXPR 4824 1.1 mrg && rhs_code != BIT_XOR_EXPR 4825 1.1 mrg && rhs_code != BIT_AND_EXPR 4826 1.1 mrg && TREE_CODE_CLASS (rhs_code) != tcc_comparison) 4827 1.1 mrg return NULL; 4828 1.1 mrg 4829 1.1 mrg rhs2 = gimple_assign_rhs2 (last_stmt); 4830 1.1 mrg 4831 1.1 mrg rhs1_type = integer_type_for_mask (rhs1, vinfo); 4832 1.1 mrg rhs2_type = integer_type_for_mask (rhs2, vinfo); 4833 1.1 mrg 4834 1.1 mrg if (!rhs1_type || !rhs2_type 4835 1.1 mrg || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type)) 4836 1.1 mrg return NULL; 4837 1.1 mrg 4838 1.1 mrg if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type)) 4839 1.1 mrg { 4840 1.1 mrg vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type); 4841 1.1 mrg if (!vectype1) 4842 1.1 mrg return NULL; 4843 1.1 mrg rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo); 4844 1.1 mrg } 4845 1.1 mrg else 4846 1.1 mrg { 4847 1.1 mrg vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type); 4848 1.1 mrg if (!vectype1) 4849 1.1 mrg return NULL; 4850 1.1 mrg rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo); 4851 1.1 mrg } 4852 1.1 mrg 4853 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4854 1.1 mrg pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2); 4855 1.1 mrg 4856 1.1 mrg *type_out = vectype1; 4857 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt); 4858 1.1 mrg 4859 1.1 mrg return pattern_stmt; 4860 1.1 mrg } 4861 1.1 mrg 4862 1.1 mrg /* STMT_INFO is a load or store. If the load or store is conditional, return 4863 1.1 mrg the boolean condition under which it occurs, otherwise return null. */ 4864 1.1 mrg 4865 1.1 mrg static tree 4866 1.1 mrg vect_get_load_store_mask (stmt_vec_info stmt_info) 4867 1.1 mrg { 4868 1.1 mrg if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt)) 4869 1.1 mrg { 4870 1.1 mrg gcc_assert (gimple_assign_single_p (def_assign)); 4871 1.1 mrg return NULL_TREE; 4872 1.1 mrg } 4873 1.1 mrg 4874 1.1 mrg if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt)) 4875 1.1 mrg { 4876 1.1 mrg internal_fn ifn = gimple_call_internal_fn (def_call); 4877 1.1 mrg int mask_index = internal_fn_mask_index (ifn); 4878 1.1 mrg return gimple_call_arg (def_call, mask_index); 4879 1.1 mrg } 4880 1.1 mrg 4881 1.1 mrg gcc_unreachable (); 4882 1.1 mrg } 4883 1.1 mrg 4884 1.1 mrg /* Return MASK if MASK is suitable for masking an operation on vectors 4885 1.1 mrg of type VECTYPE, otherwise convert it into such a form and return 4886 1.1 mrg the result. Associate any conversion statements with STMT_INFO's 4887 1.1 mrg pattern. */ 4888 1.1 mrg 4889 1.1 mrg static tree 4890 1.1 mrg vect_convert_mask_for_vectype (tree mask, tree vectype, 4891 1.1 mrg stmt_vec_info stmt_info, vec_info *vinfo) 4892 1.1 mrg { 4893 1.1 mrg tree mask_type = integer_type_for_mask (mask, vinfo); 4894 1.1 mrg if (mask_type) 4895 1.1 mrg { 4896 1.1 mrg tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type); 4897 1.1 mrg if (mask_vectype 4898 1.1 mrg && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 4899 1.1 mrg TYPE_VECTOR_SUBPARTS (mask_vectype))) 4900 1.1 mrg mask = build_mask_conversion (vinfo, mask, vectype, stmt_info); 4901 1.1 mrg } 4902 1.1 mrg return mask; 4903 1.1 mrg } 4904 1.1 mrg 4905 1.1 mrg /* Return the equivalent of: 4906 1.1 mrg 4907 1.1 mrg fold_convert (TYPE, VALUE) 4908 1.1 mrg 4909 1.1 mrg with the expectation that the operation will be vectorized. 4910 1.1 mrg If new statements are needed, add them as pattern statements 4911 1.1 mrg to STMT_INFO. */ 4912 1.1 mrg 4913 1.1 mrg static tree 4914 1.1 mrg vect_add_conversion_to_pattern (vec_info *vinfo, 4915 1.1 mrg tree type, tree value, stmt_vec_info stmt_info) 4916 1.1 mrg { 4917 1.1 mrg if (useless_type_conversion_p (type, TREE_TYPE (value))) 4918 1.1 mrg return value; 4919 1.1 mrg 4920 1.1 mrg tree new_value = vect_recog_temp_ssa_var (type, NULL); 4921 1.1 mrg gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value); 4922 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, conversion, 4923 1.1 mrg get_vectype_for_scalar_type (vinfo, type)); 4924 1.1 mrg return new_value; 4925 1.1 mrg } 4926 1.1 mrg 4927 1.1 mrg /* Try to convert STMT_INFO into a call to a gather load or scatter store 4928 1.1 mrg internal function. Return the final statement on success and set 4929 1.1 mrg *TYPE_OUT to the vector type being loaded or stored. 4930 1.1 mrg 4931 1.1 mrg This function only handles gathers and scatters that were recognized 4932 1.1 mrg as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */ 4933 1.1 mrg 4934 1.1 mrg static gimple * 4935 1.1 mrg vect_recog_gather_scatter_pattern (vec_info *vinfo, 4936 1.1 mrg stmt_vec_info stmt_info, tree *type_out) 4937 1.1 mrg { 4938 1.1 mrg /* Currently we only support this for loop vectorization. */ 4939 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 4940 1.1 mrg if (!loop_vinfo) 4941 1.1 mrg return NULL; 4942 1.1 mrg 4943 1.1 mrg /* Make sure that we're looking at a gather load or scatter store. */ 4944 1.1 mrg data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 4945 1.1 mrg if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 4946 1.1 mrg return NULL; 4947 1.1 mrg 4948 1.1 mrg /* Get the boolean that controls whether the load or store happens. 4949 1.1 mrg This is null if the operation is unconditional. */ 4950 1.1 mrg tree mask = vect_get_load_store_mask (stmt_info); 4951 1.1 mrg 4952 1.1 mrg /* Make sure that the target supports an appropriate internal 4953 1.1 mrg function for the gather/scatter operation. */ 4954 1.1 mrg gather_scatter_info gs_info; 4955 1.1 mrg if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info) 4956 1.1 mrg || gs_info.ifn == IFN_LAST) 4957 1.1 mrg return NULL; 4958 1.1 mrg 4959 1.1 mrg /* Convert the mask to the right form. */ 4960 1.1 mrg tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo, 4961 1.1 mrg gs_info.element_type); 4962 1.1 mrg if (mask) 4963 1.1 mrg mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info, 4964 1.1 mrg loop_vinfo); 4965 1.1 mrg else if (gs_info.ifn == IFN_MASK_SCATTER_STORE 4966 1.1 mrg || gs_info.ifn == IFN_MASK_GATHER_LOAD) 4967 1.1 mrg mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1); 4968 1.1 mrg 4969 1.1 mrg /* Get the invariant base and non-invariant offset, converting the 4970 1.1 mrg latter to the same width as the vector elements. */ 4971 1.1 mrg tree base = gs_info.base; 4972 1.1 mrg tree offset_type = TREE_TYPE (gs_info.offset_vectype); 4973 1.1 mrg tree offset = vect_add_conversion_to_pattern (vinfo, offset_type, 4974 1.1 mrg gs_info.offset, stmt_info); 4975 1.1 mrg 4976 1.1 mrg /* Build the new pattern statement. */ 4977 1.1 mrg tree scale = size_int (gs_info.scale); 4978 1.1 mrg gcall *pattern_stmt; 4979 1.1 mrg if (DR_IS_READ (dr)) 4980 1.1 mrg { 4981 1.1 mrg tree zero = build_zero_cst (gs_info.element_type); 4982 1.1 mrg if (mask != NULL) 4983 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, 4984 1.1 mrg offset, scale, zero, mask); 4985 1.1 mrg else 4986 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, 4987 1.1 mrg offset, scale, zero); 4988 1.1 mrg tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); 4989 1.1 mrg gimple_call_set_lhs (pattern_stmt, load_lhs); 4990 1.1 mrg } 4991 1.1 mrg else 4992 1.1 mrg { 4993 1.1 mrg tree rhs = vect_get_store_rhs (stmt_info); 4994 1.1 mrg if (mask != NULL) 4995 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, 4996 1.1 mrg base, offset, scale, rhs, 4997 1.1 mrg mask); 4998 1.1 mrg else 4999 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, 5000 1.1 mrg base, offset, scale, rhs); 5001 1.1 mrg } 5002 1.1 mrg gimple_call_set_nothrow (pattern_stmt, true); 5003 1.1 mrg 5004 1.1 mrg /* Copy across relevant vectorization info and associate DR with the 5005 1.1 mrg new pattern statement instead of the original statement. */ 5006 1.1 mrg stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt); 5007 1.1 mrg loop_vinfo->move_dr (pattern_stmt_info, stmt_info); 5008 1.1 mrg 5009 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5010 1.1 mrg *type_out = vectype; 5011 1.1 mrg vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt); 5012 1.1 mrg 5013 1.1 mrg return pattern_stmt; 5014 1.1 mrg } 5015 1.1 mrg 5016 1.1 mrg /* Return true if TYPE is a non-boolean integer type. These are the types 5017 1.1 mrg that we want to consider for narrowing. */ 5018 1.1 mrg 5019 1.1 mrg static bool 5020 1.1 mrg vect_narrowable_type_p (tree type) 5021 1.1 mrg { 5022 1.1 mrg return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type); 5023 1.1 mrg } 5024 1.1 mrg 5025 1.1 mrg /* Return true if the operation given by CODE can be truncated to N bits 5026 1.1 mrg when only N bits of the output are needed. This is only true if bit N+1 5027 1.1 mrg of the inputs has no effect on the low N bits of the result. */ 5028 1.1 mrg 5029 1.1 mrg static bool 5030 1.1 mrg vect_truncatable_operation_p (tree_code code) 5031 1.1 mrg { 5032 1.1 mrg switch (code) 5033 1.1 mrg { 5034 1.1 mrg case NEGATE_EXPR: 5035 1.1 mrg case PLUS_EXPR: 5036 1.1 mrg case MINUS_EXPR: 5037 1.1 mrg case MULT_EXPR: 5038 1.1 mrg case BIT_NOT_EXPR: 5039 1.1 mrg case BIT_AND_EXPR: 5040 1.1 mrg case BIT_IOR_EXPR: 5041 1.1 mrg case BIT_XOR_EXPR: 5042 1.1 mrg case COND_EXPR: 5043 1.1 mrg return true; 5044 1.1 mrg 5045 1.1 mrg default: 5046 1.1 mrg return false; 5047 1.1 mrg } 5048 1.1 mrg } 5049 1.1 mrg 5050 1.1 mrg /* Record that STMT_INFO could be changed from operating on TYPE to 5051 1.1 mrg operating on a type with the precision and sign given by PRECISION 5052 1.1 mrg and SIGN respectively. PRECISION is an arbitrary bit precision; 5053 1.1 mrg it might not be a whole number of bytes. */ 5054 1.1 mrg 5055 1.1 mrg static void 5056 1.1 mrg vect_set_operation_type (stmt_vec_info stmt_info, tree type, 5057 1.1 mrg unsigned int precision, signop sign) 5058 1.1 mrg { 5059 1.1 mrg /* Round the precision up to a whole number of bytes. */ 5060 1.1 mrg precision = vect_element_precision (precision); 5061 1.1 mrg if (precision < TYPE_PRECISION (type) 5062 1.1 mrg && (!stmt_info->operation_precision 5063 1.1 mrg || stmt_info->operation_precision > precision)) 5064 1.1 mrg { 5065 1.1 mrg stmt_info->operation_precision = precision; 5066 1.1 mrg stmt_info->operation_sign = sign; 5067 1.1 mrg } 5068 1.1 mrg } 5069 1.1 mrg 5070 1.1 mrg /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its 5071 1.1 mrg non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION 5072 1.1 mrg is an arbitrary bit precision; it might not be a whole number of bytes. */ 5073 1.1 mrg 5074 1.1 mrg static void 5075 1.1 mrg vect_set_min_input_precision (stmt_vec_info stmt_info, tree type, 5076 1.1 mrg unsigned int min_input_precision) 5077 1.1 mrg { 5078 1.1 mrg /* This operation in isolation only requires the inputs to have 5079 1.1 mrg MIN_INPUT_PRECISION of precision, However, that doesn't mean 5080 1.1 mrg that MIN_INPUT_PRECISION is a natural precision for the chain 5081 1.1 mrg as a whole. E.g. consider something like: 5082 1.1 mrg 5083 1.1 mrg unsigned short *x, *y; 5084 1.1 mrg *y = ((*x & 0xf0) >> 4) | (*y << 4); 5085 1.1 mrg 5086 1.1 mrg The right shift can be done on unsigned chars, and only requires the 5087 1.1 mrg result of "*x & 0xf0" to be done on unsigned chars. But taking that 5088 1.1 mrg approach would mean turning a natural chain of single-vector unsigned 5089 1.1 mrg short operations into one that truncates "*x" and then extends 5090 1.1 mrg "(*x & 0xf0) >> 4", with two vectors for each unsigned short 5091 1.1 mrg operation and one vector for each unsigned char operation. 5092 1.1 mrg This would be a significant pessimization. 5093 1.1 mrg 5094 1.1 mrg Instead only propagate the maximum of this precision and the precision 5095 1.1 mrg required by the users of the result. This means that we don't pessimize 5096 1.1 mrg the case above but continue to optimize things like: 5097 1.1 mrg 5098 1.1 mrg unsigned char *y; 5099 1.1 mrg unsigned short *x; 5100 1.1 mrg *y = ((*x & 0xf0) >> 4) | (*y << 4); 5101 1.1 mrg 5102 1.1 mrg Here we would truncate two vectors of *x to a single vector of 5103 1.1 mrg unsigned chars and use single-vector unsigned char operations for 5104 1.1 mrg everything else, rather than doing two unsigned short copies of 5105 1.1 mrg "(*x & 0xf0) >> 4" and then truncating the result. */ 5106 1.1 mrg min_input_precision = MAX (min_input_precision, 5107 1.1 mrg stmt_info->min_output_precision); 5108 1.1 mrg 5109 1.1 mrg if (min_input_precision < TYPE_PRECISION (type) 5110 1.1 mrg && (!stmt_info->min_input_precision 5111 1.1 mrg || stmt_info->min_input_precision > min_input_precision)) 5112 1.1 mrg stmt_info->min_input_precision = min_input_precision; 5113 1.1 mrg } 5114 1.1 mrg 5115 1.1 mrg /* Subroutine of vect_determine_min_output_precision. Return true if 5116 1.1 mrg we can calculate a reduced number of output bits for STMT_INFO, 5117 1.1 mrg whose result is LHS. */ 5118 1.1 mrg 5119 1.1 mrg static bool 5120 1.1 mrg vect_determine_min_output_precision_1 (vec_info *vinfo, 5121 1.1 mrg stmt_vec_info stmt_info, tree lhs) 5122 1.1 mrg { 5123 1.1 mrg /* Take the maximum precision required by users of the result. */ 5124 1.1 mrg unsigned int precision = 0; 5125 1.1 mrg imm_use_iterator iter; 5126 1.1 mrg use_operand_p use; 5127 1.1 mrg FOR_EACH_IMM_USE_FAST (use, iter, lhs) 5128 1.1 mrg { 5129 1.1 mrg gimple *use_stmt = USE_STMT (use); 5130 1.1 mrg if (is_gimple_debug (use_stmt)) 5131 1.1 mrg continue; 5132 1.1 mrg stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt); 5133 1.1 mrg if (!use_stmt_info || !use_stmt_info->min_input_precision) 5134 1.1 mrg return false; 5135 1.1 mrg /* The input precision recorded for COND_EXPRs applies only to the 5136 1.1 mrg "then" and "else" values. */ 5137 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); 5138 1.1 mrg if (assign 5139 1.1 mrg && gimple_assign_rhs_code (assign) == COND_EXPR 5140 1.1 mrg && use->use != gimple_assign_rhs2_ptr (assign) 5141 1.1 mrg && use->use != gimple_assign_rhs3_ptr (assign)) 5142 1.1 mrg return false; 5143 1.1 mrg precision = MAX (precision, use_stmt_info->min_input_precision); 5144 1.1 mrg } 5145 1.1 mrg 5146 1.1 mrg if (dump_enabled_p ()) 5147 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5148 1.1 mrg "only the low %d bits of %T are significant\n", 5149 1.1 mrg precision, lhs); 5150 1.1 mrg stmt_info->min_output_precision = precision; 5151 1.1 mrg return true; 5152 1.1 mrg } 5153 1.1 mrg 5154 1.1 mrg /* Calculate min_output_precision for STMT_INFO. */ 5155 1.1 mrg 5156 1.1 mrg static void 5157 1.1 mrg vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info) 5158 1.1 mrg { 5159 1.1 mrg /* We're only interested in statements with a narrowable result. */ 5160 1.1 mrg tree lhs = gimple_get_lhs (stmt_info->stmt); 5161 1.1 mrg if (!lhs 5162 1.1 mrg || TREE_CODE (lhs) != SSA_NAME 5163 1.1 mrg || !vect_narrowable_type_p (TREE_TYPE (lhs))) 5164 1.1 mrg return; 5165 1.1 mrg 5166 1.1 mrg if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs)) 5167 1.1 mrg stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs)); 5168 1.1 mrg } 5169 1.1 mrg 5170 1.1 mrg /* Use range information to decide whether STMT (described by STMT_INFO) 5171 1.1 mrg could be done in a narrower type. This is effectively a forward 5172 1.1 mrg propagation, since it uses context-independent information that applies 5173 1.1 mrg to all users of an SSA name. */ 5174 1.1 mrg 5175 1.1 mrg static void 5176 1.1 mrg vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt) 5177 1.1 mrg { 5178 1.1 mrg tree lhs = gimple_assign_lhs (stmt); 5179 1.1 mrg if (!lhs || TREE_CODE (lhs) != SSA_NAME) 5180 1.1 mrg return; 5181 1.1 mrg 5182 1.1 mrg tree type = TREE_TYPE (lhs); 5183 1.1 mrg if (!vect_narrowable_type_p (type)) 5184 1.1 mrg return; 5185 1.1 mrg 5186 1.1 mrg /* First see whether we have any useful range information for the result. */ 5187 1.1 mrg unsigned int precision = TYPE_PRECISION (type); 5188 1.1 mrg signop sign = TYPE_SIGN (type); 5189 1.1 mrg wide_int min_value, max_value; 5190 1.1 mrg if (!vect_get_range_info (lhs, &min_value, &max_value)) 5191 1.1 mrg return; 5192 1.1 mrg 5193 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt); 5194 1.1 mrg unsigned int nops = gimple_num_ops (stmt); 5195 1.1 mrg 5196 1.1 mrg if (!vect_truncatable_operation_p (code)) 5197 1.1 mrg { 5198 1.1 mrg /* Handle operations that can be computed in type T if all inputs 5199 1.1 mrg and outputs can be represented in type T. Also handle left and 5200 1.1 mrg right shifts, where (in addition) the maximum shift amount must 5201 1.1 mrg be less than the number of bits in T. */ 5202 1.1 mrg bool is_shift; 5203 1.1 mrg switch (code) 5204 1.1 mrg { 5205 1.1 mrg case LSHIFT_EXPR: 5206 1.1 mrg case RSHIFT_EXPR: 5207 1.1 mrg is_shift = true; 5208 1.1 mrg break; 5209 1.1 mrg 5210 1.1 mrg case ABS_EXPR: 5211 1.1 mrg case MIN_EXPR: 5212 1.1 mrg case MAX_EXPR: 5213 1.1 mrg case TRUNC_DIV_EXPR: 5214 1.1 mrg case CEIL_DIV_EXPR: 5215 1.1 mrg case FLOOR_DIV_EXPR: 5216 1.1 mrg case ROUND_DIV_EXPR: 5217 1.1 mrg case EXACT_DIV_EXPR: 5218 1.1 mrg /* Modulus is excluded because it is typically calculated by doing 5219 1.1 mrg a division, for which minimum signed / -1 isn't representable in 5220 1.1 mrg the original signed type. We could take the division range into 5221 1.1 mrg account instead, if handling modulus ever becomes important. */ 5222 1.1 mrg is_shift = false; 5223 1.1 mrg break; 5224 1.1 mrg 5225 1.1 mrg default: 5226 1.1 mrg return; 5227 1.1 mrg } 5228 1.1 mrg for (unsigned int i = 1; i < nops; ++i) 5229 1.1 mrg { 5230 1.1 mrg tree op = gimple_op (stmt, i); 5231 1.1 mrg wide_int op_min_value, op_max_value; 5232 1.1 mrg if (TREE_CODE (op) == INTEGER_CST) 5233 1.1 mrg { 5234 1.1 mrg unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op)); 5235 1.1 mrg op_min_value = op_max_value = wi::to_wide (op, op_precision); 5236 1.1 mrg } 5237 1.1 mrg else if (TREE_CODE (op) == SSA_NAME) 5238 1.1 mrg { 5239 1.1 mrg if (!vect_get_range_info (op, &op_min_value, &op_max_value)) 5240 1.1 mrg return; 5241 1.1 mrg } 5242 1.1 mrg else 5243 1.1 mrg return; 5244 1.1 mrg 5245 1.1 mrg if (is_shift && i == 2) 5246 1.1 mrg { 5247 1.1 mrg /* There needs to be one more bit than the maximum shift amount. 5248 1.1 mrg 5249 1.1 mrg If the maximum shift amount is already 1 less than PRECISION 5250 1.1 mrg then we can't narrow the shift further. Dealing with that 5251 1.1 mrg case first ensures that we can safely use an unsigned range 5252 1.1 mrg below. 5253 1.1 mrg 5254 1.1 mrg op_min_value isn't relevant, since shifts by negative amounts 5255 1.1 mrg are UB. */ 5256 1.1 mrg if (wi::geu_p (op_max_value, precision - 1)) 5257 1.1 mrg return; 5258 1.1 mrg unsigned int min_bits = op_max_value.to_uhwi () + 1; 5259 1.1 mrg 5260 1.1 mrg /* As explained below, we can convert a signed shift into an 5261 1.1 mrg unsigned shift if the sign bit is always clear. At this 5262 1.1 mrg point we've already processed the ranges of the output and 5263 1.1 mrg the first input. */ 5264 1.1 mrg auto op_sign = sign; 5265 1.1 mrg if (sign == SIGNED && !wi::neg_p (min_value)) 5266 1.1 mrg op_sign = UNSIGNED; 5267 1.1 mrg op_min_value = wide_int::from (wi::min_value (min_bits, op_sign), 5268 1.1 mrg precision, op_sign); 5269 1.1 mrg op_max_value = wide_int::from (wi::max_value (min_bits, op_sign), 5270 1.1 mrg precision, op_sign); 5271 1.1 mrg } 5272 1.1 mrg min_value = wi::min (min_value, op_min_value, sign); 5273 1.1 mrg max_value = wi::max (max_value, op_max_value, sign); 5274 1.1 mrg } 5275 1.1 mrg } 5276 1.1 mrg 5277 1.1 mrg /* Try to switch signed types for unsigned types if we can. 5278 1.1 mrg This is better for two reasons. First, unsigned ops tend 5279 1.1 mrg to be cheaper than signed ops. Second, it means that we can 5280 1.1 mrg handle things like: 5281 1.1 mrg 5282 1.1 mrg signed char c; 5283 1.1 mrg int res = (int) c & 0xff00; // range [0x0000, 0xff00] 5284 1.1 mrg 5285 1.1 mrg as: 5286 1.1 mrg 5287 1.1 mrg signed char c; 5288 1.1 mrg unsigned short res_1 = (unsigned short) c & 0xff00; 5289 1.1 mrg int res = (int) res_1; 5290 1.1 mrg 5291 1.1 mrg where the intermediate result res_1 has unsigned rather than 5292 1.1 mrg signed type. */ 5293 1.1 mrg if (sign == SIGNED && !wi::neg_p (min_value)) 5294 1.1 mrg sign = UNSIGNED; 5295 1.1 mrg 5296 1.1 mrg /* See what precision is required for MIN_VALUE and MAX_VALUE. */ 5297 1.1 mrg unsigned int precision1 = wi::min_precision (min_value, sign); 5298 1.1 mrg unsigned int precision2 = wi::min_precision (max_value, sign); 5299 1.1 mrg unsigned int value_precision = MAX (precision1, precision2); 5300 1.1 mrg if (value_precision >= precision) 5301 1.1 mrg return; 5302 1.1 mrg 5303 1.1 mrg if (dump_enabled_p ()) 5304 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d" 5305 1.1 mrg " without loss of precision: %G", 5306 1.1 mrg sign == SIGNED ? "signed" : "unsigned", 5307 1.1 mrg value_precision, stmt); 5308 1.1 mrg 5309 1.1 mrg vect_set_operation_type (stmt_info, type, value_precision, sign); 5310 1.1 mrg vect_set_min_input_precision (stmt_info, type, value_precision); 5311 1.1 mrg } 5312 1.1 mrg 5313 1.1 mrg /* Use information about the users of STMT's result to decide whether 5314 1.1 mrg STMT (described by STMT_INFO) could be done in a narrower type. 5315 1.1 mrg This is effectively a backward propagation. */ 5316 1.1 mrg 5317 1.1 mrg static void 5318 1.1 mrg vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt) 5319 1.1 mrg { 5320 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt); 5321 1.1 mrg unsigned int opno = (code == COND_EXPR ? 2 : 1); 5322 1.1 mrg tree type = TREE_TYPE (gimple_op (stmt, opno)); 5323 1.1 mrg if (!vect_narrowable_type_p (type)) 5324 1.1 mrg return; 5325 1.1 mrg 5326 1.1 mrg unsigned int precision = TYPE_PRECISION (type); 5327 1.1 mrg unsigned int operation_precision, min_input_precision; 5328 1.1 mrg switch (code) 5329 1.1 mrg { 5330 1.1 mrg CASE_CONVERT: 5331 1.1 mrg /* Only the bits that contribute to the output matter. Don't change 5332 1.1 mrg the precision of the operation itself. */ 5333 1.1 mrg operation_precision = precision; 5334 1.1 mrg min_input_precision = stmt_info->min_output_precision; 5335 1.1 mrg break; 5336 1.1 mrg 5337 1.1 mrg case LSHIFT_EXPR: 5338 1.1 mrg case RSHIFT_EXPR: 5339 1.1 mrg { 5340 1.1 mrg tree shift = gimple_assign_rhs2 (stmt); 5341 1.1 mrg if (TREE_CODE (shift) != INTEGER_CST 5342 1.1 mrg || !wi::ltu_p (wi::to_widest (shift), precision)) 5343 1.1 mrg return; 5344 1.1 mrg unsigned int const_shift = TREE_INT_CST_LOW (shift); 5345 1.1 mrg if (code == LSHIFT_EXPR) 5346 1.1 mrg { 5347 1.1 mrg /* Avoid creating an undefined shift. 5348 1.1 mrg 5349 1.1 mrg ??? We could instead use min_output_precision as-is and 5350 1.1 mrg optimize out-of-range shifts to zero. However, only 5351 1.1 mrg degenerate testcases shift away all their useful input data, 5352 1.1 mrg and it isn't natural to drop input operations in the middle 5353 1.1 mrg of vectorization. This sort of thing should really be 5354 1.1 mrg handled before vectorization. */ 5355 1.1 mrg operation_precision = MAX (stmt_info->min_output_precision, 5356 1.1 mrg const_shift + 1); 5357 1.1 mrg /* We need CONST_SHIFT fewer bits of the input. */ 5358 1.1 mrg min_input_precision = (MAX (operation_precision, const_shift) 5359 1.1 mrg - const_shift); 5360 1.1 mrg } 5361 1.1 mrg else 5362 1.1 mrg { 5363 1.1 mrg /* We need CONST_SHIFT extra bits to do the operation. */ 5364 1.1 mrg operation_precision = (stmt_info->min_output_precision 5365 1.1 mrg + const_shift); 5366 1.1 mrg min_input_precision = operation_precision; 5367 1.1 mrg } 5368 1.1 mrg break; 5369 1.1 mrg } 5370 1.1 mrg 5371 1.1 mrg default: 5372 1.1 mrg if (vect_truncatable_operation_p (code)) 5373 1.1 mrg { 5374 1.1 mrg /* Input bit N has no effect on output bits N-1 and lower. */ 5375 1.1 mrg operation_precision = stmt_info->min_output_precision; 5376 1.1 mrg min_input_precision = operation_precision; 5377 1.1 mrg break; 5378 1.1 mrg } 5379 1.1 mrg return; 5380 1.1 mrg } 5381 1.1 mrg 5382 1.1 mrg if (operation_precision < precision) 5383 1.1 mrg { 5384 1.1 mrg if (dump_enabled_p ()) 5385 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d" 5386 1.1 mrg " without affecting users: %G", 5387 1.1 mrg TYPE_UNSIGNED (type) ? "unsigned" : "signed", 5388 1.1 mrg operation_precision, stmt); 5389 1.1 mrg vect_set_operation_type (stmt_info, type, operation_precision, 5390 1.1 mrg TYPE_SIGN (type)); 5391 1.1 mrg } 5392 1.1 mrg vect_set_min_input_precision (stmt_info, type, min_input_precision); 5393 1.1 mrg } 5394 1.1 mrg 5395 1.1 mrg /* Return true if the statement described by STMT_INFO sets a boolean 5396 1.1 mrg SSA_NAME and if we know how to vectorize this kind of statement using 5397 1.1 mrg vector mask types. */ 5398 1.1 mrg 5399 1.1 mrg static bool 5400 1.1 mrg possible_vector_mask_operation_p (stmt_vec_info stmt_info) 5401 1.1 mrg { 5402 1.1 mrg tree lhs = gimple_get_lhs (stmt_info->stmt); 5403 1.1 mrg if (!lhs 5404 1.1 mrg || TREE_CODE (lhs) != SSA_NAME 5405 1.1 mrg || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) 5406 1.1 mrg return false; 5407 1.1 mrg 5408 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 5409 1.1 mrg { 5410 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign); 5411 1.1 mrg switch (rhs_code) 5412 1.1 mrg { 5413 1.1 mrg CASE_CONVERT: 5414 1.1 mrg case SSA_NAME: 5415 1.1 mrg case BIT_NOT_EXPR: 5416 1.1 mrg case BIT_IOR_EXPR: 5417 1.1 mrg case BIT_XOR_EXPR: 5418 1.1 mrg case BIT_AND_EXPR: 5419 1.1 mrg return true; 5420 1.1 mrg 5421 1.1 mrg default: 5422 1.1 mrg return TREE_CODE_CLASS (rhs_code) == tcc_comparison; 5423 1.1 mrg } 5424 1.1 mrg } 5425 1.1 mrg else if (is_a <gphi *> (stmt_info->stmt)) 5426 1.1 mrg return true; 5427 1.1 mrg return false; 5428 1.1 mrg } 5429 1.1 mrg 5430 1.1 mrg /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use 5431 1.1 mrg a vector mask type instead of a normal vector type. Record the 5432 1.1 mrg result in STMT_INFO->mask_precision. */ 5433 1.1 mrg 5434 1.1 mrg static void 5435 1.1 mrg vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info) 5436 1.1 mrg { 5437 1.1 mrg if (!possible_vector_mask_operation_p (stmt_info)) 5438 1.1 mrg return; 5439 1.1 mrg 5440 1.1 mrg /* If at least one boolean input uses a vector mask type, 5441 1.1 mrg pick the mask type with the narrowest elements. 5442 1.1 mrg 5443 1.1 mrg ??? This is the traditional behavior. It should always produce 5444 1.1 mrg the smallest number of operations, but isn't necessarily the 5445 1.1 mrg optimal choice. For example, if we have: 5446 1.1 mrg 5447 1.1 mrg a = b & c 5448 1.1 mrg 5449 1.1 mrg where: 5450 1.1 mrg 5451 1.1 mrg - the user of a wants it to have a mask type for 16-bit elements (M16) 5452 1.1 mrg - b also uses M16 5453 1.1 mrg - c uses a mask type for 8-bit elements (M8) 5454 1.1 mrg 5455 1.1 mrg then picking M8 gives: 5456 1.1 mrg 5457 1.1 mrg - 1 M16->M8 pack for b 5458 1.1 mrg - 1 M8 AND for a 5459 1.1 mrg - 2 M8->M16 unpacks for the user of a 5460 1.1 mrg 5461 1.1 mrg whereas picking M16 would have given: 5462 1.1 mrg 5463 1.1 mrg - 2 M8->M16 unpacks for c 5464 1.1 mrg - 2 M16 ANDs for a 5465 1.1 mrg 5466 1.1 mrg The number of operations are equal, but M16 would have given 5467 1.1 mrg a shorter dependency chain and allowed more ILP. */ 5468 1.1 mrg unsigned int precision = ~0U; 5469 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 5470 1.1 mrg { 5471 1.1 mrg unsigned int nops = gimple_num_ops (assign); 5472 1.1 mrg for (unsigned int i = 1; i < nops; ++i) 5473 1.1 mrg { 5474 1.1 mrg tree rhs = gimple_op (assign, i); 5475 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs))) 5476 1.1 mrg continue; 5477 1.1 mrg 5478 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); 5479 1.1 mrg if (!def_stmt_info) 5480 1.1 mrg /* Don't let external or constant operands influence the choice. 5481 1.1 mrg We can convert them to whichever vector type we pick. */ 5482 1.1 mrg continue; 5483 1.1 mrg 5484 1.1 mrg if (def_stmt_info->mask_precision) 5485 1.1 mrg { 5486 1.1 mrg if (precision > def_stmt_info->mask_precision) 5487 1.1 mrg precision = def_stmt_info->mask_precision; 5488 1.1 mrg } 5489 1.1 mrg } 5490 1.1 mrg 5491 1.1 mrg /* If the statement compares two values that shouldn't use vector masks, 5492 1.1 mrg try comparing the values as normal scalars instead. */ 5493 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign); 5494 1.1 mrg if (precision == ~0U 5495 1.1 mrg && TREE_CODE_CLASS (rhs_code) == tcc_comparison) 5496 1.1 mrg { 5497 1.1 mrg tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign)); 5498 1.1 mrg scalar_mode mode; 5499 1.1 mrg tree vectype, mask_type; 5500 1.1 mrg if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode) 5501 1.1 mrg && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type)) 5502 1.1 mrg && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type)) 5503 1.1 mrg && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) 5504 1.1 mrg precision = GET_MODE_BITSIZE (mode); 5505 1.1 mrg } 5506 1.1 mrg } 5507 1.1 mrg else 5508 1.1 mrg { 5509 1.1 mrg gphi *phi = as_a <gphi *> (stmt_info->stmt); 5510 1.1 mrg for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) 5511 1.1 mrg { 5512 1.1 mrg tree rhs = gimple_phi_arg_def (phi, i); 5513 1.1 mrg 5514 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); 5515 1.1 mrg if (!def_stmt_info) 5516 1.1 mrg /* Don't let external or constant operands influence the choice. 5517 1.1 mrg We can convert them to whichever vector type we pick. */ 5518 1.1 mrg continue; 5519 1.1 mrg 5520 1.1 mrg if (def_stmt_info->mask_precision) 5521 1.1 mrg { 5522 1.1 mrg if (precision > def_stmt_info->mask_precision) 5523 1.1 mrg precision = def_stmt_info->mask_precision; 5524 1.1 mrg } 5525 1.1 mrg } 5526 1.1 mrg } 5527 1.1 mrg 5528 1.1 mrg if (dump_enabled_p ()) 5529 1.1 mrg { 5530 1.1 mrg if (precision == ~0U) 5531 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5532 1.1 mrg "using normal nonmask vectors for %G", 5533 1.1 mrg stmt_info->stmt); 5534 1.1 mrg else 5535 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5536 1.1 mrg "using boolean precision %d for %G", 5537 1.1 mrg precision, stmt_info->stmt); 5538 1.1 mrg } 5539 1.1 mrg 5540 1.1 mrg stmt_info->mask_precision = precision; 5541 1.1 mrg } 5542 1.1 mrg 5543 1.1 mrg /* Handle vect_determine_precisions for STMT_INFO, given that we 5544 1.1 mrg have already done so for the users of its result. */ 5545 1.1 mrg 5546 1.1 mrg void 5547 1.1 mrg vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info) 5548 1.1 mrg { 5549 1.1 mrg vect_determine_min_output_precision (vinfo, stmt_info); 5550 1.1 mrg if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) 5551 1.1 mrg { 5552 1.1 mrg vect_determine_precisions_from_range (stmt_info, stmt); 5553 1.1 mrg vect_determine_precisions_from_users (stmt_info, stmt); 5554 1.1 mrg } 5555 1.1 mrg } 5556 1.1 mrg 5557 1.1 mrg /* Walk backwards through the vectorizable region to determine the 5558 1.1 mrg values of these fields: 5559 1.1 mrg 5560 1.1 mrg - min_output_precision 5561 1.1 mrg - min_input_precision 5562 1.1 mrg - operation_precision 5563 1.1 mrg - operation_sign. */ 5564 1.1 mrg 5565 1.1 mrg void 5566 1.1 mrg vect_determine_precisions (vec_info *vinfo) 5567 1.1 mrg { 5568 1.1 mrg DUMP_VECT_SCOPE ("vect_determine_precisions"); 5569 1.1 mrg 5570 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) 5571 1.1 mrg { 5572 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 5573 1.1 mrg basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 5574 1.1 mrg unsigned int nbbs = loop->num_nodes; 5575 1.1 mrg 5576 1.1 mrg for (unsigned int i = 0; i < nbbs; i++) 5577 1.1 mrg { 5578 1.1 mrg basic_block bb = bbs[i]; 5579 1.1 mrg for (auto gsi = gsi_start_phis (bb); 5580 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 5581 1.1 mrg { 5582 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); 5583 1.1 mrg if (stmt_info) 5584 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info); 5585 1.1 mrg } 5586 1.1 mrg for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 5587 1.1 mrg if (!is_gimple_debug (gsi_stmt (si))) 5588 1.1 mrg vect_determine_mask_precision 5589 1.1 mrg (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); 5590 1.1 mrg } 5591 1.1 mrg for (unsigned int i = 0; i < nbbs; i++) 5592 1.1 mrg { 5593 1.1 mrg basic_block bb = bbs[nbbs - i - 1]; 5594 1.1 mrg for (gimple_stmt_iterator si = gsi_last_bb (bb); 5595 1.1 mrg !gsi_end_p (si); gsi_prev (&si)) 5596 1.1 mrg if (!is_gimple_debug (gsi_stmt (si))) 5597 1.1 mrg vect_determine_stmt_precisions 5598 1.1 mrg (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); 5599 1.1 mrg for (auto gsi = gsi_start_phis (bb); 5600 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 5601 1.1 mrg { 5602 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); 5603 1.1 mrg if (stmt_info) 5604 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info); 5605 1.1 mrg } 5606 1.1 mrg } 5607 1.1 mrg } 5608 1.1 mrg else 5609 1.1 mrg { 5610 1.1 mrg bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); 5611 1.1 mrg for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) 5612 1.1 mrg { 5613 1.1 mrg basic_block bb = bb_vinfo->bbs[i]; 5614 1.1 mrg for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5615 1.1 mrg { 5616 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); 5617 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) 5618 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info); 5619 1.1 mrg } 5620 1.1 mrg for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5621 1.1 mrg { 5622 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); 5623 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) 5624 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info); 5625 1.1 mrg } 5626 1.1 mrg } 5627 1.1 mrg for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i) 5628 1.1 mrg { 5629 1.1 mrg for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]); 5630 1.1 mrg !gsi_end_p (gsi); gsi_prev (&gsi)) 5631 1.1 mrg { 5632 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); 5633 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) 5634 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info); 5635 1.1 mrg } 5636 1.1 mrg for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]); 5637 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 5638 1.1 mrg { 5639 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); 5640 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) 5641 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info); 5642 1.1 mrg } 5643 1.1 mrg } 5644 1.1 mrg } 5645 1.1 mrg } 5646 1.1 mrg 5647 1.1 mrg typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *); 5648 1.1 mrg 5649 1.1 mrg struct vect_recog_func 5650 1.1 mrg { 5651 1.1 mrg vect_recog_func_ptr fn; 5652 1.1 mrg const char *name; 5653 1.1 mrg }; 5654 1.1 mrg 5655 1.1 mrg /* Note that ordering matters - the first pattern matching on a stmt is 5656 1.1 mrg taken which means usually the more complex one needs to preceed the 5657 1.1 mrg less comples onex (widen_sum only after dot_prod or sad for example). */ 5658 1.1 mrg static vect_recog_func vect_vect_recog_func_ptrs[] = { 5659 1.1 mrg { vect_recog_over_widening_pattern, "over_widening" }, 5660 1.1 mrg /* Must come after over_widening, which narrows the shift as much as 5661 1.1 mrg possible beforehand. */ 5662 1.1 mrg { vect_recog_average_pattern, "average" }, 5663 1.1 mrg { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" }, 5664 1.1 mrg { vect_recog_mulhs_pattern, "mult_high" }, 5665 1.1 mrg { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, 5666 1.1 mrg { vect_recog_widen_mult_pattern, "widen_mult" }, 5667 1.1 mrg { vect_recog_dot_prod_pattern, "dot_prod" }, 5668 1.1 mrg { vect_recog_sad_pattern, "sad" }, 5669 1.1 mrg { vect_recog_widen_sum_pattern, "widen_sum" }, 5670 1.1 mrg { vect_recog_pow_pattern, "pow" }, 5671 1.1 mrg { vect_recog_popcount_pattern, "popcount" }, 5672 1.1 mrg { vect_recog_widen_shift_pattern, "widen_shift" }, 5673 1.1 mrg { vect_recog_rotate_pattern, "rotate" }, 5674 1.1 mrg { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, 5675 1.1 mrg { vect_recog_divmod_pattern, "divmod" }, 5676 1.1 mrg { vect_recog_mult_pattern, "mult" }, 5677 1.1 mrg { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" }, 5678 1.1 mrg { vect_recog_bool_pattern, "bool" }, 5679 1.1 mrg /* This must come before mask conversion, and includes the parts 5680 1.1 mrg of mask conversion that are needed for gather and scatter 5681 1.1 mrg internal functions. */ 5682 1.1 mrg { vect_recog_gather_scatter_pattern, "gather_scatter" }, 5683 1.1 mrg { vect_recog_mask_conversion_pattern, "mask_conversion" }, 5684 1.1 mrg { vect_recog_widen_plus_pattern, "widen_plus" }, 5685 1.1 mrg { vect_recog_widen_minus_pattern, "widen_minus" }, 5686 1.1 mrg }; 5687 1.1 mrg 5688 1.1 mrg const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs); 5689 1.1 mrg 5690 1.1 mrg /* Mark statements that are involved in a pattern. */ 5691 1.1 mrg 5692 1.1 mrg void 5693 1.1 mrg vect_mark_pattern_stmts (vec_info *vinfo, 5694 1.1 mrg stmt_vec_info orig_stmt_info, gimple *pattern_stmt, 5695 1.1 mrg tree pattern_vectype) 5696 1.1 mrg { 5697 1.1 mrg stmt_vec_info orig_stmt_info_saved = orig_stmt_info; 5698 1.1 mrg gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 5699 1.1 mrg 5700 1.1 mrg gimple *orig_pattern_stmt = NULL; 5701 1.1 mrg if (is_pattern_stmt_p (orig_stmt_info)) 5702 1.1 mrg { 5703 1.1 mrg /* We're replacing a statement in an existing pattern definition 5704 1.1 mrg sequence. */ 5705 1.1 mrg orig_pattern_stmt = orig_stmt_info->stmt; 5706 1.1 mrg if (dump_enabled_p ()) 5707 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5708 1.1 mrg "replacing earlier pattern %G", orig_pattern_stmt); 5709 1.1 mrg 5710 1.1 mrg /* To keep the book-keeping simple, just swap the lhs of the 5711 1.1 mrg old and new statements, so that the old one has a valid but 5712 1.1 mrg unused lhs. */ 5713 1.1 mrg tree old_lhs = gimple_get_lhs (orig_pattern_stmt); 5714 1.1 mrg gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt)); 5715 1.1 mrg gimple_set_lhs (pattern_stmt, old_lhs); 5716 1.1 mrg 5717 1.1 mrg if (dump_enabled_p ()) 5718 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt); 5719 1.1 mrg 5720 1.1 mrg /* Switch to the statement that ORIG replaces. */ 5721 1.1 mrg orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info); 5722 1.1 mrg 5723 1.1 mrg /* We shouldn't be replacing the main pattern statement. */ 5724 1.1 mrg gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt 5725 1.1 mrg != orig_pattern_stmt); 5726 1.1 mrg } 5727 1.1 mrg 5728 1.1 mrg if (def_seq) 5729 1.1 mrg for (gimple_stmt_iterator si = gsi_start (def_seq); 5730 1.1 mrg !gsi_end_p (si); gsi_next (&si)) 5731 1.1 mrg { 5732 1.1 mrg if (dump_enabled_p ()) 5733 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5734 1.1 mrg "extra pattern stmt: %G", gsi_stmt (si)); 5735 1.1 mrg stmt_vec_info pattern_stmt_info 5736 1.1 mrg = vect_init_pattern_stmt (vinfo, gsi_stmt (si), 5737 1.1 mrg orig_stmt_info, pattern_vectype); 5738 1.1 mrg /* Stmts in the def sequence are not vectorizable cycle or 5739 1.1 mrg induction defs, instead they should all be vect_internal_def 5740 1.1 mrg feeding the main pattern stmt which retains this def type. */ 5741 1.1 mrg STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def; 5742 1.1 mrg } 5743 1.1 mrg 5744 1.1 mrg if (orig_pattern_stmt) 5745 1.1 mrg { 5746 1.1 mrg vect_init_pattern_stmt (vinfo, pattern_stmt, 5747 1.1 mrg orig_stmt_info, pattern_vectype); 5748 1.1 mrg 5749 1.1 mrg /* Insert all the new pattern statements before the original one. */ 5750 1.1 mrg gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 5751 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt, 5752 1.1 mrg orig_def_seq); 5753 1.1 mrg gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT); 5754 1.1 mrg gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT); 5755 1.1 mrg 5756 1.1 mrg /* Remove the pattern statement that this new pattern replaces. */ 5757 1.1 mrg gsi_remove (&gsi, false); 5758 1.1 mrg } 5759 1.1 mrg else 5760 1.1 mrg vect_set_pattern_stmt (vinfo, 5761 1.1 mrg pattern_stmt, orig_stmt_info, pattern_vectype); 5762 1.1 mrg 5763 1.1 mrg /* Transfer reduction path info to the pattern. */ 5764 1.1 mrg if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1) 5765 1.1 mrg { 5766 1.1 mrg gimple_match_op op; 5767 1.1 mrg if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op)) 5768 1.1 mrg gcc_unreachable (); 5769 1.1 mrg tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)]; 5770 1.1 mrg /* Search the pattern def sequence and the main pattern stmt. Note 5771 1.1 mrg we may have inserted all into a containing pattern def sequence 5772 1.1 mrg so the following is a bit awkward. */ 5773 1.1 mrg gimple_stmt_iterator si; 5774 1.1 mrg gimple *s; 5775 1.1 mrg if (def_seq) 5776 1.1 mrg { 5777 1.1 mrg si = gsi_start (def_seq); 5778 1.1 mrg s = gsi_stmt (si); 5779 1.1 mrg gsi_next (&si); 5780 1.1 mrg } 5781 1.1 mrg else 5782 1.1 mrg { 5783 1.1 mrg si = gsi_none (); 5784 1.1 mrg s = pattern_stmt; 5785 1.1 mrg } 5786 1.1 mrg do 5787 1.1 mrg { 5788 1.1 mrg bool found = false; 5789 1.1 mrg if (gimple_extract_op (s, &op)) 5790 1.1 mrg for (unsigned i = 0; i < op.num_ops; ++i) 5791 1.1 mrg if (op.ops[i] == lookfor) 5792 1.1 mrg { 5793 1.1 mrg STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i; 5794 1.1 mrg lookfor = gimple_get_lhs (s); 5795 1.1 mrg found = true; 5796 1.1 mrg break; 5797 1.1 mrg } 5798 1.1 mrg if (s == pattern_stmt) 5799 1.1 mrg { 5800 1.1 mrg if (!found && dump_enabled_p ()) 5801 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5802 1.1 mrg "failed to update reduction index.\n"); 5803 1.1 mrg break; 5804 1.1 mrg } 5805 1.1 mrg if (gsi_end_p (si)) 5806 1.1 mrg s = pattern_stmt; 5807 1.1 mrg else 5808 1.1 mrg { 5809 1.1 mrg s = gsi_stmt (si); 5810 1.1 mrg if (s == pattern_stmt) 5811 1.1 mrg /* Found the end inside a bigger pattern def seq. */ 5812 1.1 mrg si = gsi_none (); 5813 1.1 mrg else 5814 1.1 mrg gsi_next (&si); 5815 1.1 mrg } 5816 1.1 mrg } while (1); 5817 1.1 mrg } 5818 1.1 mrg } 5819 1.1 mrg 5820 1.1 mrg /* Function vect_pattern_recog_1 5821 1.1 mrg 5822 1.1 mrg Input: 5823 1.1 mrg PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 5824 1.1 mrg computation pattern. 5825 1.1 mrg STMT_INFO: A stmt from which the pattern search should start. 5826 1.1 mrg 5827 1.1 mrg If PATTERN_RECOG_FUNC successfully detected the pattern, it creates 5828 1.1 mrg a sequence of statements that has the same functionality and can be 5829 1.1 mrg used to replace STMT_INFO. It returns the last statement in the sequence 5830 1.1 mrg and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ. 5831 1.1 mrg PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final 5832 1.1 mrg statement, having first checked that the target supports the new operation 5833 1.1 mrg in that type. 5834 1.1 mrg 5835 1.1 mrg This function also does some bookkeeping, as explained in the documentation 5836 1.1 mrg for vect_recog_pattern. */ 5837 1.1 mrg 5838 1.1 mrg static void 5839 1.1 mrg vect_pattern_recog_1 (vec_info *vinfo, 5840 1.1 mrg vect_recog_func *recog_func, stmt_vec_info stmt_info) 5841 1.1 mrg { 5842 1.1 mrg gimple *pattern_stmt; 5843 1.1 mrg loop_vec_info loop_vinfo; 5844 1.1 mrg tree pattern_vectype; 5845 1.1 mrg 5846 1.1 mrg /* If this statement has already been replaced with pattern statements, 5847 1.1 mrg leave the original statement alone, since the first match wins. 5848 1.1 mrg Instead try to match against the definition statements that feed 5849 1.1 mrg the main pattern statement. */ 5850 1.1 mrg if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 5851 1.1 mrg { 5852 1.1 mrg gimple_stmt_iterator gsi; 5853 1.1 mrg for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)); 5854 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 5855 1.1 mrg vect_pattern_recog_1 (vinfo, recog_func, 5856 1.1 mrg vinfo->lookup_stmt (gsi_stmt (gsi))); 5857 1.1 mrg return; 5858 1.1 mrg } 5859 1.1 mrg 5860 1.1 mrg gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)); 5861 1.1 mrg pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype); 5862 1.1 mrg if (!pattern_stmt) 5863 1.1 mrg { 5864 1.1 mrg /* Clear any half-formed pattern definition sequence. */ 5865 1.1 mrg STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL; 5866 1.1 mrg return; 5867 1.1 mrg } 5868 1.1 mrg 5869 1.1 mrg loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 5870 1.1 mrg 5871 1.1 mrg /* Found a vectorizable pattern. */ 5872 1.1 mrg if (dump_enabled_p ()) 5873 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, 5874 1.1 mrg "%s pattern recognized: %G", 5875 1.1 mrg recog_func->name, pattern_stmt); 5876 1.1 mrg 5877 1.1 mrg /* Mark the stmts that are involved in the pattern. */ 5878 1.1 mrg vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype); 5879 1.1 mrg 5880 1.1 mrg /* Patterns cannot be vectorized using SLP, because they change the order of 5881 1.1 mrg computation. */ 5882 1.1 mrg if (loop_vinfo) 5883 1.1 mrg { 5884 1.1 mrg unsigned ix, ix2; 5885 1.1 mrg stmt_vec_info *elem_ptr; 5886 1.1 mrg VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2, 5887 1.1 mrg elem_ptr, *elem_ptr == stmt_info); 5888 1.1 mrg } 5889 1.1 mrg } 5890 1.1 mrg 5891 1.1 mrg 5892 1.1 mrg /* Function vect_pattern_recog 5893 1.1 mrg 5894 1.1 mrg Input: 5895 1.1 mrg LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 5896 1.1 mrg computation idioms. 5897 1.1 mrg 5898 1.1 mrg Output - for each computation idiom that is detected we create a new stmt 5899 1.1 mrg that provides the same functionality and that can be vectorized. We 5900 1.1 mrg also record some information in the struct_stmt_info of the relevant 5901 1.1 mrg stmts, as explained below: 5902 1.1 mrg 5903 1.1 mrg At the entry to this function we have the following stmts, with the 5904 1.1 mrg following initial value in the STMT_VINFO fields: 5905 1.1 mrg 5906 1.1 mrg stmt in_pattern_p related_stmt vec_stmt 5907 1.1 mrg S1: a_i = .... - - - 5908 1.1 mrg S2: a_2 = ..use(a_i).. - - - 5909 1.1 mrg S3: a_1 = ..use(a_2).. - - - 5910 1.1 mrg S4: a_0 = ..use(a_1).. - - - 5911 1.1 mrg S5: ... = ..use(a_0).. - - - 5912 1.1 mrg 5913 1.1 mrg Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 5914 1.1 mrg represented by a single stmt. We then: 5915 1.1 mrg - create a new stmt S6 equivalent to the pattern (the stmt is not 5916 1.1 mrg inserted into the code) 5917 1.1 mrg - fill in the STMT_VINFO fields as follows: 5918 1.1 mrg 5919 1.1 mrg in_pattern_p related_stmt vec_stmt 5920 1.1 mrg S1: a_i = .... - - - 5921 1.1 mrg S2: a_2 = ..use(a_i).. - - - 5922 1.1 mrg S3: a_1 = ..use(a_2).. - - - 5923 1.1 mrg S4: a_0 = ..use(a_1).. true S6 - 5924 1.1 mrg '---> S6: a_new = .... - S4 - 5925 1.1 mrg S5: ... = ..use(a_0).. - - - 5926 1.1 mrg 5927 1.1 mrg (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 5928 1.1 mrg to each other through the RELATED_STMT field). 5929 1.1 mrg 5930 1.1 mrg S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 5931 1.1 mrg of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 5932 1.1 mrg remain irrelevant unless used by stmts other than S4. 5933 1.1 mrg 5934 1.1 mrg If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 5935 1.1 mrg (because they are marked as irrelevant). It will vectorize S6, and record 5936 1.1 mrg a pointer to the new vector stmt VS6 from S6 (as usual). 5937 1.1 mrg S4 will be skipped, and S5 will be vectorized as usual: 5938 1.1 mrg 5939 1.1 mrg in_pattern_p related_stmt vec_stmt 5940 1.1 mrg S1: a_i = .... - - - 5941 1.1 mrg S2: a_2 = ..use(a_i).. - - - 5942 1.1 mrg S3: a_1 = ..use(a_2).. - - - 5943 1.1 mrg > VS6: va_new = .... - - - 5944 1.1 mrg S4: a_0 = ..use(a_1).. true S6 VS6 5945 1.1 mrg '---> S6: a_new = .... - S4 VS6 5946 1.1 mrg > VS5: ... = ..vuse(va_new).. - - - 5947 1.1 mrg S5: ... = ..use(a_0).. - - - 5948 1.1 mrg 5949 1.1 mrg DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used 5950 1.1 mrg elsewhere), and we'll end up with: 5951 1.1 mrg 5952 1.1 mrg VS6: va_new = .... 5953 1.1 mrg VS5: ... = ..vuse(va_new).. 5954 1.1 mrg 5955 1.1 mrg In case of more than one pattern statements, e.g., widen-mult with 5956 1.1 mrg intermediate type: 5957 1.1 mrg 5958 1.1 mrg S1 a_t = ; 5959 1.1 mrg S2 a_T = (TYPE) a_t; 5960 1.1 mrg '--> S3: a_it = (interm_type) a_t; 5961 1.1 mrg S4 prod_T = a_T * CONST; 5962 1.1 mrg '--> S5: prod_T' = a_it w* CONST; 5963 1.1 mrg 5964 1.1 mrg there may be other users of a_T outside the pattern. In that case S2 will 5965 1.1 mrg be marked as relevant (as well as S3), and both S2 and S3 will be analyzed 5966 1.1 mrg and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will 5967 1.1 mrg be recorded in S3. */ 5968 1.1 mrg 5969 1.1 mrg void 5970 1.1 mrg vect_pattern_recog (vec_info *vinfo) 5971 1.1 mrg { 5972 1.1 mrg class loop *loop; 5973 1.1 mrg basic_block *bbs; 5974 1.1 mrg unsigned int nbbs; 5975 1.1 mrg gimple_stmt_iterator si; 5976 1.1 mrg unsigned int i, j; 5977 1.1 mrg 5978 1.1 mrg vect_determine_precisions (vinfo); 5979 1.1 mrg 5980 1.1 mrg DUMP_VECT_SCOPE ("vect_pattern_recog"); 5981 1.1 mrg 5982 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) 5983 1.1 mrg { 5984 1.1 mrg loop = LOOP_VINFO_LOOP (loop_vinfo); 5985 1.1 mrg bbs = LOOP_VINFO_BBS (loop_vinfo); 5986 1.1 mrg nbbs = loop->num_nodes; 5987 1.1 mrg 5988 1.1 mrg /* Scan through the loop stmts, applying the pattern recognition 5989 1.1 mrg functions starting at each stmt visited: */ 5990 1.1 mrg for (i = 0; i < nbbs; i++) 5991 1.1 mrg { 5992 1.1 mrg basic_block bb = bbs[i]; 5993 1.1 mrg for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 5994 1.1 mrg { 5995 1.1 mrg if (is_gimple_debug (gsi_stmt (si))) 5996 1.1 mrg continue; 5997 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); 5998 1.1 mrg /* Scan over all generic vect_recog_xxx_pattern functions. */ 5999 1.1 mrg for (j = 0; j < NUM_PATTERNS; j++) 6000 1.1 mrg vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j], 6001 1.1 mrg stmt_info); 6002 1.1 mrg } 6003 1.1 mrg } 6004 1.1 mrg } 6005 1.1 mrg else 6006 1.1 mrg { 6007 1.1 mrg bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); 6008 1.1 mrg for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) 6009 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]); 6010 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 6011 1.1 mrg { 6012 1.1 mrg stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi)); 6013 1.1 mrg if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)) 6014 1.1 mrg continue; 6015 1.1 mrg 6016 1.1 mrg /* Scan over all generic vect_recog_xxx_pattern functions. */ 6017 1.1 mrg for (j = 0; j < NUM_PATTERNS; j++) 6018 1.1 mrg vect_pattern_recog_1 (vinfo, 6019 1.1 mrg &vect_vect_recog_func_ptrs[j], stmt_info); 6020 1.1 mrg } 6021 1.1 mrg } 6022 1.1 mrg 6023 1.1 mrg /* After this no more add_stmt calls are allowed. */ 6024 1.1 mrg vinfo->stmt_vec_info_ro = true; 6025 1.1 mrg } 6026