tree-vect-patterns.cc revision 1.1 1 1.1 mrg /* Analysis Utilities for Loop Vectorization.
2 1.1 mrg Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Dorit Nuzman <dorit (at) il.ibm.com>
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
8 1.1 mrg the terms of the GNU General Public License as published by the Free
9 1.1 mrg Software Foundation; either version 3, or (at your option) any later
10 1.1 mrg version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 1.1 mrg for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg #include "config.h"
22 1.1 mrg #include "system.h"
23 1.1 mrg #include "coretypes.h"
24 1.1 mrg #include "backend.h"
25 1.1 mrg #include "rtl.h"
26 1.1 mrg #include "tree.h"
27 1.1 mrg #include "gimple.h"
28 1.1 mrg #include "ssa.h"
29 1.1 mrg #include "expmed.h"
30 1.1 mrg #include "optabs-tree.h"
31 1.1 mrg #include "insn-config.h"
32 1.1 mrg #include "recog.h" /* FIXME: for insn_data */
33 1.1 mrg #include "fold-const.h"
34 1.1 mrg #include "stor-layout.h"
35 1.1 mrg #include "tree-eh.h"
36 1.1 mrg #include "gimplify.h"
37 1.1 mrg #include "gimple-iterator.h"
38 1.1 mrg #include "cfgloop.h"
39 1.1 mrg #include "tree-vectorizer.h"
40 1.1 mrg #include "dumpfile.h"
41 1.1 mrg #include "builtins.h"
42 1.1 mrg #include "internal-fn.h"
43 1.1 mrg #include "case-cfn-macros.h"
44 1.1 mrg #include "fold-const-call.h"
45 1.1 mrg #include "attribs.h"
46 1.1 mrg #include "cgraph.h"
47 1.1 mrg #include "omp-simd-clone.h"
48 1.1 mrg #include "predict.h"
49 1.1 mrg #include "tree-vector-builder.h"
50 1.1 mrg #include "vec-perm-indices.h"
51 1.1 mrg #include "gimple-range.h"
52 1.1 mrg
53 1.1 mrg /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 1.1 mrg in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55 1.1 mrg
56 1.1 mrg static bool
57 1.1 mrg vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 1.1 mrg {
59 1.1 mrg value_range vr;
60 1.1 mrg get_range_query (cfun)->range_of_expr (vr, var);
61 1.1 mrg if (vr.undefined_p ())
62 1.1 mrg vr.set_varying (TREE_TYPE (var));
63 1.1 mrg *min_value = wi::to_wide (vr.min ());
64 1.1 mrg *max_value = wi::to_wide (vr.max ());
65 1.1 mrg value_range_kind vr_type = vr.kind ();
66 1.1 mrg wide_int nonzero = get_nonzero_bits (var);
67 1.1 mrg signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 1.1 mrg if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 1.1 mrg nonzero, sgn) == VR_RANGE)
70 1.1 mrg {
71 1.1 mrg if (dump_enabled_p ())
72 1.1 mrg {
73 1.1 mrg dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 1.1 mrg dump_printf (MSG_NOTE, " has range [");
75 1.1 mrg dump_hex (MSG_NOTE, *min_value);
76 1.1 mrg dump_printf (MSG_NOTE, ", ");
77 1.1 mrg dump_hex (MSG_NOTE, *max_value);
78 1.1 mrg dump_printf (MSG_NOTE, "]\n");
79 1.1 mrg }
80 1.1 mrg return true;
81 1.1 mrg }
82 1.1 mrg else
83 1.1 mrg {
84 1.1 mrg if (dump_enabled_p ())
85 1.1 mrg {
86 1.1 mrg dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 1.1 mrg dump_printf (MSG_NOTE, " has no range info\n");
88 1.1 mrg }
89 1.1 mrg return false;
90 1.1 mrg }
91 1.1 mrg }
92 1.1 mrg
93 1.1 mrg /* Report that we've found an instance of pattern PATTERN in
94 1.1 mrg statement STMT. */
95 1.1 mrg
96 1.1 mrg static void
97 1.1 mrg vect_pattern_detected (const char *name, gimple *stmt)
98 1.1 mrg {
99 1.1 mrg if (dump_enabled_p ())
100 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
101 1.1 mrg }
102 1.1 mrg
103 1.1 mrg /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 1.1 mrg return the pattern statement's stmt_vec_info. Set its vector type to
105 1.1 mrg VECTYPE if it doesn't have one already. */
106 1.1 mrg
107 1.1 mrg static stmt_vec_info
108 1.1 mrg vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 1.1 mrg stmt_vec_info orig_stmt_info, tree vectype)
110 1.1 mrg {
111 1.1 mrg stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 1.1 mrg if (pattern_stmt_info == NULL)
113 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 1.1 mrg gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
115 1.1 mrg
116 1.1 mrg pattern_stmt_info->pattern_stmt_p = true;
117 1.1 mrg STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 1.1 mrg STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 1.1 mrg = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 1.1 mrg if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
121 1.1 mrg {
122 1.1 mrg gcc_assert (!vectype
123 1.1 mrg || (VECTOR_BOOLEAN_TYPE_P (vectype)
124 1.1 mrg == vect_use_mask_type_p (orig_stmt_info)));
125 1.1 mrg STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
126 1.1 mrg pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127 1.1 mrg }
128 1.1 mrg return pattern_stmt_info;
129 1.1 mrg }
130 1.1 mrg
131 1.1 mrg /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132 1.1 mrg Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
133 1.1 mrg have one already. */
134 1.1 mrg
135 1.1 mrg static void
136 1.1 mrg vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
137 1.1 mrg stmt_vec_info orig_stmt_info, tree vectype)
138 1.1 mrg {
139 1.1 mrg STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
140 1.1 mrg STMT_VINFO_RELATED_STMT (orig_stmt_info)
141 1.1 mrg = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
142 1.1 mrg }
143 1.1 mrg
144 1.1 mrg /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
145 1.1 mrg is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146 1.1 mrg be different from the vector type of the final pattern statement.
147 1.1 mrg If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148 1.1 mrg from which it was derived. */
149 1.1 mrg
150 1.1 mrg static inline void
151 1.1 mrg append_pattern_def_seq (vec_info *vinfo,
152 1.1 mrg stmt_vec_info stmt_info, gimple *new_stmt,
153 1.1 mrg tree vectype = NULL_TREE,
154 1.1 mrg tree scalar_type_for_mask = NULL_TREE)
155 1.1 mrg {
156 1.1 mrg gcc_assert (!scalar_type_for_mask
157 1.1 mrg == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
158 1.1 mrg if (vectype)
159 1.1 mrg {
160 1.1 mrg stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
161 1.1 mrg STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
162 1.1 mrg if (scalar_type_for_mask)
163 1.1 mrg new_stmt_info->mask_precision
164 1.1 mrg = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165 1.1 mrg }
166 1.1 mrg gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
167 1.1 mrg new_stmt);
168 1.1 mrg }
169 1.1 mrg
170 1.1 mrg /* The caller wants to perform new operations on vect_external variable
171 1.1 mrg VAR, so that the result of the operations would also be vect_external.
172 1.1 mrg Return the edge on which the operations can be performed, if one exists.
173 1.1 mrg Return null if the operations should instead be treated as part of
174 1.1 mrg the pattern that needs them. */
175 1.1 mrg
176 1.1 mrg static edge
177 1.1 mrg vect_get_external_def_edge (vec_info *vinfo, tree var)
178 1.1 mrg {
179 1.1 mrg edge e = NULL;
180 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181 1.1 mrg {
182 1.1 mrg e = loop_preheader_edge (loop_vinfo->loop);
183 1.1 mrg if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 1.1 mrg {
185 1.1 mrg basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
186 1.1 mrg if (bb == NULL
187 1.1 mrg || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
188 1.1 mrg e = NULL;
189 1.1 mrg }
190 1.1 mrg }
191 1.1 mrg return e;
192 1.1 mrg }
193 1.1 mrg
194 1.1 mrg /* Return true if the target supports a vector version of CODE,
195 1.1 mrg where CODE is known to map to a direct optab with the given SUBTYPE.
196 1.1 mrg ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197 1.1 mrg specifies the type of the scalar result.
198 1.1 mrg
199 1.1 mrg If CODE allows the inputs and outputs to have different type
200 1.1 mrg (such as for WIDEN_SUM_EXPR), it is the input mode rather
201 1.1 mrg than the output mode that determines the appropriate target pattern.
202 1.1 mrg Operand 0 of the target pattern then specifies the mode that the output
203 1.1 mrg must have.
204 1.1 mrg
205 1.1 mrg When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206 1.1 mrg Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
207 1.1 mrg is nonnull. */
208 1.1 mrg
209 1.1 mrg static bool
210 1.1 mrg vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
211 1.1 mrg tree itype, tree *vecotype_out,
212 1.1 mrg tree *vecitype_out = NULL,
213 1.1 mrg enum optab_subtype subtype = optab_default)
214 1.1 mrg {
215 1.1 mrg tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
216 1.1 mrg if (!vecitype)
217 1.1 mrg return false;
218 1.1 mrg
219 1.1 mrg tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
220 1.1 mrg if (!vecotype)
221 1.1 mrg return false;
222 1.1 mrg
223 1.1 mrg optab optab = optab_for_tree_code (code, vecitype, subtype);
224 1.1 mrg if (!optab)
225 1.1 mrg return false;
226 1.1 mrg
227 1.1 mrg insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
228 1.1 mrg if (icode == CODE_FOR_nothing
229 1.1 mrg || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
230 1.1 mrg return false;
231 1.1 mrg
232 1.1 mrg *vecotype_out = vecotype;
233 1.1 mrg if (vecitype_out)
234 1.1 mrg *vecitype_out = vecitype;
235 1.1 mrg return true;
236 1.1 mrg }
237 1.1 mrg
238 1.1 mrg /* Round bit precision PRECISION up to a full element. */
239 1.1 mrg
240 1.1 mrg static unsigned int
241 1.1 mrg vect_element_precision (unsigned int precision)
242 1.1 mrg {
243 1.1 mrg precision = 1 << ceil_log2 (precision);
244 1.1 mrg return MAX (precision, BITS_PER_UNIT);
245 1.1 mrg }
246 1.1 mrg
247 1.1 mrg /* If OP is defined by a statement that's being considered for vectorization,
248 1.1 mrg return information about that statement, otherwise return NULL. */
249 1.1 mrg
250 1.1 mrg static stmt_vec_info
251 1.1 mrg vect_get_internal_def (vec_info *vinfo, tree op)
252 1.1 mrg {
253 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
254 1.1 mrg if (def_stmt_info
255 1.1 mrg && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
256 1.1 mrg return def_stmt_info;
257 1.1 mrg return NULL;
258 1.1 mrg }
259 1.1 mrg
260 1.1 mrg /* Check whether NAME, an ssa-name used in STMT_VINFO,
261 1.1 mrg is a result of a type promotion, such that:
262 1.1 mrg DEF_STMT: NAME = NOP (name0)
263 1.1 mrg If CHECK_SIGN is TRUE, check that either both types are signed or both are
264 1.1 mrg unsigned. */
265 1.1 mrg
266 1.1 mrg static bool
267 1.1 mrg type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
268 1.1 mrg tree *orig_type, gimple **def_stmt, bool *promotion)
269 1.1 mrg {
270 1.1 mrg tree type = TREE_TYPE (name);
271 1.1 mrg tree oprnd0;
272 1.1 mrg enum vect_def_type dt;
273 1.1 mrg
274 1.1 mrg stmt_vec_info def_stmt_info;
275 1.1 mrg if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
276 1.1 mrg return false;
277 1.1 mrg
278 1.1 mrg if (dt != vect_internal_def
279 1.1 mrg && dt != vect_external_def && dt != vect_constant_def)
280 1.1 mrg return false;
281 1.1 mrg
282 1.1 mrg if (!*def_stmt)
283 1.1 mrg return false;
284 1.1 mrg
285 1.1 mrg if (!is_gimple_assign (*def_stmt))
286 1.1 mrg return false;
287 1.1 mrg
288 1.1 mrg if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
289 1.1 mrg return false;
290 1.1 mrg
291 1.1 mrg oprnd0 = gimple_assign_rhs1 (*def_stmt);
292 1.1 mrg
293 1.1 mrg *orig_type = TREE_TYPE (oprnd0);
294 1.1 mrg if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
295 1.1 mrg || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
296 1.1 mrg return false;
297 1.1 mrg
298 1.1 mrg if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
299 1.1 mrg *promotion = true;
300 1.1 mrg else
301 1.1 mrg *promotion = false;
302 1.1 mrg
303 1.1 mrg if (!vect_is_simple_use (oprnd0, vinfo, &dt))
304 1.1 mrg return false;
305 1.1 mrg
306 1.1 mrg return true;
307 1.1 mrg }
308 1.1 mrg
309 1.1 mrg /* Holds information about an input operand after some sign changes
310 1.1 mrg and type promotions have been peeled away. */
311 1.1 mrg class vect_unpromoted_value {
312 1.1 mrg public:
313 1.1 mrg vect_unpromoted_value ();
314 1.1 mrg
315 1.1 mrg void set_op (tree, vect_def_type, stmt_vec_info = NULL);
316 1.1 mrg
317 1.1 mrg /* The value obtained after peeling away zero or more casts. */
318 1.1 mrg tree op;
319 1.1 mrg
320 1.1 mrg /* The type of OP. */
321 1.1 mrg tree type;
322 1.1 mrg
323 1.1 mrg /* The definition type of OP. */
324 1.1 mrg vect_def_type dt;
325 1.1 mrg
326 1.1 mrg /* If OP is the result of peeling at least one cast, and if the cast
327 1.1 mrg of OP itself is a vectorizable statement, CASTER identifies that
328 1.1 mrg statement, otherwise it is null. */
329 1.1 mrg stmt_vec_info caster;
330 1.1 mrg };
331 1.1 mrg
332 1.1 mrg inline vect_unpromoted_value::vect_unpromoted_value ()
333 1.1 mrg : op (NULL_TREE),
334 1.1 mrg type (NULL_TREE),
335 1.1 mrg dt (vect_uninitialized_def),
336 1.1 mrg caster (NULL)
337 1.1 mrg {
338 1.1 mrg }
339 1.1 mrg
340 1.1 mrg /* Set the operand to OP_IN, its definition type to DT_IN, and the
341 1.1 mrg statement that casts it to CASTER_IN. */
342 1.1 mrg
343 1.1 mrg inline void
344 1.1 mrg vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
345 1.1 mrg stmt_vec_info caster_in)
346 1.1 mrg {
347 1.1 mrg op = op_in;
348 1.1 mrg type = TREE_TYPE (op);
349 1.1 mrg dt = dt_in;
350 1.1 mrg caster = caster_in;
351 1.1 mrg }
352 1.1 mrg
353 1.1 mrg /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354 1.1 mrg to reach some vectorizable inner operand OP', continuing as long as it
355 1.1 mrg is possible to convert OP' back to OP using a possible sign change
356 1.1 mrg followed by a possible promotion P. Return this OP', or null if OP is
357 1.1 mrg not a vectorizable SSA name. If there is a promotion P, describe its
358 1.1 mrg input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
359 1.1 mrg is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360 1.1 mrg have more than one user.
361 1.1 mrg
362 1.1 mrg A successful return means that it is possible to go from OP' to OP
363 1.1 mrg via UNPROM. The cast from OP' to UNPROM is at most a sign change,
364 1.1 mrg whereas the cast from UNPROM to OP might be a promotion, a sign
365 1.1 mrg change, or a nop.
366 1.1 mrg
367 1.1 mrg E.g. say we have:
368 1.1 mrg
369 1.1 mrg signed short *ptr = ...;
370 1.1 mrg signed short C = *ptr;
371 1.1 mrg unsigned short B = (unsigned short) C; // sign change
372 1.1 mrg signed int A = (signed int) B; // unsigned promotion
373 1.1 mrg ...possible other uses of A...
374 1.1 mrg unsigned int OP = (unsigned int) A; // sign change
375 1.1 mrg
376 1.1 mrg In this case it's possible to go directly from C to OP using:
377 1.1 mrg
378 1.1 mrg OP = (unsigned int) (unsigned short) C;
379 1.1 mrg +------------+ +--------------+
380 1.1 mrg promotion sign change
381 1.1 mrg
382 1.1 mrg so OP' would be C. The input to the promotion is B, so UNPROM
383 1.1 mrg would describe B. */
384 1.1 mrg
385 1.1 mrg static tree
386 1.1 mrg vect_look_through_possible_promotion (vec_info *vinfo, tree op,
387 1.1 mrg vect_unpromoted_value *unprom,
388 1.1 mrg bool *single_use_p = NULL)
389 1.1 mrg {
390 1.1 mrg tree res = NULL_TREE;
391 1.1 mrg tree op_type = TREE_TYPE (op);
392 1.1 mrg unsigned int orig_precision = TYPE_PRECISION (op_type);
393 1.1 mrg unsigned int min_precision = orig_precision;
394 1.1 mrg stmt_vec_info caster = NULL;
395 1.1 mrg while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
396 1.1 mrg {
397 1.1 mrg /* See whether OP is simple enough to vectorize. */
398 1.1 mrg stmt_vec_info def_stmt_info;
399 1.1 mrg gimple *def_stmt;
400 1.1 mrg vect_def_type dt;
401 1.1 mrg if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
402 1.1 mrg break;
403 1.1 mrg
404 1.1 mrg /* If OP is the input of a demotion, skip over it to see whether
405 1.1 mrg OP is itself the result of a promotion. If so, the combined
406 1.1 mrg effect of the promotion and the demotion might fit the required
407 1.1 mrg pattern, otherwise neither operation fits.
408 1.1 mrg
409 1.1 mrg This copes with cases such as the result of an arithmetic
410 1.1 mrg operation being truncated before being stored, and where that
411 1.1 mrg arithmetic operation has been recognized as an over-widened one. */
412 1.1 mrg if (TYPE_PRECISION (op_type) <= min_precision)
413 1.1 mrg {
414 1.1 mrg /* Use OP as the UNPROM described above if we haven't yet
415 1.1 mrg found a promotion, or if using the new input preserves the
416 1.1 mrg sign of the previous promotion. */
417 1.1 mrg if (!res
418 1.1 mrg || TYPE_PRECISION (unprom->type) == orig_precision
419 1.1 mrg || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
420 1.1 mrg {
421 1.1 mrg unprom->set_op (op, dt, caster);
422 1.1 mrg min_precision = TYPE_PRECISION (op_type);
423 1.1 mrg }
424 1.1 mrg /* Stop if we've already seen a promotion and if this
425 1.1 mrg conversion does more than change the sign. */
426 1.1 mrg else if (TYPE_PRECISION (op_type)
427 1.1 mrg != TYPE_PRECISION (unprom->type))
428 1.1 mrg break;
429 1.1 mrg
430 1.1 mrg /* The sequence now extends to OP. */
431 1.1 mrg res = op;
432 1.1 mrg }
433 1.1 mrg
434 1.1 mrg /* See whether OP is defined by a cast. Record it as CASTER if
435 1.1 mrg the cast is potentially vectorizable. */
436 1.1 mrg if (!def_stmt)
437 1.1 mrg break;
438 1.1 mrg caster = def_stmt_info;
439 1.1 mrg
440 1.1 mrg /* Ignore pattern statements, since we don't link uses for them. */
441 1.1 mrg if (caster
442 1.1 mrg && single_use_p
443 1.1 mrg && !STMT_VINFO_RELATED_STMT (caster)
444 1.1 mrg && !has_single_use (res))
445 1.1 mrg *single_use_p = false;
446 1.1 mrg
447 1.1 mrg gassign *assign = dyn_cast <gassign *> (def_stmt);
448 1.1 mrg if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
449 1.1 mrg break;
450 1.1 mrg
451 1.1 mrg /* Continue with the input to the cast. */
452 1.1 mrg op = gimple_assign_rhs1 (def_stmt);
453 1.1 mrg op_type = TREE_TYPE (op);
454 1.1 mrg }
455 1.1 mrg return res;
456 1.1 mrg }
457 1.1 mrg
458 1.1 mrg /* OP is an integer operand to an operation that returns TYPE, and we
459 1.1 mrg want to treat the operation as a widening one. So far we can treat
460 1.1 mrg it as widening from *COMMON_TYPE.
461 1.1 mrg
462 1.1 mrg Return true if OP is suitable for such a widening operation,
463 1.1 mrg either widening from *COMMON_TYPE or from some supertype of it.
464 1.1 mrg Update *COMMON_TYPE to the supertype in the latter case.
465 1.1 mrg
466 1.1 mrg SHIFT_P is true if OP is a shift amount. */
467 1.1 mrg
468 1.1 mrg static bool
469 1.1 mrg vect_joust_widened_integer (tree type, bool shift_p, tree op,
470 1.1 mrg tree *common_type)
471 1.1 mrg {
472 1.1 mrg /* Calculate the minimum precision required by OP, without changing
473 1.1 mrg the sign of either operand. */
474 1.1 mrg unsigned int precision;
475 1.1 mrg if (shift_p)
476 1.1 mrg {
477 1.1 mrg if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
478 1.1 mrg return false;
479 1.1 mrg precision = TREE_INT_CST_LOW (op);
480 1.1 mrg }
481 1.1 mrg else
482 1.1 mrg {
483 1.1 mrg precision = wi::min_precision (wi::to_widest (op),
484 1.1 mrg TYPE_SIGN (*common_type));
485 1.1 mrg if (precision * 2 > TYPE_PRECISION (type))
486 1.1 mrg return false;
487 1.1 mrg }
488 1.1 mrg
489 1.1 mrg /* If OP requires a wider type, switch to that type. The checks
490 1.1 mrg above ensure that this is still narrower than the result. */
491 1.1 mrg precision = vect_element_precision (precision);
492 1.1 mrg if (TYPE_PRECISION (*common_type) < precision)
493 1.1 mrg *common_type = build_nonstandard_integer_type
494 1.1 mrg (precision, TYPE_UNSIGNED (*common_type));
495 1.1 mrg return true;
496 1.1 mrg }
497 1.1 mrg
498 1.1 mrg /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499 1.1 mrg is narrower than type, storing the supertype in *COMMON_TYPE if so. */
500 1.1 mrg
501 1.1 mrg static bool
502 1.1 mrg vect_joust_widened_type (tree type, tree new_type, tree *common_type)
503 1.1 mrg {
504 1.1 mrg if (types_compatible_p (*common_type, new_type))
505 1.1 mrg return true;
506 1.1 mrg
507 1.1 mrg /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
508 1.1 mrg if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
509 1.1 mrg && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
510 1.1 mrg return true;
511 1.1 mrg
512 1.1 mrg /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
513 1.1 mrg if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
514 1.1 mrg && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
515 1.1 mrg {
516 1.1 mrg *common_type = new_type;
517 1.1 mrg return true;
518 1.1 mrg }
519 1.1 mrg
520 1.1 mrg /* We have mismatched signs, with the signed type being
521 1.1 mrg no wider than the unsigned type. In this case we need
522 1.1 mrg a wider signed type. */
523 1.1 mrg unsigned int precision = MAX (TYPE_PRECISION (*common_type),
524 1.1 mrg TYPE_PRECISION (new_type));
525 1.1 mrg precision *= 2;
526 1.1 mrg
527 1.1 mrg if (precision * 2 > TYPE_PRECISION (type))
528 1.1 mrg return false;
529 1.1 mrg
530 1.1 mrg *common_type = build_nonstandard_integer_type (precision, false);
531 1.1 mrg return true;
532 1.1 mrg }
533 1.1 mrg
534 1.1 mrg /* Check whether STMT_INFO can be viewed as a tree of integer operations
535 1.1 mrg in which each node either performs CODE or WIDENED_CODE, and where
536 1.1 mrg each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
537 1.1 mrg specifies the maximum number of leaf operands. SHIFT_P says whether
538 1.1 mrg CODE and WIDENED_CODE are some sort of shift.
539 1.1 mrg
540 1.1 mrg If STMT_INFO is such a tree, return the number of leaf operands
541 1.1 mrg and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
542 1.1 mrg to a type that (a) is narrower than the result of STMT_INFO and
543 1.1 mrg (b) can hold all leaf operand values.
544 1.1 mrg
545 1.1 mrg If SUBTYPE then allow that the signs of the operands
546 1.1 mrg may differ in signs but not in precision. SUBTYPE is updated to reflect
547 1.1 mrg this.
548 1.1 mrg
549 1.1 mrg Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
550 1.1 mrg exists. */
551 1.1 mrg
552 1.1 mrg static unsigned int
553 1.1 mrg vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
554 1.1 mrg tree_code widened_code, bool shift_p,
555 1.1 mrg unsigned int max_nops,
556 1.1 mrg vect_unpromoted_value *unprom, tree *common_type,
557 1.1 mrg enum optab_subtype *subtype = NULL)
558 1.1 mrg {
559 1.1 mrg /* Check for an integer operation with the right code. */
560 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
561 1.1 mrg if (!assign)
562 1.1 mrg return 0;
563 1.1 mrg
564 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign);
565 1.1 mrg if (rhs_code != code && rhs_code != widened_code)
566 1.1 mrg return 0;
567 1.1 mrg
568 1.1 mrg tree type = TREE_TYPE (gimple_assign_lhs (assign));
569 1.1 mrg if (!INTEGRAL_TYPE_P (type))
570 1.1 mrg return 0;
571 1.1 mrg
572 1.1 mrg /* Assume that both operands will be leaf operands. */
573 1.1 mrg max_nops -= 2;
574 1.1 mrg
575 1.1 mrg /* Check the operands. */
576 1.1 mrg unsigned int next_op = 0;
577 1.1 mrg for (unsigned int i = 0; i < 2; ++i)
578 1.1 mrg {
579 1.1 mrg vect_unpromoted_value *this_unprom = &unprom[next_op];
580 1.1 mrg unsigned int nops = 1;
581 1.1 mrg tree op = gimple_op (assign, i + 1);
582 1.1 mrg if (i == 1 && TREE_CODE (op) == INTEGER_CST)
583 1.1 mrg {
584 1.1 mrg /* We already have a common type from earlier operands.
585 1.1 mrg Update it to account for OP. */
586 1.1 mrg this_unprom->set_op (op, vect_constant_def);
587 1.1 mrg if (!vect_joust_widened_integer (type, shift_p, op, common_type))
588 1.1 mrg return 0;
589 1.1 mrg }
590 1.1 mrg else
591 1.1 mrg {
592 1.1 mrg /* Only allow shifts by constants. */
593 1.1 mrg if (shift_p && i == 1)
594 1.1 mrg return 0;
595 1.1 mrg
596 1.1 mrg if (rhs_code != code)
597 1.1 mrg {
598 1.1 mrg /* If rhs_code is widened_code, don't look through further
599 1.1 mrg possible promotions, there is a promotion already embedded
600 1.1 mrg in the WIDEN_*_EXPR. */
601 1.1 mrg if (TREE_CODE (op) != SSA_NAME
602 1.1 mrg || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
603 1.1 mrg return 0;
604 1.1 mrg
605 1.1 mrg stmt_vec_info def_stmt_info;
606 1.1 mrg gimple *def_stmt;
607 1.1 mrg vect_def_type dt;
608 1.1 mrg if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
609 1.1 mrg &def_stmt))
610 1.1 mrg return 0;
611 1.1 mrg this_unprom->set_op (op, dt, NULL);
612 1.1 mrg }
613 1.1 mrg else if (!vect_look_through_possible_promotion (vinfo, op,
614 1.1 mrg this_unprom))
615 1.1 mrg return 0;
616 1.1 mrg
617 1.1 mrg if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
618 1.1 mrg {
619 1.1 mrg /* The operand isn't widened. If STMT_INFO has the code
620 1.1 mrg for an unwidened operation, recursively check whether
621 1.1 mrg this operand is a node of the tree. */
622 1.1 mrg if (rhs_code != code
623 1.1 mrg || max_nops == 0
624 1.1 mrg || this_unprom->dt != vect_internal_def)
625 1.1 mrg return 0;
626 1.1 mrg
627 1.1 mrg /* Give back the leaf slot allocated above now that we're
628 1.1 mrg not treating this as a leaf operand. */
629 1.1 mrg max_nops += 1;
630 1.1 mrg
631 1.1 mrg /* Recursively process the definition of the operand. */
632 1.1 mrg stmt_vec_info def_stmt_info
633 1.1 mrg = vinfo->lookup_def (this_unprom->op);
634 1.1 mrg nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
635 1.1 mrg widened_code, shift_p, max_nops,
636 1.1 mrg this_unprom, common_type,
637 1.1 mrg subtype);
638 1.1 mrg if (nops == 0)
639 1.1 mrg return 0;
640 1.1 mrg
641 1.1 mrg max_nops -= nops;
642 1.1 mrg }
643 1.1 mrg else
644 1.1 mrg {
645 1.1 mrg /* Make sure that the operand is narrower than the result. */
646 1.1 mrg if (TYPE_PRECISION (this_unprom->type) * 2
647 1.1 mrg > TYPE_PRECISION (type))
648 1.1 mrg return 0;
649 1.1 mrg
650 1.1 mrg /* Update COMMON_TYPE for the new operand. */
651 1.1 mrg if (i == 0)
652 1.1 mrg *common_type = this_unprom->type;
653 1.1 mrg else if (!vect_joust_widened_type (type, this_unprom->type,
654 1.1 mrg common_type))
655 1.1 mrg {
656 1.1 mrg if (subtype)
657 1.1 mrg {
658 1.1 mrg /* See if we can sign extend the smaller type. */
659 1.1 mrg if (TYPE_PRECISION (this_unprom->type)
660 1.1 mrg > TYPE_PRECISION (*common_type))
661 1.1 mrg *common_type = this_unprom->type;
662 1.1 mrg *subtype = optab_vector_mixed_sign;
663 1.1 mrg }
664 1.1 mrg else
665 1.1 mrg return 0;
666 1.1 mrg }
667 1.1 mrg }
668 1.1 mrg }
669 1.1 mrg next_op += nops;
670 1.1 mrg }
671 1.1 mrg return next_op;
672 1.1 mrg }
673 1.1 mrg
674 1.1 mrg /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
675 1.1 mrg is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
676 1.1 mrg
677 1.1 mrg static tree
678 1.1 mrg vect_recog_temp_ssa_var (tree type, gimple *stmt)
679 1.1 mrg {
680 1.1 mrg return make_temp_ssa_name (type, stmt, "patt");
681 1.1 mrg }
682 1.1 mrg
683 1.1 mrg /* STMT2_INFO describes a type conversion that could be split into STMT1
684 1.1 mrg followed by a version of STMT2_INFO that takes NEW_RHS as its first
685 1.1 mrg input. Try to do this using pattern statements, returning true on
686 1.1 mrg success. */
687 1.1 mrg
688 1.1 mrg static bool
689 1.1 mrg vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
690 1.1 mrg gimple *stmt1, tree vectype)
691 1.1 mrg {
692 1.1 mrg if (is_pattern_stmt_p (stmt2_info))
693 1.1 mrg {
694 1.1 mrg /* STMT2_INFO is part of a pattern. Get the statement to which
695 1.1 mrg the pattern is attached. */
696 1.1 mrg stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
697 1.1 mrg vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
698 1.1 mrg
699 1.1 mrg if (dump_enabled_p ())
700 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
701 1.1 mrg "Splitting pattern statement: %G", stmt2_info->stmt);
702 1.1 mrg
703 1.1 mrg /* Since STMT2_INFO is a pattern statement, we can change it
704 1.1 mrg in-situ without worrying about changing the code for the
705 1.1 mrg containing block. */
706 1.1 mrg gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
707 1.1 mrg
708 1.1 mrg if (dump_enabled_p ())
709 1.1 mrg {
710 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
711 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
712 1.1 mrg stmt2_info->stmt);
713 1.1 mrg }
714 1.1 mrg
715 1.1 mrg gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
716 1.1 mrg if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
717 1.1 mrg /* STMT2_INFO is the actual pattern statement. Add STMT1
718 1.1 mrg to the end of the definition sequence. */
719 1.1 mrg gimple_seq_add_stmt_without_update (def_seq, stmt1);
720 1.1 mrg else
721 1.1 mrg {
722 1.1 mrg /* STMT2_INFO belongs to the definition sequence. Insert STMT1
723 1.1 mrg before it. */
724 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
725 1.1 mrg gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
726 1.1 mrg }
727 1.1 mrg return true;
728 1.1 mrg }
729 1.1 mrg else
730 1.1 mrg {
731 1.1 mrg /* STMT2_INFO doesn't yet have a pattern. Try to create a
732 1.1 mrg two-statement pattern now. */
733 1.1 mrg gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
734 1.1 mrg tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
735 1.1 mrg tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
736 1.1 mrg if (!lhs_vectype)
737 1.1 mrg return false;
738 1.1 mrg
739 1.1 mrg if (dump_enabled_p ())
740 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
741 1.1 mrg "Splitting statement: %G", stmt2_info->stmt);
742 1.1 mrg
743 1.1 mrg /* Add STMT1 as a singleton pattern definition sequence. */
744 1.1 mrg gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
745 1.1 mrg vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
746 1.1 mrg gimple_seq_add_stmt_without_update (def_seq, stmt1);
747 1.1 mrg
748 1.1 mrg /* Build the second of the two pattern statements. */
749 1.1 mrg tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
750 1.1 mrg gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
751 1.1 mrg vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
752 1.1 mrg
753 1.1 mrg if (dump_enabled_p ())
754 1.1 mrg {
755 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
756 1.1 mrg "into pattern statements: %G", stmt1);
757 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
758 1.1 mrg }
759 1.1 mrg
760 1.1 mrg return true;
761 1.1 mrg }
762 1.1 mrg }
763 1.1 mrg
764 1.1 mrg /* Convert UNPROM to TYPE and return the result, adding new statements
765 1.1 mrg to STMT_INFO's pattern definition statements if no better way is
766 1.1 mrg available. VECTYPE is the vector form of TYPE.
767 1.1 mrg
768 1.1 mrg If SUBTYPE then convert the type based on the subtype. */
769 1.1 mrg
770 1.1 mrg static tree
771 1.1 mrg vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
772 1.1 mrg vect_unpromoted_value *unprom, tree vectype,
773 1.1 mrg enum optab_subtype subtype = optab_default)
774 1.1 mrg {
775 1.1 mrg
776 1.1 mrg /* Update the type if the signs differ. */
777 1.1 mrg if (subtype == optab_vector_mixed_sign
778 1.1 mrg && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
779 1.1 mrg type = build_nonstandard_integer_type (TYPE_PRECISION (type),
780 1.1 mrg TYPE_SIGN (unprom->type));
781 1.1 mrg
782 1.1 mrg /* Check for a no-op conversion. */
783 1.1 mrg if (types_compatible_p (type, TREE_TYPE (unprom->op)))
784 1.1 mrg return unprom->op;
785 1.1 mrg
786 1.1 mrg /* Allow the caller to create constant vect_unpromoted_values. */
787 1.1 mrg if (TREE_CODE (unprom->op) == INTEGER_CST)
788 1.1 mrg return wide_int_to_tree (type, wi::to_widest (unprom->op));
789 1.1 mrg
790 1.1 mrg tree input = unprom->op;
791 1.1 mrg if (unprom->caster)
792 1.1 mrg {
793 1.1 mrg tree lhs = gimple_get_lhs (unprom->caster->stmt);
794 1.1 mrg tree lhs_type = TREE_TYPE (lhs);
795 1.1 mrg
796 1.1 mrg /* If the result of the existing cast is the right width, use it
797 1.1 mrg instead of the source of the cast. */
798 1.1 mrg if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
799 1.1 mrg input = lhs;
800 1.1 mrg /* If the precision we want is between the source and result
801 1.1 mrg precisions of the existing cast, try splitting the cast into
802 1.1 mrg two and tapping into a mid-way point. */
803 1.1 mrg else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
804 1.1 mrg && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
805 1.1 mrg {
806 1.1 mrg /* In order to preserve the semantics of the original cast,
807 1.1 mrg give the mid-way point the same signedness as the input value.
808 1.1 mrg
809 1.1 mrg It would be possible to use a signed type here instead if
810 1.1 mrg TYPE is signed and UNPROM->TYPE is unsigned, but that would
811 1.1 mrg make the sign of the midtype sensitive to the order in
812 1.1 mrg which we process the statements, since the signedness of
813 1.1 mrg TYPE is the signedness required by just one of possibly
814 1.1 mrg many users. Also, unsigned promotions are usually as cheap
815 1.1 mrg as or cheaper than signed ones, so it's better to keep an
816 1.1 mrg unsigned promotion. */
817 1.1 mrg tree midtype = build_nonstandard_integer_type
818 1.1 mrg (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
819 1.1 mrg tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
820 1.1 mrg if (vec_midtype)
821 1.1 mrg {
822 1.1 mrg input = vect_recog_temp_ssa_var (midtype, NULL);
823 1.1 mrg gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
824 1.1 mrg unprom->op);
825 1.1 mrg if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
826 1.1 mrg vec_midtype))
827 1.1 mrg append_pattern_def_seq (vinfo, stmt_info,
828 1.1 mrg new_stmt, vec_midtype);
829 1.1 mrg }
830 1.1 mrg }
831 1.1 mrg
832 1.1 mrg /* See if we can reuse an existing result. */
833 1.1 mrg if (types_compatible_p (type, TREE_TYPE (input)))
834 1.1 mrg return input;
835 1.1 mrg }
836 1.1 mrg
837 1.1 mrg /* We need a new conversion statement. */
838 1.1 mrg tree new_op = vect_recog_temp_ssa_var (type, NULL);
839 1.1 mrg gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
840 1.1 mrg
841 1.1 mrg /* If OP is an external value, see if we can insert the new statement
842 1.1 mrg on an incoming edge. */
843 1.1 mrg if (input == unprom->op && unprom->dt == vect_external_def)
844 1.1 mrg if (edge e = vect_get_external_def_edge (vinfo, input))
845 1.1 mrg {
846 1.1 mrg basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
847 1.1 mrg gcc_assert (!new_bb);
848 1.1 mrg return new_op;
849 1.1 mrg }
850 1.1 mrg
851 1.1 mrg /* As a (common) last resort, add the statement to the pattern itself. */
852 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
853 1.1 mrg return new_op;
854 1.1 mrg }
855 1.1 mrg
856 1.1 mrg /* Invoke vect_convert_input for N elements of UNPROM and store the
857 1.1 mrg result in the corresponding elements of RESULT.
858 1.1 mrg
859 1.1 mrg If SUBTYPE then convert the type based on the subtype. */
860 1.1 mrg
861 1.1 mrg static void
862 1.1 mrg vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
863 1.1 mrg tree *result, tree type, vect_unpromoted_value *unprom,
864 1.1 mrg tree vectype, enum optab_subtype subtype = optab_default)
865 1.1 mrg {
866 1.1 mrg for (unsigned int i = 0; i < n; ++i)
867 1.1 mrg {
868 1.1 mrg unsigned int j;
869 1.1 mrg for (j = 0; j < i; ++j)
870 1.1 mrg if (unprom[j].op == unprom[i].op)
871 1.1 mrg break;
872 1.1 mrg
873 1.1 mrg if (j < i)
874 1.1 mrg result[i] = result[j];
875 1.1 mrg else
876 1.1 mrg result[i] = vect_convert_input (vinfo, stmt_info,
877 1.1 mrg type, &unprom[i], vectype, subtype);
878 1.1 mrg }
879 1.1 mrg }
880 1.1 mrg
881 1.1 mrg /* The caller has created a (possibly empty) sequence of pattern definition
882 1.1 mrg statements followed by a single statement PATTERN_STMT. Cast the result
883 1.1 mrg of this final statement to TYPE. If a new statement is needed, add
884 1.1 mrg PATTERN_STMT to the end of STMT_INFO's pattern definition statements
885 1.1 mrg and return the new statement, otherwise return PATTERN_STMT as-is.
886 1.1 mrg VECITYPE is the vector form of PATTERN_STMT's result type. */
887 1.1 mrg
888 1.1 mrg static gimple *
889 1.1 mrg vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
890 1.1 mrg gimple *pattern_stmt, tree vecitype)
891 1.1 mrg {
892 1.1 mrg tree lhs = gimple_get_lhs (pattern_stmt);
893 1.1 mrg if (!types_compatible_p (type, TREE_TYPE (lhs)))
894 1.1 mrg {
895 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
896 1.1 mrg tree cast_var = vect_recog_temp_ssa_var (type, NULL);
897 1.1 mrg pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
898 1.1 mrg }
899 1.1 mrg return pattern_stmt;
900 1.1 mrg }
901 1.1 mrg
902 1.1 mrg /* Return true if STMT_VINFO describes a reduction for which reassociation
903 1.1 mrg is allowed. If STMT_INFO is part of a group, assume that it's part of
904 1.1 mrg a reduction chain and optimistically assume that all statements
905 1.1 mrg except the last allow reassociation.
906 1.1 mrg Also require it to have code CODE and to be a reduction
907 1.1 mrg in the outermost loop. When returning true, store the operands in
908 1.1 mrg *OP0_OUT and *OP1_OUT. */
909 1.1 mrg
910 1.1 mrg static bool
911 1.1 mrg vect_reassociating_reduction_p (vec_info *vinfo,
912 1.1 mrg stmt_vec_info stmt_info, tree_code code,
913 1.1 mrg tree *op0_out, tree *op1_out)
914 1.1 mrg {
915 1.1 mrg loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
916 1.1 mrg if (!loop_info)
917 1.1 mrg return false;
918 1.1 mrg
919 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
920 1.1 mrg if (!assign || gimple_assign_rhs_code (assign) != code)
921 1.1 mrg return false;
922 1.1 mrg
923 1.1 mrg /* We don't allow changing the order of the computation in the inner-loop
924 1.1 mrg when doing outer-loop vectorization. */
925 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_info);
926 1.1 mrg if (loop && nested_in_vect_loop_p (loop, stmt_info))
927 1.1 mrg return false;
928 1.1 mrg
929 1.1 mrg if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
930 1.1 mrg {
931 1.1 mrg if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
932 1.1 mrg code))
933 1.1 mrg return false;
934 1.1 mrg }
935 1.1 mrg else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
936 1.1 mrg return false;
937 1.1 mrg
938 1.1 mrg *op0_out = gimple_assign_rhs1 (assign);
939 1.1 mrg *op1_out = gimple_assign_rhs2 (assign);
940 1.1 mrg if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
941 1.1 mrg std::swap (*op0_out, *op1_out);
942 1.1 mrg return true;
943 1.1 mrg }
944 1.1 mrg
945 1.1 mrg /* match.pd function to match
946 1.1 mrg (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
947 1.1 mrg with conditions:
948 1.1 mrg 1) @1, @2, c, d, a, b are all integral type.
949 1.1 mrg 2) There's single_use for both @1 and @2.
950 1.1 mrg 3) a, c have same precision.
951 1.1 mrg 4) c and @1 have different precision.
952 1.1 mrg 5) c, d are the same type or they can differ in sign when convert is
953 1.1 mrg truncation.
954 1.1 mrg
955 1.1 mrg record a and c and d and @3. */
956 1.1 mrg
957 1.1 mrg extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
958 1.1 mrg
959 1.1 mrg /* Function vect_recog_cond_expr_convert
960 1.1 mrg
961 1.1 mrg Try to find the following pattern:
962 1.1 mrg
963 1.1 mrg TYPE_AB A,B;
964 1.1 mrg TYPE_CD C,D;
965 1.1 mrg TYPE_E E;
966 1.1 mrg TYPE_E op_true = (TYPE_E) A;
967 1.1 mrg TYPE_E op_false = (TYPE_E) B;
968 1.1 mrg
969 1.1 mrg E = C cmp D ? op_true : op_false;
970 1.1 mrg
971 1.1 mrg where
972 1.1 mrg TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
973 1.1 mrg TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
974 1.1 mrg single_use of op_true and op_false.
975 1.1 mrg TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
976 1.1 mrg
977 1.1 mrg Input:
978 1.1 mrg
979 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins.
980 1.1 mrg here it starts with E = c cmp D ? op_true : op_false;
981 1.1 mrg
982 1.1 mrg Output:
983 1.1 mrg
984 1.1 mrg TYPE1 E' = C cmp D ? A : B;
985 1.1 mrg TYPE3 E = (TYPE3) E';
986 1.1 mrg
987 1.1 mrg There may extra nop_convert for A or B to handle different signness.
988 1.1 mrg
989 1.1 mrg * TYPE_OUT: The vector type of the output of this pattern.
990 1.1 mrg
991 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
992 1.1 mrg stmts that constitute the pattern. In this case it will be:
993 1.1 mrg E = (TYPE3)E';
994 1.1 mrg E' = C cmp D ? A : B; is recorded in pattern definition statements; */
995 1.1 mrg
996 1.1 mrg static gimple *
997 1.1 mrg vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
998 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
999 1.1 mrg {
1000 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1001 1.1 mrg tree lhs, match[4], temp, type, new_lhs, op2;
1002 1.1 mrg gimple *cond_stmt;
1003 1.1 mrg gimple *pattern_stmt;
1004 1.1 mrg
1005 1.1 mrg if (!last_stmt)
1006 1.1 mrg return NULL;
1007 1.1 mrg
1008 1.1 mrg lhs = gimple_assign_lhs (last_stmt);
1009 1.1 mrg
1010 1.1 mrg /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1011 1.1 mrg TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1012 1.1 mrg if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1013 1.1 mrg return NULL;
1014 1.1 mrg
1015 1.1 mrg vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1016 1.1 mrg
1017 1.1 mrg op2 = match[2];
1018 1.1 mrg type = TREE_TYPE (match[1]);
1019 1.1 mrg if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1020 1.1 mrg {
1021 1.1 mrg op2 = vect_recog_temp_ssa_var (type, NULL);
1022 1.1 mrg gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1023 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1024 1.1 mrg get_vectype_for_scalar_type (vinfo, type));
1025 1.1 mrg }
1026 1.1 mrg
1027 1.1 mrg temp = vect_recog_temp_ssa_var (type, NULL);
1028 1.1 mrg cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1029 1.1 mrg match[1], op2));
1030 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1031 1.1 mrg get_vectype_for_scalar_type (vinfo, type));
1032 1.1 mrg new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1033 1.1 mrg pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1034 1.1 mrg *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1035 1.1 mrg
1036 1.1 mrg if (dump_enabled_p ())
1037 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
1038 1.1 mrg "created pattern stmt: %G", pattern_stmt);
1039 1.1 mrg return pattern_stmt;
1040 1.1 mrg }
1041 1.1 mrg
1042 1.1 mrg /* Function vect_recog_dot_prod_pattern
1043 1.1 mrg
1044 1.1 mrg Try to find the following pattern:
1045 1.1 mrg
1046 1.1 mrg type1a x_t
1047 1.1 mrg type1b y_t;
1048 1.1 mrg TYPE1 prod;
1049 1.1 mrg TYPE2 sum = init;
1050 1.1 mrg loop:
1051 1.1 mrg sum_0 = phi <init, sum_1>
1052 1.1 mrg S1 x_t = ...
1053 1.1 mrg S2 y_t = ...
1054 1.1 mrg S3 x_T = (TYPE1) x_t;
1055 1.1 mrg S4 y_T = (TYPE1) y_t;
1056 1.1 mrg S5 prod = x_T * y_T;
1057 1.1 mrg [S6 prod = (TYPE2) prod; #optional]
1058 1.1 mrg S7 sum_1 = prod + sum_0;
1059 1.1 mrg
1060 1.1 mrg where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1061 1.1 mrg the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1062 1.1 mrg 'type1a' and 'type1b' can differ.
1063 1.1 mrg
1064 1.1 mrg Input:
1065 1.1 mrg
1066 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the
1067 1.1 mrg example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1068 1.1 mrg will be detected.
1069 1.1 mrg
1070 1.1 mrg Output:
1071 1.1 mrg
1072 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
1073 1.1 mrg
1074 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
1075 1.1 mrg stmts that constitute the pattern. In this case it will be:
1076 1.1 mrg WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1077 1.1 mrg
1078 1.1 mrg Note: The dot-prod idiom is a widening reduction pattern that is
1079 1.1 mrg vectorized without preserving all the intermediate results. It
1080 1.1 mrg produces only N/2 (widened) results (by summing up pairs of
1081 1.1 mrg intermediate results) rather than all N results. Therefore, we
1082 1.1 mrg cannot allow this pattern when we want to get all the results and in
1083 1.1 mrg the correct order (as is the case when this computation is in an
1084 1.1 mrg inner-loop nested in an outer-loop that us being vectorized). */
1085 1.1 mrg
1086 1.1 mrg static gimple *
1087 1.1 mrg vect_recog_dot_prod_pattern (vec_info *vinfo,
1088 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
1089 1.1 mrg {
1090 1.1 mrg tree oprnd0, oprnd1;
1091 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
1092 1.1 mrg tree type, half_type;
1093 1.1 mrg gimple *pattern_stmt;
1094 1.1 mrg tree var;
1095 1.1 mrg
1096 1.1 mrg /* Look for the following pattern
1097 1.1 mrg DX = (TYPE1) X;
1098 1.1 mrg DY = (TYPE1) Y;
1099 1.1 mrg DPROD = DX * DY;
1100 1.1 mrg DDPROD = (TYPE2) DPROD;
1101 1.1 mrg sum_1 = DDPROD + sum_0;
1102 1.1 mrg In which
1103 1.1 mrg - DX is double the size of X
1104 1.1 mrg - DY is double the size of Y
1105 1.1 mrg - DX, DY, DPROD all have the same type but the sign
1106 1.1 mrg between X, Y and DPROD can differ.
1107 1.1 mrg - sum is the same size of DPROD or bigger
1108 1.1 mrg - sum has been recognized as a reduction variable.
1109 1.1 mrg
1110 1.1 mrg This is equivalent to:
1111 1.1 mrg DPROD = X w* Y; #widen mult
1112 1.1 mrg sum_1 = DPROD w+ sum_0; #widen summation
1113 1.1 mrg or
1114 1.1 mrg DPROD = X w* Y; #widen mult
1115 1.1 mrg sum_1 = DPROD + sum_0; #summation
1116 1.1 mrg */
1117 1.1 mrg
1118 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search
1119 1.1 mrg of the above pattern. */
1120 1.1 mrg
1121 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1122 1.1 mrg &oprnd0, &oprnd1))
1123 1.1 mrg return NULL;
1124 1.1 mrg
1125 1.1 mrg type = TREE_TYPE (gimple_get_lhs (last_stmt));
1126 1.1 mrg
1127 1.1 mrg vect_unpromoted_value unprom_mult;
1128 1.1 mrg oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1129 1.1 mrg
1130 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction,
1131 1.1 mrg we know that oprnd1 is the reduction variable (defined by a loop-header
1132 1.1 mrg phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1133 1.1 mrg Left to check that oprnd0 is defined by a (widen_)mult_expr */
1134 1.1 mrg if (!oprnd0)
1135 1.1 mrg return NULL;
1136 1.1 mrg
1137 1.1 mrg stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1138 1.1 mrg if (!mult_vinfo)
1139 1.1 mrg return NULL;
1140 1.1 mrg
1141 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1142 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */
1143 1.1 mrg vect_unpromoted_value unprom0[2];
1144 1.1 mrg enum optab_subtype subtype = optab_vector;
1145 1.1 mrg if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1146 1.1 mrg false, 2, unprom0, &half_type, &subtype))
1147 1.1 mrg return NULL;
1148 1.1 mrg
1149 1.1 mrg /* If there are two widening operations, make sure they agree on the sign
1150 1.1 mrg of the extension. The result of an optab_vector_mixed_sign operation
1151 1.1 mrg is signed; otherwise, the result has the same sign as the operands. */
1152 1.1 mrg if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1153 1.1 mrg && (subtype == optab_vector_mixed_sign
1154 1.1 mrg ? TYPE_UNSIGNED (unprom_mult.type)
1155 1.1 mrg : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1156 1.1 mrg return NULL;
1157 1.1 mrg
1158 1.1 mrg vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1159 1.1 mrg
1160 1.1 mrg tree half_vectype;
1161 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1162 1.1 mrg type_out, &half_vectype, subtype))
1163 1.1 mrg return NULL;
1164 1.1 mrg
1165 1.1 mrg /* Get the inputs in the appropriate types. */
1166 1.1 mrg tree mult_oprnd[2];
1167 1.1 mrg vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1168 1.1 mrg unprom0, half_vectype, subtype);
1169 1.1 mrg
1170 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL);
1171 1.1 mrg pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1172 1.1 mrg mult_oprnd[0], mult_oprnd[1], oprnd1);
1173 1.1 mrg
1174 1.1 mrg return pattern_stmt;
1175 1.1 mrg }
1176 1.1 mrg
1177 1.1 mrg
1178 1.1 mrg /* Function vect_recog_sad_pattern
1179 1.1 mrg
1180 1.1 mrg Try to find the following Sum of Absolute Difference (SAD) pattern:
1181 1.1 mrg
1182 1.1 mrg type x_t, y_t;
1183 1.1 mrg signed TYPE1 diff, abs_diff;
1184 1.1 mrg TYPE2 sum = init;
1185 1.1 mrg loop:
1186 1.1 mrg sum_0 = phi <init, sum_1>
1187 1.1 mrg S1 x_t = ...
1188 1.1 mrg S2 y_t = ...
1189 1.1 mrg S3 x_T = (TYPE1) x_t;
1190 1.1 mrg S4 y_T = (TYPE1) y_t;
1191 1.1 mrg S5 diff = x_T - y_T;
1192 1.1 mrg S6 abs_diff = ABS_EXPR <diff>;
1193 1.1 mrg [S7 abs_diff = (TYPE2) abs_diff; #optional]
1194 1.1 mrg S8 sum_1 = abs_diff + sum_0;
1195 1.1 mrg
1196 1.1 mrg where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1197 1.1 mrg same size of 'TYPE1' or bigger. This is a special case of a reduction
1198 1.1 mrg computation.
1199 1.1 mrg
1200 1.1 mrg Input:
1201 1.1 mrg
1202 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the
1203 1.1 mrg example, when this function is called with S8, the pattern
1204 1.1 mrg {S3,S4,S5,S6,S7,S8} will be detected.
1205 1.1 mrg
1206 1.1 mrg Output:
1207 1.1 mrg
1208 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
1209 1.1 mrg
1210 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
1211 1.1 mrg stmts that constitute the pattern. In this case it will be:
1212 1.1 mrg SAD_EXPR <x_t, y_t, sum_0>
1213 1.1 mrg */
1214 1.1 mrg
1215 1.1 mrg static gimple *
1216 1.1 mrg vect_recog_sad_pattern (vec_info *vinfo,
1217 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
1218 1.1 mrg {
1219 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
1220 1.1 mrg tree half_type;
1221 1.1 mrg
1222 1.1 mrg /* Look for the following pattern
1223 1.1 mrg DX = (TYPE1) X;
1224 1.1 mrg DY = (TYPE1) Y;
1225 1.1 mrg DDIFF = DX - DY;
1226 1.1 mrg DAD = ABS_EXPR <DDIFF>;
1227 1.1 mrg DDPROD = (TYPE2) DPROD;
1228 1.1 mrg sum_1 = DAD + sum_0;
1229 1.1 mrg In which
1230 1.1 mrg - DX is at least double the size of X
1231 1.1 mrg - DY is at least double the size of Y
1232 1.1 mrg - DX, DY, DDIFF, DAD all have the same type
1233 1.1 mrg - sum is the same size of DAD or bigger
1234 1.1 mrg - sum has been recognized as a reduction variable.
1235 1.1 mrg
1236 1.1 mrg This is equivalent to:
1237 1.1 mrg DDIFF = X w- Y; #widen sub
1238 1.1 mrg DAD = ABS_EXPR <DDIFF>;
1239 1.1 mrg sum_1 = DAD w+ sum_0; #widen summation
1240 1.1 mrg or
1241 1.1 mrg DDIFF = X w- Y; #widen sub
1242 1.1 mrg DAD = ABS_EXPR <DDIFF>;
1243 1.1 mrg sum_1 = DAD + sum_0; #summation
1244 1.1 mrg */
1245 1.1 mrg
1246 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search
1247 1.1 mrg of the above pattern. */
1248 1.1 mrg
1249 1.1 mrg tree plus_oprnd0, plus_oprnd1;
1250 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1251 1.1 mrg &plus_oprnd0, &plus_oprnd1))
1252 1.1 mrg return NULL;
1253 1.1 mrg
1254 1.1 mrg tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1255 1.1 mrg
1256 1.1 mrg /* Any non-truncating sequence of conversions is OK here, since
1257 1.1 mrg with a successful match, the result of the ABS(U) is known to fit
1258 1.1 mrg within the nonnegative range of the result type. (It cannot be the
1259 1.1 mrg negative of the minimum signed value due to the range of the widening
1260 1.1 mrg MINUS_EXPR.) */
1261 1.1 mrg vect_unpromoted_value unprom_abs;
1262 1.1 mrg plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1263 1.1 mrg &unprom_abs);
1264 1.1 mrg
1265 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction,
1266 1.1 mrg we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1267 1.1 mrg phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1268 1.1 mrg Then check that plus_oprnd0 is defined by an abs_expr. */
1269 1.1 mrg
1270 1.1 mrg if (!plus_oprnd0)
1271 1.1 mrg return NULL;
1272 1.1 mrg
1273 1.1 mrg stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1274 1.1 mrg if (!abs_stmt_vinfo)
1275 1.1 mrg return NULL;
1276 1.1 mrg
1277 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1278 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */
1279 1.1 mrg gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1280 1.1 mrg if (!abs_stmt
1281 1.1 mrg || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1282 1.1 mrg && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1283 1.1 mrg return NULL;
1284 1.1 mrg
1285 1.1 mrg tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1286 1.1 mrg tree abs_type = TREE_TYPE (abs_oprnd);
1287 1.1 mrg if (TYPE_UNSIGNED (abs_type))
1288 1.1 mrg return NULL;
1289 1.1 mrg
1290 1.1 mrg /* Peel off conversions from the ABS input. This can involve sign
1291 1.1 mrg changes (e.g. from an unsigned subtraction to a signed ABS input)
1292 1.1 mrg or signed promotion, but it can't include unsigned promotion.
1293 1.1 mrg (Note that ABS of an unsigned promotion should have been folded
1294 1.1 mrg away before now anyway.) */
1295 1.1 mrg vect_unpromoted_value unprom_diff;
1296 1.1 mrg abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1297 1.1 mrg &unprom_diff);
1298 1.1 mrg if (!abs_oprnd)
1299 1.1 mrg return NULL;
1300 1.1 mrg if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1301 1.1 mrg && TYPE_UNSIGNED (unprom_diff.type))
1302 1.1 mrg return NULL;
1303 1.1 mrg
1304 1.1 mrg /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1305 1.1 mrg stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1306 1.1 mrg if (!diff_stmt_vinfo)
1307 1.1 mrg return NULL;
1308 1.1 mrg
1309 1.1 mrg /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1310 1.1 mrg inside the loop (in case we are analyzing an outer-loop). */
1311 1.1 mrg vect_unpromoted_value unprom[2];
1312 1.1 mrg if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1313 1.1 mrg false, 2, unprom, &half_type))
1314 1.1 mrg return NULL;
1315 1.1 mrg
1316 1.1 mrg vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1317 1.1 mrg
1318 1.1 mrg tree half_vectype;
1319 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1320 1.1 mrg type_out, &half_vectype))
1321 1.1 mrg return NULL;
1322 1.1 mrg
1323 1.1 mrg /* Get the inputs to the SAD_EXPR in the appropriate types. */
1324 1.1 mrg tree sad_oprnd[2];
1325 1.1 mrg vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1326 1.1 mrg unprom, half_vectype);
1327 1.1 mrg
1328 1.1 mrg tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1329 1.1 mrg gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1330 1.1 mrg sad_oprnd[1], plus_oprnd1);
1331 1.1 mrg
1332 1.1 mrg return pattern_stmt;
1333 1.1 mrg }
1334 1.1 mrg
1335 1.1 mrg /* Recognize an operation that performs ORIG_CODE on widened inputs,
1336 1.1 mrg so that it can be treated as though it had the form:
1337 1.1 mrg
1338 1.1 mrg A_TYPE a;
1339 1.1 mrg B_TYPE b;
1340 1.1 mrg HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1341 1.1 mrg HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1342 1.1 mrg | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1343 1.1 mrg | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1344 1.1 mrg | RES_TYPE res = a_extend ORIG_CODE b_extend;
1345 1.1 mrg
1346 1.1 mrg Try to replace the pattern with:
1347 1.1 mrg
1348 1.1 mrg A_TYPE a;
1349 1.1 mrg B_TYPE b;
1350 1.1 mrg HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1351 1.1 mrg HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1352 1.1 mrg | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1353 1.1 mrg | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1354 1.1 mrg
1355 1.1 mrg where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1356 1.1 mrg
1357 1.1 mrg SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1358 1.1 mrg name of the pattern being matched, for dump purposes. */
1359 1.1 mrg
1360 1.1 mrg static gimple *
1361 1.1 mrg vect_recog_widen_op_pattern (vec_info *vinfo,
1362 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out,
1363 1.1 mrg tree_code orig_code, tree_code wide_code,
1364 1.1 mrg bool shift_p, const char *name)
1365 1.1 mrg {
1366 1.1 mrg gimple *last_stmt = last_stmt_info->stmt;
1367 1.1 mrg
1368 1.1 mrg vect_unpromoted_value unprom[2];
1369 1.1 mrg tree half_type;
1370 1.1 mrg if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1371 1.1 mrg shift_p, 2, unprom, &half_type))
1372 1.1 mrg return NULL;
1373 1.1 mrg
1374 1.1 mrg /* Pattern detected. */
1375 1.1 mrg vect_pattern_detected (name, last_stmt);
1376 1.1 mrg
1377 1.1 mrg tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1378 1.1 mrg tree itype = type;
1379 1.1 mrg if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1380 1.1 mrg || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1381 1.1 mrg itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1382 1.1 mrg TYPE_UNSIGNED (half_type));
1383 1.1 mrg
1384 1.1 mrg /* Check target support */
1385 1.1 mrg tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1386 1.1 mrg tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1387 1.1 mrg tree ctype = itype;
1388 1.1 mrg tree vecctype = vecitype;
1389 1.1 mrg if (orig_code == MINUS_EXPR
1390 1.1 mrg && TYPE_UNSIGNED (itype)
1391 1.1 mrg && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1392 1.1 mrg {
1393 1.1 mrg /* Subtraction is special, even if half_type is unsigned and no matter
1394 1.1 mrg whether type is signed or unsigned, if type is wider than itype,
1395 1.1 mrg we need to sign-extend from the widening operation result to the
1396 1.1 mrg result type.
1397 1.1 mrg Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1398 1.1 mrg itype unsigned short and type either int or unsigned int.
1399 1.1 mrg Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1400 1.1 mrg (unsigned short) 0xffff, but for type int we want the result -1
1401 1.1 mrg and for type unsigned int 0xffffffff rather than 0xffff. */
1402 1.1 mrg ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1403 1.1 mrg vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1404 1.1 mrg }
1405 1.1 mrg
1406 1.1 mrg enum tree_code dummy_code;
1407 1.1 mrg int dummy_int;
1408 1.1 mrg auto_vec<tree> dummy_vec;
1409 1.1 mrg if (!vectype
1410 1.1 mrg || !vecitype
1411 1.1 mrg || !vecctype
1412 1.1 mrg || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1413 1.1 mrg vecitype, vectype,
1414 1.1 mrg &dummy_code, &dummy_code,
1415 1.1 mrg &dummy_int, &dummy_vec))
1416 1.1 mrg return NULL;
1417 1.1 mrg
1418 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type);
1419 1.1 mrg if (!*type_out)
1420 1.1 mrg return NULL;
1421 1.1 mrg
1422 1.1 mrg tree oprnd[2];
1423 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info,
1424 1.1 mrg 2, oprnd, half_type, unprom, vectype);
1425 1.1 mrg
1426 1.1 mrg tree var = vect_recog_temp_ssa_var (itype, NULL);
1427 1.1 mrg gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1428 1.1 mrg oprnd[0], oprnd[1]);
1429 1.1 mrg
1430 1.1 mrg if (vecctype != vecitype)
1431 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1432 1.1 mrg pattern_stmt, vecitype);
1433 1.1 mrg
1434 1.1 mrg return vect_convert_output (vinfo, last_stmt_info,
1435 1.1 mrg type, pattern_stmt, vecctype);
1436 1.1 mrg }
1437 1.1 mrg
1438 1.1 mrg /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1439 1.1 mrg to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1440 1.1 mrg
1441 1.1 mrg static gimple *
1442 1.1 mrg vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1443 1.1 mrg tree *type_out)
1444 1.1 mrg {
1445 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1446 1.1 mrg MULT_EXPR, WIDEN_MULT_EXPR, false,
1447 1.1 mrg "vect_recog_widen_mult_pattern");
1448 1.1 mrg }
1449 1.1 mrg
1450 1.1 mrg /* Try to detect addition on widened inputs, converting PLUS_EXPR
1451 1.1 mrg to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1452 1.1 mrg
1453 1.1 mrg static gimple *
1454 1.1 mrg vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1455 1.1 mrg tree *type_out)
1456 1.1 mrg {
1457 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1458 1.1 mrg PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1459 1.1 mrg "vect_recog_widen_plus_pattern");
1460 1.1 mrg }
1461 1.1 mrg
1462 1.1 mrg /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1463 1.1 mrg to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1464 1.1 mrg static gimple *
1465 1.1 mrg vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1466 1.1 mrg tree *type_out)
1467 1.1 mrg {
1468 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1469 1.1 mrg MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1470 1.1 mrg "vect_recog_widen_minus_pattern");
1471 1.1 mrg }
1472 1.1 mrg
1473 1.1 mrg /* Function vect_recog_popcount_pattern
1474 1.1 mrg
1475 1.1 mrg Try to find the following pattern:
1476 1.1 mrg
1477 1.1 mrg UTYPE1 A;
1478 1.1 mrg TYPE1 B;
1479 1.1 mrg UTYPE2 temp_in;
1480 1.1 mrg TYPE3 temp_out;
1481 1.1 mrg temp_in = (UTYPE2)A;
1482 1.1 mrg
1483 1.1 mrg temp_out = __builtin_popcount{,l,ll} (temp_in);
1484 1.1 mrg B = (TYPE1) temp_out;
1485 1.1 mrg
1486 1.1 mrg TYPE2 may or may not be equal to TYPE3.
1487 1.1 mrg i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1488 1.1 mrg i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1489 1.1 mrg
1490 1.1 mrg Input:
1491 1.1 mrg
1492 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins.
1493 1.1 mrg here it starts with B = (TYPE1) temp_out;
1494 1.1 mrg
1495 1.1 mrg Output:
1496 1.1 mrg
1497 1.1 mrg * TYPE_OUT: The vector type of the output of this pattern.
1498 1.1 mrg
1499 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
1500 1.1 mrg stmts that constitute the pattern. In this case it will be:
1501 1.1 mrg B = .POPCOUNT (A);
1502 1.1 mrg */
1503 1.1 mrg
1504 1.1 mrg static gimple *
1505 1.1 mrg vect_recog_popcount_pattern (vec_info *vinfo,
1506 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
1507 1.1 mrg {
1508 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1509 1.1 mrg gimple *popcount_stmt, *pattern_stmt;
1510 1.1 mrg tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1511 1.1 mrg auto_vec<tree> vargs;
1512 1.1 mrg
1513 1.1 mrg /* Find B = (TYPE1) temp_out. */
1514 1.1 mrg if (!last_stmt)
1515 1.1 mrg return NULL;
1516 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt);
1517 1.1 mrg if (!CONVERT_EXPR_CODE_P (code))
1518 1.1 mrg return NULL;
1519 1.1 mrg
1520 1.1 mrg lhs_oprnd = gimple_assign_lhs (last_stmt);
1521 1.1 mrg lhs_type = TREE_TYPE (lhs_oprnd);
1522 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type))
1523 1.1 mrg return NULL;
1524 1.1 mrg
1525 1.1 mrg rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1526 1.1 mrg if (TREE_CODE (rhs_oprnd) != SSA_NAME
1527 1.1 mrg || !has_single_use (rhs_oprnd))
1528 1.1 mrg return NULL;
1529 1.1 mrg popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1530 1.1 mrg
1531 1.1 mrg /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1532 1.1 mrg if (!is_gimple_call (popcount_stmt))
1533 1.1 mrg return NULL;
1534 1.1 mrg switch (gimple_call_combined_fn (popcount_stmt))
1535 1.1 mrg {
1536 1.1 mrg CASE_CFN_POPCOUNT:
1537 1.1 mrg break;
1538 1.1 mrg default:
1539 1.1 mrg return NULL;
1540 1.1 mrg }
1541 1.1 mrg
1542 1.1 mrg if (gimple_call_num_args (popcount_stmt) != 1)
1543 1.1 mrg return NULL;
1544 1.1 mrg
1545 1.1 mrg rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1546 1.1 mrg vect_unpromoted_value unprom_diff;
1547 1.1 mrg rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1548 1.1 mrg &unprom_diff);
1549 1.1 mrg
1550 1.1 mrg if (!rhs_origin)
1551 1.1 mrg return NULL;
1552 1.1 mrg
1553 1.1 mrg /* Input and output of .POPCOUNT should be same-precision integer.
1554 1.1 mrg Also A should be unsigned or same precision as temp_in,
1555 1.1 mrg otherwise there would be sign_extend from A to temp_in. */
1556 1.1 mrg if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1557 1.1 mrg || (!TYPE_UNSIGNED (unprom_diff.type)
1558 1.1 mrg && (TYPE_PRECISION (unprom_diff.type)
1559 1.1 mrg != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1560 1.1 mrg return NULL;
1561 1.1 mrg vargs.safe_push (unprom_diff.op);
1562 1.1 mrg
1563 1.1 mrg vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1564 1.1 mrg vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1565 1.1 mrg /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1566 1.1 mrg if (!vec_type
1567 1.1 mrg || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1568 1.1 mrg OPTIMIZE_FOR_SPEED))
1569 1.1 mrg return NULL;
1570 1.1 mrg
1571 1.1 mrg /* Create B = .POPCOUNT (A). */
1572 1.1 mrg new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1573 1.1 mrg pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1574 1.1 mrg gimple_call_set_lhs (pattern_stmt, new_var);
1575 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1576 1.1 mrg *type_out = vec_type;
1577 1.1 mrg
1578 1.1 mrg if (dump_enabled_p ())
1579 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
1580 1.1 mrg "created pattern stmt: %G", pattern_stmt);
1581 1.1 mrg return pattern_stmt;
1582 1.1 mrg }
1583 1.1 mrg
1584 1.1 mrg /* Function vect_recog_pow_pattern
1585 1.1 mrg
1586 1.1 mrg Try to find the following pattern:
1587 1.1 mrg
1588 1.1 mrg x = POW (y, N);
1589 1.1 mrg
1590 1.1 mrg with POW being one of pow, powf, powi, powif and N being
1591 1.1 mrg either 2 or 0.5.
1592 1.1 mrg
1593 1.1 mrg Input:
1594 1.1 mrg
1595 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins.
1596 1.1 mrg
1597 1.1 mrg Output:
1598 1.1 mrg
1599 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
1600 1.1 mrg
1601 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
1602 1.1 mrg stmts that constitute the pattern. In this case it will be:
1603 1.1 mrg x = x * x
1604 1.1 mrg or
1605 1.1 mrg x = sqrt (x)
1606 1.1 mrg */
1607 1.1 mrg
1608 1.1 mrg static gimple *
1609 1.1 mrg vect_recog_pow_pattern (vec_info *vinfo,
1610 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
1611 1.1 mrg {
1612 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
1613 1.1 mrg tree base, exp;
1614 1.1 mrg gimple *stmt;
1615 1.1 mrg tree var;
1616 1.1 mrg
1617 1.1 mrg if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1618 1.1 mrg return NULL;
1619 1.1 mrg
1620 1.1 mrg switch (gimple_call_combined_fn (last_stmt))
1621 1.1 mrg {
1622 1.1 mrg CASE_CFN_POW:
1623 1.1 mrg CASE_CFN_POWI:
1624 1.1 mrg break;
1625 1.1 mrg
1626 1.1 mrg default:
1627 1.1 mrg return NULL;
1628 1.1 mrg }
1629 1.1 mrg
1630 1.1 mrg base = gimple_call_arg (last_stmt, 0);
1631 1.1 mrg exp = gimple_call_arg (last_stmt, 1);
1632 1.1 mrg if (TREE_CODE (exp) != REAL_CST
1633 1.1 mrg && TREE_CODE (exp) != INTEGER_CST)
1634 1.1 mrg {
1635 1.1 mrg if (flag_unsafe_math_optimizations
1636 1.1 mrg && TREE_CODE (base) == REAL_CST
1637 1.1 mrg && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1638 1.1 mrg {
1639 1.1 mrg combined_fn log_cfn;
1640 1.1 mrg built_in_function exp_bfn;
1641 1.1 mrg switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1642 1.1 mrg {
1643 1.1 mrg case BUILT_IN_POW:
1644 1.1 mrg log_cfn = CFN_BUILT_IN_LOG;
1645 1.1 mrg exp_bfn = BUILT_IN_EXP;
1646 1.1 mrg break;
1647 1.1 mrg case BUILT_IN_POWF:
1648 1.1 mrg log_cfn = CFN_BUILT_IN_LOGF;
1649 1.1 mrg exp_bfn = BUILT_IN_EXPF;
1650 1.1 mrg break;
1651 1.1 mrg case BUILT_IN_POWL:
1652 1.1 mrg log_cfn = CFN_BUILT_IN_LOGL;
1653 1.1 mrg exp_bfn = BUILT_IN_EXPL;
1654 1.1 mrg break;
1655 1.1 mrg default:
1656 1.1 mrg return NULL;
1657 1.1 mrg }
1658 1.1 mrg tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1659 1.1 mrg tree exp_decl = builtin_decl_implicit (exp_bfn);
1660 1.1 mrg /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1661 1.1 mrg does that, but if C is a power of 2, we want to use
1662 1.1 mrg exp2 (log2 (C) * x) in the non-vectorized version, but for
1663 1.1 mrg vectorization we don't have vectorized exp2. */
1664 1.1 mrg if (logc
1665 1.1 mrg && TREE_CODE (logc) == REAL_CST
1666 1.1 mrg && exp_decl
1667 1.1 mrg && lookup_attribute ("omp declare simd",
1668 1.1 mrg DECL_ATTRIBUTES (exp_decl)))
1669 1.1 mrg {
1670 1.1 mrg cgraph_node *node = cgraph_node::get_create (exp_decl);
1671 1.1 mrg if (node->simd_clones == NULL)
1672 1.1 mrg {
1673 1.1 mrg if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1674 1.1 mrg || node->definition)
1675 1.1 mrg return NULL;
1676 1.1 mrg expand_simd_clones (node);
1677 1.1 mrg if (node->simd_clones == NULL)
1678 1.1 mrg return NULL;
1679 1.1 mrg }
1680 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1681 1.1 mrg if (!*type_out)
1682 1.1 mrg return NULL;
1683 1.1 mrg tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1684 1.1 mrg gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1685 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, g);
1686 1.1 mrg tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1687 1.1 mrg g = gimple_build_call (exp_decl, 1, def);
1688 1.1 mrg gimple_call_set_lhs (g, res);
1689 1.1 mrg return g;
1690 1.1 mrg }
1691 1.1 mrg }
1692 1.1 mrg
1693 1.1 mrg return NULL;
1694 1.1 mrg }
1695 1.1 mrg
1696 1.1 mrg /* We now have a pow or powi builtin function call with a constant
1697 1.1 mrg exponent. */
1698 1.1 mrg
1699 1.1 mrg /* Catch squaring. */
1700 1.1 mrg if ((tree_fits_shwi_p (exp)
1701 1.1 mrg && tree_to_shwi (exp) == 2)
1702 1.1 mrg || (TREE_CODE (exp) == REAL_CST
1703 1.1 mrg && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1704 1.1 mrg {
1705 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1706 1.1 mrg TREE_TYPE (base), type_out))
1707 1.1 mrg return NULL;
1708 1.1 mrg
1709 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1710 1.1 mrg stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1711 1.1 mrg return stmt;
1712 1.1 mrg }
1713 1.1 mrg
1714 1.1 mrg /* Catch square root. */
1715 1.1 mrg if (TREE_CODE (exp) == REAL_CST
1716 1.1 mrg && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1717 1.1 mrg {
1718 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1719 1.1 mrg if (*type_out
1720 1.1 mrg && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1721 1.1 mrg OPTIMIZE_FOR_SPEED))
1722 1.1 mrg {
1723 1.1 mrg gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1724 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1725 1.1 mrg gimple_call_set_lhs (stmt, var);
1726 1.1 mrg gimple_call_set_nothrow (stmt, true);
1727 1.1 mrg return stmt;
1728 1.1 mrg }
1729 1.1 mrg }
1730 1.1 mrg
1731 1.1 mrg return NULL;
1732 1.1 mrg }
1733 1.1 mrg
1734 1.1 mrg
1735 1.1 mrg /* Function vect_recog_widen_sum_pattern
1736 1.1 mrg
1737 1.1 mrg Try to find the following pattern:
1738 1.1 mrg
1739 1.1 mrg type x_t;
1740 1.1 mrg TYPE x_T, sum = init;
1741 1.1 mrg loop:
1742 1.1 mrg sum_0 = phi <init, sum_1>
1743 1.1 mrg S1 x_t = *p;
1744 1.1 mrg S2 x_T = (TYPE) x_t;
1745 1.1 mrg S3 sum_1 = x_T + sum_0;
1746 1.1 mrg
1747 1.1 mrg where type 'TYPE' is at least double the size of type 'type', i.e - we're
1748 1.1 mrg summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1749 1.1 mrg a special case of a reduction computation.
1750 1.1 mrg
1751 1.1 mrg Input:
1752 1.1 mrg
1753 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1754 1.1 mrg when this function is called with S3, the pattern {S2,S3} will be detected.
1755 1.1 mrg
1756 1.1 mrg Output:
1757 1.1 mrg
1758 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
1759 1.1 mrg
1760 1.1 mrg * Return value: A new stmt that will be used to replace the sequence of
1761 1.1 mrg stmts that constitute the pattern. In this case it will be:
1762 1.1 mrg WIDEN_SUM <x_t, sum_0>
1763 1.1 mrg
1764 1.1 mrg Note: The widening-sum idiom is a widening reduction pattern that is
1765 1.1 mrg vectorized without preserving all the intermediate results. It
1766 1.1 mrg produces only N/2 (widened) results (by summing up pairs of
1767 1.1 mrg intermediate results) rather than all N results. Therefore, we
1768 1.1 mrg cannot allow this pattern when we want to get all the results and in
1769 1.1 mrg the correct order (as is the case when this computation is in an
1770 1.1 mrg inner-loop nested in an outer-loop that us being vectorized). */
1771 1.1 mrg
1772 1.1 mrg static gimple *
1773 1.1 mrg vect_recog_widen_sum_pattern (vec_info *vinfo,
1774 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
1775 1.1 mrg {
1776 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
1777 1.1 mrg tree oprnd0, oprnd1;
1778 1.1 mrg tree type;
1779 1.1 mrg gimple *pattern_stmt;
1780 1.1 mrg tree var;
1781 1.1 mrg
1782 1.1 mrg /* Look for the following pattern
1783 1.1 mrg DX = (TYPE) X;
1784 1.1 mrg sum_1 = DX + sum_0;
1785 1.1 mrg In which DX is at least double the size of X, and sum_1 has been
1786 1.1 mrg recognized as a reduction variable.
1787 1.1 mrg */
1788 1.1 mrg
1789 1.1 mrg /* Starting from LAST_STMT, follow the defs of its uses in search
1790 1.1 mrg of the above pattern. */
1791 1.1 mrg
1792 1.1 mrg if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1793 1.1 mrg &oprnd0, &oprnd1)
1794 1.1 mrg || TREE_CODE (oprnd0) != SSA_NAME
1795 1.1 mrg || !vinfo->lookup_def (oprnd0))
1796 1.1 mrg return NULL;
1797 1.1 mrg
1798 1.1 mrg type = TREE_TYPE (gimple_get_lhs (last_stmt));
1799 1.1 mrg
1800 1.1 mrg /* So far so good. Since last_stmt was detected as a (summation) reduction,
1801 1.1 mrg we know that oprnd1 is the reduction variable (defined by a loop-header
1802 1.1 mrg phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1803 1.1 mrg Left to check that oprnd0 is defined by a cast from type 'type' to type
1804 1.1 mrg 'TYPE'. */
1805 1.1 mrg
1806 1.1 mrg vect_unpromoted_value unprom0;
1807 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1808 1.1 mrg || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1809 1.1 mrg return NULL;
1810 1.1 mrg
1811 1.1 mrg vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1812 1.1 mrg
1813 1.1 mrg if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1814 1.1 mrg unprom0.type, type_out))
1815 1.1 mrg return NULL;
1816 1.1 mrg
1817 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL);
1818 1.1 mrg pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1819 1.1 mrg
1820 1.1 mrg return pattern_stmt;
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1 mrg /* Recognize cases in which an operation is performed in one type WTYPE
1824 1.1 mrg but could be done more efficiently in a narrower type NTYPE. For example,
1825 1.1 mrg if we have:
1826 1.1 mrg
1827 1.1 mrg ATYPE a; // narrower than NTYPE
1828 1.1 mrg BTYPE b; // narrower than NTYPE
1829 1.1 mrg WTYPE aw = (WTYPE) a;
1830 1.1 mrg WTYPE bw = (WTYPE) b;
1831 1.1 mrg WTYPE res = aw + bw; // only uses of aw and bw
1832 1.1 mrg
1833 1.1 mrg then it would be more efficient to do:
1834 1.1 mrg
1835 1.1 mrg NTYPE an = (NTYPE) a;
1836 1.1 mrg NTYPE bn = (NTYPE) b;
1837 1.1 mrg NTYPE resn = an + bn;
1838 1.1 mrg WTYPE res = (WTYPE) resn;
1839 1.1 mrg
1840 1.1 mrg Other situations include things like:
1841 1.1 mrg
1842 1.1 mrg ATYPE a; // NTYPE or narrower
1843 1.1 mrg WTYPE aw = (WTYPE) a;
1844 1.1 mrg WTYPE res = aw + b;
1845 1.1 mrg
1846 1.1 mrg when only "(NTYPE) res" is significant. In that case it's more efficient
1847 1.1 mrg to truncate "b" and do the operation on NTYPE instead:
1848 1.1 mrg
1849 1.1 mrg NTYPE an = (NTYPE) a;
1850 1.1 mrg NTYPE bn = (NTYPE) b; // truncation
1851 1.1 mrg NTYPE resn = an + bn;
1852 1.1 mrg WTYPE res = (WTYPE) resn;
1853 1.1 mrg
1854 1.1 mrg All users of "res" should then use "resn" instead, making the final
1855 1.1 mrg statement dead (not marked as relevant). The final statement is still
1856 1.1 mrg needed to maintain the type correctness of the IR.
1857 1.1 mrg
1858 1.1 mrg vect_determine_precisions has already determined the minimum
1859 1.1 mrg precison of the operation and the minimum precision required
1860 1.1 mrg by users of the result. */
1861 1.1 mrg
1862 1.1 mrg static gimple *
1863 1.1 mrg vect_recog_over_widening_pattern (vec_info *vinfo,
1864 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out)
1865 1.1 mrg {
1866 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1867 1.1 mrg if (!last_stmt)
1868 1.1 mrg return NULL;
1869 1.1 mrg
1870 1.1 mrg /* See whether we have found that this operation can be done on a
1871 1.1 mrg narrower type without changing its semantics. */
1872 1.1 mrg unsigned int new_precision = last_stmt_info->operation_precision;
1873 1.1 mrg if (!new_precision)
1874 1.1 mrg return NULL;
1875 1.1 mrg
1876 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt);
1877 1.1 mrg tree type = TREE_TYPE (lhs);
1878 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt);
1879 1.1 mrg
1880 1.1 mrg /* Punt for reductions where we don't handle the type conversions. */
1881 1.1 mrg if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1882 1.1 mrg return NULL;
1883 1.1 mrg
1884 1.1 mrg /* Keep the first operand of a COND_EXPR as-is: only the other two
1885 1.1 mrg operands are interesting. */
1886 1.1 mrg unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1887 1.1 mrg
1888 1.1 mrg /* Check the operands. */
1889 1.1 mrg unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1890 1.1 mrg auto_vec <vect_unpromoted_value, 3> unprom (nops);
1891 1.1 mrg unprom.quick_grow (nops);
1892 1.1 mrg unsigned int min_precision = 0;
1893 1.1 mrg bool single_use_p = false;
1894 1.1 mrg for (unsigned int i = 0; i < nops; ++i)
1895 1.1 mrg {
1896 1.1 mrg tree op = gimple_op (last_stmt, first_op + i);
1897 1.1 mrg if (TREE_CODE (op) == INTEGER_CST)
1898 1.1 mrg unprom[i].set_op (op, vect_constant_def);
1899 1.1 mrg else if (TREE_CODE (op) == SSA_NAME)
1900 1.1 mrg {
1901 1.1 mrg bool op_single_use_p = true;
1902 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1903 1.1 mrg &op_single_use_p))
1904 1.1 mrg return NULL;
1905 1.1 mrg /* If:
1906 1.1 mrg
1907 1.1 mrg (1) N bits of the result are needed;
1908 1.1 mrg (2) all inputs are widened from M<N bits; and
1909 1.1 mrg (3) one operand OP is a single-use SSA name
1910 1.1 mrg
1911 1.1 mrg we can shift the M->N widening from OP to the output
1912 1.1 mrg without changing the number or type of extensions involved.
1913 1.1 mrg This then reduces the number of copies of STMT_INFO.
1914 1.1 mrg
1915 1.1 mrg If instead of (3) more than one operand is a single-use SSA name,
1916 1.1 mrg shifting the extension to the output is even more of a win.
1917 1.1 mrg
1918 1.1 mrg If instead:
1919 1.1 mrg
1920 1.1 mrg (1) N bits of the result are needed;
1921 1.1 mrg (2) one operand OP2 is widened from M2<N bits;
1922 1.1 mrg (3) another operand OP1 is widened from M1<M2 bits; and
1923 1.1 mrg (4) both OP1 and OP2 are single-use
1924 1.1 mrg
1925 1.1 mrg the choice is between:
1926 1.1 mrg
1927 1.1 mrg (a) truncating OP2 to M1, doing the operation on M1,
1928 1.1 mrg and then widening the result to N
1929 1.1 mrg
1930 1.1 mrg (b) widening OP1 to M2, doing the operation on M2, and then
1931 1.1 mrg widening the result to N
1932 1.1 mrg
1933 1.1 mrg Both shift the M2->N widening of the inputs to the output.
1934 1.1 mrg (a) additionally shifts the M1->M2 widening to the output;
1935 1.1 mrg it requires fewer copies of STMT_INFO but requires an extra
1936 1.1 mrg M2->M1 truncation.
1937 1.1 mrg
1938 1.1 mrg Which is better will depend on the complexity and cost of
1939 1.1 mrg STMT_INFO, which is hard to predict at this stage. However,
1940 1.1 mrg a clear tie-breaker in favor of (b) is the fact that the
1941 1.1 mrg truncation in (a) increases the length of the operation chain.
1942 1.1 mrg
1943 1.1 mrg If instead of (4) only one of OP1 or OP2 is single-use,
1944 1.1 mrg (b) is still a win over doing the operation in N bits:
1945 1.1 mrg it still shifts the M2->N widening on the single-use operand
1946 1.1 mrg to the output and reduces the number of STMT_INFO copies.
1947 1.1 mrg
1948 1.1 mrg If neither operand is single-use then operating on fewer than
1949 1.1 mrg N bits might lead to more extensions overall. Whether it does
1950 1.1 mrg or not depends on global information about the vectorization
1951 1.1 mrg region, and whether that's a good trade-off would again
1952 1.1 mrg depend on the complexity and cost of the statements involved,
1953 1.1 mrg as well as things like register pressure that are not normally
1954 1.1 mrg modelled at this stage. We therefore ignore these cases
1955 1.1 mrg and just optimize the clear single-use wins above.
1956 1.1 mrg
1957 1.1 mrg Thus we take the maximum precision of the unpromoted operands
1958 1.1 mrg and record whether any operand is single-use. */
1959 1.1 mrg if (unprom[i].dt == vect_internal_def)
1960 1.1 mrg {
1961 1.1 mrg min_precision = MAX (min_precision,
1962 1.1 mrg TYPE_PRECISION (unprom[i].type));
1963 1.1 mrg single_use_p |= op_single_use_p;
1964 1.1 mrg }
1965 1.1 mrg }
1966 1.1 mrg else
1967 1.1 mrg return NULL;
1968 1.1 mrg }
1969 1.1 mrg
1970 1.1 mrg /* Although the operation could be done in operation_precision, we have
1971 1.1 mrg to balance that against introducing extra truncations or extensions.
1972 1.1 mrg Calculate the minimum precision that can be handled efficiently.
1973 1.1 mrg
1974 1.1 mrg The loop above determined that the operation could be handled
1975 1.1 mrg efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1976 1.1 mrg extension from the inputs to the output without introducing more
1977 1.1 mrg instructions, and would reduce the number of instructions required
1978 1.1 mrg for STMT_INFO itself.
1979 1.1 mrg
1980 1.1 mrg vect_determine_precisions has also determined that the result only
1981 1.1 mrg needs min_output_precision bits. Truncating by a factor of N times
1982 1.1 mrg requires a tree of N - 1 instructions, so if TYPE is N times wider
1983 1.1 mrg than min_output_precision, doing the operation in TYPE and truncating
1984 1.1 mrg the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1985 1.1 mrg In contrast:
1986 1.1 mrg
1987 1.1 mrg - truncating the input to a unary operation and doing the operation
1988 1.1 mrg in the new type requires at most N - 1 + 1 = N instructions per
1989 1.1 mrg output vector
1990 1.1 mrg
1991 1.1 mrg - doing the same for a binary operation requires at most
1992 1.1 mrg (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1993 1.1 mrg
1994 1.1 mrg Both unary and binary operations require fewer instructions than
1995 1.1 mrg this if the operands were extended from a suitable truncated form.
1996 1.1 mrg Thus there is usually nothing to lose by doing operations in
1997 1.1 mrg min_output_precision bits, but there can be something to gain. */
1998 1.1 mrg if (!single_use_p)
1999 1.1 mrg min_precision = last_stmt_info->min_output_precision;
2000 1.1 mrg else
2001 1.1 mrg min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2002 1.1 mrg
2003 1.1 mrg /* Apply the minimum efficient precision we just calculated. */
2004 1.1 mrg if (new_precision < min_precision)
2005 1.1 mrg new_precision = min_precision;
2006 1.1 mrg new_precision = vect_element_precision (new_precision);
2007 1.1 mrg if (new_precision >= TYPE_PRECISION (type))
2008 1.1 mrg return NULL;
2009 1.1 mrg
2010 1.1 mrg vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2011 1.1 mrg
2012 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type);
2013 1.1 mrg if (!*type_out)
2014 1.1 mrg return NULL;
2015 1.1 mrg
2016 1.1 mrg /* We've found a viable pattern. Get the new type of the operation. */
2017 1.1 mrg bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2018 1.1 mrg tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2019 1.1 mrg
2020 1.1 mrg /* If we're truncating an operation, we need to make sure that we
2021 1.1 mrg don't introduce new undefined overflow. The codes tested here are
2022 1.1 mrg a subset of those accepted by vect_truncatable_operation_p. */
2023 1.1 mrg tree op_type = new_type;
2024 1.1 mrg if (TYPE_OVERFLOW_UNDEFINED (new_type)
2025 1.1 mrg && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2026 1.1 mrg op_type = build_nonstandard_integer_type (new_precision, true);
2027 1.1 mrg
2028 1.1 mrg /* We specifically don't check here whether the target supports the
2029 1.1 mrg new operation, since it might be something that a later pattern
2030 1.1 mrg wants to rewrite anyway. If targets have a minimum element size
2031 1.1 mrg for some optabs, we should pattern-match smaller ops to larger ops
2032 1.1 mrg where beneficial. */
2033 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2034 1.1 mrg tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2035 1.1 mrg if (!new_vectype || !op_vectype)
2036 1.1 mrg return NULL;
2037 1.1 mrg
2038 1.1 mrg if (dump_enabled_p ())
2039 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2040 1.1 mrg type, new_type);
2041 1.1 mrg
2042 1.1 mrg /* Calculate the rhs operands for an operation on OP_TYPE. */
2043 1.1 mrg tree ops[3] = {};
2044 1.1 mrg for (unsigned int i = 1; i < first_op; ++i)
2045 1.1 mrg ops[i - 1] = gimple_op (last_stmt, i);
2046 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2047 1.1 mrg op_type, &unprom[0], op_vectype);
2048 1.1 mrg
2049 1.1 mrg /* Use the operation to produce a result of type OP_TYPE. */
2050 1.1 mrg tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2051 1.1 mrg gimple *pattern_stmt = gimple_build_assign (new_var, code,
2052 1.1 mrg ops[0], ops[1], ops[2]);
2053 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2054 1.1 mrg
2055 1.1 mrg if (dump_enabled_p ())
2056 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
2057 1.1 mrg "created pattern stmt: %G", pattern_stmt);
2058 1.1 mrg
2059 1.1 mrg /* Convert back to the original signedness, if OP_TYPE is different
2060 1.1 mrg from NEW_TYPE. */
2061 1.1 mrg if (op_type != new_type)
2062 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2063 1.1 mrg pattern_stmt, op_vectype);
2064 1.1 mrg
2065 1.1 mrg /* Promote the result to the original type. */
2066 1.1 mrg pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2067 1.1 mrg pattern_stmt, new_vectype);
2068 1.1 mrg
2069 1.1 mrg return pattern_stmt;
2070 1.1 mrg }
2071 1.1 mrg
2072 1.1 mrg /* Recognize the following patterns:
2073 1.1 mrg
2074 1.1 mrg ATYPE a; // narrower than TYPE
2075 1.1 mrg BTYPE b; // narrower than TYPE
2076 1.1 mrg
2077 1.1 mrg 1) Multiply high with scaling
2078 1.1 mrg TYPE res = ((TYPE) a * (TYPE) b) >> c;
2079 1.1 mrg Here, c is bitsize (TYPE) / 2 - 1.
2080 1.1 mrg
2081 1.1 mrg 2) ... or also with rounding
2082 1.1 mrg TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2083 1.1 mrg Here, d is bitsize (TYPE) / 2 - 2.
2084 1.1 mrg
2085 1.1 mrg 3) Normal multiply high
2086 1.1 mrg TYPE res = ((TYPE) a * (TYPE) b) >> e;
2087 1.1 mrg Here, e is bitsize (TYPE) / 2.
2088 1.1 mrg
2089 1.1 mrg where only the bottom half of res is used. */
2090 1.1 mrg
2091 1.1 mrg static gimple *
2092 1.1 mrg vect_recog_mulhs_pattern (vec_info *vinfo,
2093 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out)
2094 1.1 mrg {
2095 1.1 mrg /* Check for a right shift. */
2096 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2097 1.1 mrg if (!last_stmt
2098 1.1 mrg || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2099 1.1 mrg return NULL;
2100 1.1 mrg
2101 1.1 mrg /* Check that the shift result is wider than the users of the
2102 1.1 mrg result need (i.e. that narrowing would be a natural choice). */
2103 1.1 mrg tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2104 1.1 mrg unsigned int target_precision
2105 1.1 mrg = vect_element_precision (last_stmt_info->min_output_precision);
2106 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type)
2107 1.1 mrg || target_precision >= TYPE_PRECISION (lhs_type))
2108 1.1 mrg return NULL;
2109 1.1 mrg
2110 1.1 mrg /* Look through any change in sign on the outer shift input. */
2111 1.1 mrg vect_unpromoted_value unprom_rshift_input;
2112 1.1 mrg tree rshift_input = vect_look_through_possible_promotion
2113 1.1 mrg (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2114 1.1 mrg if (!rshift_input
2115 1.1 mrg || TYPE_PRECISION (TREE_TYPE (rshift_input))
2116 1.1 mrg != TYPE_PRECISION (lhs_type))
2117 1.1 mrg return NULL;
2118 1.1 mrg
2119 1.1 mrg /* Get the definition of the shift input. */
2120 1.1 mrg stmt_vec_info rshift_input_stmt_info
2121 1.1 mrg = vect_get_internal_def (vinfo, rshift_input);
2122 1.1 mrg if (!rshift_input_stmt_info)
2123 1.1 mrg return NULL;
2124 1.1 mrg gassign *rshift_input_stmt
2125 1.1 mrg = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2126 1.1 mrg if (!rshift_input_stmt)
2127 1.1 mrg return NULL;
2128 1.1 mrg
2129 1.1 mrg stmt_vec_info mulh_stmt_info;
2130 1.1 mrg tree scale_term;
2131 1.1 mrg bool rounding_p = false;
2132 1.1 mrg
2133 1.1 mrg /* Check for the presence of the rounding term. */
2134 1.1 mrg if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2135 1.1 mrg {
2136 1.1 mrg /* Check that the outer shift was by 1. */
2137 1.1 mrg if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2138 1.1 mrg return NULL;
2139 1.1 mrg
2140 1.1 mrg /* Check that the second operand of the PLUS_EXPR is 1. */
2141 1.1 mrg if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2142 1.1 mrg return NULL;
2143 1.1 mrg
2144 1.1 mrg /* Look through any change in sign on the addition input. */
2145 1.1 mrg vect_unpromoted_value unprom_plus_input;
2146 1.1 mrg tree plus_input = vect_look_through_possible_promotion
2147 1.1 mrg (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2148 1.1 mrg if (!plus_input
2149 1.1 mrg || TYPE_PRECISION (TREE_TYPE (plus_input))
2150 1.1 mrg != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2151 1.1 mrg return NULL;
2152 1.1 mrg
2153 1.1 mrg /* Get the definition of the multiply-high-scale part. */
2154 1.1 mrg stmt_vec_info plus_input_stmt_info
2155 1.1 mrg = vect_get_internal_def (vinfo, plus_input);
2156 1.1 mrg if (!plus_input_stmt_info)
2157 1.1 mrg return NULL;
2158 1.1 mrg gassign *plus_input_stmt
2159 1.1 mrg = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2160 1.1 mrg if (!plus_input_stmt
2161 1.1 mrg || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2162 1.1 mrg return NULL;
2163 1.1 mrg
2164 1.1 mrg /* Look through any change in sign on the scaling input. */
2165 1.1 mrg vect_unpromoted_value unprom_scale_input;
2166 1.1 mrg tree scale_input = vect_look_through_possible_promotion
2167 1.1 mrg (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2168 1.1 mrg if (!scale_input
2169 1.1 mrg || TYPE_PRECISION (TREE_TYPE (scale_input))
2170 1.1 mrg != TYPE_PRECISION (TREE_TYPE (plus_input)))
2171 1.1 mrg return NULL;
2172 1.1 mrg
2173 1.1 mrg /* Get the definition of the multiply-high part. */
2174 1.1 mrg mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2175 1.1 mrg if (!mulh_stmt_info)
2176 1.1 mrg return NULL;
2177 1.1 mrg
2178 1.1 mrg /* Get the scaling term. */
2179 1.1 mrg scale_term = gimple_assign_rhs2 (plus_input_stmt);
2180 1.1 mrg rounding_p = true;
2181 1.1 mrg }
2182 1.1 mrg else
2183 1.1 mrg {
2184 1.1 mrg mulh_stmt_info = rshift_input_stmt_info;
2185 1.1 mrg scale_term = gimple_assign_rhs2 (last_stmt);
2186 1.1 mrg }
2187 1.1 mrg
2188 1.1 mrg /* Check that the scaling factor is constant. */
2189 1.1 mrg if (TREE_CODE (scale_term) != INTEGER_CST)
2190 1.1 mrg return NULL;
2191 1.1 mrg
2192 1.1 mrg /* Check whether the scaling input term can be seen as two widened
2193 1.1 mrg inputs multiplied together. */
2194 1.1 mrg vect_unpromoted_value unprom_mult[2];
2195 1.1 mrg tree new_type;
2196 1.1 mrg unsigned int nops
2197 1.1 mrg = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2198 1.1 mrg false, 2, unprom_mult, &new_type);
2199 1.1 mrg if (nops != 2)
2200 1.1 mrg return NULL;
2201 1.1 mrg
2202 1.1 mrg /* Adjust output precision. */
2203 1.1 mrg if (TYPE_PRECISION (new_type) < target_precision)
2204 1.1 mrg new_type = build_nonstandard_integer_type
2205 1.1 mrg (target_precision, TYPE_UNSIGNED (new_type));
2206 1.1 mrg
2207 1.1 mrg unsigned mult_precision = TYPE_PRECISION (new_type);
2208 1.1 mrg internal_fn ifn;
2209 1.1 mrg /* Check that the scaling factor is expected. Instead of
2210 1.1 mrg target_precision, we should use the one that we actually
2211 1.1 mrg use for internal function. */
2212 1.1 mrg if (rounding_p)
2213 1.1 mrg {
2214 1.1 mrg /* Check pattern 2). */
2215 1.1 mrg if (wi::to_widest (scale_term) + mult_precision + 2
2216 1.1 mrg != TYPE_PRECISION (lhs_type))
2217 1.1 mrg return NULL;
2218 1.1 mrg
2219 1.1 mrg ifn = IFN_MULHRS;
2220 1.1 mrg }
2221 1.1 mrg else
2222 1.1 mrg {
2223 1.1 mrg /* Check for pattern 1). */
2224 1.1 mrg if (wi::to_widest (scale_term) + mult_precision + 1
2225 1.1 mrg == TYPE_PRECISION (lhs_type))
2226 1.1 mrg ifn = IFN_MULHS;
2227 1.1 mrg /* Check for pattern 3). */
2228 1.1 mrg else if (wi::to_widest (scale_term) + mult_precision
2229 1.1 mrg == TYPE_PRECISION (lhs_type))
2230 1.1 mrg ifn = IFN_MULH;
2231 1.1 mrg else
2232 1.1 mrg return NULL;
2233 1.1 mrg }
2234 1.1 mrg
2235 1.1 mrg vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2236 1.1 mrg
2237 1.1 mrg /* Check for target support. */
2238 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2239 1.1 mrg if (!new_vectype
2240 1.1 mrg || !direct_internal_fn_supported_p
2241 1.1 mrg (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2242 1.1 mrg return NULL;
2243 1.1 mrg
2244 1.1 mrg /* The IR requires a valid vector type for the cast result, even though
2245 1.1 mrg it's likely to be discarded. */
2246 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2247 1.1 mrg if (!*type_out)
2248 1.1 mrg return NULL;
2249 1.1 mrg
2250 1.1 mrg /* Generate the IFN_MULHRS call. */
2251 1.1 mrg tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2252 1.1 mrg tree new_ops[2];
2253 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2254 1.1 mrg unprom_mult, new_vectype);
2255 1.1 mrg gcall *mulhrs_stmt
2256 1.1 mrg = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2257 1.1 mrg gimple_call_set_lhs (mulhrs_stmt, new_var);
2258 1.1 mrg gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2259 1.1 mrg
2260 1.1 mrg if (dump_enabled_p ())
2261 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
2262 1.1 mrg "created pattern stmt: %G", mulhrs_stmt);
2263 1.1 mrg
2264 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2265 1.1 mrg mulhrs_stmt, new_vectype);
2266 1.1 mrg }
2267 1.1 mrg
2268 1.1 mrg /* Recognize the patterns:
2269 1.1 mrg
2270 1.1 mrg ATYPE a; // narrower than TYPE
2271 1.1 mrg BTYPE b; // narrower than TYPE
2272 1.1 mrg (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2273 1.1 mrg or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2274 1.1 mrg
2275 1.1 mrg where only the bottom half of avg is used. Try to transform them into:
2276 1.1 mrg
2277 1.1 mrg (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2278 1.1 mrg or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2279 1.1 mrg
2280 1.1 mrg followed by:
2281 1.1 mrg
2282 1.1 mrg TYPE avg = (TYPE) avg';
2283 1.1 mrg
2284 1.1 mrg where NTYPE is no wider than half of TYPE. Since only the bottom half
2285 1.1 mrg of avg is used, all or part of the cast of avg' should become redundant.
2286 1.1 mrg
2287 1.1 mrg If there is no target support available, generate code to distribute rshift
2288 1.1 mrg over plus and add a carry. */
2289 1.1 mrg
2290 1.1 mrg static gimple *
2291 1.1 mrg vect_recog_average_pattern (vec_info *vinfo,
2292 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out)
2293 1.1 mrg {
2294 1.1 mrg /* Check for a shift right by one bit. */
2295 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2296 1.1 mrg if (!last_stmt
2297 1.1 mrg || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2298 1.1 mrg || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2299 1.1 mrg return NULL;
2300 1.1 mrg
2301 1.1 mrg /* Check that the shift result is wider than the users of the
2302 1.1 mrg result need (i.e. that narrowing would be a natural choice). */
2303 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt);
2304 1.1 mrg tree type = TREE_TYPE (lhs);
2305 1.1 mrg unsigned int target_precision
2306 1.1 mrg = vect_element_precision (last_stmt_info->min_output_precision);
2307 1.1 mrg if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2308 1.1 mrg return NULL;
2309 1.1 mrg
2310 1.1 mrg /* Look through any change in sign on the shift input. */
2311 1.1 mrg tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2312 1.1 mrg vect_unpromoted_value unprom_plus;
2313 1.1 mrg rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2314 1.1 mrg &unprom_plus);
2315 1.1 mrg if (!rshift_rhs
2316 1.1 mrg || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2317 1.1 mrg return NULL;
2318 1.1 mrg
2319 1.1 mrg /* Get the definition of the shift input. */
2320 1.1 mrg stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2321 1.1 mrg if (!plus_stmt_info)
2322 1.1 mrg return NULL;
2323 1.1 mrg
2324 1.1 mrg /* Check whether the shift input can be seen as a tree of additions on
2325 1.1 mrg 2 or 3 widened inputs.
2326 1.1 mrg
2327 1.1 mrg Note that the pattern should be a win even if the result of one or
2328 1.1 mrg more additions is reused elsewhere: if the pattern matches, we'd be
2329 1.1 mrg replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2330 1.1 mrg internal_fn ifn = IFN_AVG_FLOOR;
2331 1.1 mrg vect_unpromoted_value unprom[3];
2332 1.1 mrg tree new_type;
2333 1.1 mrg unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2334 1.1 mrg WIDEN_PLUS_EXPR, false, 3,
2335 1.1 mrg unprom, &new_type);
2336 1.1 mrg if (nops == 0)
2337 1.1 mrg return NULL;
2338 1.1 mrg if (nops == 3)
2339 1.1 mrg {
2340 1.1 mrg /* Check that one operand is 1. */
2341 1.1 mrg unsigned int i;
2342 1.1 mrg for (i = 0; i < 3; ++i)
2343 1.1 mrg if (integer_onep (unprom[i].op))
2344 1.1 mrg break;
2345 1.1 mrg if (i == 3)
2346 1.1 mrg return NULL;
2347 1.1 mrg /* Throw away the 1 operand and keep the other two. */
2348 1.1 mrg if (i < 2)
2349 1.1 mrg unprom[i] = unprom[2];
2350 1.1 mrg ifn = IFN_AVG_CEIL;
2351 1.1 mrg }
2352 1.1 mrg
2353 1.1 mrg vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2354 1.1 mrg
2355 1.1 mrg /* We know that:
2356 1.1 mrg
2357 1.1 mrg (a) the operation can be viewed as:
2358 1.1 mrg
2359 1.1 mrg TYPE widened0 = (TYPE) UNPROM[0];
2360 1.1 mrg TYPE widened1 = (TYPE) UNPROM[1];
2361 1.1 mrg TYPE tmp1 = widened0 + widened1 {+ 1};
2362 1.1 mrg TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2363 1.1 mrg
2364 1.1 mrg (b) the first two statements are equivalent to:
2365 1.1 mrg
2366 1.1 mrg TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2367 1.1 mrg TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2368 1.1 mrg
2369 1.1 mrg (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2370 1.1 mrg where sensible;
2371 1.1 mrg
2372 1.1 mrg (d) all the operations can be performed correctly at twice the width of
2373 1.1 mrg NEW_TYPE, due to the nature of the average operation; and
2374 1.1 mrg
2375 1.1 mrg (e) users of the result of the right shift need only TARGET_PRECISION
2376 1.1 mrg bits, where TARGET_PRECISION is no more than half of TYPE's
2377 1.1 mrg precision.
2378 1.1 mrg
2379 1.1 mrg Under these circumstances, the only situation in which NEW_TYPE
2380 1.1 mrg could be narrower than TARGET_PRECISION is if widened0, widened1
2381 1.1 mrg and an addition result are all used more than once. Thus we can
2382 1.1 mrg treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2383 1.1 mrg as "free", whereas widening the result of the average instruction
2384 1.1 mrg from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2385 1.1 mrg therefore better not to go narrower than TARGET_PRECISION. */
2386 1.1 mrg if (TYPE_PRECISION (new_type) < target_precision)
2387 1.1 mrg new_type = build_nonstandard_integer_type (target_precision,
2388 1.1 mrg TYPE_UNSIGNED (new_type));
2389 1.1 mrg
2390 1.1 mrg /* Check for target support. */
2391 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2392 1.1 mrg if (!new_vectype)
2393 1.1 mrg return NULL;
2394 1.1 mrg
2395 1.1 mrg bool fallback_p = false;
2396 1.1 mrg
2397 1.1 mrg if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2398 1.1 mrg ;
2399 1.1 mrg else if (TYPE_UNSIGNED (new_type)
2400 1.1 mrg && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2401 1.1 mrg && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2402 1.1 mrg && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2403 1.1 mrg && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2404 1.1 mrg fallback_p = true;
2405 1.1 mrg else
2406 1.1 mrg return NULL;
2407 1.1 mrg
2408 1.1 mrg /* The IR requires a valid vector type for the cast result, even though
2409 1.1 mrg it's likely to be discarded. */
2410 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, type);
2411 1.1 mrg if (!*type_out)
2412 1.1 mrg return NULL;
2413 1.1 mrg
2414 1.1 mrg tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2415 1.1 mrg tree new_ops[2];
2416 1.1 mrg vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2417 1.1 mrg unprom, new_vectype);
2418 1.1 mrg
2419 1.1 mrg if (fallback_p)
2420 1.1 mrg {
2421 1.1 mrg /* As a fallback, generate code for following sequence:
2422 1.1 mrg
2423 1.1 mrg shifted_op0 = new_ops[0] >> 1;
2424 1.1 mrg shifted_op1 = new_ops[1] >> 1;
2425 1.1 mrg sum_of_shifted = shifted_op0 + shifted_op1;
2426 1.1 mrg unmasked_carry = new_ops[0] and/or new_ops[1];
2427 1.1 mrg carry = unmasked_carry & 1;
2428 1.1 mrg new_var = sum_of_shifted + carry;
2429 1.1 mrg */
2430 1.1 mrg
2431 1.1 mrg tree one_cst = build_one_cst (new_type);
2432 1.1 mrg gassign *g;
2433 1.1 mrg
2434 1.1 mrg tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2435 1.1 mrg g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2436 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2437 1.1 mrg
2438 1.1 mrg tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2439 1.1 mrg g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2440 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2441 1.1 mrg
2442 1.1 mrg tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2443 1.1 mrg g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2444 1.1 mrg shifted_op0, shifted_op1);
2445 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2446 1.1 mrg
2447 1.1 mrg tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2448 1.1 mrg tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2449 1.1 mrg g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2450 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2451 1.1 mrg
2452 1.1 mrg tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2453 1.1 mrg g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2454 1.1 mrg append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2455 1.1 mrg
2456 1.1 mrg g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2457 1.1 mrg return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2458 1.1 mrg }
2459 1.1 mrg
2460 1.1 mrg /* Generate the IFN_AVG* call. */
2461 1.1 mrg gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2462 1.1 mrg new_ops[1]);
2463 1.1 mrg gimple_call_set_lhs (average_stmt, new_var);
2464 1.1 mrg gimple_set_location (average_stmt, gimple_location (last_stmt));
2465 1.1 mrg
2466 1.1 mrg if (dump_enabled_p ())
2467 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
2468 1.1 mrg "created pattern stmt: %G", average_stmt);
2469 1.1 mrg
2470 1.1 mrg return vect_convert_output (vinfo, last_stmt_info,
2471 1.1 mrg type, average_stmt, new_vectype);
2472 1.1 mrg }
2473 1.1 mrg
2474 1.1 mrg /* Recognize cases in which the input to a cast is wider than its
2475 1.1 mrg output, and the input is fed by a widening operation. Fold this
2476 1.1 mrg by removing the unnecessary intermediate widening. E.g.:
2477 1.1 mrg
2478 1.1 mrg unsigned char a;
2479 1.1 mrg unsigned int b = (unsigned int) a;
2480 1.1 mrg unsigned short c = (unsigned short) b;
2481 1.1 mrg
2482 1.1 mrg -->
2483 1.1 mrg
2484 1.1 mrg unsigned short c = (unsigned short) a;
2485 1.1 mrg
2486 1.1 mrg Although this is rare in input IR, it is an expected side-effect
2487 1.1 mrg of the over-widening pattern above.
2488 1.1 mrg
2489 1.1 mrg This is beneficial also for integer-to-float conversions, if the
2490 1.1 mrg widened integer has more bits than the float, and if the unwidened
2491 1.1 mrg input doesn't. */
2492 1.1 mrg
2493 1.1 mrg static gimple *
2494 1.1 mrg vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2495 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out)
2496 1.1 mrg {
2497 1.1 mrg /* Check for a cast, including an integer-to-float conversion. */
2498 1.1 mrg gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2499 1.1 mrg if (!last_stmt)
2500 1.1 mrg return NULL;
2501 1.1 mrg tree_code code = gimple_assign_rhs_code (last_stmt);
2502 1.1 mrg if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2503 1.1 mrg return NULL;
2504 1.1 mrg
2505 1.1 mrg /* Make sure that the rhs is a scalar with a natural bitsize. */
2506 1.1 mrg tree lhs = gimple_assign_lhs (last_stmt);
2507 1.1 mrg if (!lhs)
2508 1.1 mrg return NULL;
2509 1.1 mrg tree lhs_type = TREE_TYPE (lhs);
2510 1.1 mrg scalar_mode lhs_mode;
2511 1.1 mrg if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2512 1.1 mrg || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2513 1.1 mrg return NULL;
2514 1.1 mrg
2515 1.1 mrg /* Check for a narrowing operation (from a vector point of view). */
2516 1.1 mrg tree rhs = gimple_assign_rhs1 (last_stmt);
2517 1.1 mrg tree rhs_type = TREE_TYPE (rhs);
2518 1.1 mrg if (!INTEGRAL_TYPE_P (rhs_type)
2519 1.1 mrg || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2520 1.1 mrg || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2521 1.1 mrg return NULL;
2522 1.1 mrg
2523 1.1 mrg /* Try to find an unpromoted input. */
2524 1.1 mrg vect_unpromoted_value unprom;
2525 1.1 mrg if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2526 1.1 mrg || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2527 1.1 mrg return NULL;
2528 1.1 mrg
2529 1.1 mrg /* If the bits above RHS_TYPE matter, make sure that they're the
2530 1.1 mrg same when extending from UNPROM as they are when extending from RHS. */
2531 1.1 mrg if (!INTEGRAL_TYPE_P (lhs_type)
2532 1.1 mrg && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2533 1.1 mrg return NULL;
2534 1.1 mrg
2535 1.1 mrg /* We can get the same result by casting UNPROM directly, to avoid
2536 1.1 mrg the unnecessary widening and narrowing. */
2537 1.1 mrg vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2538 1.1 mrg
2539 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2540 1.1 mrg if (!*type_out)
2541 1.1 mrg return NULL;
2542 1.1 mrg
2543 1.1 mrg tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2544 1.1 mrg gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2545 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2546 1.1 mrg
2547 1.1 mrg return pattern_stmt;
2548 1.1 mrg }
2549 1.1 mrg
2550 1.1 mrg /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2551 1.1 mrg to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2552 1.1 mrg
2553 1.1 mrg static gimple *
2554 1.1 mrg vect_recog_widen_shift_pattern (vec_info *vinfo,
2555 1.1 mrg stmt_vec_info last_stmt_info, tree *type_out)
2556 1.1 mrg {
2557 1.1 mrg return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2558 1.1 mrg LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2559 1.1 mrg "vect_recog_widen_shift_pattern");
2560 1.1 mrg }
2561 1.1 mrg
2562 1.1 mrg /* Detect a rotate pattern wouldn't be otherwise vectorized:
2563 1.1 mrg
2564 1.1 mrg type a_t, b_t, c_t;
2565 1.1 mrg
2566 1.1 mrg S0 a_t = b_t r<< c_t;
2567 1.1 mrg
2568 1.1 mrg Input/Output:
2569 1.1 mrg
2570 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins,
2571 1.1 mrg i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2572 1.1 mrg with a sequence:
2573 1.1 mrg
2574 1.1 mrg S1 d_t = -c_t;
2575 1.1 mrg S2 e_t = d_t & (B - 1);
2576 1.1 mrg S3 f_t = b_t << c_t;
2577 1.1 mrg S4 g_t = b_t >> e_t;
2578 1.1 mrg S0 a_t = f_t | g_t;
2579 1.1 mrg
2580 1.1 mrg where B is element bitsize of type.
2581 1.1 mrg
2582 1.1 mrg Output:
2583 1.1 mrg
2584 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
2585 1.1 mrg
2586 1.1 mrg * Return value: A new stmt that will be used to replace the rotate
2587 1.1 mrg S0 stmt. */
2588 1.1 mrg
2589 1.1 mrg static gimple *
2590 1.1 mrg vect_recog_rotate_pattern (vec_info *vinfo,
2591 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
2592 1.1 mrg {
2593 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
2594 1.1 mrg tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2595 1.1 mrg gimple *pattern_stmt, *def_stmt;
2596 1.1 mrg enum tree_code rhs_code;
2597 1.1 mrg enum vect_def_type dt;
2598 1.1 mrg optab optab1, optab2;
2599 1.1 mrg edge ext_def = NULL;
2600 1.1 mrg bool bswap16_p = false;
2601 1.1 mrg
2602 1.1 mrg if (is_gimple_assign (last_stmt))
2603 1.1 mrg {
2604 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt);
2605 1.1 mrg switch (rhs_code)
2606 1.1 mrg {
2607 1.1 mrg case LROTATE_EXPR:
2608 1.1 mrg case RROTATE_EXPR:
2609 1.1 mrg break;
2610 1.1 mrg default:
2611 1.1 mrg return NULL;
2612 1.1 mrg }
2613 1.1 mrg
2614 1.1 mrg lhs = gimple_assign_lhs (last_stmt);
2615 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt);
2616 1.1 mrg type = TREE_TYPE (oprnd0);
2617 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt);
2618 1.1 mrg }
2619 1.1 mrg else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2620 1.1 mrg {
2621 1.1 mrg /* __builtin_bswap16 (x) is another form of x r>> 8.
2622 1.1 mrg The vectorizer has bswap support, but only if the argument isn't
2623 1.1 mrg promoted. */
2624 1.1 mrg lhs = gimple_call_lhs (last_stmt);
2625 1.1 mrg oprnd0 = gimple_call_arg (last_stmt, 0);
2626 1.1 mrg type = TREE_TYPE (oprnd0);
2627 1.1 mrg if (!lhs
2628 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2629 1.1 mrg || TYPE_PRECISION (type) <= 16
2630 1.1 mrg || TREE_CODE (oprnd0) != SSA_NAME
2631 1.1 mrg || BITS_PER_UNIT != 8
2632 1.1 mrg || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2633 1.1 mrg return NULL;
2634 1.1 mrg
2635 1.1 mrg stmt_vec_info def_stmt_info;
2636 1.1 mrg if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2637 1.1 mrg return NULL;
2638 1.1 mrg
2639 1.1 mrg if (dt != vect_internal_def)
2640 1.1 mrg return NULL;
2641 1.1 mrg
2642 1.1 mrg if (gimple_assign_cast_p (def_stmt))
2643 1.1 mrg {
2644 1.1 mrg def = gimple_assign_rhs1 (def_stmt);
2645 1.1 mrg if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2646 1.1 mrg && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2647 1.1 mrg oprnd0 = def;
2648 1.1 mrg }
2649 1.1 mrg
2650 1.1 mrg type = TREE_TYPE (lhs);
2651 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type);
2652 1.1 mrg if (vectype == NULL_TREE)
2653 1.1 mrg return NULL;
2654 1.1 mrg
2655 1.1 mrg if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2656 1.1 mrg {
2657 1.1 mrg /* The encoding uses one stepped pattern for each byte in the
2658 1.1 mrg 16-bit word. */
2659 1.1 mrg vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2660 1.1 mrg for (unsigned i = 0; i < 3; ++i)
2661 1.1 mrg for (unsigned j = 0; j < 2; ++j)
2662 1.1 mrg elts.quick_push ((i + 1) * 2 - j - 1);
2663 1.1 mrg
2664 1.1 mrg vec_perm_indices indices (elts, 1,
2665 1.1 mrg TYPE_VECTOR_SUBPARTS (char_vectype));
2666 1.1 mrg if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2667 1.1 mrg {
2668 1.1 mrg /* vectorizable_bswap can handle the __builtin_bswap16 if we
2669 1.1 mrg undo the argument promotion. */
2670 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2671 1.1 mrg {
2672 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL);
2673 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2674 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2675 1.1 mrg oprnd0 = def;
2676 1.1 mrg }
2677 1.1 mrg
2678 1.1 mrg /* Pattern detected. */
2679 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2680 1.1 mrg
2681 1.1 mrg *type_out = vectype;
2682 1.1 mrg
2683 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the
2684 1.1 mrg pattern, with the unpromoted argument. */
2685 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL);
2686 1.1 mrg pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2687 1.1 mrg 1, oprnd0);
2688 1.1 mrg gimple_call_set_lhs (pattern_stmt, var);
2689 1.1 mrg gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2690 1.1 mrg gimple_call_fntype (last_stmt));
2691 1.1 mrg return pattern_stmt;
2692 1.1 mrg }
2693 1.1 mrg }
2694 1.1 mrg
2695 1.1 mrg oprnd1 = build_int_cst (integer_type_node, 8);
2696 1.1 mrg rhs_code = LROTATE_EXPR;
2697 1.1 mrg bswap16_p = true;
2698 1.1 mrg }
2699 1.1 mrg else
2700 1.1 mrg return NULL;
2701 1.1 mrg
2702 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME
2703 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2704 1.1 mrg || !INTEGRAL_TYPE_P (type)
2705 1.1 mrg || !TYPE_UNSIGNED (type))
2706 1.1 mrg return NULL;
2707 1.1 mrg
2708 1.1 mrg stmt_vec_info def_stmt_info;
2709 1.1 mrg if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2710 1.1 mrg return NULL;
2711 1.1 mrg
2712 1.1 mrg if (dt != vect_internal_def
2713 1.1 mrg && dt != vect_constant_def
2714 1.1 mrg && dt != vect_external_def)
2715 1.1 mrg return NULL;
2716 1.1 mrg
2717 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type);
2718 1.1 mrg if (vectype == NULL_TREE)
2719 1.1 mrg return NULL;
2720 1.1 mrg
2721 1.1 mrg /* If vector/vector or vector/scalar rotate is supported by the target,
2722 1.1 mrg don't do anything here. */
2723 1.1 mrg optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2724 1.1 mrg if (optab1
2725 1.1 mrg && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2726 1.1 mrg {
2727 1.1 mrg use_rotate:
2728 1.1 mrg if (bswap16_p)
2729 1.1 mrg {
2730 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2731 1.1 mrg {
2732 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL);
2733 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2734 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2735 1.1 mrg oprnd0 = def;
2736 1.1 mrg }
2737 1.1 mrg
2738 1.1 mrg /* Pattern detected. */
2739 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2740 1.1 mrg
2741 1.1 mrg *type_out = vectype;
2742 1.1 mrg
2743 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the
2744 1.1 mrg pattern. */
2745 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL);
2746 1.1 mrg pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2747 1.1 mrg oprnd1);
2748 1.1 mrg return pattern_stmt;
2749 1.1 mrg }
2750 1.1 mrg return NULL;
2751 1.1 mrg }
2752 1.1 mrg
2753 1.1 mrg if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2754 1.1 mrg {
2755 1.1 mrg optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2756 1.1 mrg if (optab2
2757 1.1 mrg && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2758 1.1 mrg goto use_rotate;
2759 1.1 mrg }
2760 1.1 mrg
2761 1.1 mrg /* If vector/vector or vector/scalar shifts aren't supported by the target,
2762 1.1 mrg don't do anything here either. */
2763 1.1 mrg optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2764 1.1 mrg optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2765 1.1 mrg if (!optab1
2766 1.1 mrg || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2767 1.1 mrg || !optab2
2768 1.1 mrg || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2769 1.1 mrg {
2770 1.1 mrg if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2771 1.1 mrg return NULL;
2772 1.1 mrg optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2773 1.1 mrg optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2774 1.1 mrg if (!optab1
2775 1.1 mrg || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2776 1.1 mrg || !optab2
2777 1.1 mrg || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2778 1.1 mrg return NULL;
2779 1.1 mrg }
2780 1.1 mrg
2781 1.1 mrg *type_out = vectype;
2782 1.1 mrg
2783 1.1 mrg if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2784 1.1 mrg {
2785 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL);
2786 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2787 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2788 1.1 mrg oprnd0 = def;
2789 1.1 mrg }
2790 1.1 mrg
2791 1.1 mrg if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2792 1.1 mrg ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2793 1.1 mrg
2794 1.1 mrg def = NULL_TREE;
2795 1.1 mrg scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2796 1.1 mrg if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2797 1.1 mrg def = oprnd1;
2798 1.1 mrg else if (def_stmt && gimple_assign_cast_p (def_stmt))
2799 1.1 mrg {
2800 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt);
2801 1.1 mrg if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2802 1.1 mrg && TYPE_PRECISION (TREE_TYPE (rhs1))
2803 1.1 mrg == TYPE_PRECISION (type))
2804 1.1 mrg def = rhs1;
2805 1.1 mrg }
2806 1.1 mrg
2807 1.1 mrg if (def == NULL_TREE)
2808 1.1 mrg {
2809 1.1 mrg def = vect_recog_temp_ssa_var (type, NULL);
2810 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2811 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2812 1.1 mrg }
2813 1.1 mrg stype = TREE_TYPE (def);
2814 1.1 mrg
2815 1.1 mrg if (TREE_CODE (def) == INTEGER_CST)
2816 1.1 mrg {
2817 1.1 mrg if (!tree_fits_uhwi_p (def)
2818 1.1 mrg || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2819 1.1 mrg || integer_zerop (def))
2820 1.1 mrg return NULL;
2821 1.1 mrg def2 = build_int_cst (stype,
2822 1.1 mrg GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2823 1.1 mrg }
2824 1.1 mrg else
2825 1.1 mrg {
2826 1.1 mrg tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2827 1.1 mrg
2828 1.1 mrg if (vecstype == NULL_TREE)
2829 1.1 mrg return NULL;
2830 1.1 mrg def2 = vect_recog_temp_ssa_var (stype, NULL);
2831 1.1 mrg def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2832 1.1 mrg if (ext_def)
2833 1.1 mrg {
2834 1.1 mrg basic_block new_bb
2835 1.1 mrg = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2836 1.1 mrg gcc_assert (!new_bb);
2837 1.1 mrg }
2838 1.1 mrg else
2839 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2840 1.1 mrg
2841 1.1 mrg def2 = vect_recog_temp_ssa_var (stype, NULL);
2842 1.1 mrg tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2843 1.1 mrg def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2844 1.1 mrg gimple_assign_lhs (def_stmt), mask);
2845 1.1 mrg if (ext_def)
2846 1.1 mrg {
2847 1.1 mrg basic_block new_bb
2848 1.1 mrg = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2849 1.1 mrg gcc_assert (!new_bb);
2850 1.1 mrg }
2851 1.1 mrg else
2852 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2853 1.1 mrg }
2854 1.1 mrg
2855 1.1 mrg var1 = vect_recog_temp_ssa_var (type, NULL);
2856 1.1 mrg def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2857 1.1 mrg ? LSHIFT_EXPR : RSHIFT_EXPR,
2858 1.1 mrg oprnd0, def);
2859 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2860 1.1 mrg
2861 1.1 mrg var2 = vect_recog_temp_ssa_var (type, NULL);
2862 1.1 mrg def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2863 1.1 mrg ? RSHIFT_EXPR : LSHIFT_EXPR,
2864 1.1 mrg oprnd0, def2);
2865 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2866 1.1 mrg
2867 1.1 mrg /* Pattern detected. */
2868 1.1 mrg vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2869 1.1 mrg
2870 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the pattern. */
2871 1.1 mrg var = vect_recog_temp_ssa_var (type, NULL);
2872 1.1 mrg pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2873 1.1 mrg
2874 1.1 mrg return pattern_stmt;
2875 1.1 mrg }
2876 1.1 mrg
2877 1.1 mrg /* Detect a vector by vector shift pattern that wouldn't be otherwise
2878 1.1 mrg vectorized:
2879 1.1 mrg
2880 1.1 mrg type a_t;
2881 1.1 mrg TYPE b_T, res_T;
2882 1.1 mrg
2883 1.1 mrg S1 a_t = ;
2884 1.1 mrg S2 b_T = ;
2885 1.1 mrg S3 res_T = b_T op a_t;
2886 1.1 mrg
2887 1.1 mrg where type 'TYPE' is a type with different size than 'type',
2888 1.1 mrg and op is <<, >> or rotate.
2889 1.1 mrg
2890 1.1 mrg Also detect cases:
2891 1.1 mrg
2892 1.1 mrg type a_t;
2893 1.1 mrg TYPE b_T, c_T, res_T;
2894 1.1 mrg
2895 1.1 mrg S0 c_T = ;
2896 1.1 mrg S1 a_t = (type) c_T;
2897 1.1 mrg S2 b_T = ;
2898 1.1 mrg S3 res_T = b_T op a_t;
2899 1.1 mrg
2900 1.1 mrg Input/Output:
2901 1.1 mrg
2902 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins,
2903 1.1 mrg i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2904 1.1 mrg with a shift/rotate which has same type on both operands, in the
2905 1.1 mrg second case just b_T op c_T, in the first case with added cast
2906 1.1 mrg from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2907 1.1 mrg
2908 1.1 mrg Output:
2909 1.1 mrg
2910 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
2911 1.1 mrg
2912 1.1 mrg * Return value: A new stmt that will be used to replace the shift/rotate
2913 1.1 mrg S3 stmt. */
2914 1.1 mrg
2915 1.1 mrg static gimple *
2916 1.1 mrg vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2917 1.1 mrg stmt_vec_info stmt_vinfo,
2918 1.1 mrg tree *type_out)
2919 1.1 mrg {
2920 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
2921 1.1 mrg tree oprnd0, oprnd1, lhs, var;
2922 1.1 mrg gimple *pattern_stmt;
2923 1.1 mrg enum tree_code rhs_code;
2924 1.1 mrg
2925 1.1 mrg if (!is_gimple_assign (last_stmt))
2926 1.1 mrg return NULL;
2927 1.1 mrg
2928 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt);
2929 1.1 mrg switch (rhs_code)
2930 1.1 mrg {
2931 1.1 mrg case LSHIFT_EXPR:
2932 1.1 mrg case RSHIFT_EXPR:
2933 1.1 mrg case LROTATE_EXPR:
2934 1.1 mrg case RROTATE_EXPR:
2935 1.1 mrg break;
2936 1.1 mrg default:
2937 1.1 mrg return NULL;
2938 1.1 mrg }
2939 1.1 mrg
2940 1.1 mrg lhs = gimple_assign_lhs (last_stmt);
2941 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt);
2942 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt);
2943 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME
2944 1.1 mrg || TREE_CODE (oprnd1) != SSA_NAME
2945 1.1 mrg || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2946 1.1 mrg || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2947 1.1 mrg || TYPE_PRECISION (TREE_TYPE (lhs))
2948 1.1 mrg != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2949 1.1 mrg return NULL;
2950 1.1 mrg
2951 1.1 mrg stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2952 1.1 mrg if (!def_vinfo)
2953 1.1 mrg return NULL;
2954 1.1 mrg
2955 1.1 mrg *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2956 1.1 mrg if (*type_out == NULL_TREE)
2957 1.1 mrg return NULL;
2958 1.1 mrg
2959 1.1 mrg tree def = NULL_TREE;
2960 1.1 mrg gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2961 1.1 mrg if (def_stmt && gimple_assign_cast_p (def_stmt))
2962 1.1 mrg {
2963 1.1 mrg tree rhs1 = gimple_assign_rhs1 (def_stmt);
2964 1.1 mrg if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2965 1.1 mrg && TYPE_PRECISION (TREE_TYPE (rhs1))
2966 1.1 mrg == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2967 1.1 mrg {
2968 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2969 1.1 mrg >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2970 1.1 mrg def = rhs1;
2971 1.1 mrg else
2972 1.1 mrg {
2973 1.1 mrg tree mask
2974 1.1 mrg = build_low_bits_mask (TREE_TYPE (rhs1),
2975 1.1 mrg TYPE_PRECISION (TREE_TYPE (oprnd1)));
2976 1.1 mrg def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2977 1.1 mrg def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2978 1.1 mrg tree vecstype = get_vectype_for_scalar_type (vinfo,
2979 1.1 mrg TREE_TYPE (rhs1));
2980 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2981 1.1 mrg }
2982 1.1 mrg }
2983 1.1 mrg }
2984 1.1 mrg
2985 1.1 mrg if (def == NULL_TREE)
2986 1.1 mrg {
2987 1.1 mrg def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2988 1.1 mrg def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2989 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2990 1.1 mrg }
2991 1.1 mrg
2992 1.1 mrg /* Pattern detected. */
2993 1.1 mrg vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2994 1.1 mrg
2995 1.1 mrg /* Pattern supported. Create a stmt to be used to replace the pattern. */
2996 1.1 mrg var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2997 1.1 mrg pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2998 1.1 mrg
2999 1.1 mrg return pattern_stmt;
3000 1.1 mrg }
3001 1.1 mrg
3002 1.1 mrg /* Return true iff the target has a vector optab implementing the operation
3003 1.1 mrg CODE on type VECTYPE. */
3004 1.1 mrg
3005 1.1 mrg static bool
3006 1.1 mrg target_has_vecop_for_code (tree_code code, tree vectype)
3007 1.1 mrg {
3008 1.1 mrg optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3009 1.1 mrg return voptab
3010 1.1 mrg && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3011 1.1 mrg }
3012 1.1 mrg
3013 1.1 mrg /* Verify that the target has optabs of VECTYPE to perform all the steps
3014 1.1 mrg needed by the multiplication-by-immediate synthesis algorithm described by
3015 1.1 mrg ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3016 1.1 mrg present. Return true iff the target supports all the steps. */
3017 1.1 mrg
3018 1.1 mrg static bool
3019 1.1 mrg target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3020 1.1 mrg tree vectype, bool synth_shift_p)
3021 1.1 mrg {
3022 1.1 mrg if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3023 1.1 mrg return false;
3024 1.1 mrg
3025 1.1 mrg bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3026 1.1 mrg bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3027 1.1 mrg
3028 1.1 mrg if (var == negate_variant
3029 1.1 mrg && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3030 1.1 mrg return false;
3031 1.1 mrg
3032 1.1 mrg /* If we must synthesize shifts with additions make sure that vector
3033 1.1 mrg addition is available. */
3034 1.1 mrg if ((var == add_variant || synth_shift_p) && !supports_vplus)
3035 1.1 mrg return false;
3036 1.1 mrg
3037 1.1 mrg for (int i = 1; i < alg->ops; i++)
3038 1.1 mrg {
3039 1.1 mrg switch (alg->op[i])
3040 1.1 mrg {
3041 1.1 mrg case alg_shift:
3042 1.1 mrg break;
3043 1.1 mrg case alg_add_t_m2:
3044 1.1 mrg case alg_add_t2_m:
3045 1.1 mrg case alg_add_factor:
3046 1.1 mrg if (!supports_vplus)
3047 1.1 mrg return false;
3048 1.1 mrg break;
3049 1.1 mrg case alg_sub_t_m2:
3050 1.1 mrg case alg_sub_t2_m:
3051 1.1 mrg case alg_sub_factor:
3052 1.1 mrg if (!supports_vminus)
3053 1.1 mrg return false;
3054 1.1 mrg break;
3055 1.1 mrg case alg_unknown:
3056 1.1 mrg case alg_m:
3057 1.1 mrg case alg_zero:
3058 1.1 mrg case alg_impossible:
3059 1.1 mrg return false;
3060 1.1 mrg default:
3061 1.1 mrg gcc_unreachable ();
3062 1.1 mrg }
3063 1.1 mrg }
3064 1.1 mrg
3065 1.1 mrg return true;
3066 1.1 mrg }
3067 1.1 mrg
3068 1.1 mrg /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3069 1.1 mrg putting the final result in DEST. Append all statements but the last into
3070 1.1 mrg VINFO. Return the last statement. */
3071 1.1 mrg
3072 1.1 mrg static gimple *
3073 1.1 mrg synth_lshift_by_additions (vec_info *vinfo,
3074 1.1 mrg tree dest, tree op, HOST_WIDE_INT amnt,
3075 1.1 mrg stmt_vec_info stmt_info)
3076 1.1 mrg {
3077 1.1 mrg HOST_WIDE_INT i;
3078 1.1 mrg tree itype = TREE_TYPE (op);
3079 1.1 mrg tree prev_res = op;
3080 1.1 mrg gcc_assert (amnt >= 0);
3081 1.1 mrg for (i = 0; i < amnt; i++)
3082 1.1 mrg {
3083 1.1 mrg tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3084 1.1 mrg : dest;
3085 1.1 mrg gimple *stmt
3086 1.1 mrg = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3087 1.1 mrg prev_res = tmp_var;
3088 1.1 mrg if (i < amnt - 1)
3089 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, stmt);
3090 1.1 mrg else
3091 1.1 mrg return stmt;
3092 1.1 mrg }
3093 1.1 mrg gcc_unreachable ();
3094 1.1 mrg return NULL;
3095 1.1 mrg }
3096 1.1 mrg
3097 1.1 mrg /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3098 1.1 mrg CODE to operands OP1 and OP2, creating a new temporary SSA var in
3099 1.1 mrg the process if necessary. Append the resulting assignment statements
3100 1.1 mrg to the sequence in STMT_VINFO. Return the SSA variable that holds the
3101 1.1 mrg result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3102 1.1 mrg left shifts using additions. */
3103 1.1 mrg
3104 1.1 mrg static tree
3105 1.1 mrg apply_binop_and_append_stmt (vec_info *vinfo,
3106 1.1 mrg tree_code code, tree op1, tree op2,
3107 1.1 mrg stmt_vec_info stmt_vinfo, bool synth_shift_p)
3108 1.1 mrg {
3109 1.1 mrg if (integer_zerop (op2)
3110 1.1 mrg && (code == LSHIFT_EXPR
3111 1.1 mrg || code == PLUS_EXPR))
3112 1.1 mrg {
3113 1.1 mrg gcc_assert (TREE_CODE (op1) == SSA_NAME);
3114 1.1 mrg return op1;
3115 1.1 mrg }
3116 1.1 mrg
3117 1.1 mrg gimple *stmt;
3118 1.1 mrg tree itype = TREE_TYPE (op1);
3119 1.1 mrg tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3120 1.1 mrg
3121 1.1 mrg if (code == LSHIFT_EXPR
3122 1.1 mrg && synth_shift_p)
3123 1.1 mrg {
3124 1.1 mrg stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3125 1.1 mrg TREE_INT_CST_LOW (op2), stmt_vinfo);
3126 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3127 1.1 mrg return tmp_var;
3128 1.1 mrg }
3129 1.1 mrg
3130 1.1 mrg stmt = gimple_build_assign (tmp_var, code, op1, op2);
3131 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3132 1.1 mrg return tmp_var;
3133 1.1 mrg }
3134 1.1 mrg
3135 1.1 mrg /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3136 1.1 mrg and simple arithmetic operations to be vectorized. Record the statements
3137 1.1 mrg produced in STMT_VINFO and return the last statement in the sequence or
3138 1.1 mrg NULL if it's not possible to synthesize such a multiplication.
3139 1.1 mrg This function mirrors the behavior of expand_mult_const in expmed.cc but
3140 1.1 mrg works on tree-ssa form. */
3141 1.1 mrg
3142 1.1 mrg static gimple *
3143 1.1 mrg vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3144 1.1 mrg stmt_vec_info stmt_vinfo)
3145 1.1 mrg {
3146 1.1 mrg tree itype = TREE_TYPE (op);
3147 1.1 mrg machine_mode mode = TYPE_MODE (itype);
3148 1.1 mrg struct algorithm alg;
3149 1.1 mrg mult_variant variant;
3150 1.1 mrg if (!tree_fits_shwi_p (val))
3151 1.1 mrg return NULL;
3152 1.1 mrg
3153 1.1 mrg /* Multiplication synthesis by shifts, adds and subs can introduce
3154 1.1 mrg signed overflow where the original operation didn't. Perform the
3155 1.1 mrg operations on an unsigned type and cast back to avoid this.
3156 1.1 mrg In the future we may want to relax this for synthesis algorithms
3157 1.1 mrg that we can prove do not cause unexpected overflow. */
3158 1.1 mrg bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3159 1.1 mrg
3160 1.1 mrg tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3161 1.1 mrg tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3162 1.1 mrg if (!vectype)
3163 1.1 mrg return NULL;
3164 1.1 mrg
3165 1.1 mrg /* Targets that don't support vector shifts but support vector additions
3166 1.1 mrg can synthesize shifts that way. */
3167 1.1 mrg bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3168 1.1 mrg
3169 1.1 mrg HOST_WIDE_INT hwval = tree_to_shwi (val);
3170 1.1 mrg /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3171 1.1 mrg The vectorizer's benefit analysis will decide whether it's beneficial
3172 1.1 mrg to do this. */
3173 1.1 mrg bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3174 1.1 mrg ? TYPE_MODE (vectype) : mode,
3175 1.1 mrg hwval, &alg, &variant, MAX_COST);
3176 1.1 mrg if (!possible)
3177 1.1 mrg return NULL;
3178 1.1 mrg
3179 1.1 mrg if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3180 1.1 mrg return NULL;
3181 1.1 mrg
3182 1.1 mrg tree accumulator;
3183 1.1 mrg
3184 1.1 mrg /* Clear out the sequence of statements so we can populate it below. */
3185 1.1 mrg gimple *stmt = NULL;
3186 1.1 mrg
3187 1.1 mrg if (cast_to_unsigned_p)
3188 1.1 mrg {
3189 1.1 mrg tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3190 1.1 mrg stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3191 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3192 1.1 mrg op = tmp_op;
3193 1.1 mrg }
3194 1.1 mrg
3195 1.1 mrg if (alg.op[0] == alg_zero)
3196 1.1 mrg accumulator = build_int_cst (multtype, 0);
3197 1.1 mrg else
3198 1.1 mrg accumulator = op;
3199 1.1 mrg
3200 1.1 mrg bool needs_fixup = (variant == negate_variant)
3201 1.1 mrg || (variant == add_variant);
3202 1.1 mrg
3203 1.1 mrg for (int i = 1; i < alg.ops; i++)
3204 1.1 mrg {
3205 1.1 mrg tree shft_log = build_int_cst (multtype, alg.log[i]);
3206 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3207 1.1 mrg tree tmp_var = NULL_TREE;
3208 1.1 mrg
3209 1.1 mrg switch (alg.op[i])
3210 1.1 mrg {
3211 1.1 mrg case alg_shift:
3212 1.1 mrg if (synth_shift_p)
3213 1.1 mrg stmt
3214 1.1 mrg = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3215 1.1 mrg alg.log[i], stmt_vinfo);
3216 1.1 mrg else
3217 1.1 mrg stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3218 1.1 mrg shft_log);
3219 1.1 mrg break;
3220 1.1 mrg case alg_add_t_m2:
3221 1.1 mrg tmp_var
3222 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3223 1.1 mrg stmt_vinfo, synth_shift_p);
3224 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3225 1.1 mrg tmp_var);
3226 1.1 mrg break;
3227 1.1 mrg case alg_sub_t_m2:
3228 1.1 mrg tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3229 1.1 mrg shft_log, stmt_vinfo,
3230 1.1 mrg synth_shift_p);
3231 1.1 mrg /* In some algorithms the first step involves zeroing the
3232 1.1 mrg accumulator. If subtracting from such an accumulator
3233 1.1 mrg just emit the negation directly. */
3234 1.1 mrg if (integer_zerop (accumulator))
3235 1.1 mrg stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3236 1.1 mrg else
3237 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3238 1.1 mrg tmp_var);
3239 1.1 mrg break;
3240 1.1 mrg case alg_add_t2_m:
3241 1.1 mrg tmp_var
3242 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3243 1.1 mrg shft_log, stmt_vinfo, synth_shift_p);
3244 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3245 1.1 mrg break;
3246 1.1 mrg case alg_sub_t2_m:
3247 1.1 mrg tmp_var
3248 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3249 1.1 mrg shft_log, stmt_vinfo, synth_shift_p);
3250 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3251 1.1 mrg break;
3252 1.1 mrg case alg_add_factor:
3253 1.1 mrg tmp_var
3254 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3255 1.1 mrg shft_log, stmt_vinfo, synth_shift_p);
3256 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3257 1.1 mrg tmp_var);
3258 1.1 mrg break;
3259 1.1 mrg case alg_sub_factor:
3260 1.1 mrg tmp_var
3261 1.1 mrg = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3262 1.1 mrg shft_log, stmt_vinfo, synth_shift_p);
3263 1.1 mrg stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3264 1.1 mrg accumulator);
3265 1.1 mrg break;
3266 1.1 mrg default:
3267 1.1 mrg gcc_unreachable ();
3268 1.1 mrg }
3269 1.1 mrg /* We don't want to append the last stmt in the sequence to stmt_vinfo
3270 1.1 mrg but rather return it directly. */
3271 1.1 mrg
3272 1.1 mrg if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3273 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3274 1.1 mrg accumulator = accum_tmp;
3275 1.1 mrg }
3276 1.1 mrg if (variant == negate_variant)
3277 1.1 mrg {
3278 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3279 1.1 mrg stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3280 1.1 mrg accumulator = accum_tmp;
3281 1.1 mrg if (cast_to_unsigned_p)
3282 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3283 1.1 mrg }
3284 1.1 mrg else if (variant == add_variant)
3285 1.1 mrg {
3286 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3287 1.1 mrg stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3288 1.1 mrg accumulator = accum_tmp;
3289 1.1 mrg if (cast_to_unsigned_p)
3290 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3291 1.1 mrg }
3292 1.1 mrg /* Move back to a signed if needed. */
3293 1.1 mrg if (cast_to_unsigned_p)
3294 1.1 mrg {
3295 1.1 mrg tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3296 1.1 mrg stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3297 1.1 mrg }
3298 1.1 mrg
3299 1.1 mrg return stmt;
3300 1.1 mrg }
3301 1.1 mrg
3302 1.1 mrg /* Detect multiplication by constant and convert it into a sequence of
3303 1.1 mrg shifts and additions, subtractions, negations. We reuse the
3304 1.1 mrg choose_mult_variant algorithms from expmed.cc
3305 1.1 mrg
3306 1.1 mrg Input/Output:
3307 1.1 mrg
3308 1.1 mrg STMT_VINFO: The stmt from which the pattern search begins,
3309 1.1 mrg i.e. the mult stmt.
3310 1.1 mrg
3311 1.1 mrg Output:
3312 1.1 mrg
3313 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
3314 1.1 mrg
3315 1.1 mrg * Return value: A new stmt that will be used to replace
3316 1.1 mrg the multiplication. */
3317 1.1 mrg
3318 1.1 mrg static gimple *
3319 1.1 mrg vect_recog_mult_pattern (vec_info *vinfo,
3320 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
3321 1.1 mrg {
3322 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
3323 1.1 mrg tree oprnd0, oprnd1, vectype, itype;
3324 1.1 mrg gimple *pattern_stmt;
3325 1.1 mrg
3326 1.1 mrg if (!is_gimple_assign (last_stmt))
3327 1.1 mrg return NULL;
3328 1.1 mrg
3329 1.1 mrg if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3330 1.1 mrg return NULL;
3331 1.1 mrg
3332 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt);
3333 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt);
3334 1.1 mrg itype = TREE_TYPE (oprnd0);
3335 1.1 mrg
3336 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME
3337 1.1 mrg || TREE_CODE (oprnd1) != INTEGER_CST
3338 1.1 mrg || !INTEGRAL_TYPE_P (itype)
3339 1.1 mrg || !type_has_mode_precision_p (itype))
3340 1.1 mrg return NULL;
3341 1.1 mrg
3342 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, itype);
3343 1.1 mrg if (vectype == NULL_TREE)
3344 1.1 mrg return NULL;
3345 1.1 mrg
3346 1.1 mrg /* If the target can handle vectorized multiplication natively,
3347 1.1 mrg don't attempt to optimize this. */
3348 1.1 mrg optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3349 1.1 mrg if (mul_optab != unknown_optab)
3350 1.1 mrg {
3351 1.1 mrg machine_mode vec_mode = TYPE_MODE (vectype);
3352 1.1 mrg int icode = (int) optab_handler (mul_optab, vec_mode);
3353 1.1 mrg if (icode != CODE_FOR_nothing)
3354 1.1 mrg return NULL;
3355 1.1 mrg }
3356 1.1 mrg
3357 1.1 mrg pattern_stmt = vect_synth_mult_by_constant (vinfo,
3358 1.1 mrg oprnd0, oprnd1, stmt_vinfo);
3359 1.1 mrg if (!pattern_stmt)
3360 1.1 mrg return NULL;
3361 1.1 mrg
3362 1.1 mrg /* Pattern detected. */
3363 1.1 mrg vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3364 1.1 mrg
3365 1.1 mrg *type_out = vectype;
3366 1.1 mrg
3367 1.1 mrg return pattern_stmt;
3368 1.1 mrg }
3369 1.1 mrg
3370 1.1 mrg /* Detect a signed division by a constant that wouldn't be
3371 1.1 mrg otherwise vectorized:
3372 1.1 mrg
3373 1.1 mrg type a_t, b_t;
3374 1.1 mrg
3375 1.1 mrg S1 a_t = b_t / N;
3376 1.1 mrg
3377 1.1 mrg where type 'type' is an integral type and N is a constant.
3378 1.1 mrg
3379 1.1 mrg Similarly handle modulo by a constant:
3380 1.1 mrg
3381 1.1 mrg S4 a_t = b_t % N;
3382 1.1 mrg
3383 1.1 mrg Input/Output:
3384 1.1 mrg
3385 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins,
3386 1.1 mrg i.e. the division stmt. S1 is replaced by if N is a power
3387 1.1 mrg of two constant and type is signed:
3388 1.1 mrg S3 y_t = b_t < 0 ? N - 1 : 0;
3389 1.1 mrg S2 x_t = b_t + y_t;
3390 1.1 mrg S1' a_t = x_t >> log2 (N);
3391 1.1 mrg
3392 1.1 mrg S4 is replaced if N is a power of two constant and
3393 1.1 mrg type is signed by (where *_T temporaries have unsigned type):
3394 1.1 mrg S9 y_T = b_t < 0 ? -1U : 0U;
3395 1.1 mrg S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3396 1.1 mrg S7 z_t = (type) z_T;
3397 1.1 mrg S6 w_t = b_t + z_t;
3398 1.1 mrg S5 x_t = w_t & (N - 1);
3399 1.1 mrg S4' a_t = x_t - z_t;
3400 1.1 mrg
3401 1.1 mrg Output:
3402 1.1 mrg
3403 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
3404 1.1 mrg
3405 1.1 mrg * Return value: A new stmt that will be used to replace the division
3406 1.1 mrg S1 or modulo S4 stmt. */
3407 1.1 mrg
3408 1.1 mrg static gimple *
3409 1.1 mrg vect_recog_divmod_pattern (vec_info *vinfo,
3410 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
3411 1.1 mrg {
3412 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
3413 1.1 mrg tree oprnd0, oprnd1, vectype, itype, cond;
3414 1.1 mrg gimple *pattern_stmt, *def_stmt;
3415 1.1 mrg enum tree_code rhs_code;
3416 1.1 mrg optab optab;
3417 1.1 mrg tree q;
3418 1.1 mrg int dummy_int, prec;
3419 1.1 mrg
3420 1.1 mrg if (!is_gimple_assign (last_stmt))
3421 1.1 mrg return NULL;
3422 1.1 mrg
3423 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt);
3424 1.1 mrg switch (rhs_code)
3425 1.1 mrg {
3426 1.1 mrg case TRUNC_DIV_EXPR:
3427 1.1 mrg case EXACT_DIV_EXPR:
3428 1.1 mrg case TRUNC_MOD_EXPR:
3429 1.1 mrg break;
3430 1.1 mrg default:
3431 1.1 mrg return NULL;
3432 1.1 mrg }
3433 1.1 mrg
3434 1.1 mrg oprnd0 = gimple_assign_rhs1 (last_stmt);
3435 1.1 mrg oprnd1 = gimple_assign_rhs2 (last_stmt);
3436 1.1 mrg itype = TREE_TYPE (oprnd0);
3437 1.1 mrg if (TREE_CODE (oprnd0) != SSA_NAME
3438 1.1 mrg || TREE_CODE (oprnd1) != INTEGER_CST
3439 1.1 mrg || TREE_CODE (itype) != INTEGER_TYPE
3440 1.1 mrg || !type_has_mode_precision_p (itype))
3441 1.1 mrg return NULL;
3442 1.1 mrg
3443 1.1 mrg scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3444 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, itype);
3445 1.1 mrg if (vectype == NULL_TREE)
3446 1.1 mrg return NULL;
3447 1.1 mrg
3448 1.1 mrg if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3449 1.1 mrg {
3450 1.1 mrg /* If the target can handle vectorized division or modulo natively,
3451 1.1 mrg don't attempt to optimize this, since native division is likely
3452 1.1 mrg to give smaller code. */
3453 1.1 mrg optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3454 1.1 mrg if (optab != unknown_optab)
3455 1.1 mrg {
3456 1.1 mrg machine_mode vec_mode = TYPE_MODE (vectype);
3457 1.1 mrg int icode = (int) optab_handler (optab, vec_mode);
3458 1.1 mrg if (icode != CODE_FOR_nothing)
3459 1.1 mrg return NULL;
3460 1.1 mrg }
3461 1.1 mrg }
3462 1.1 mrg
3463 1.1 mrg prec = TYPE_PRECISION (itype);
3464 1.1 mrg if (integer_pow2p (oprnd1))
3465 1.1 mrg {
3466 1.1 mrg if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3467 1.1 mrg return NULL;
3468 1.1 mrg
3469 1.1 mrg /* Pattern detected. */
3470 1.1 mrg vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3471 1.1 mrg
3472 1.1 mrg *type_out = vectype;
3473 1.1 mrg
3474 1.1 mrg /* Check if the target supports this internal function. */
3475 1.1 mrg internal_fn ifn = IFN_DIV_POW2;
3476 1.1 mrg if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3477 1.1 mrg {
3478 1.1 mrg tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3479 1.1 mrg
3480 1.1 mrg tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3481 1.1 mrg gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3482 1.1 mrg gimple_call_set_lhs (div_stmt, var_div);
3483 1.1 mrg
3484 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR)
3485 1.1 mrg {
3486 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3487 1.1 mrg def_stmt
3488 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489 1.1 mrg LSHIFT_EXPR, var_div, shift);
3490 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3491 1.1 mrg pattern_stmt
3492 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3493 1.1 mrg MINUS_EXPR, oprnd0,
3494 1.1 mrg gimple_assign_lhs (def_stmt));
3495 1.1 mrg }
3496 1.1 mrg else
3497 1.1 mrg pattern_stmt = div_stmt;
3498 1.1 mrg gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3499 1.1 mrg
3500 1.1 mrg return pattern_stmt;
3501 1.1 mrg }
3502 1.1 mrg
3503 1.1 mrg cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3504 1.1 mrg build_int_cst (itype, 0));
3505 1.1 mrg if (rhs_code == TRUNC_DIV_EXPR
3506 1.1 mrg || rhs_code == EXACT_DIV_EXPR)
3507 1.1 mrg {
3508 1.1 mrg tree var = vect_recog_temp_ssa_var (itype, NULL);
3509 1.1 mrg tree shift;
3510 1.1 mrg def_stmt
3511 1.1 mrg = gimple_build_assign (var, COND_EXPR, cond,
3512 1.1 mrg fold_build2 (MINUS_EXPR, itype, oprnd1,
3513 1.1 mrg build_int_cst (itype, 1)),
3514 1.1 mrg build_int_cst (itype, 0));
3515 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3516 1.1 mrg var = vect_recog_temp_ssa_var (itype, NULL);
3517 1.1 mrg def_stmt
3518 1.1 mrg = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3519 1.1 mrg gimple_assign_lhs (def_stmt));
3520 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3521 1.1 mrg
3522 1.1 mrg shift = build_int_cst (itype, tree_log2 (oprnd1));
3523 1.1 mrg pattern_stmt
3524 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3525 1.1 mrg RSHIFT_EXPR, var, shift);
3526 1.1 mrg }
3527 1.1 mrg else
3528 1.1 mrg {
3529 1.1 mrg tree signmask;
3530 1.1 mrg if (compare_tree_int (oprnd1, 2) == 0)
3531 1.1 mrg {
3532 1.1 mrg signmask = vect_recog_temp_ssa_var (itype, NULL);
3533 1.1 mrg def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3534 1.1 mrg build_int_cst (itype, 1),
3535 1.1 mrg build_int_cst (itype, 0));
3536 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3537 1.1 mrg }
3538 1.1 mrg else
3539 1.1 mrg {
3540 1.1 mrg tree utype
3541 1.1 mrg = build_nonstandard_integer_type (prec, 1);
3542 1.1 mrg tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3543 1.1 mrg tree shift
3544 1.1 mrg = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3545 1.1 mrg - tree_log2 (oprnd1));
3546 1.1 mrg tree var = vect_recog_temp_ssa_var (utype, NULL);
3547 1.1 mrg
3548 1.1 mrg def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3549 1.1 mrg build_int_cst (utype, -1),
3550 1.1 mrg build_int_cst (utype, 0));
3551 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3552 1.1 mrg var = vect_recog_temp_ssa_var (utype, NULL);
3553 1.1 mrg def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3554 1.1 mrg gimple_assign_lhs (def_stmt),
3555 1.1 mrg shift);
3556 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3557 1.1 mrg signmask = vect_recog_temp_ssa_var (itype, NULL);
3558 1.1 mrg def_stmt
3559 1.1 mrg = gimple_build_assign (signmask, NOP_EXPR, var);
3560 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3561 1.1 mrg }
3562 1.1 mrg def_stmt
3563 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3564 1.1 mrg PLUS_EXPR, oprnd0, signmask);
3565 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3566 1.1 mrg def_stmt
3567 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3568 1.1 mrg BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3569 1.1 mrg fold_build2 (MINUS_EXPR, itype, oprnd1,
3570 1.1 mrg build_int_cst (itype, 1)));
3571 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3572 1.1 mrg
3573 1.1 mrg pattern_stmt
3574 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3575 1.1 mrg MINUS_EXPR, gimple_assign_lhs (def_stmt),
3576 1.1 mrg signmask);
3577 1.1 mrg }
3578 1.1 mrg
3579 1.1 mrg return pattern_stmt;
3580 1.1 mrg }
3581 1.1 mrg
3582 1.1 mrg if (prec > HOST_BITS_PER_WIDE_INT
3583 1.1 mrg || integer_zerop (oprnd1))
3584 1.1 mrg return NULL;
3585 1.1 mrg
3586 1.1 mrg if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3587 1.1 mrg return NULL;
3588 1.1 mrg
3589 1.1 mrg if (TYPE_UNSIGNED (itype))
3590 1.1 mrg {
3591 1.1 mrg unsigned HOST_WIDE_INT mh, ml;
3592 1.1 mrg int pre_shift, post_shift;
3593 1.1 mrg unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3594 1.1 mrg & GET_MODE_MASK (itype_mode));
3595 1.1 mrg tree t1, t2, t3, t4;
3596 1.1 mrg
3597 1.1 mrg if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3598 1.1 mrg /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3599 1.1 mrg return NULL;
3600 1.1 mrg
3601 1.1 mrg /* Find a suitable multiplier and right shift count
3602 1.1 mrg instead of multiplying with D. */
3603 1.1 mrg mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3604 1.1 mrg
3605 1.1 mrg /* If the suggested multiplier is more than SIZE bits, we can do better
3606 1.1 mrg for even divisors, using an initial right shift. */
3607 1.1 mrg if (mh != 0 && (d & 1) == 0)
3608 1.1 mrg {
3609 1.1 mrg pre_shift = ctz_or_zero (d);
3610 1.1 mrg mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3611 1.1 mrg &ml, &post_shift, &dummy_int);
3612 1.1 mrg gcc_assert (!mh);
3613 1.1 mrg }
3614 1.1 mrg else
3615 1.1 mrg pre_shift = 0;
3616 1.1 mrg
3617 1.1 mrg if (mh != 0)
3618 1.1 mrg {
3619 1.1 mrg if (post_shift - 1 >= prec)
3620 1.1 mrg return NULL;
3621 1.1 mrg
3622 1.1 mrg /* t1 = oprnd0 h* ml;
3623 1.1 mrg t2 = oprnd0 - t1;
3624 1.1 mrg t3 = t2 >> 1;
3625 1.1 mrg t4 = t1 + t3;
3626 1.1 mrg q = t4 >> (post_shift - 1); */
3627 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL);
3628 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3629 1.1 mrg build_int_cst (itype, ml));
3630 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3631 1.1 mrg
3632 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL);
3633 1.1 mrg def_stmt
3634 1.1 mrg = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3635 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3636 1.1 mrg
3637 1.1 mrg t3 = vect_recog_temp_ssa_var (itype, NULL);
3638 1.1 mrg def_stmt
3639 1.1 mrg = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3640 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3641 1.1 mrg
3642 1.1 mrg t4 = vect_recog_temp_ssa_var (itype, NULL);
3643 1.1 mrg def_stmt
3644 1.1 mrg = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3645 1.1 mrg
3646 1.1 mrg if (post_shift != 1)
3647 1.1 mrg {
3648 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3649 1.1 mrg
3650 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL);
3651 1.1 mrg pattern_stmt
3652 1.1 mrg = gimple_build_assign (q, RSHIFT_EXPR, t4,
3653 1.1 mrg build_int_cst (itype, post_shift - 1));
3654 1.1 mrg }
3655 1.1 mrg else
3656 1.1 mrg {
3657 1.1 mrg q = t4;
3658 1.1 mrg pattern_stmt = def_stmt;
3659 1.1 mrg }
3660 1.1 mrg }
3661 1.1 mrg else
3662 1.1 mrg {
3663 1.1 mrg if (pre_shift >= prec || post_shift >= prec)
3664 1.1 mrg return NULL;
3665 1.1 mrg
3666 1.1 mrg /* t1 = oprnd0 >> pre_shift;
3667 1.1 mrg t2 = t1 h* ml;
3668 1.1 mrg q = t2 >> post_shift; */
3669 1.1 mrg if (pre_shift)
3670 1.1 mrg {
3671 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL);
3672 1.1 mrg def_stmt
3673 1.1 mrg = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3674 1.1 mrg build_int_cst (NULL, pre_shift));
3675 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3676 1.1 mrg }
3677 1.1 mrg else
3678 1.1 mrg t1 = oprnd0;
3679 1.1 mrg
3680 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL);
3681 1.1 mrg def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3682 1.1 mrg build_int_cst (itype, ml));
3683 1.1 mrg
3684 1.1 mrg if (post_shift)
3685 1.1 mrg {
3686 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3687 1.1 mrg
3688 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL);
3689 1.1 mrg def_stmt
3690 1.1 mrg = gimple_build_assign (q, RSHIFT_EXPR, t2,
3691 1.1 mrg build_int_cst (itype, post_shift));
3692 1.1 mrg }
3693 1.1 mrg else
3694 1.1 mrg q = t2;
3695 1.1 mrg
3696 1.1 mrg pattern_stmt = def_stmt;
3697 1.1 mrg }
3698 1.1 mrg }
3699 1.1 mrg else
3700 1.1 mrg {
3701 1.1 mrg unsigned HOST_WIDE_INT ml;
3702 1.1 mrg int post_shift;
3703 1.1 mrg HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3704 1.1 mrg unsigned HOST_WIDE_INT abs_d;
3705 1.1 mrg bool add = false;
3706 1.1 mrg tree t1, t2, t3, t4;
3707 1.1 mrg
3708 1.1 mrg /* Give up for -1. */
3709 1.1 mrg if (d == -1)
3710 1.1 mrg return NULL;
3711 1.1 mrg
3712 1.1 mrg /* Since d might be INT_MIN, we have to cast to
3713 1.1 mrg unsigned HOST_WIDE_INT before negating to avoid
3714 1.1 mrg undefined signed overflow. */
3715 1.1 mrg abs_d = (d >= 0
3716 1.1 mrg ? (unsigned HOST_WIDE_INT) d
3717 1.1 mrg : - (unsigned HOST_WIDE_INT) d);
3718 1.1 mrg
3719 1.1 mrg /* n rem d = n rem -d */
3720 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3721 1.1 mrg {
3722 1.1 mrg d = abs_d;
3723 1.1 mrg oprnd1 = build_int_cst (itype, abs_d);
3724 1.1 mrg }
3725 1.1 mrg if (HOST_BITS_PER_WIDE_INT >= prec
3726 1.1 mrg && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3727 1.1 mrg /* This case is not handled correctly below. */
3728 1.1 mrg return NULL;
3729 1.1 mrg
3730 1.1 mrg choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3731 1.1 mrg if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3732 1.1 mrg {
3733 1.1 mrg add = true;
3734 1.1 mrg ml |= HOST_WIDE_INT_M1U << (prec - 1);
3735 1.1 mrg }
3736 1.1 mrg if (post_shift >= prec)
3737 1.1 mrg return NULL;
3738 1.1 mrg
3739 1.1 mrg /* t1 = oprnd0 h* ml; */
3740 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL);
3741 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3742 1.1 mrg build_int_cst (itype, ml));
3743 1.1 mrg
3744 1.1 mrg if (add)
3745 1.1 mrg {
3746 1.1 mrg /* t2 = t1 + oprnd0; */
3747 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3748 1.1 mrg t2 = vect_recog_temp_ssa_var (itype, NULL);
3749 1.1 mrg def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3750 1.1 mrg }
3751 1.1 mrg else
3752 1.1 mrg t2 = t1;
3753 1.1 mrg
3754 1.1 mrg if (post_shift)
3755 1.1 mrg {
3756 1.1 mrg /* t3 = t2 >> post_shift; */
3757 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3758 1.1 mrg t3 = vect_recog_temp_ssa_var (itype, NULL);
3759 1.1 mrg def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3760 1.1 mrg build_int_cst (itype, post_shift));
3761 1.1 mrg }
3762 1.1 mrg else
3763 1.1 mrg t3 = t2;
3764 1.1 mrg
3765 1.1 mrg int msb = 1;
3766 1.1 mrg value_range r;
3767 1.1 mrg get_range_query (cfun)->range_of_expr (r, oprnd0);
3768 1.1 mrg if (r.kind () == VR_RANGE)
3769 1.1 mrg {
3770 1.1 mrg if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3771 1.1 mrg msb = 0;
3772 1.1 mrg else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3773 1.1 mrg msb = -1;
3774 1.1 mrg }
3775 1.1 mrg
3776 1.1 mrg if (msb == 0 && d >= 0)
3777 1.1 mrg {
3778 1.1 mrg /* q = t3; */
3779 1.1 mrg q = t3;
3780 1.1 mrg pattern_stmt = def_stmt;
3781 1.1 mrg }
3782 1.1 mrg else
3783 1.1 mrg {
3784 1.1 mrg /* t4 = oprnd0 >> (prec - 1);
3785 1.1 mrg or if we know from VRP that oprnd0 >= 0
3786 1.1 mrg t4 = 0;
3787 1.1 mrg or if we know from VRP that oprnd0 < 0
3788 1.1 mrg t4 = -1; */
3789 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3790 1.1 mrg t4 = vect_recog_temp_ssa_var (itype, NULL);
3791 1.1 mrg if (msb != 1)
3792 1.1 mrg def_stmt = gimple_build_assign (t4, INTEGER_CST,
3793 1.1 mrg build_int_cst (itype, msb));
3794 1.1 mrg else
3795 1.1 mrg def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3796 1.1 mrg build_int_cst (itype, prec - 1));
3797 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3798 1.1 mrg
3799 1.1 mrg /* q = t3 - t4; or q = t4 - t3; */
3800 1.1 mrg q = vect_recog_temp_ssa_var (itype, NULL);
3801 1.1 mrg pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3802 1.1 mrg d < 0 ? t3 : t4);
3803 1.1 mrg }
3804 1.1 mrg }
3805 1.1 mrg
3806 1.1 mrg if (rhs_code == TRUNC_MOD_EXPR)
3807 1.1 mrg {
3808 1.1 mrg tree r, t1;
3809 1.1 mrg
3810 1.1 mrg /* We divided. Now finish by:
3811 1.1 mrg t1 = q * oprnd1;
3812 1.1 mrg r = oprnd0 - t1; */
3813 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3814 1.1 mrg
3815 1.1 mrg t1 = vect_recog_temp_ssa_var (itype, NULL);
3816 1.1 mrg def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3817 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3818 1.1 mrg
3819 1.1 mrg r = vect_recog_temp_ssa_var (itype, NULL);
3820 1.1 mrg pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3821 1.1 mrg }
3822 1.1 mrg
3823 1.1 mrg /* Pattern detected. */
3824 1.1 mrg vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3825 1.1 mrg
3826 1.1 mrg *type_out = vectype;
3827 1.1 mrg return pattern_stmt;
3828 1.1 mrg }
3829 1.1 mrg
3830 1.1 mrg /* Function vect_recog_mixed_size_cond_pattern
3831 1.1 mrg
3832 1.1 mrg Try to find the following pattern:
3833 1.1 mrg
3834 1.1 mrg type x_t, y_t;
3835 1.1 mrg TYPE a_T, b_T, c_T;
3836 1.1 mrg loop:
3837 1.1 mrg S1 a_T = x_t CMP y_t ? b_T : c_T;
3838 1.1 mrg
3839 1.1 mrg where type 'TYPE' is an integral type which has different size
3840 1.1 mrg from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3841 1.1 mrg than 'type', the constants need to fit into an integer type
3842 1.1 mrg with the same width as 'type') or results of conversion from 'type'.
3843 1.1 mrg
3844 1.1 mrg Input:
3845 1.1 mrg
3846 1.1 mrg * STMT_VINFO: The stmt from which the pattern search begins.
3847 1.1 mrg
3848 1.1 mrg Output:
3849 1.1 mrg
3850 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
3851 1.1 mrg
3852 1.1 mrg * Return value: A new stmt that will be used to replace the pattern.
3853 1.1 mrg Additionally a def_stmt is added.
3854 1.1 mrg
3855 1.1 mrg a_it = x_t CMP y_t ? b_it : c_it;
3856 1.1 mrg a_T = (TYPE) a_it; */
3857 1.1 mrg
3858 1.1 mrg static gimple *
3859 1.1 mrg vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3860 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
3861 1.1 mrg {
3862 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
3863 1.1 mrg tree cond_expr, then_clause, else_clause;
3864 1.1 mrg tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3865 1.1 mrg gimple *pattern_stmt, *def_stmt;
3866 1.1 mrg tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3867 1.1 mrg gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3868 1.1 mrg bool promotion;
3869 1.1 mrg tree comp_scalar_type;
3870 1.1 mrg
3871 1.1 mrg if (!is_gimple_assign (last_stmt)
3872 1.1 mrg || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3873 1.1 mrg || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3874 1.1 mrg return NULL;
3875 1.1 mrg
3876 1.1 mrg cond_expr = gimple_assign_rhs1 (last_stmt);
3877 1.1 mrg then_clause = gimple_assign_rhs2 (last_stmt);
3878 1.1 mrg else_clause = gimple_assign_rhs3 (last_stmt);
3879 1.1 mrg
3880 1.1 mrg if (!COMPARISON_CLASS_P (cond_expr))
3881 1.1 mrg return NULL;
3882 1.1 mrg
3883 1.1 mrg comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3884 1.1 mrg comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3885 1.1 mrg if (comp_vectype == NULL_TREE)
3886 1.1 mrg return NULL;
3887 1.1 mrg
3888 1.1 mrg type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3889 1.1 mrg if (types_compatible_p (type, comp_scalar_type)
3890 1.1 mrg || ((TREE_CODE (then_clause) != INTEGER_CST
3891 1.1 mrg || TREE_CODE (else_clause) != INTEGER_CST)
3892 1.1 mrg && !INTEGRAL_TYPE_P (comp_scalar_type))
3893 1.1 mrg || !INTEGRAL_TYPE_P (type))
3894 1.1 mrg return NULL;
3895 1.1 mrg
3896 1.1 mrg if ((TREE_CODE (then_clause) != INTEGER_CST
3897 1.1 mrg && !type_conversion_p (vinfo, then_clause, false,
3898 1.1 mrg &orig_type0, &def_stmt0, &promotion))
3899 1.1 mrg || (TREE_CODE (else_clause) != INTEGER_CST
3900 1.1 mrg && !type_conversion_p (vinfo, else_clause, false,
3901 1.1 mrg &orig_type1, &def_stmt1, &promotion)))
3902 1.1 mrg return NULL;
3903 1.1 mrg
3904 1.1 mrg if (orig_type0 && orig_type1
3905 1.1 mrg && !types_compatible_p (orig_type0, orig_type1))
3906 1.1 mrg return NULL;
3907 1.1 mrg
3908 1.1 mrg if (orig_type0)
3909 1.1 mrg {
3910 1.1 mrg if (!types_compatible_p (orig_type0, comp_scalar_type))
3911 1.1 mrg return NULL;
3912 1.1 mrg then_clause = gimple_assign_rhs1 (def_stmt0);
3913 1.1 mrg itype = orig_type0;
3914 1.1 mrg }
3915 1.1 mrg
3916 1.1 mrg if (orig_type1)
3917 1.1 mrg {
3918 1.1 mrg if (!types_compatible_p (orig_type1, comp_scalar_type))
3919 1.1 mrg return NULL;
3920 1.1 mrg else_clause = gimple_assign_rhs1 (def_stmt1);
3921 1.1 mrg itype = orig_type1;
3922 1.1 mrg }
3923 1.1 mrg
3924 1.1 mrg
3925 1.1 mrg HOST_WIDE_INT cmp_mode_size
3926 1.1 mrg = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3927 1.1 mrg
3928 1.1 mrg scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3929 1.1 mrg if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3930 1.1 mrg return NULL;
3931 1.1 mrg
3932 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, type);
3933 1.1 mrg if (vectype == NULL_TREE)
3934 1.1 mrg return NULL;
3935 1.1 mrg
3936 1.1 mrg if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3937 1.1 mrg return NULL;
3938 1.1 mrg
3939 1.1 mrg if (itype == NULL_TREE)
3940 1.1 mrg itype = build_nonstandard_integer_type (cmp_mode_size,
3941 1.1 mrg TYPE_UNSIGNED (type));
3942 1.1 mrg
3943 1.1 mrg if (itype == NULL_TREE
3944 1.1 mrg || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3945 1.1 mrg return NULL;
3946 1.1 mrg
3947 1.1 mrg vecitype = get_vectype_for_scalar_type (vinfo, itype);
3948 1.1 mrg if (vecitype == NULL_TREE)
3949 1.1 mrg return NULL;
3950 1.1 mrg
3951 1.1 mrg if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3952 1.1 mrg return NULL;
3953 1.1 mrg
3954 1.1 mrg if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3955 1.1 mrg {
3956 1.1 mrg if ((TREE_CODE (then_clause) == INTEGER_CST
3957 1.1 mrg && !int_fits_type_p (then_clause, itype))
3958 1.1 mrg || (TREE_CODE (else_clause) == INTEGER_CST
3959 1.1 mrg && !int_fits_type_p (else_clause, itype)))
3960 1.1 mrg return NULL;
3961 1.1 mrg }
3962 1.1 mrg
3963 1.1 mrg def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3964 1.1 mrg COND_EXPR, unshare_expr (cond_expr),
3965 1.1 mrg fold_convert (itype, then_clause),
3966 1.1 mrg fold_convert (itype, else_clause));
3967 1.1 mrg pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3968 1.1 mrg NOP_EXPR, gimple_assign_lhs (def_stmt));
3969 1.1 mrg
3970 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3971 1.1 mrg *type_out = vectype;
3972 1.1 mrg
3973 1.1 mrg vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3974 1.1 mrg
3975 1.1 mrg return pattern_stmt;
3976 1.1 mrg }
3977 1.1 mrg
3978 1.1 mrg
3979 1.1 mrg /* Helper function of vect_recog_bool_pattern. Called recursively, return
3980 1.1 mrg true if bool VAR can and should be optimized that way. Assume it shouldn't
3981 1.1 mrg in case it's a result of a comparison which can be directly vectorized into
3982 1.1 mrg a vector comparison. Fills in STMTS with all stmts visited during the
3983 1.1 mrg walk. */
3984 1.1 mrg
3985 1.1 mrg static bool
3986 1.1 mrg check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3987 1.1 mrg {
3988 1.1 mrg tree rhs1;
3989 1.1 mrg enum tree_code rhs_code;
3990 1.1 mrg
3991 1.1 mrg stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3992 1.1 mrg if (!def_stmt_info)
3993 1.1 mrg return false;
3994 1.1 mrg
3995 1.1 mrg gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3996 1.1 mrg if (!def_stmt)
3997 1.1 mrg return false;
3998 1.1 mrg
3999 1.1 mrg if (stmts.contains (def_stmt))
4000 1.1 mrg return true;
4001 1.1 mrg
4002 1.1 mrg rhs1 = gimple_assign_rhs1 (def_stmt);
4003 1.1 mrg rhs_code = gimple_assign_rhs_code (def_stmt);
4004 1.1 mrg switch (rhs_code)
4005 1.1 mrg {
4006 1.1 mrg case SSA_NAME:
4007 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts))
4008 1.1 mrg return false;
4009 1.1 mrg break;
4010 1.1 mrg
4011 1.1 mrg CASE_CONVERT:
4012 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4013 1.1 mrg return false;
4014 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts))
4015 1.1 mrg return false;
4016 1.1 mrg break;
4017 1.1 mrg
4018 1.1 mrg case BIT_NOT_EXPR:
4019 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts))
4020 1.1 mrg return false;
4021 1.1 mrg break;
4022 1.1 mrg
4023 1.1 mrg case BIT_AND_EXPR:
4024 1.1 mrg case BIT_IOR_EXPR:
4025 1.1 mrg case BIT_XOR_EXPR:
4026 1.1 mrg if (! check_bool_pattern (rhs1, vinfo, stmts)
4027 1.1 mrg || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4028 1.1 mrg return false;
4029 1.1 mrg break;
4030 1.1 mrg
4031 1.1 mrg default:
4032 1.1 mrg if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4033 1.1 mrg {
4034 1.1 mrg tree vecitype, comp_vectype;
4035 1.1 mrg
4036 1.1 mrg /* If the comparison can throw, then is_gimple_condexpr will be
4037 1.1 mrg false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4038 1.1 mrg if (stmt_could_throw_p (cfun, def_stmt))
4039 1.1 mrg return false;
4040 1.1 mrg
4041 1.1 mrg comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4042 1.1 mrg if (comp_vectype == NULL_TREE)
4043 1.1 mrg return false;
4044 1.1 mrg
4045 1.1 mrg tree mask_type = get_mask_type_for_scalar_type (vinfo,
4046 1.1 mrg TREE_TYPE (rhs1));
4047 1.1 mrg if (mask_type
4048 1.1 mrg && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4049 1.1 mrg return false;
4050 1.1 mrg
4051 1.1 mrg if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4052 1.1 mrg {
4053 1.1 mrg scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4054 1.1 mrg tree itype
4055 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4056 1.1 mrg vecitype = get_vectype_for_scalar_type (vinfo, itype);
4057 1.1 mrg if (vecitype == NULL_TREE)
4058 1.1 mrg return false;
4059 1.1 mrg }
4060 1.1 mrg else
4061 1.1 mrg vecitype = comp_vectype;
4062 1.1 mrg if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4063 1.1 mrg return false;
4064 1.1 mrg }
4065 1.1 mrg else
4066 1.1 mrg return false;
4067 1.1 mrg break;
4068 1.1 mrg }
4069 1.1 mrg
4070 1.1 mrg bool res = stmts.add (def_stmt);
4071 1.1 mrg /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4072 1.1 mrg gcc_assert (!res);
4073 1.1 mrg
4074 1.1 mrg return true;
4075 1.1 mrg }
4076 1.1 mrg
4077 1.1 mrg
4078 1.1 mrg /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4079 1.1 mrg stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4080 1.1 mrg pattern sequence. */
4081 1.1 mrg
4082 1.1 mrg static tree
4083 1.1 mrg adjust_bool_pattern_cast (vec_info *vinfo,
4084 1.1 mrg tree type, tree var, stmt_vec_info stmt_info)
4085 1.1 mrg {
4086 1.1 mrg gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4087 1.1 mrg NOP_EXPR, var);
4088 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4089 1.1 mrg get_vectype_for_scalar_type (vinfo, type));
4090 1.1 mrg return gimple_assign_lhs (cast_stmt);
4091 1.1 mrg }
4092 1.1 mrg
4093 1.1 mrg /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4094 1.1 mrg VAR is an SSA_NAME that should be transformed from bool to a wider integer
4095 1.1 mrg type, OUT_TYPE is the desired final integer type of the whole pattern.
4096 1.1 mrg STMT_INFO is the info of the pattern root and is where pattern stmts should
4097 1.1 mrg be associated with. DEFS is a map of pattern defs. */
4098 1.1 mrg
4099 1.1 mrg static void
4100 1.1 mrg adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4101 1.1 mrg stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4102 1.1 mrg {
4103 1.1 mrg gimple *stmt = SSA_NAME_DEF_STMT (var);
4104 1.1 mrg enum tree_code rhs_code, def_rhs_code;
4105 1.1 mrg tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4106 1.1 mrg location_t loc;
4107 1.1 mrg gimple *pattern_stmt, *def_stmt;
4108 1.1 mrg tree trueval = NULL_TREE;
4109 1.1 mrg
4110 1.1 mrg rhs1 = gimple_assign_rhs1 (stmt);
4111 1.1 mrg rhs2 = gimple_assign_rhs2 (stmt);
4112 1.1 mrg rhs_code = gimple_assign_rhs_code (stmt);
4113 1.1 mrg loc = gimple_location (stmt);
4114 1.1 mrg switch (rhs_code)
4115 1.1 mrg {
4116 1.1 mrg case SSA_NAME:
4117 1.1 mrg CASE_CONVERT:
4118 1.1 mrg irhs1 = *defs.get (rhs1);
4119 1.1 mrg itype = TREE_TYPE (irhs1);
4120 1.1 mrg pattern_stmt
4121 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4122 1.1 mrg SSA_NAME, irhs1);
4123 1.1 mrg break;
4124 1.1 mrg
4125 1.1 mrg case BIT_NOT_EXPR:
4126 1.1 mrg irhs1 = *defs.get (rhs1);
4127 1.1 mrg itype = TREE_TYPE (irhs1);
4128 1.1 mrg pattern_stmt
4129 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4130 1.1 mrg BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4131 1.1 mrg break;
4132 1.1 mrg
4133 1.1 mrg case BIT_AND_EXPR:
4134 1.1 mrg /* Try to optimize x = y & (a < b ? 1 : 0); into
4135 1.1 mrg x = (a < b ? y : 0);
4136 1.1 mrg
4137 1.1 mrg E.g. for:
4138 1.1 mrg bool a_b, b_b, c_b;
4139 1.1 mrg TYPE d_T;
4140 1.1 mrg
4141 1.1 mrg S1 a_b = x1 CMP1 y1;
4142 1.1 mrg S2 b_b = x2 CMP2 y2;
4143 1.1 mrg S3 c_b = a_b & b_b;
4144 1.1 mrg S4 d_T = (TYPE) c_b;
4145 1.1 mrg
4146 1.1 mrg we would normally emit:
4147 1.1 mrg
4148 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0;
4149 1.1 mrg S2' b_T = x2 CMP2 y2 ? 1 : 0;
4150 1.1 mrg S3' c_T = a_T & b_T;
4151 1.1 mrg S4' d_T = c_T;
4152 1.1 mrg
4153 1.1 mrg but we can save one stmt by using the
4154 1.1 mrg result of one of the COND_EXPRs in the other COND_EXPR and leave
4155 1.1 mrg BIT_AND_EXPR stmt out:
4156 1.1 mrg
4157 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0;
4158 1.1 mrg S3' c_T = x2 CMP2 y2 ? a_T : 0;
4159 1.1 mrg S4' f_T = c_T;
4160 1.1 mrg
4161 1.1 mrg At least when VEC_COND_EXPR is implemented using masks
4162 1.1 mrg cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4163 1.1 mrg computes the comparison masks and ands it, in one case with
4164 1.1 mrg all ones vector, in the other case with a vector register.
4165 1.1 mrg Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4166 1.1 mrg often more expensive. */
4167 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (rhs2);
4168 1.1 mrg def_rhs_code = gimple_assign_rhs_code (def_stmt);
4169 1.1 mrg if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4170 1.1 mrg {
4171 1.1 mrg irhs1 = *defs.get (rhs1);
4172 1.1 mrg tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4173 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs1))
4174 1.1 mrg == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4175 1.1 mrg {
4176 1.1 mrg rhs_code = def_rhs_code;
4177 1.1 mrg rhs1 = def_rhs1;
4178 1.1 mrg rhs2 = gimple_assign_rhs2 (def_stmt);
4179 1.1 mrg trueval = irhs1;
4180 1.1 mrg goto do_compare;
4181 1.1 mrg }
4182 1.1 mrg else
4183 1.1 mrg irhs2 = *defs.get (rhs2);
4184 1.1 mrg goto and_ior_xor;
4185 1.1 mrg }
4186 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (rhs1);
4187 1.1 mrg def_rhs_code = gimple_assign_rhs_code (def_stmt);
4188 1.1 mrg if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4189 1.1 mrg {
4190 1.1 mrg irhs2 = *defs.get (rhs2);
4191 1.1 mrg tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4192 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs2))
4193 1.1 mrg == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4194 1.1 mrg {
4195 1.1 mrg rhs_code = def_rhs_code;
4196 1.1 mrg rhs1 = def_rhs1;
4197 1.1 mrg rhs2 = gimple_assign_rhs2 (def_stmt);
4198 1.1 mrg trueval = irhs2;
4199 1.1 mrg goto do_compare;
4200 1.1 mrg }
4201 1.1 mrg else
4202 1.1 mrg irhs1 = *defs.get (rhs1);
4203 1.1 mrg goto and_ior_xor;
4204 1.1 mrg }
4205 1.1 mrg /* FALLTHRU */
4206 1.1 mrg case BIT_IOR_EXPR:
4207 1.1 mrg case BIT_XOR_EXPR:
4208 1.1 mrg irhs1 = *defs.get (rhs1);
4209 1.1 mrg irhs2 = *defs.get (rhs2);
4210 1.1 mrg and_ior_xor:
4211 1.1 mrg if (TYPE_PRECISION (TREE_TYPE (irhs1))
4212 1.1 mrg != TYPE_PRECISION (TREE_TYPE (irhs2)))
4213 1.1 mrg {
4214 1.1 mrg int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4215 1.1 mrg int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4216 1.1 mrg int out_prec = TYPE_PRECISION (out_type);
4217 1.1 mrg if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4218 1.1 mrg irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4219 1.1 mrg stmt_info);
4220 1.1 mrg else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4221 1.1 mrg irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4222 1.1 mrg stmt_info);
4223 1.1 mrg else
4224 1.1 mrg {
4225 1.1 mrg irhs1 = adjust_bool_pattern_cast (vinfo,
4226 1.1 mrg out_type, irhs1, stmt_info);
4227 1.1 mrg irhs2 = adjust_bool_pattern_cast (vinfo,
4228 1.1 mrg out_type, irhs2, stmt_info);
4229 1.1 mrg }
4230 1.1 mrg }
4231 1.1 mrg itype = TREE_TYPE (irhs1);
4232 1.1 mrg pattern_stmt
4233 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4234 1.1 mrg rhs_code, irhs1, irhs2);
4235 1.1 mrg break;
4236 1.1 mrg
4237 1.1 mrg default:
4238 1.1 mrg do_compare:
4239 1.1 mrg gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4240 1.1 mrg if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4241 1.1 mrg || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4242 1.1 mrg || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4243 1.1 mrg GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4244 1.1 mrg {
4245 1.1 mrg scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4246 1.1 mrg itype
4247 1.1 mrg = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4248 1.1 mrg }
4249 1.1 mrg else
4250 1.1 mrg itype = TREE_TYPE (rhs1);
4251 1.1 mrg cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4252 1.1 mrg if (trueval == NULL_TREE)
4253 1.1 mrg trueval = build_int_cst (itype, 1);
4254 1.1 mrg else
4255 1.1 mrg gcc_checking_assert (useless_type_conversion_p (itype,
4256 1.1 mrg TREE_TYPE (trueval)));
4257 1.1 mrg pattern_stmt
4258 1.1 mrg = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4259 1.1 mrg COND_EXPR, cond_expr, trueval,
4260 1.1 mrg build_int_cst (itype, 0));
4261 1.1 mrg break;
4262 1.1 mrg }
4263 1.1 mrg
4264 1.1 mrg gimple_set_location (pattern_stmt, loc);
4265 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4266 1.1 mrg get_vectype_for_scalar_type (vinfo, itype));
4267 1.1 mrg defs.put (var, gimple_assign_lhs (pattern_stmt));
4268 1.1 mrg }
4269 1.1 mrg
4270 1.1 mrg /* Comparison function to qsort a vector of gimple stmts after UID. */
4271 1.1 mrg
4272 1.1 mrg static int
4273 1.1 mrg sort_after_uid (const void *p1, const void *p2)
4274 1.1 mrg {
4275 1.1 mrg const gimple *stmt1 = *(const gimple * const *)p1;
4276 1.1 mrg const gimple *stmt2 = *(const gimple * const *)p2;
4277 1.1 mrg return gimple_uid (stmt1) - gimple_uid (stmt2);
4278 1.1 mrg }
4279 1.1 mrg
4280 1.1 mrg /* Create pattern stmts for all stmts participating in the bool pattern
4281 1.1 mrg specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4282 1.1 mrg OUT_TYPE. Return the def of the pattern root. */
4283 1.1 mrg
4284 1.1 mrg static tree
4285 1.1 mrg adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4286 1.1 mrg tree out_type, stmt_vec_info stmt_info)
4287 1.1 mrg {
4288 1.1 mrg /* Gather original stmts in the bool pattern in their order of appearance
4289 1.1 mrg in the IL. */
4290 1.1 mrg auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4291 1.1 mrg for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4292 1.1 mrg i != bool_stmt_set.end (); ++i)
4293 1.1 mrg bool_stmts.quick_push (*i);
4294 1.1 mrg bool_stmts.qsort (sort_after_uid);
4295 1.1 mrg
4296 1.1 mrg /* Now process them in that order, producing pattern stmts. */
4297 1.1 mrg hash_map <tree, tree> defs;
4298 1.1 mrg for (unsigned i = 0; i < bool_stmts.length (); ++i)
4299 1.1 mrg adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4300 1.1 mrg out_type, stmt_info, defs);
4301 1.1 mrg
4302 1.1 mrg /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4303 1.1 mrg gimple *pattern_stmt
4304 1.1 mrg = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4305 1.1 mrg return gimple_assign_lhs (pattern_stmt);
4306 1.1 mrg }
4307 1.1 mrg
4308 1.1 mrg /* Return the proper type for converting bool VAR into
4309 1.1 mrg an integer value or NULL_TREE if no such type exists.
4310 1.1 mrg The type is chosen so that the converted value has the
4311 1.1 mrg same number of elements as VAR's vector type. */
4312 1.1 mrg
4313 1.1 mrg static tree
4314 1.1 mrg integer_type_for_mask (tree var, vec_info *vinfo)
4315 1.1 mrg {
4316 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4317 1.1 mrg return NULL_TREE;
4318 1.1 mrg
4319 1.1 mrg stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4320 1.1 mrg if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4321 1.1 mrg return NULL_TREE;
4322 1.1 mrg
4323 1.1 mrg return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4324 1.1 mrg }
4325 1.1 mrg
4326 1.1 mrg /* Function vect_recog_bool_pattern
4327 1.1 mrg
4328 1.1 mrg Try to find pattern like following:
4329 1.1 mrg
4330 1.1 mrg bool a_b, b_b, c_b, d_b, e_b;
4331 1.1 mrg TYPE f_T;
4332 1.1 mrg loop:
4333 1.1 mrg S1 a_b = x1 CMP1 y1;
4334 1.1 mrg S2 b_b = x2 CMP2 y2;
4335 1.1 mrg S3 c_b = a_b & b_b;
4336 1.1 mrg S4 d_b = x3 CMP3 y3;
4337 1.1 mrg S5 e_b = c_b | d_b;
4338 1.1 mrg S6 f_T = (TYPE) e_b;
4339 1.1 mrg
4340 1.1 mrg where type 'TYPE' is an integral type. Or a similar pattern
4341 1.1 mrg ending in
4342 1.1 mrg
4343 1.1 mrg S6 f_Y = e_b ? r_Y : s_Y;
4344 1.1 mrg
4345 1.1 mrg as results from if-conversion of a complex condition.
4346 1.1 mrg
4347 1.1 mrg Input:
4348 1.1 mrg
4349 1.1 mrg * STMT_VINFO: The stmt at the end from which the pattern
4350 1.1 mrg search begins, i.e. cast of a bool to
4351 1.1 mrg an integer type.
4352 1.1 mrg
4353 1.1 mrg Output:
4354 1.1 mrg
4355 1.1 mrg * TYPE_OUT: The type of the output of this pattern.
4356 1.1 mrg
4357 1.1 mrg * Return value: A new stmt that will be used to replace the pattern.
4358 1.1 mrg
4359 1.1 mrg Assuming size of TYPE is the same as size of all comparisons
4360 1.1 mrg (otherwise some casts would be added where needed), the above
4361 1.1 mrg sequence we create related pattern stmts:
4362 1.1 mrg S1' a_T = x1 CMP1 y1 ? 1 : 0;
4363 1.1 mrg S3' c_T = x2 CMP2 y2 ? a_T : 0;
4364 1.1 mrg S4' d_T = x3 CMP3 y3 ? 1 : 0;
4365 1.1 mrg S5' e_T = c_T | d_T;
4366 1.1 mrg S6' f_T = e_T;
4367 1.1 mrg
4368 1.1 mrg Instead of the above S3' we could emit:
4369 1.1 mrg S2' b_T = x2 CMP2 y2 ? 1 : 0;
4370 1.1 mrg S3' c_T = a_T | b_T;
4371 1.1 mrg but the above is more efficient. */
4372 1.1 mrg
4373 1.1 mrg static gimple *
4374 1.1 mrg vect_recog_bool_pattern (vec_info *vinfo,
4375 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
4376 1.1 mrg {
4377 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
4378 1.1 mrg enum tree_code rhs_code;
4379 1.1 mrg tree var, lhs, rhs, vectype;
4380 1.1 mrg gimple *pattern_stmt;
4381 1.1 mrg
4382 1.1 mrg if (!is_gimple_assign (last_stmt))
4383 1.1 mrg return NULL;
4384 1.1 mrg
4385 1.1 mrg var = gimple_assign_rhs1 (last_stmt);
4386 1.1 mrg lhs = gimple_assign_lhs (last_stmt);
4387 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt);
4388 1.1 mrg
4389 1.1 mrg if (rhs_code == VIEW_CONVERT_EXPR)
4390 1.1 mrg var = TREE_OPERAND (var, 0);
4391 1.1 mrg
4392 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4393 1.1 mrg return NULL;
4394 1.1 mrg
4395 1.1 mrg hash_set<gimple *> bool_stmts;
4396 1.1 mrg
4397 1.1 mrg if (CONVERT_EXPR_CODE_P (rhs_code)
4398 1.1 mrg || rhs_code == VIEW_CONVERT_EXPR)
4399 1.1 mrg {
4400 1.1 mrg if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4401 1.1 mrg || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4402 1.1 mrg return NULL;
4403 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4404 1.1 mrg
4405 1.1 mrg if (check_bool_pattern (var, vinfo, bool_stmts))
4406 1.1 mrg {
4407 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts,
4408 1.1 mrg TREE_TYPE (lhs), stmt_vinfo);
4409 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4410 1.1 mrg if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4411 1.1 mrg pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4412 1.1 mrg else
4413 1.1 mrg pattern_stmt
4414 1.1 mrg = gimple_build_assign (lhs, NOP_EXPR, rhs);
4415 1.1 mrg }
4416 1.1 mrg else
4417 1.1 mrg {
4418 1.1 mrg tree type = integer_type_for_mask (var, vinfo);
4419 1.1 mrg tree cst0, cst1, tmp;
4420 1.1 mrg
4421 1.1 mrg if (!type)
4422 1.1 mrg return NULL;
4423 1.1 mrg
4424 1.1 mrg /* We may directly use cond with narrowed type to avoid
4425 1.1 mrg multiple cond exprs with following result packing and
4426 1.1 mrg perform single cond with packed mask instead. In case
4427 1.1 mrg of widening we better make cond first and then extract
4428 1.1 mrg results. */
4429 1.1 mrg if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4430 1.1 mrg type = TREE_TYPE (lhs);
4431 1.1 mrg
4432 1.1 mrg cst0 = build_int_cst (type, 0);
4433 1.1 mrg cst1 = build_int_cst (type, 1);
4434 1.1 mrg tmp = vect_recog_temp_ssa_var (type, NULL);
4435 1.1 mrg pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4436 1.1 mrg
4437 1.1 mrg if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4438 1.1 mrg {
4439 1.1 mrg tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4440 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo,
4441 1.1 mrg pattern_stmt, new_vectype);
4442 1.1 mrg
4443 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4444 1.1 mrg pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4445 1.1 mrg }
4446 1.1 mrg }
4447 1.1 mrg
4448 1.1 mrg *type_out = vectype;
4449 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4450 1.1 mrg
4451 1.1 mrg return pattern_stmt;
4452 1.1 mrg }
4453 1.1 mrg else if (rhs_code == COND_EXPR
4454 1.1 mrg && TREE_CODE (var) == SSA_NAME)
4455 1.1 mrg {
4456 1.1 mrg vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4457 1.1 mrg if (vectype == NULL_TREE)
4458 1.1 mrg return NULL;
4459 1.1 mrg
4460 1.1 mrg /* Build a scalar type for the boolean result that when
4461 1.1 mrg vectorized matches the vector type of the result in
4462 1.1 mrg size and number of elements. */
4463 1.1 mrg unsigned prec
4464 1.1 mrg = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4465 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype));
4466 1.1 mrg
4467 1.1 mrg tree type
4468 1.1 mrg = build_nonstandard_integer_type (prec,
4469 1.1 mrg TYPE_UNSIGNED (TREE_TYPE (var)));
4470 1.1 mrg if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4471 1.1 mrg return NULL;
4472 1.1 mrg
4473 1.1 mrg if (!check_bool_pattern (var, vinfo, bool_stmts))
4474 1.1 mrg return NULL;
4475 1.1 mrg
4476 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4477 1.1 mrg
4478 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4479 1.1 mrg pattern_stmt
4480 1.1 mrg = gimple_build_assign (lhs, COND_EXPR,
4481 1.1 mrg build2 (NE_EXPR, boolean_type_node,
4482 1.1 mrg rhs, build_int_cst (type, 0)),
4483 1.1 mrg gimple_assign_rhs2 (last_stmt),
4484 1.1 mrg gimple_assign_rhs3 (last_stmt));
4485 1.1 mrg *type_out = vectype;
4486 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4487 1.1 mrg
4488 1.1 mrg return pattern_stmt;
4489 1.1 mrg }
4490 1.1 mrg else if (rhs_code == SSA_NAME
4491 1.1 mrg && STMT_VINFO_DATA_REF (stmt_vinfo))
4492 1.1 mrg {
4493 1.1 mrg stmt_vec_info pattern_stmt_info;
4494 1.1 mrg tree nunits_vectype;
4495 1.1 mrg if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4496 1.1 mrg &nunits_vectype)
4497 1.1 mrg || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4498 1.1 mrg return NULL;
4499 1.1 mrg
4500 1.1 mrg if (check_bool_pattern (var, vinfo, bool_stmts))
4501 1.1 mrg rhs = adjust_bool_stmts (vinfo, bool_stmts,
4502 1.1 mrg TREE_TYPE (vectype), stmt_vinfo);
4503 1.1 mrg else
4504 1.1 mrg {
4505 1.1 mrg tree type = integer_type_for_mask (var, vinfo);
4506 1.1 mrg tree cst0, cst1, new_vectype;
4507 1.1 mrg
4508 1.1 mrg if (!type)
4509 1.1 mrg return NULL;
4510 1.1 mrg
4511 1.1 mrg if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4512 1.1 mrg type = TREE_TYPE (vectype);
4513 1.1 mrg
4514 1.1 mrg cst0 = build_int_cst (type, 0);
4515 1.1 mrg cst1 = build_int_cst (type, 1);
4516 1.1 mrg new_vectype = get_vectype_for_scalar_type (vinfo, type);
4517 1.1 mrg
4518 1.1 mrg rhs = vect_recog_temp_ssa_var (type, NULL);
4519 1.1 mrg pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4520 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4521 1.1 mrg }
4522 1.1 mrg
4523 1.1 mrg lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4524 1.1 mrg if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4525 1.1 mrg {
4526 1.1 mrg tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4527 1.1 mrg gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4528 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4529 1.1 mrg rhs = rhs2;
4530 1.1 mrg }
4531 1.1 mrg pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4532 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4533 1.1 mrg vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4534 1.1 mrg *type_out = vectype;
4535 1.1 mrg vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4536 1.1 mrg
4537 1.1 mrg return pattern_stmt;
4538 1.1 mrg }
4539 1.1 mrg else
4540 1.1 mrg return NULL;
4541 1.1 mrg }
4542 1.1 mrg
4543 1.1 mrg
4544 1.1 mrg /* A helper for vect_recog_mask_conversion_pattern. Build
4545 1.1 mrg conversion of MASK to a type suitable for masking VECTYPE.
4546 1.1 mrg Built statement gets required vectype and is appended to
4547 1.1 mrg a pattern sequence of STMT_VINFO.
4548 1.1 mrg
4549 1.1 mrg Return converted mask. */
4550 1.1 mrg
4551 1.1 mrg static tree
4552 1.1 mrg build_mask_conversion (vec_info *vinfo,
4553 1.1 mrg tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4554 1.1 mrg {
4555 1.1 mrg gimple *stmt;
4556 1.1 mrg tree masktype, tmp;
4557 1.1 mrg
4558 1.1 mrg masktype = truth_type_for (vectype);
4559 1.1 mrg tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4560 1.1 mrg stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4561 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo,
4562 1.1 mrg stmt, masktype, TREE_TYPE (vectype));
4563 1.1 mrg
4564 1.1 mrg return tmp;
4565 1.1 mrg }
4566 1.1 mrg
4567 1.1 mrg
4568 1.1 mrg /* Function vect_recog_mask_conversion_pattern
4569 1.1 mrg
4570 1.1 mrg Try to find statements which require boolean type
4571 1.1 mrg converison. Additional conversion statements are
4572 1.1 mrg added to handle such cases. For example:
4573 1.1 mrg
4574 1.1 mrg bool m_1, m_2, m_3;
4575 1.1 mrg int i_4, i_5;
4576 1.1 mrg double d_6, d_7;
4577 1.1 mrg char c_1, c_2, c_3;
4578 1.1 mrg
4579 1.1 mrg S1 m_1 = i_4 > i_5;
4580 1.1 mrg S2 m_2 = d_6 < d_7;
4581 1.1 mrg S3 m_3 = m_1 & m_2;
4582 1.1 mrg S4 c_1 = m_3 ? c_2 : c_3;
4583 1.1 mrg
4584 1.1 mrg Will be transformed into:
4585 1.1 mrg
4586 1.1 mrg S1 m_1 = i_4 > i_5;
4587 1.1 mrg S2 m_2 = d_6 < d_7;
4588 1.1 mrg S3'' m_2' = (_Bool[bitsize=32])m_2
4589 1.1 mrg S3' m_3' = m_1 & m_2';
4590 1.1 mrg S4'' m_3'' = (_Bool[bitsize=8])m_3'
4591 1.1 mrg S4' c_1' = m_3'' ? c_2 : c_3; */
4592 1.1 mrg
4593 1.1 mrg static gimple *
4594 1.1 mrg vect_recog_mask_conversion_pattern (vec_info *vinfo,
4595 1.1 mrg stmt_vec_info stmt_vinfo, tree *type_out)
4596 1.1 mrg {
4597 1.1 mrg gimple *last_stmt = stmt_vinfo->stmt;
4598 1.1 mrg enum tree_code rhs_code;
4599 1.1 mrg tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4600 1.1 mrg tree vectype1, vectype2;
4601 1.1 mrg stmt_vec_info pattern_stmt_info;
4602 1.1 mrg tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4603 1.1 mrg tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4604 1.1 mrg
4605 1.1 mrg /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4606 1.1 mrg if (is_gimple_call (last_stmt)
4607 1.1 mrg && gimple_call_internal_p (last_stmt))
4608 1.1 mrg {
4609 1.1 mrg gcall *pattern_stmt;
4610 1.1 mrg
4611 1.1 mrg internal_fn ifn = gimple_call_internal_fn (last_stmt);
4612 1.1 mrg int mask_argno = internal_fn_mask_index (ifn);
4613 1.1 mrg if (mask_argno < 0)
4614 1.1 mrg return NULL;
4615 1.1 mrg
4616 1.1 mrg bool store_p = internal_store_fn_p (ifn);
4617 1.1 mrg if (store_p)
4618 1.1 mrg {
4619 1.1 mrg int rhs_index = internal_fn_stored_value_index (ifn);
4620 1.1 mrg tree rhs = gimple_call_arg (last_stmt, rhs_index);
4621 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4622 1.1 mrg }
4623 1.1 mrg else
4624 1.1 mrg {
4625 1.1 mrg lhs = gimple_call_lhs (last_stmt);
4626 1.1 mrg if (!lhs)
4627 1.1 mrg return NULL;
4628 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4629 1.1 mrg }
4630 1.1 mrg
4631 1.1 mrg tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4632 1.1 mrg tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4633 1.1 mrg if (!mask_arg_type)
4634 1.1 mrg return NULL;
4635 1.1 mrg vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4636 1.1 mrg
4637 1.1 mrg if (!vectype1 || !vectype2
4638 1.1 mrg || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4639 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2)))
4640 1.1 mrg return NULL;
4641 1.1 mrg
4642 1.1 mrg tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4643 1.1 mrg
4644 1.1 mrg auto_vec<tree, 8> args;
4645 1.1 mrg unsigned int nargs = gimple_call_num_args (last_stmt);
4646 1.1 mrg args.safe_grow (nargs, true);
4647 1.1 mrg for (unsigned int i = 0; i < nargs; ++i)
4648 1.1 mrg args[i] = ((int) i == mask_argno
4649 1.1 mrg ? tmp
4650 1.1 mrg : gimple_call_arg (last_stmt, i));
4651 1.1 mrg pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4652 1.1 mrg
4653 1.1 mrg if (!store_p)
4654 1.1 mrg {
4655 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4656 1.1 mrg gimple_call_set_lhs (pattern_stmt, lhs);
4657 1.1 mrg }
4658 1.1 mrg gimple_call_set_nothrow (pattern_stmt, true);
4659 1.1 mrg
4660 1.1 mrg pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4661 1.1 mrg if (STMT_VINFO_DATA_REF (stmt_vinfo))
4662 1.1 mrg vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4663 1.1 mrg
4664 1.1 mrg *type_out = vectype1;
4665 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4666 1.1 mrg
4667 1.1 mrg return pattern_stmt;
4668 1.1 mrg }
4669 1.1 mrg
4670 1.1 mrg if (!is_gimple_assign (last_stmt))
4671 1.1 mrg return NULL;
4672 1.1 mrg
4673 1.1 mrg gimple *pattern_stmt;
4674 1.1 mrg lhs = gimple_assign_lhs (last_stmt);
4675 1.1 mrg rhs1 = gimple_assign_rhs1 (last_stmt);
4676 1.1 mrg rhs_code = gimple_assign_rhs_code (last_stmt);
4677 1.1 mrg
4678 1.1 mrg /* Check for cond expression requiring mask conversion. */
4679 1.1 mrg if (rhs_code == COND_EXPR)
4680 1.1 mrg {
4681 1.1 mrg vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4682 1.1 mrg
4683 1.1 mrg if (TREE_CODE (rhs1) == SSA_NAME)
4684 1.1 mrg {
4685 1.1 mrg rhs1_type = integer_type_for_mask (rhs1, vinfo);
4686 1.1 mrg if (!rhs1_type)
4687 1.1 mrg return NULL;
4688 1.1 mrg }
4689 1.1 mrg else if (COMPARISON_CLASS_P (rhs1))
4690 1.1 mrg {
4691 1.1 mrg /* Check whether we're comparing scalar booleans and (if so)
4692 1.1 mrg whether a better mask type exists than the mask associated
4693 1.1 mrg with boolean-sized elements. This avoids unnecessary packs
4694 1.1 mrg and unpacks if the booleans are set from comparisons of
4695 1.1 mrg wider types. E.g. in:
4696 1.1 mrg
4697 1.1 mrg int x1, x2, x3, x4, y1, y1;
4698 1.1 mrg ...
4699 1.1 mrg bool b1 = (x1 == x2);
4700 1.1 mrg bool b2 = (x3 == x4);
4701 1.1 mrg ... = b1 == b2 ? y1 : y2;
4702 1.1 mrg
4703 1.1 mrg it is better for b1 and b2 to use the mask type associated
4704 1.1 mrg with int elements rather bool (byte) elements. */
4705 1.1 mrg rhs1_op0 = TREE_OPERAND (rhs1, 0);
4706 1.1 mrg rhs1_op1 = TREE_OPERAND (rhs1, 1);
4707 1.1 mrg if (!rhs1_op0 || !rhs1_op1)
4708 1.1 mrg return NULL;
4709 1.1 mrg rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4710 1.1 mrg rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4711 1.1 mrg
4712 1.1 mrg if (!rhs1_op0_type)
4713 1.1 mrg rhs1_type = TREE_TYPE (rhs1_op0);
4714 1.1 mrg else if (!rhs1_op1_type)
4715 1.1 mrg rhs1_type = TREE_TYPE (rhs1_op1);
4716 1.1 mrg else if (TYPE_PRECISION (rhs1_op0_type)
4717 1.1 mrg != TYPE_PRECISION (rhs1_op1_type))
4718 1.1 mrg {
4719 1.1 mrg int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4720 1.1 mrg - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4721 1.1 mrg int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4722 1.1 mrg - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4723 1.1 mrg if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4724 1.1 mrg {
4725 1.1 mrg if (abs (tmp0) > abs (tmp1))
4726 1.1 mrg rhs1_type = rhs1_op1_type;
4727 1.1 mrg else
4728 1.1 mrg rhs1_type = rhs1_op0_type;
4729 1.1 mrg }
4730 1.1 mrg else
4731 1.1 mrg rhs1_type = build_nonstandard_integer_type
4732 1.1 mrg (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4733 1.1 mrg }
4734 1.1 mrg else
4735 1.1 mrg rhs1_type = rhs1_op0_type;
4736 1.1 mrg }
4737 1.1 mrg else
4738 1.1 mrg return NULL;
4739 1.1 mrg
4740 1.1 mrg vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4741 1.1 mrg
4742 1.1 mrg if (!vectype1 || !vectype2)
4743 1.1 mrg return NULL;
4744 1.1 mrg
4745 1.1 mrg /* Continue if a conversion is needed. Also continue if we have
4746 1.1 mrg a comparison whose vector type would normally be different from
4747 1.1 mrg VECTYPE2 when considered in isolation. In that case we'll
4748 1.1 mrg replace the comparison with an SSA name (so that we can record
4749 1.1 mrg its vector type) and behave as though the comparison was an SSA
4750 1.1 mrg name from the outset. */
4751 1.1 mrg if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4752 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2))
4753 1.1 mrg && !rhs1_op0_type
4754 1.1 mrg && !rhs1_op1_type)
4755 1.1 mrg return NULL;
4756 1.1 mrg
4757 1.1 mrg /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4758 1.1 mrg in place, we can handle it in vectorizable_condition. This avoids
4759 1.1 mrg unnecessary promotion stmts and increased vectorization factor. */
4760 1.1 mrg if (COMPARISON_CLASS_P (rhs1)
4761 1.1 mrg && INTEGRAL_TYPE_P (rhs1_type)
4762 1.1 mrg && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4763 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2)))
4764 1.1 mrg {
4765 1.1 mrg enum vect_def_type dt;
4766 1.1 mrg if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4767 1.1 mrg && dt == vect_external_def
4768 1.1 mrg && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4769 1.1 mrg && (dt == vect_external_def
4770 1.1 mrg || dt == vect_constant_def))
4771 1.1 mrg {
4772 1.1 mrg tree wide_scalar_type = build_nonstandard_integer_type
4773 1.1 mrg (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4774 1.1 mrg tree vectype3 = get_vectype_for_scalar_type (vinfo,
4775 1.1 mrg wide_scalar_type);
4776 1.1 mrg if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4777 1.1 mrg return NULL;
4778 1.1 mrg }
4779 1.1 mrg }
4780 1.1 mrg
4781 1.1 mrg /* If rhs1 is a comparison we need to move it into a
4782 1.1 mrg separate statement. */
4783 1.1 mrg if (TREE_CODE (rhs1) != SSA_NAME)
4784 1.1 mrg {
4785 1.1 mrg tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4786 1.1 mrg if (rhs1_op0_type
4787 1.1 mrg && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4788 1.1 mrg rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4789 1.1 mrg vectype2, stmt_vinfo);
4790 1.1 mrg if (rhs1_op1_type
4791 1.1 mrg && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4792 1.1 mrg rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4793 1.1 mrg vectype2, stmt_vinfo);
4794 1.1 mrg pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4795 1.1 mrg rhs1_op0, rhs1_op1);
4796 1.1 mrg rhs1 = tmp;
4797 1.1 mrg append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4798 1.1 mrg rhs1_type);
4799 1.1 mrg }
4800 1.1 mrg
4801 1.1 mrg if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4802 1.1 mrg TYPE_VECTOR_SUBPARTS (vectype2)))
4803 1.1 mrg tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4804 1.1 mrg else
4805 1.1 mrg tmp = rhs1;
4806 1.1 mrg
4807 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4808 1.1 mrg pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4809 1.1 mrg gimple_assign_rhs2 (last_stmt),
4810 1.1 mrg gimple_assign_rhs3 (last_stmt));
4811 1.1 mrg
4812 1.1 mrg *type_out = vectype1;
4813 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4814 1.1 mrg
4815 1.1 mrg return pattern_stmt;
4816 1.1 mrg }
4817 1.1 mrg
4818 1.1 mrg /* Now check for binary boolean operations requiring conversion for
4819 1.1 mrg one of operands. */
4820 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4821 1.1 mrg return NULL;
4822 1.1 mrg
4823 1.1 mrg if (rhs_code != BIT_IOR_EXPR
4824 1.1 mrg && rhs_code != BIT_XOR_EXPR
4825 1.1 mrg && rhs_code != BIT_AND_EXPR
4826 1.1 mrg && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4827 1.1 mrg return NULL;
4828 1.1 mrg
4829 1.1 mrg rhs2 = gimple_assign_rhs2 (last_stmt);
4830 1.1 mrg
4831 1.1 mrg rhs1_type = integer_type_for_mask (rhs1, vinfo);
4832 1.1 mrg rhs2_type = integer_type_for_mask (rhs2, vinfo);
4833 1.1 mrg
4834 1.1 mrg if (!rhs1_type || !rhs2_type
4835 1.1 mrg || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4836 1.1 mrg return NULL;
4837 1.1 mrg
4838 1.1 mrg if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4839 1.1 mrg {
4840 1.1 mrg vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4841 1.1 mrg if (!vectype1)
4842 1.1 mrg return NULL;
4843 1.1 mrg rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4844 1.1 mrg }
4845 1.1 mrg else
4846 1.1 mrg {
4847 1.1 mrg vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4848 1.1 mrg if (!vectype1)
4849 1.1 mrg return NULL;
4850 1.1 mrg rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4851 1.1 mrg }
4852 1.1 mrg
4853 1.1 mrg lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4854 1.1 mrg pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4855 1.1 mrg
4856 1.1 mrg *type_out = vectype1;
4857 1.1 mrg vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4858 1.1 mrg
4859 1.1 mrg return pattern_stmt;
4860 1.1 mrg }
4861 1.1 mrg
4862 1.1 mrg /* STMT_INFO is a load or store. If the load or store is conditional, return
4863 1.1 mrg the boolean condition under which it occurs, otherwise return null. */
4864 1.1 mrg
4865 1.1 mrg static tree
4866 1.1 mrg vect_get_load_store_mask (stmt_vec_info stmt_info)
4867 1.1 mrg {
4868 1.1 mrg if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4869 1.1 mrg {
4870 1.1 mrg gcc_assert (gimple_assign_single_p (def_assign));
4871 1.1 mrg return NULL_TREE;
4872 1.1 mrg }
4873 1.1 mrg
4874 1.1 mrg if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4875 1.1 mrg {
4876 1.1 mrg internal_fn ifn = gimple_call_internal_fn (def_call);
4877 1.1 mrg int mask_index = internal_fn_mask_index (ifn);
4878 1.1 mrg return gimple_call_arg (def_call, mask_index);
4879 1.1 mrg }
4880 1.1 mrg
4881 1.1 mrg gcc_unreachable ();
4882 1.1 mrg }
4883 1.1 mrg
4884 1.1 mrg /* Return MASK if MASK is suitable for masking an operation on vectors
4885 1.1 mrg of type VECTYPE, otherwise convert it into such a form and return
4886 1.1 mrg the result. Associate any conversion statements with STMT_INFO's
4887 1.1 mrg pattern. */
4888 1.1 mrg
4889 1.1 mrg static tree
4890 1.1 mrg vect_convert_mask_for_vectype (tree mask, tree vectype,
4891 1.1 mrg stmt_vec_info stmt_info, vec_info *vinfo)
4892 1.1 mrg {
4893 1.1 mrg tree mask_type = integer_type_for_mask (mask, vinfo);
4894 1.1 mrg if (mask_type)
4895 1.1 mrg {
4896 1.1 mrg tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4897 1.1 mrg if (mask_vectype
4898 1.1 mrg && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4899 1.1 mrg TYPE_VECTOR_SUBPARTS (mask_vectype)))
4900 1.1 mrg mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4901 1.1 mrg }
4902 1.1 mrg return mask;
4903 1.1 mrg }
4904 1.1 mrg
4905 1.1 mrg /* Return the equivalent of:
4906 1.1 mrg
4907 1.1 mrg fold_convert (TYPE, VALUE)
4908 1.1 mrg
4909 1.1 mrg with the expectation that the operation will be vectorized.
4910 1.1 mrg If new statements are needed, add them as pattern statements
4911 1.1 mrg to STMT_INFO. */
4912 1.1 mrg
4913 1.1 mrg static tree
4914 1.1 mrg vect_add_conversion_to_pattern (vec_info *vinfo,
4915 1.1 mrg tree type, tree value, stmt_vec_info stmt_info)
4916 1.1 mrg {
4917 1.1 mrg if (useless_type_conversion_p (type, TREE_TYPE (value)))
4918 1.1 mrg return value;
4919 1.1 mrg
4920 1.1 mrg tree new_value = vect_recog_temp_ssa_var (type, NULL);
4921 1.1 mrg gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4922 1.1 mrg append_pattern_def_seq (vinfo, stmt_info, conversion,
4923 1.1 mrg get_vectype_for_scalar_type (vinfo, type));
4924 1.1 mrg return new_value;
4925 1.1 mrg }
4926 1.1 mrg
4927 1.1 mrg /* Try to convert STMT_INFO into a call to a gather load or scatter store
4928 1.1 mrg internal function. Return the final statement on success and set
4929 1.1 mrg *TYPE_OUT to the vector type being loaded or stored.
4930 1.1 mrg
4931 1.1 mrg This function only handles gathers and scatters that were recognized
4932 1.1 mrg as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4933 1.1 mrg
4934 1.1 mrg static gimple *
4935 1.1 mrg vect_recog_gather_scatter_pattern (vec_info *vinfo,
4936 1.1 mrg stmt_vec_info stmt_info, tree *type_out)
4937 1.1 mrg {
4938 1.1 mrg /* Currently we only support this for loop vectorization. */
4939 1.1 mrg loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4940 1.1 mrg if (!loop_vinfo)
4941 1.1 mrg return NULL;
4942 1.1 mrg
4943 1.1 mrg /* Make sure that we're looking at a gather load or scatter store. */
4944 1.1 mrg data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4945 1.1 mrg if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4946 1.1 mrg return NULL;
4947 1.1 mrg
4948 1.1 mrg /* Get the boolean that controls whether the load or store happens.
4949 1.1 mrg This is null if the operation is unconditional. */
4950 1.1 mrg tree mask = vect_get_load_store_mask (stmt_info);
4951 1.1 mrg
4952 1.1 mrg /* Make sure that the target supports an appropriate internal
4953 1.1 mrg function for the gather/scatter operation. */
4954 1.1 mrg gather_scatter_info gs_info;
4955 1.1 mrg if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4956 1.1 mrg || gs_info.ifn == IFN_LAST)
4957 1.1 mrg return NULL;
4958 1.1 mrg
4959 1.1 mrg /* Convert the mask to the right form. */
4960 1.1 mrg tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4961 1.1 mrg gs_info.element_type);
4962 1.1 mrg if (mask)
4963 1.1 mrg mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4964 1.1 mrg loop_vinfo);
4965 1.1 mrg else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4966 1.1 mrg || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4967 1.1 mrg mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4968 1.1 mrg
4969 1.1 mrg /* Get the invariant base and non-invariant offset, converting the
4970 1.1 mrg latter to the same width as the vector elements. */
4971 1.1 mrg tree base = gs_info.base;
4972 1.1 mrg tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4973 1.1 mrg tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4974 1.1 mrg gs_info.offset, stmt_info);
4975 1.1 mrg
4976 1.1 mrg /* Build the new pattern statement. */
4977 1.1 mrg tree scale = size_int (gs_info.scale);
4978 1.1 mrg gcall *pattern_stmt;
4979 1.1 mrg if (DR_IS_READ (dr))
4980 1.1 mrg {
4981 1.1 mrg tree zero = build_zero_cst (gs_info.element_type);
4982 1.1 mrg if (mask != NULL)
4983 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4984 1.1 mrg offset, scale, zero, mask);
4985 1.1 mrg else
4986 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4987 1.1 mrg offset, scale, zero);
4988 1.1 mrg tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4989 1.1 mrg gimple_call_set_lhs (pattern_stmt, load_lhs);
4990 1.1 mrg }
4991 1.1 mrg else
4992 1.1 mrg {
4993 1.1 mrg tree rhs = vect_get_store_rhs (stmt_info);
4994 1.1 mrg if (mask != NULL)
4995 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4996 1.1 mrg base, offset, scale, rhs,
4997 1.1 mrg mask);
4998 1.1 mrg else
4999 1.1 mrg pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5000 1.1 mrg base, offset, scale, rhs);
5001 1.1 mrg }
5002 1.1 mrg gimple_call_set_nothrow (pattern_stmt, true);
5003 1.1 mrg
5004 1.1 mrg /* Copy across relevant vectorization info and associate DR with the
5005 1.1 mrg new pattern statement instead of the original statement. */
5006 1.1 mrg stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5007 1.1 mrg loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5008 1.1 mrg
5009 1.1 mrg tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5010 1.1 mrg *type_out = vectype;
5011 1.1 mrg vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5012 1.1 mrg
5013 1.1 mrg return pattern_stmt;
5014 1.1 mrg }
5015 1.1 mrg
5016 1.1 mrg /* Return true if TYPE is a non-boolean integer type. These are the types
5017 1.1 mrg that we want to consider for narrowing. */
5018 1.1 mrg
5019 1.1 mrg static bool
5020 1.1 mrg vect_narrowable_type_p (tree type)
5021 1.1 mrg {
5022 1.1 mrg return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5023 1.1 mrg }
5024 1.1 mrg
5025 1.1 mrg /* Return true if the operation given by CODE can be truncated to N bits
5026 1.1 mrg when only N bits of the output are needed. This is only true if bit N+1
5027 1.1 mrg of the inputs has no effect on the low N bits of the result. */
5028 1.1 mrg
5029 1.1 mrg static bool
5030 1.1 mrg vect_truncatable_operation_p (tree_code code)
5031 1.1 mrg {
5032 1.1 mrg switch (code)
5033 1.1 mrg {
5034 1.1 mrg case NEGATE_EXPR:
5035 1.1 mrg case PLUS_EXPR:
5036 1.1 mrg case MINUS_EXPR:
5037 1.1 mrg case MULT_EXPR:
5038 1.1 mrg case BIT_NOT_EXPR:
5039 1.1 mrg case BIT_AND_EXPR:
5040 1.1 mrg case BIT_IOR_EXPR:
5041 1.1 mrg case BIT_XOR_EXPR:
5042 1.1 mrg case COND_EXPR:
5043 1.1 mrg return true;
5044 1.1 mrg
5045 1.1 mrg default:
5046 1.1 mrg return false;
5047 1.1 mrg }
5048 1.1 mrg }
5049 1.1 mrg
5050 1.1 mrg /* Record that STMT_INFO could be changed from operating on TYPE to
5051 1.1 mrg operating on a type with the precision and sign given by PRECISION
5052 1.1 mrg and SIGN respectively. PRECISION is an arbitrary bit precision;
5053 1.1 mrg it might not be a whole number of bytes. */
5054 1.1 mrg
5055 1.1 mrg static void
5056 1.1 mrg vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5057 1.1 mrg unsigned int precision, signop sign)
5058 1.1 mrg {
5059 1.1 mrg /* Round the precision up to a whole number of bytes. */
5060 1.1 mrg precision = vect_element_precision (precision);
5061 1.1 mrg if (precision < TYPE_PRECISION (type)
5062 1.1 mrg && (!stmt_info->operation_precision
5063 1.1 mrg || stmt_info->operation_precision > precision))
5064 1.1 mrg {
5065 1.1 mrg stmt_info->operation_precision = precision;
5066 1.1 mrg stmt_info->operation_sign = sign;
5067 1.1 mrg }
5068 1.1 mrg }
5069 1.1 mrg
5070 1.1 mrg /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5071 1.1 mrg non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5072 1.1 mrg is an arbitrary bit precision; it might not be a whole number of bytes. */
5073 1.1 mrg
5074 1.1 mrg static void
5075 1.1 mrg vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5076 1.1 mrg unsigned int min_input_precision)
5077 1.1 mrg {
5078 1.1 mrg /* This operation in isolation only requires the inputs to have
5079 1.1 mrg MIN_INPUT_PRECISION of precision, However, that doesn't mean
5080 1.1 mrg that MIN_INPUT_PRECISION is a natural precision for the chain
5081 1.1 mrg as a whole. E.g. consider something like:
5082 1.1 mrg
5083 1.1 mrg unsigned short *x, *y;
5084 1.1 mrg *y = ((*x & 0xf0) >> 4) | (*y << 4);
5085 1.1 mrg
5086 1.1 mrg The right shift can be done on unsigned chars, and only requires the
5087 1.1 mrg result of "*x & 0xf0" to be done on unsigned chars. But taking that
5088 1.1 mrg approach would mean turning a natural chain of single-vector unsigned
5089 1.1 mrg short operations into one that truncates "*x" and then extends
5090 1.1 mrg "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5091 1.1 mrg operation and one vector for each unsigned char operation.
5092 1.1 mrg This would be a significant pessimization.
5093 1.1 mrg
5094 1.1 mrg Instead only propagate the maximum of this precision and the precision
5095 1.1 mrg required by the users of the result. This means that we don't pessimize
5096 1.1 mrg the case above but continue to optimize things like:
5097 1.1 mrg
5098 1.1 mrg unsigned char *y;
5099 1.1 mrg unsigned short *x;
5100 1.1 mrg *y = ((*x & 0xf0) >> 4) | (*y << 4);
5101 1.1 mrg
5102 1.1 mrg Here we would truncate two vectors of *x to a single vector of
5103 1.1 mrg unsigned chars and use single-vector unsigned char operations for
5104 1.1 mrg everything else, rather than doing two unsigned short copies of
5105 1.1 mrg "(*x & 0xf0) >> 4" and then truncating the result. */
5106 1.1 mrg min_input_precision = MAX (min_input_precision,
5107 1.1 mrg stmt_info->min_output_precision);
5108 1.1 mrg
5109 1.1 mrg if (min_input_precision < TYPE_PRECISION (type)
5110 1.1 mrg && (!stmt_info->min_input_precision
5111 1.1 mrg || stmt_info->min_input_precision > min_input_precision))
5112 1.1 mrg stmt_info->min_input_precision = min_input_precision;
5113 1.1 mrg }
5114 1.1 mrg
5115 1.1 mrg /* Subroutine of vect_determine_min_output_precision. Return true if
5116 1.1 mrg we can calculate a reduced number of output bits for STMT_INFO,
5117 1.1 mrg whose result is LHS. */
5118 1.1 mrg
5119 1.1 mrg static bool
5120 1.1 mrg vect_determine_min_output_precision_1 (vec_info *vinfo,
5121 1.1 mrg stmt_vec_info stmt_info, tree lhs)
5122 1.1 mrg {
5123 1.1 mrg /* Take the maximum precision required by users of the result. */
5124 1.1 mrg unsigned int precision = 0;
5125 1.1 mrg imm_use_iterator iter;
5126 1.1 mrg use_operand_p use;
5127 1.1 mrg FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5128 1.1 mrg {
5129 1.1 mrg gimple *use_stmt = USE_STMT (use);
5130 1.1 mrg if (is_gimple_debug (use_stmt))
5131 1.1 mrg continue;
5132 1.1 mrg stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5133 1.1 mrg if (!use_stmt_info || !use_stmt_info->min_input_precision)
5134 1.1 mrg return false;
5135 1.1 mrg /* The input precision recorded for COND_EXPRs applies only to the
5136 1.1 mrg "then" and "else" values. */
5137 1.1 mrg gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5138 1.1 mrg if (assign
5139 1.1 mrg && gimple_assign_rhs_code (assign) == COND_EXPR
5140 1.1 mrg && use->use != gimple_assign_rhs2_ptr (assign)
5141 1.1 mrg && use->use != gimple_assign_rhs3_ptr (assign))
5142 1.1 mrg return false;
5143 1.1 mrg precision = MAX (precision, use_stmt_info->min_input_precision);
5144 1.1 mrg }
5145 1.1 mrg
5146 1.1 mrg if (dump_enabled_p ())
5147 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5148 1.1 mrg "only the low %d bits of %T are significant\n",
5149 1.1 mrg precision, lhs);
5150 1.1 mrg stmt_info->min_output_precision = precision;
5151 1.1 mrg return true;
5152 1.1 mrg }
5153 1.1 mrg
5154 1.1 mrg /* Calculate min_output_precision for STMT_INFO. */
5155 1.1 mrg
5156 1.1 mrg static void
5157 1.1 mrg vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5158 1.1 mrg {
5159 1.1 mrg /* We're only interested in statements with a narrowable result. */
5160 1.1 mrg tree lhs = gimple_get_lhs (stmt_info->stmt);
5161 1.1 mrg if (!lhs
5162 1.1 mrg || TREE_CODE (lhs) != SSA_NAME
5163 1.1 mrg || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5164 1.1 mrg return;
5165 1.1 mrg
5166 1.1 mrg if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5167 1.1 mrg stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5168 1.1 mrg }
5169 1.1 mrg
5170 1.1 mrg /* Use range information to decide whether STMT (described by STMT_INFO)
5171 1.1 mrg could be done in a narrower type. This is effectively a forward
5172 1.1 mrg propagation, since it uses context-independent information that applies
5173 1.1 mrg to all users of an SSA name. */
5174 1.1 mrg
5175 1.1 mrg static void
5176 1.1 mrg vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5177 1.1 mrg {
5178 1.1 mrg tree lhs = gimple_assign_lhs (stmt);
5179 1.1 mrg if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5180 1.1 mrg return;
5181 1.1 mrg
5182 1.1 mrg tree type = TREE_TYPE (lhs);
5183 1.1 mrg if (!vect_narrowable_type_p (type))
5184 1.1 mrg return;
5185 1.1 mrg
5186 1.1 mrg /* First see whether we have any useful range information for the result. */
5187 1.1 mrg unsigned int precision = TYPE_PRECISION (type);
5188 1.1 mrg signop sign = TYPE_SIGN (type);
5189 1.1 mrg wide_int min_value, max_value;
5190 1.1 mrg if (!vect_get_range_info (lhs, &min_value, &max_value))
5191 1.1 mrg return;
5192 1.1 mrg
5193 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt);
5194 1.1 mrg unsigned int nops = gimple_num_ops (stmt);
5195 1.1 mrg
5196 1.1 mrg if (!vect_truncatable_operation_p (code))
5197 1.1 mrg {
5198 1.1 mrg /* Handle operations that can be computed in type T if all inputs
5199 1.1 mrg and outputs can be represented in type T. Also handle left and
5200 1.1 mrg right shifts, where (in addition) the maximum shift amount must
5201 1.1 mrg be less than the number of bits in T. */
5202 1.1 mrg bool is_shift;
5203 1.1 mrg switch (code)
5204 1.1 mrg {
5205 1.1 mrg case LSHIFT_EXPR:
5206 1.1 mrg case RSHIFT_EXPR:
5207 1.1 mrg is_shift = true;
5208 1.1 mrg break;
5209 1.1 mrg
5210 1.1 mrg case ABS_EXPR:
5211 1.1 mrg case MIN_EXPR:
5212 1.1 mrg case MAX_EXPR:
5213 1.1 mrg case TRUNC_DIV_EXPR:
5214 1.1 mrg case CEIL_DIV_EXPR:
5215 1.1 mrg case FLOOR_DIV_EXPR:
5216 1.1 mrg case ROUND_DIV_EXPR:
5217 1.1 mrg case EXACT_DIV_EXPR:
5218 1.1 mrg /* Modulus is excluded because it is typically calculated by doing
5219 1.1 mrg a division, for which minimum signed / -1 isn't representable in
5220 1.1 mrg the original signed type. We could take the division range into
5221 1.1 mrg account instead, if handling modulus ever becomes important. */
5222 1.1 mrg is_shift = false;
5223 1.1 mrg break;
5224 1.1 mrg
5225 1.1 mrg default:
5226 1.1 mrg return;
5227 1.1 mrg }
5228 1.1 mrg for (unsigned int i = 1; i < nops; ++i)
5229 1.1 mrg {
5230 1.1 mrg tree op = gimple_op (stmt, i);
5231 1.1 mrg wide_int op_min_value, op_max_value;
5232 1.1 mrg if (TREE_CODE (op) == INTEGER_CST)
5233 1.1 mrg {
5234 1.1 mrg unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
5235 1.1 mrg op_min_value = op_max_value = wi::to_wide (op, op_precision);
5236 1.1 mrg }
5237 1.1 mrg else if (TREE_CODE (op) == SSA_NAME)
5238 1.1 mrg {
5239 1.1 mrg if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5240 1.1 mrg return;
5241 1.1 mrg }
5242 1.1 mrg else
5243 1.1 mrg return;
5244 1.1 mrg
5245 1.1 mrg if (is_shift && i == 2)
5246 1.1 mrg {
5247 1.1 mrg /* There needs to be one more bit than the maximum shift amount.
5248 1.1 mrg
5249 1.1 mrg If the maximum shift amount is already 1 less than PRECISION
5250 1.1 mrg then we can't narrow the shift further. Dealing with that
5251 1.1 mrg case first ensures that we can safely use an unsigned range
5252 1.1 mrg below.
5253 1.1 mrg
5254 1.1 mrg op_min_value isn't relevant, since shifts by negative amounts
5255 1.1 mrg are UB. */
5256 1.1 mrg if (wi::geu_p (op_max_value, precision - 1))
5257 1.1 mrg return;
5258 1.1 mrg unsigned int min_bits = op_max_value.to_uhwi () + 1;
5259 1.1 mrg
5260 1.1 mrg /* As explained below, we can convert a signed shift into an
5261 1.1 mrg unsigned shift if the sign bit is always clear. At this
5262 1.1 mrg point we've already processed the ranges of the output and
5263 1.1 mrg the first input. */
5264 1.1 mrg auto op_sign = sign;
5265 1.1 mrg if (sign == SIGNED && !wi::neg_p (min_value))
5266 1.1 mrg op_sign = UNSIGNED;
5267 1.1 mrg op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
5268 1.1 mrg precision, op_sign);
5269 1.1 mrg op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
5270 1.1 mrg precision, op_sign);
5271 1.1 mrg }
5272 1.1 mrg min_value = wi::min (min_value, op_min_value, sign);
5273 1.1 mrg max_value = wi::max (max_value, op_max_value, sign);
5274 1.1 mrg }
5275 1.1 mrg }
5276 1.1 mrg
5277 1.1 mrg /* Try to switch signed types for unsigned types if we can.
5278 1.1 mrg This is better for two reasons. First, unsigned ops tend
5279 1.1 mrg to be cheaper than signed ops. Second, it means that we can
5280 1.1 mrg handle things like:
5281 1.1 mrg
5282 1.1 mrg signed char c;
5283 1.1 mrg int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5284 1.1 mrg
5285 1.1 mrg as:
5286 1.1 mrg
5287 1.1 mrg signed char c;
5288 1.1 mrg unsigned short res_1 = (unsigned short) c & 0xff00;
5289 1.1 mrg int res = (int) res_1;
5290 1.1 mrg
5291 1.1 mrg where the intermediate result res_1 has unsigned rather than
5292 1.1 mrg signed type. */
5293 1.1 mrg if (sign == SIGNED && !wi::neg_p (min_value))
5294 1.1 mrg sign = UNSIGNED;
5295 1.1 mrg
5296 1.1 mrg /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5297 1.1 mrg unsigned int precision1 = wi::min_precision (min_value, sign);
5298 1.1 mrg unsigned int precision2 = wi::min_precision (max_value, sign);
5299 1.1 mrg unsigned int value_precision = MAX (precision1, precision2);
5300 1.1 mrg if (value_precision >= precision)
5301 1.1 mrg return;
5302 1.1 mrg
5303 1.1 mrg if (dump_enabled_p ())
5304 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5305 1.1 mrg " without loss of precision: %G",
5306 1.1 mrg sign == SIGNED ? "signed" : "unsigned",
5307 1.1 mrg value_precision, stmt);
5308 1.1 mrg
5309 1.1 mrg vect_set_operation_type (stmt_info, type, value_precision, sign);
5310 1.1 mrg vect_set_min_input_precision (stmt_info, type, value_precision);
5311 1.1 mrg }
5312 1.1 mrg
5313 1.1 mrg /* Use information about the users of STMT's result to decide whether
5314 1.1 mrg STMT (described by STMT_INFO) could be done in a narrower type.
5315 1.1 mrg This is effectively a backward propagation. */
5316 1.1 mrg
5317 1.1 mrg static void
5318 1.1 mrg vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5319 1.1 mrg {
5320 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt);
5321 1.1 mrg unsigned int opno = (code == COND_EXPR ? 2 : 1);
5322 1.1 mrg tree type = TREE_TYPE (gimple_op (stmt, opno));
5323 1.1 mrg if (!vect_narrowable_type_p (type))
5324 1.1 mrg return;
5325 1.1 mrg
5326 1.1 mrg unsigned int precision = TYPE_PRECISION (type);
5327 1.1 mrg unsigned int operation_precision, min_input_precision;
5328 1.1 mrg switch (code)
5329 1.1 mrg {
5330 1.1 mrg CASE_CONVERT:
5331 1.1 mrg /* Only the bits that contribute to the output matter. Don't change
5332 1.1 mrg the precision of the operation itself. */
5333 1.1 mrg operation_precision = precision;
5334 1.1 mrg min_input_precision = stmt_info->min_output_precision;
5335 1.1 mrg break;
5336 1.1 mrg
5337 1.1 mrg case LSHIFT_EXPR:
5338 1.1 mrg case RSHIFT_EXPR:
5339 1.1 mrg {
5340 1.1 mrg tree shift = gimple_assign_rhs2 (stmt);
5341 1.1 mrg if (TREE_CODE (shift) != INTEGER_CST
5342 1.1 mrg || !wi::ltu_p (wi::to_widest (shift), precision))
5343 1.1 mrg return;
5344 1.1 mrg unsigned int const_shift = TREE_INT_CST_LOW (shift);
5345 1.1 mrg if (code == LSHIFT_EXPR)
5346 1.1 mrg {
5347 1.1 mrg /* Avoid creating an undefined shift.
5348 1.1 mrg
5349 1.1 mrg ??? We could instead use min_output_precision as-is and
5350 1.1 mrg optimize out-of-range shifts to zero. However, only
5351 1.1 mrg degenerate testcases shift away all their useful input data,
5352 1.1 mrg and it isn't natural to drop input operations in the middle
5353 1.1 mrg of vectorization. This sort of thing should really be
5354 1.1 mrg handled before vectorization. */
5355 1.1 mrg operation_precision = MAX (stmt_info->min_output_precision,
5356 1.1 mrg const_shift + 1);
5357 1.1 mrg /* We need CONST_SHIFT fewer bits of the input. */
5358 1.1 mrg min_input_precision = (MAX (operation_precision, const_shift)
5359 1.1 mrg - const_shift);
5360 1.1 mrg }
5361 1.1 mrg else
5362 1.1 mrg {
5363 1.1 mrg /* We need CONST_SHIFT extra bits to do the operation. */
5364 1.1 mrg operation_precision = (stmt_info->min_output_precision
5365 1.1 mrg + const_shift);
5366 1.1 mrg min_input_precision = operation_precision;
5367 1.1 mrg }
5368 1.1 mrg break;
5369 1.1 mrg }
5370 1.1 mrg
5371 1.1 mrg default:
5372 1.1 mrg if (vect_truncatable_operation_p (code))
5373 1.1 mrg {
5374 1.1 mrg /* Input bit N has no effect on output bits N-1 and lower. */
5375 1.1 mrg operation_precision = stmt_info->min_output_precision;
5376 1.1 mrg min_input_precision = operation_precision;
5377 1.1 mrg break;
5378 1.1 mrg }
5379 1.1 mrg return;
5380 1.1 mrg }
5381 1.1 mrg
5382 1.1 mrg if (operation_precision < precision)
5383 1.1 mrg {
5384 1.1 mrg if (dump_enabled_p ())
5385 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5386 1.1 mrg " without affecting users: %G",
5387 1.1 mrg TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5388 1.1 mrg operation_precision, stmt);
5389 1.1 mrg vect_set_operation_type (stmt_info, type, operation_precision,
5390 1.1 mrg TYPE_SIGN (type));
5391 1.1 mrg }
5392 1.1 mrg vect_set_min_input_precision (stmt_info, type, min_input_precision);
5393 1.1 mrg }
5394 1.1 mrg
5395 1.1 mrg /* Return true if the statement described by STMT_INFO sets a boolean
5396 1.1 mrg SSA_NAME and if we know how to vectorize this kind of statement using
5397 1.1 mrg vector mask types. */
5398 1.1 mrg
5399 1.1 mrg static bool
5400 1.1 mrg possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5401 1.1 mrg {
5402 1.1 mrg tree lhs = gimple_get_lhs (stmt_info->stmt);
5403 1.1 mrg if (!lhs
5404 1.1 mrg || TREE_CODE (lhs) != SSA_NAME
5405 1.1 mrg || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5406 1.1 mrg return false;
5407 1.1 mrg
5408 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5409 1.1 mrg {
5410 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign);
5411 1.1 mrg switch (rhs_code)
5412 1.1 mrg {
5413 1.1 mrg CASE_CONVERT:
5414 1.1 mrg case SSA_NAME:
5415 1.1 mrg case BIT_NOT_EXPR:
5416 1.1 mrg case BIT_IOR_EXPR:
5417 1.1 mrg case BIT_XOR_EXPR:
5418 1.1 mrg case BIT_AND_EXPR:
5419 1.1 mrg return true;
5420 1.1 mrg
5421 1.1 mrg default:
5422 1.1 mrg return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5423 1.1 mrg }
5424 1.1 mrg }
5425 1.1 mrg else if (is_a <gphi *> (stmt_info->stmt))
5426 1.1 mrg return true;
5427 1.1 mrg return false;
5428 1.1 mrg }
5429 1.1 mrg
5430 1.1 mrg /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5431 1.1 mrg a vector mask type instead of a normal vector type. Record the
5432 1.1 mrg result in STMT_INFO->mask_precision. */
5433 1.1 mrg
5434 1.1 mrg static void
5435 1.1 mrg vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5436 1.1 mrg {
5437 1.1 mrg if (!possible_vector_mask_operation_p (stmt_info))
5438 1.1 mrg return;
5439 1.1 mrg
5440 1.1 mrg /* If at least one boolean input uses a vector mask type,
5441 1.1 mrg pick the mask type with the narrowest elements.
5442 1.1 mrg
5443 1.1 mrg ??? This is the traditional behavior. It should always produce
5444 1.1 mrg the smallest number of operations, but isn't necessarily the
5445 1.1 mrg optimal choice. For example, if we have:
5446 1.1 mrg
5447 1.1 mrg a = b & c
5448 1.1 mrg
5449 1.1 mrg where:
5450 1.1 mrg
5451 1.1 mrg - the user of a wants it to have a mask type for 16-bit elements (M16)
5452 1.1 mrg - b also uses M16
5453 1.1 mrg - c uses a mask type for 8-bit elements (M8)
5454 1.1 mrg
5455 1.1 mrg then picking M8 gives:
5456 1.1 mrg
5457 1.1 mrg - 1 M16->M8 pack for b
5458 1.1 mrg - 1 M8 AND for a
5459 1.1 mrg - 2 M8->M16 unpacks for the user of a
5460 1.1 mrg
5461 1.1 mrg whereas picking M16 would have given:
5462 1.1 mrg
5463 1.1 mrg - 2 M8->M16 unpacks for c
5464 1.1 mrg - 2 M16 ANDs for a
5465 1.1 mrg
5466 1.1 mrg The number of operations are equal, but M16 would have given
5467 1.1 mrg a shorter dependency chain and allowed more ILP. */
5468 1.1 mrg unsigned int precision = ~0U;
5469 1.1 mrg if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5470 1.1 mrg {
5471 1.1 mrg unsigned int nops = gimple_num_ops (assign);
5472 1.1 mrg for (unsigned int i = 1; i < nops; ++i)
5473 1.1 mrg {
5474 1.1 mrg tree rhs = gimple_op (assign, i);
5475 1.1 mrg if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5476 1.1 mrg continue;
5477 1.1 mrg
5478 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5479 1.1 mrg if (!def_stmt_info)
5480 1.1 mrg /* Don't let external or constant operands influence the choice.
5481 1.1 mrg We can convert them to whichever vector type we pick. */
5482 1.1 mrg continue;
5483 1.1 mrg
5484 1.1 mrg if (def_stmt_info->mask_precision)
5485 1.1 mrg {
5486 1.1 mrg if (precision > def_stmt_info->mask_precision)
5487 1.1 mrg precision = def_stmt_info->mask_precision;
5488 1.1 mrg }
5489 1.1 mrg }
5490 1.1 mrg
5491 1.1 mrg /* If the statement compares two values that shouldn't use vector masks,
5492 1.1 mrg try comparing the values as normal scalars instead. */
5493 1.1 mrg tree_code rhs_code = gimple_assign_rhs_code (assign);
5494 1.1 mrg if (precision == ~0U
5495 1.1 mrg && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5496 1.1 mrg {
5497 1.1 mrg tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5498 1.1 mrg scalar_mode mode;
5499 1.1 mrg tree vectype, mask_type;
5500 1.1 mrg if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5501 1.1 mrg && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5502 1.1 mrg && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5503 1.1 mrg && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5504 1.1 mrg precision = GET_MODE_BITSIZE (mode);
5505 1.1 mrg }
5506 1.1 mrg }
5507 1.1 mrg else
5508 1.1 mrg {
5509 1.1 mrg gphi *phi = as_a <gphi *> (stmt_info->stmt);
5510 1.1 mrg for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5511 1.1 mrg {
5512 1.1 mrg tree rhs = gimple_phi_arg_def (phi, i);
5513 1.1 mrg
5514 1.1 mrg stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5515 1.1 mrg if (!def_stmt_info)
5516 1.1 mrg /* Don't let external or constant operands influence the choice.
5517 1.1 mrg We can convert them to whichever vector type we pick. */
5518 1.1 mrg continue;
5519 1.1 mrg
5520 1.1 mrg if (def_stmt_info->mask_precision)
5521 1.1 mrg {
5522 1.1 mrg if (precision > def_stmt_info->mask_precision)
5523 1.1 mrg precision = def_stmt_info->mask_precision;
5524 1.1 mrg }
5525 1.1 mrg }
5526 1.1 mrg }
5527 1.1 mrg
5528 1.1 mrg if (dump_enabled_p ())
5529 1.1 mrg {
5530 1.1 mrg if (precision == ~0U)
5531 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5532 1.1 mrg "using normal nonmask vectors for %G",
5533 1.1 mrg stmt_info->stmt);
5534 1.1 mrg else
5535 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5536 1.1 mrg "using boolean precision %d for %G",
5537 1.1 mrg precision, stmt_info->stmt);
5538 1.1 mrg }
5539 1.1 mrg
5540 1.1 mrg stmt_info->mask_precision = precision;
5541 1.1 mrg }
5542 1.1 mrg
5543 1.1 mrg /* Handle vect_determine_precisions for STMT_INFO, given that we
5544 1.1 mrg have already done so for the users of its result. */
5545 1.1 mrg
5546 1.1 mrg void
5547 1.1 mrg vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5548 1.1 mrg {
5549 1.1 mrg vect_determine_min_output_precision (vinfo, stmt_info);
5550 1.1 mrg if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5551 1.1 mrg {
5552 1.1 mrg vect_determine_precisions_from_range (stmt_info, stmt);
5553 1.1 mrg vect_determine_precisions_from_users (stmt_info, stmt);
5554 1.1 mrg }
5555 1.1 mrg }
5556 1.1 mrg
5557 1.1 mrg /* Walk backwards through the vectorizable region to determine the
5558 1.1 mrg values of these fields:
5559 1.1 mrg
5560 1.1 mrg - min_output_precision
5561 1.1 mrg - min_input_precision
5562 1.1 mrg - operation_precision
5563 1.1 mrg - operation_sign. */
5564 1.1 mrg
5565 1.1 mrg void
5566 1.1 mrg vect_determine_precisions (vec_info *vinfo)
5567 1.1 mrg {
5568 1.1 mrg DUMP_VECT_SCOPE ("vect_determine_precisions");
5569 1.1 mrg
5570 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5571 1.1 mrg {
5572 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5573 1.1 mrg basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5574 1.1 mrg unsigned int nbbs = loop->num_nodes;
5575 1.1 mrg
5576 1.1 mrg for (unsigned int i = 0; i < nbbs; i++)
5577 1.1 mrg {
5578 1.1 mrg basic_block bb = bbs[i];
5579 1.1 mrg for (auto gsi = gsi_start_phis (bb);
5580 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
5581 1.1 mrg {
5582 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5583 1.1 mrg if (stmt_info)
5584 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info);
5585 1.1 mrg }
5586 1.1 mrg for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5587 1.1 mrg if (!is_gimple_debug (gsi_stmt (si)))
5588 1.1 mrg vect_determine_mask_precision
5589 1.1 mrg (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5590 1.1 mrg }
5591 1.1 mrg for (unsigned int i = 0; i < nbbs; i++)
5592 1.1 mrg {
5593 1.1 mrg basic_block bb = bbs[nbbs - i - 1];
5594 1.1 mrg for (gimple_stmt_iterator si = gsi_last_bb (bb);
5595 1.1 mrg !gsi_end_p (si); gsi_prev (&si))
5596 1.1 mrg if (!is_gimple_debug (gsi_stmt (si)))
5597 1.1 mrg vect_determine_stmt_precisions
5598 1.1 mrg (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5599 1.1 mrg for (auto gsi = gsi_start_phis (bb);
5600 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
5601 1.1 mrg {
5602 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5603 1.1 mrg if (stmt_info)
5604 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info);
5605 1.1 mrg }
5606 1.1 mrg }
5607 1.1 mrg }
5608 1.1 mrg else
5609 1.1 mrg {
5610 1.1 mrg bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5611 1.1 mrg for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5612 1.1 mrg {
5613 1.1 mrg basic_block bb = bb_vinfo->bbs[i];
5614 1.1 mrg for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5615 1.1 mrg {
5616 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5617 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5618 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info);
5619 1.1 mrg }
5620 1.1 mrg for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5621 1.1 mrg {
5622 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5623 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5624 1.1 mrg vect_determine_mask_precision (vinfo, stmt_info);
5625 1.1 mrg }
5626 1.1 mrg }
5627 1.1 mrg for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5628 1.1 mrg {
5629 1.1 mrg for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5630 1.1 mrg !gsi_end_p (gsi); gsi_prev (&gsi))
5631 1.1 mrg {
5632 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5633 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5634 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info);
5635 1.1 mrg }
5636 1.1 mrg for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5637 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
5638 1.1 mrg {
5639 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5640 1.1 mrg if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5641 1.1 mrg vect_determine_stmt_precisions (vinfo, stmt_info);
5642 1.1 mrg }
5643 1.1 mrg }
5644 1.1 mrg }
5645 1.1 mrg }
5646 1.1 mrg
5647 1.1 mrg typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5648 1.1 mrg
5649 1.1 mrg struct vect_recog_func
5650 1.1 mrg {
5651 1.1 mrg vect_recog_func_ptr fn;
5652 1.1 mrg const char *name;
5653 1.1 mrg };
5654 1.1 mrg
5655 1.1 mrg /* Note that ordering matters - the first pattern matching on a stmt is
5656 1.1 mrg taken which means usually the more complex one needs to preceed the
5657 1.1 mrg less comples onex (widen_sum only after dot_prod or sad for example). */
5658 1.1 mrg static vect_recog_func vect_vect_recog_func_ptrs[] = {
5659 1.1 mrg { vect_recog_over_widening_pattern, "over_widening" },
5660 1.1 mrg /* Must come after over_widening, which narrows the shift as much as
5661 1.1 mrg possible beforehand. */
5662 1.1 mrg { vect_recog_average_pattern, "average" },
5663 1.1 mrg { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5664 1.1 mrg { vect_recog_mulhs_pattern, "mult_high" },
5665 1.1 mrg { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5666 1.1 mrg { vect_recog_widen_mult_pattern, "widen_mult" },
5667 1.1 mrg { vect_recog_dot_prod_pattern, "dot_prod" },
5668 1.1 mrg { vect_recog_sad_pattern, "sad" },
5669 1.1 mrg { vect_recog_widen_sum_pattern, "widen_sum" },
5670 1.1 mrg { vect_recog_pow_pattern, "pow" },
5671 1.1 mrg { vect_recog_popcount_pattern, "popcount" },
5672 1.1 mrg { vect_recog_widen_shift_pattern, "widen_shift" },
5673 1.1 mrg { vect_recog_rotate_pattern, "rotate" },
5674 1.1 mrg { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5675 1.1 mrg { vect_recog_divmod_pattern, "divmod" },
5676 1.1 mrg { vect_recog_mult_pattern, "mult" },
5677 1.1 mrg { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5678 1.1 mrg { vect_recog_bool_pattern, "bool" },
5679 1.1 mrg /* This must come before mask conversion, and includes the parts
5680 1.1 mrg of mask conversion that are needed for gather and scatter
5681 1.1 mrg internal functions. */
5682 1.1 mrg { vect_recog_gather_scatter_pattern, "gather_scatter" },
5683 1.1 mrg { vect_recog_mask_conversion_pattern, "mask_conversion" },
5684 1.1 mrg { vect_recog_widen_plus_pattern, "widen_plus" },
5685 1.1 mrg { vect_recog_widen_minus_pattern, "widen_minus" },
5686 1.1 mrg };
5687 1.1 mrg
5688 1.1 mrg const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5689 1.1 mrg
5690 1.1 mrg /* Mark statements that are involved in a pattern. */
5691 1.1 mrg
5692 1.1 mrg void
5693 1.1 mrg vect_mark_pattern_stmts (vec_info *vinfo,
5694 1.1 mrg stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5695 1.1 mrg tree pattern_vectype)
5696 1.1 mrg {
5697 1.1 mrg stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5698 1.1 mrg gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5699 1.1 mrg
5700 1.1 mrg gimple *orig_pattern_stmt = NULL;
5701 1.1 mrg if (is_pattern_stmt_p (orig_stmt_info))
5702 1.1 mrg {
5703 1.1 mrg /* We're replacing a statement in an existing pattern definition
5704 1.1 mrg sequence. */
5705 1.1 mrg orig_pattern_stmt = orig_stmt_info->stmt;
5706 1.1 mrg if (dump_enabled_p ())
5707 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5708 1.1 mrg "replacing earlier pattern %G", orig_pattern_stmt);
5709 1.1 mrg
5710 1.1 mrg /* To keep the book-keeping simple, just swap the lhs of the
5711 1.1 mrg old and new statements, so that the old one has a valid but
5712 1.1 mrg unused lhs. */
5713 1.1 mrg tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5714 1.1 mrg gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5715 1.1 mrg gimple_set_lhs (pattern_stmt, old_lhs);
5716 1.1 mrg
5717 1.1 mrg if (dump_enabled_p ())
5718 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5719 1.1 mrg
5720 1.1 mrg /* Switch to the statement that ORIG replaces. */
5721 1.1 mrg orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5722 1.1 mrg
5723 1.1 mrg /* We shouldn't be replacing the main pattern statement. */
5724 1.1 mrg gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5725 1.1 mrg != orig_pattern_stmt);
5726 1.1 mrg }
5727 1.1 mrg
5728 1.1 mrg if (def_seq)
5729 1.1 mrg for (gimple_stmt_iterator si = gsi_start (def_seq);
5730 1.1 mrg !gsi_end_p (si); gsi_next (&si))
5731 1.1 mrg {
5732 1.1 mrg if (dump_enabled_p ())
5733 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5734 1.1 mrg "extra pattern stmt: %G", gsi_stmt (si));
5735 1.1 mrg stmt_vec_info pattern_stmt_info
5736 1.1 mrg = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5737 1.1 mrg orig_stmt_info, pattern_vectype);
5738 1.1 mrg /* Stmts in the def sequence are not vectorizable cycle or
5739 1.1 mrg induction defs, instead they should all be vect_internal_def
5740 1.1 mrg feeding the main pattern stmt which retains this def type. */
5741 1.1 mrg STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5742 1.1 mrg }
5743 1.1 mrg
5744 1.1 mrg if (orig_pattern_stmt)
5745 1.1 mrg {
5746 1.1 mrg vect_init_pattern_stmt (vinfo, pattern_stmt,
5747 1.1 mrg orig_stmt_info, pattern_vectype);
5748 1.1 mrg
5749 1.1 mrg /* Insert all the new pattern statements before the original one. */
5750 1.1 mrg gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5751 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5752 1.1 mrg orig_def_seq);
5753 1.1 mrg gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5754 1.1 mrg gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5755 1.1 mrg
5756 1.1 mrg /* Remove the pattern statement that this new pattern replaces. */
5757 1.1 mrg gsi_remove (&gsi, false);
5758 1.1 mrg }
5759 1.1 mrg else
5760 1.1 mrg vect_set_pattern_stmt (vinfo,
5761 1.1 mrg pattern_stmt, orig_stmt_info, pattern_vectype);
5762 1.1 mrg
5763 1.1 mrg /* Transfer reduction path info to the pattern. */
5764 1.1 mrg if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5765 1.1 mrg {
5766 1.1 mrg gimple_match_op op;
5767 1.1 mrg if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5768 1.1 mrg gcc_unreachable ();
5769 1.1 mrg tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5770 1.1 mrg /* Search the pattern def sequence and the main pattern stmt. Note
5771 1.1 mrg we may have inserted all into a containing pattern def sequence
5772 1.1 mrg so the following is a bit awkward. */
5773 1.1 mrg gimple_stmt_iterator si;
5774 1.1 mrg gimple *s;
5775 1.1 mrg if (def_seq)
5776 1.1 mrg {
5777 1.1 mrg si = gsi_start (def_seq);
5778 1.1 mrg s = gsi_stmt (si);
5779 1.1 mrg gsi_next (&si);
5780 1.1 mrg }
5781 1.1 mrg else
5782 1.1 mrg {
5783 1.1 mrg si = gsi_none ();
5784 1.1 mrg s = pattern_stmt;
5785 1.1 mrg }
5786 1.1 mrg do
5787 1.1 mrg {
5788 1.1 mrg bool found = false;
5789 1.1 mrg if (gimple_extract_op (s, &op))
5790 1.1 mrg for (unsigned i = 0; i < op.num_ops; ++i)
5791 1.1 mrg if (op.ops[i] == lookfor)
5792 1.1 mrg {
5793 1.1 mrg STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5794 1.1 mrg lookfor = gimple_get_lhs (s);
5795 1.1 mrg found = true;
5796 1.1 mrg break;
5797 1.1 mrg }
5798 1.1 mrg if (s == pattern_stmt)
5799 1.1 mrg {
5800 1.1 mrg if (!found && dump_enabled_p ())
5801 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5802 1.1 mrg "failed to update reduction index.\n");
5803 1.1 mrg break;
5804 1.1 mrg }
5805 1.1 mrg if (gsi_end_p (si))
5806 1.1 mrg s = pattern_stmt;
5807 1.1 mrg else
5808 1.1 mrg {
5809 1.1 mrg s = gsi_stmt (si);
5810 1.1 mrg if (s == pattern_stmt)
5811 1.1 mrg /* Found the end inside a bigger pattern def seq. */
5812 1.1 mrg si = gsi_none ();
5813 1.1 mrg else
5814 1.1 mrg gsi_next (&si);
5815 1.1 mrg }
5816 1.1 mrg } while (1);
5817 1.1 mrg }
5818 1.1 mrg }
5819 1.1 mrg
5820 1.1 mrg /* Function vect_pattern_recog_1
5821 1.1 mrg
5822 1.1 mrg Input:
5823 1.1 mrg PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5824 1.1 mrg computation pattern.
5825 1.1 mrg STMT_INFO: A stmt from which the pattern search should start.
5826 1.1 mrg
5827 1.1 mrg If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5828 1.1 mrg a sequence of statements that has the same functionality and can be
5829 1.1 mrg used to replace STMT_INFO. It returns the last statement in the sequence
5830 1.1 mrg and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5831 1.1 mrg PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5832 1.1 mrg statement, having first checked that the target supports the new operation
5833 1.1 mrg in that type.
5834 1.1 mrg
5835 1.1 mrg This function also does some bookkeeping, as explained in the documentation
5836 1.1 mrg for vect_recog_pattern. */
5837 1.1 mrg
5838 1.1 mrg static void
5839 1.1 mrg vect_pattern_recog_1 (vec_info *vinfo,
5840 1.1 mrg vect_recog_func *recog_func, stmt_vec_info stmt_info)
5841 1.1 mrg {
5842 1.1 mrg gimple *pattern_stmt;
5843 1.1 mrg loop_vec_info loop_vinfo;
5844 1.1 mrg tree pattern_vectype;
5845 1.1 mrg
5846 1.1 mrg /* If this statement has already been replaced with pattern statements,
5847 1.1 mrg leave the original statement alone, since the first match wins.
5848 1.1 mrg Instead try to match against the definition statements that feed
5849 1.1 mrg the main pattern statement. */
5850 1.1 mrg if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5851 1.1 mrg {
5852 1.1 mrg gimple_stmt_iterator gsi;
5853 1.1 mrg for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5854 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
5855 1.1 mrg vect_pattern_recog_1 (vinfo, recog_func,
5856 1.1 mrg vinfo->lookup_stmt (gsi_stmt (gsi)));
5857 1.1 mrg return;
5858 1.1 mrg }
5859 1.1 mrg
5860 1.1 mrg gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5861 1.1 mrg pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5862 1.1 mrg if (!pattern_stmt)
5863 1.1 mrg {
5864 1.1 mrg /* Clear any half-formed pattern definition sequence. */
5865 1.1 mrg STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5866 1.1 mrg return;
5867 1.1 mrg }
5868 1.1 mrg
5869 1.1 mrg loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5870 1.1 mrg
5871 1.1 mrg /* Found a vectorizable pattern. */
5872 1.1 mrg if (dump_enabled_p ())
5873 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
5874 1.1 mrg "%s pattern recognized: %G",
5875 1.1 mrg recog_func->name, pattern_stmt);
5876 1.1 mrg
5877 1.1 mrg /* Mark the stmts that are involved in the pattern. */
5878 1.1 mrg vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5879 1.1 mrg
5880 1.1 mrg /* Patterns cannot be vectorized using SLP, because they change the order of
5881 1.1 mrg computation. */
5882 1.1 mrg if (loop_vinfo)
5883 1.1 mrg {
5884 1.1 mrg unsigned ix, ix2;
5885 1.1 mrg stmt_vec_info *elem_ptr;
5886 1.1 mrg VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5887 1.1 mrg elem_ptr, *elem_ptr == stmt_info);
5888 1.1 mrg }
5889 1.1 mrg }
5890 1.1 mrg
5891 1.1 mrg
5892 1.1 mrg /* Function vect_pattern_recog
5893 1.1 mrg
5894 1.1 mrg Input:
5895 1.1 mrg LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5896 1.1 mrg computation idioms.
5897 1.1 mrg
5898 1.1 mrg Output - for each computation idiom that is detected we create a new stmt
5899 1.1 mrg that provides the same functionality and that can be vectorized. We
5900 1.1 mrg also record some information in the struct_stmt_info of the relevant
5901 1.1 mrg stmts, as explained below:
5902 1.1 mrg
5903 1.1 mrg At the entry to this function we have the following stmts, with the
5904 1.1 mrg following initial value in the STMT_VINFO fields:
5905 1.1 mrg
5906 1.1 mrg stmt in_pattern_p related_stmt vec_stmt
5907 1.1 mrg S1: a_i = .... - - -
5908 1.1 mrg S2: a_2 = ..use(a_i).. - - -
5909 1.1 mrg S3: a_1 = ..use(a_2).. - - -
5910 1.1 mrg S4: a_0 = ..use(a_1).. - - -
5911 1.1 mrg S5: ... = ..use(a_0).. - - -
5912 1.1 mrg
5913 1.1 mrg Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5914 1.1 mrg represented by a single stmt. We then:
5915 1.1 mrg - create a new stmt S6 equivalent to the pattern (the stmt is not
5916 1.1 mrg inserted into the code)
5917 1.1 mrg - fill in the STMT_VINFO fields as follows:
5918 1.1 mrg
5919 1.1 mrg in_pattern_p related_stmt vec_stmt
5920 1.1 mrg S1: a_i = .... - - -
5921 1.1 mrg S2: a_2 = ..use(a_i).. - - -
5922 1.1 mrg S3: a_1 = ..use(a_2).. - - -
5923 1.1 mrg S4: a_0 = ..use(a_1).. true S6 -
5924 1.1 mrg '---> S6: a_new = .... - S4 -
5925 1.1 mrg S5: ... = ..use(a_0).. - - -
5926 1.1 mrg
5927 1.1 mrg (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5928 1.1 mrg to each other through the RELATED_STMT field).
5929 1.1 mrg
5930 1.1 mrg S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5931 1.1 mrg of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5932 1.1 mrg remain irrelevant unless used by stmts other than S4.
5933 1.1 mrg
5934 1.1 mrg If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5935 1.1 mrg (because they are marked as irrelevant). It will vectorize S6, and record
5936 1.1 mrg a pointer to the new vector stmt VS6 from S6 (as usual).
5937 1.1 mrg S4 will be skipped, and S5 will be vectorized as usual:
5938 1.1 mrg
5939 1.1 mrg in_pattern_p related_stmt vec_stmt
5940 1.1 mrg S1: a_i = .... - - -
5941 1.1 mrg S2: a_2 = ..use(a_i).. - - -
5942 1.1 mrg S3: a_1 = ..use(a_2).. - - -
5943 1.1 mrg > VS6: va_new = .... - - -
5944 1.1 mrg S4: a_0 = ..use(a_1).. true S6 VS6
5945 1.1 mrg '---> S6: a_new = .... - S4 VS6
5946 1.1 mrg > VS5: ... = ..vuse(va_new).. - - -
5947 1.1 mrg S5: ... = ..use(a_0).. - - -
5948 1.1 mrg
5949 1.1 mrg DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5950 1.1 mrg elsewhere), and we'll end up with:
5951 1.1 mrg
5952 1.1 mrg VS6: va_new = ....
5953 1.1 mrg VS5: ... = ..vuse(va_new)..
5954 1.1 mrg
5955 1.1 mrg In case of more than one pattern statements, e.g., widen-mult with
5956 1.1 mrg intermediate type:
5957 1.1 mrg
5958 1.1 mrg S1 a_t = ;
5959 1.1 mrg S2 a_T = (TYPE) a_t;
5960 1.1 mrg '--> S3: a_it = (interm_type) a_t;
5961 1.1 mrg S4 prod_T = a_T * CONST;
5962 1.1 mrg '--> S5: prod_T' = a_it w* CONST;
5963 1.1 mrg
5964 1.1 mrg there may be other users of a_T outside the pattern. In that case S2 will
5965 1.1 mrg be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5966 1.1 mrg and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5967 1.1 mrg be recorded in S3. */
5968 1.1 mrg
5969 1.1 mrg void
5970 1.1 mrg vect_pattern_recog (vec_info *vinfo)
5971 1.1 mrg {
5972 1.1 mrg class loop *loop;
5973 1.1 mrg basic_block *bbs;
5974 1.1 mrg unsigned int nbbs;
5975 1.1 mrg gimple_stmt_iterator si;
5976 1.1 mrg unsigned int i, j;
5977 1.1 mrg
5978 1.1 mrg vect_determine_precisions (vinfo);
5979 1.1 mrg
5980 1.1 mrg DUMP_VECT_SCOPE ("vect_pattern_recog");
5981 1.1 mrg
5982 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5983 1.1 mrg {
5984 1.1 mrg loop = LOOP_VINFO_LOOP (loop_vinfo);
5985 1.1 mrg bbs = LOOP_VINFO_BBS (loop_vinfo);
5986 1.1 mrg nbbs = loop->num_nodes;
5987 1.1 mrg
5988 1.1 mrg /* Scan through the loop stmts, applying the pattern recognition
5989 1.1 mrg functions starting at each stmt visited: */
5990 1.1 mrg for (i = 0; i < nbbs; i++)
5991 1.1 mrg {
5992 1.1 mrg basic_block bb = bbs[i];
5993 1.1 mrg for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5994 1.1 mrg {
5995 1.1 mrg if (is_gimple_debug (gsi_stmt (si)))
5996 1.1 mrg continue;
5997 1.1 mrg stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5998 1.1 mrg /* Scan over all generic vect_recog_xxx_pattern functions. */
5999 1.1 mrg for (j = 0; j < NUM_PATTERNS; j++)
6000 1.1 mrg vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6001 1.1 mrg stmt_info);
6002 1.1 mrg }
6003 1.1 mrg }
6004 1.1 mrg }
6005 1.1 mrg else
6006 1.1 mrg {
6007 1.1 mrg bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6008 1.1 mrg for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6009 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6010 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
6011 1.1 mrg {
6012 1.1 mrg stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6013 1.1 mrg if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6014 1.1 mrg continue;
6015 1.1 mrg
6016 1.1 mrg /* Scan over all generic vect_recog_xxx_pattern functions. */
6017 1.1 mrg for (j = 0; j < NUM_PATTERNS; j++)
6018 1.1 mrg vect_pattern_recog_1 (vinfo,
6019 1.1 mrg &vect_vect_recog_func_ptrs[j], stmt_info);
6020 1.1 mrg }
6021 1.1 mrg }
6022 1.1 mrg
6023 1.1 mrg /* After this no more add_stmt calls are allowed. */
6024 1.1 mrg vinfo->stmt_vec_info_ro = true;
6025 1.1 mrg }
6026