tree-vect-generic.cc revision 1.1.1.1 1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "gimple-fold.h"
45 #include "gimple-match.h"
46 #include "recog.h" /* FIXME: for insn_data */
47
48
49 /* Build a ternary operation and gimplify it. Emit code before GSI.
50 Return the gimple_val holding the result. */
51
52 static tree
53 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
54 tree type, tree a, tree b, tree c)
55 {
56 location_t loc = gimple_location (gsi_stmt (*gsi));
57 gimple_seq stmts = NULL;
58 tree ret = gimple_build (&stmts, loc, code, type, a, b, c);
59 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
60 return ret;
61 }
62
63 /* Build a binary operation and gimplify it. Emit code before GSI.
64 Return the gimple_val holding the result. */
65
66 static tree
67 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
68 tree type, tree a, tree b)
69 {
70 location_t loc = gimple_location (gsi_stmt (*gsi));
71 gimple_seq stmts = NULL;
72 tree ret = gimple_build (&stmts, loc, code, type, a, b);
73 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
74 return ret;
75 }
76
77 /* Build a unary operation and gimplify it. Emit code before GSI.
78 Return the gimple_val holding the result. */
79
80 static tree
81 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
82 tree a)
83 {
84 location_t loc = gimple_location (gsi_stmt (*gsi));
85 gimple_seq stmts = NULL;
86 tree ret = gimple_build (&stmts, loc, code, type, a);
87 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
88 return ret;
89 }
90
91
92 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
93
94 /* Return the number of elements in a vector type TYPE that we have
95 already decided needs to be expanded piecewise. We don't support
96 this kind of expansion for variable-length vectors, since we should
97 always check for target support before introducing uses of those. */
98 static unsigned int
99 nunits_for_known_piecewise_op (const_tree type)
100 {
101 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
102 }
103
104 /* Return true if TYPE1 has more elements than TYPE2, where either
105 type may be a vector or a scalar. */
106
107 static inline bool
108 subparts_gt (tree type1, tree type2)
109 {
110 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
111 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
112 return known_gt (n1, n2);
113 }
114
115 /* Build a constant of type TYPE, made of VALUE's bits replicated
116 every WIDTH bits to fit TYPE's precision. */
117 static tree
118 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
119 {
120 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
121 / HOST_BITS_PER_WIDE_INT;
122 unsigned HOST_WIDE_INT low, mask;
123 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
124 int i;
125
126 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
127
128 if (width == HOST_BITS_PER_WIDE_INT)
129 low = value;
130 else
131 {
132 mask = ((HOST_WIDE_INT)1 << width) - 1;
133 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
134 }
135
136 for (i = 0; i < n; i++)
137 a[i] = low;
138
139 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
140 return wide_int_to_tree
141 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
142 }
143
144 static GTY(()) tree vector_inner_type;
145 static GTY(()) tree vector_last_type;
146 static GTY(()) int vector_last_nunits;
147
148 /* Return a suitable vector types made of SUBPARTS units each of mode
149 "word_mode" (the global variable). */
150 static tree
151 build_word_mode_vector_type (int nunits)
152 {
153 if (!vector_inner_type)
154 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
155 else if (vector_last_nunits == nunits)
156 {
157 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
158 return vector_last_type;
159 }
160
161 vector_last_nunits = nunits;
162 vector_last_type = build_vector_type (vector_inner_type, nunits);
163 return vector_last_type;
164 }
165
166 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
167 tree, tree, tree, tree, tree, enum tree_code,
168 tree);
169
170 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
171 and return it. */
172
173 tree
174 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
175 tree t, tree bitsize, tree bitpos)
176 {
177 /* We're using the resimplify API and maybe_push_res_to_seq to
178 simplify the BIT_FIELD_REF but restrict the simplification to
179 a single stmt while at the same time following SSA edges for
180 simplification with already emitted CTORs. */
181 gimple_match_op opr;
182 opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
183 opr.resimplify (NULL, follow_all_ssa_edges);
184 gimple_seq stmts = NULL;
185 tree res = maybe_push_res_to_seq (&opr, &stmts);
186 if (!res)
187 {
188 /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
189 used. Build BIT_FIELD_REF manually otherwise. */
190 t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
191 res = make_ssa_name (type);
192 gimple *g = gimple_build_assign (res, t);
193 gsi_insert_before (gsi, g, GSI_SAME_STMT);
194 return res;
195 }
196 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
197 return res;
198 }
199
200 static tree
201 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
202 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
203 enum tree_code code, tree type ATTRIBUTE_UNUSED)
204 {
205 tree rhs_type = inner_type;
206
207 /* For ABSU_EXPR, use the signed type for the rhs if the rhs was signed. */
208 if (code == ABSU_EXPR
209 && ANY_INTEGRAL_TYPE_P (TREE_TYPE (a))
210 && !TYPE_UNSIGNED (TREE_TYPE (a)))
211 rhs_type = signed_type_for (rhs_type);
212
213 a = tree_vec_extract (gsi, rhs_type, a, bitsize, bitpos);
214 return gimplify_build1 (gsi, code, inner_type, a);
215 }
216
217 static tree
218 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
219 tree bitpos, tree bitsize, enum tree_code code,
220 tree type ATTRIBUTE_UNUSED)
221 {
222 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
223 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
224 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
225 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
226 return gimplify_build2 (gsi, code, inner_type, a, b);
227 }
228
229 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
230
231 INNER_TYPE is the type of A and B elements
232
233 returned expression is of signed integer type with the
234 size equal to the size of INNER_TYPE. */
235 static tree
236 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
237 tree bitpos, tree bitsize, enum tree_code code, tree type)
238 {
239 tree stype = TREE_TYPE (type);
240 tree cst_false = build_zero_cst (stype);
241 tree cst_true = build_all_ones_cst (stype);
242 tree cmp;
243
244 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
245 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
246
247 cmp = build2 (code, boolean_type_node, a, b);
248 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
249 }
250
251 /* Expand vector addition to scalars. This does bit twiddling
252 in order to increase parallelism:
253
254 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
255 (a ^ b) & 0x80808080
256
257 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
258 (a ^ ~b) & 0x80808080
259
260 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
261
262 This optimization should be done only if 4 vector items or more
263 fit into a word. */
264 static tree
265 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
266 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
267 enum tree_code code, tree type ATTRIBUTE_UNUSED)
268 {
269 unsigned int width = vector_element_bits (TREE_TYPE (a));
270 tree inner_type = TREE_TYPE (TREE_TYPE (a));
271 unsigned HOST_WIDE_INT max;
272 tree low_bits, high_bits, a_low, b_low, result_low, signs;
273
274 max = GET_MODE_MASK (TYPE_MODE (inner_type));
275 low_bits = build_replicated_const (word_type, width, max >> 1);
276 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
277
278 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
279 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
280
281 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
282 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
283 if (code == PLUS_EXPR)
284 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
285 else
286 {
287 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
288 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
289 }
290
291 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
292 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
293 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
294 }
295
296 static tree
297 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
298 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
299 tree bitsize ATTRIBUTE_UNUSED,
300 enum tree_code code ATTRIBUTE_UNUSED,
301 tree type ATTRIBUTE_UNUSED)
302 {
303 unsigned int width = vector_element_bits (TREE_TYPE (b));
304 tree inner_type = TREE_TYPE (TREE_TYPE (b));
305 HOST_WIDE_INT max;
306 tree low_bits, high_bits, b_low, result_low, signs;
307
308 max = GET_MODE_MASK (TYPE_MODE (inner_type));
309 low_bits = build_replicated_const (word_type, width, max >> 1);
310 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
311
312 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
313
314 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
315 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
316 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
317 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
318 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
319 }
320
321 /* Expand a vector operation to scalars, by using many operations
322 whose type is the vector type's inner type. */
323 static tree
324 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
325 tree type, tree inner_type,
326 tree a, tree b, enum tree_code code,
327 bool parallel_p, tree ret_type = NULL_TREE)
328 {
329 vec<constructor_elt, va_gc> *v;
330 tree part_width = TYPE_SIZE (inner_type);
331 tree index = bitsize_int (0);
332 int nunits = nunits_for_known_piecewise_op (type);
333 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
334 int i;
335 location_t loc = gimple_location (gsi_stmt (*gsi));
336
337 if (nunits == 1
338 || warning_suppressed_p (gsi_stmt (*gsi),
339 OPT_Wvector_operation_performance))
340 /* Do not diagnose decomposing single element vectors or when
341 decomposing vectorizer produced operations. */
342 ;
343 else if (ret_type || !parallel_p)
344 warning_at (loc, OPT_Wvector_operation_performance,
345 "vector operation will be expanded piecewise");
346 else
347 warning_at (loc, OPT_Wvector_operation_performance,
348 "vector operation will be expanded in parallel");
349
350 if (!ret_type)
351 ret_type = type;
352 vec_alloc (v, (nunits + delta - 1) / delta);
353 bool constant_p = true;
354 for (i = 0; i < nunits;
355 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
356 {
357 tree result = f (gsi, inner_type, a, b, index, part_width, code,
358 ret_type);
359 if (!CONSTANT_CLASS_P (result))
360 constant_p = false;
361 constructor_elt ce = {NULL_TREE, result};
362 v->quick_push (ce);
363 }
364
365 if (constant_p)
366 return build_vector_from_ctor (ret_type, v);
367 else
368 return build_constructor (ret_type, v);
369 }
370
371 /* Expand a vector operation to scalars with the freedom to use
372 a scalar integer type, or to use a different size for the items
373 in the vector type. */
374 static tree
375 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
376 tree a, tree b, enum tree_code code)
377 {
378 tree result, compute_type;
379 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
380 location_t loc = gimple_location (gsi_stmt (*gsi));
381
382 /* We have three strategies. If the type is already correct, just do
383 the operation an element at a time. Else, if the vector is wider than
384 one word, do it a word at a time; finally, if the vector is smaller
385 than one word, do it as a scalar. */
386 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
387 return expand_vector_piecewise (gsi, f,
388 type, TREE_TYPE (type),
389 a, b, code, true);
390 else if (n_words > 1)
391 {
392 tree word_type = build_word_mode_vector_type (n_words);
393 result = expand_vector_piecewise (gsi, f,
394 word_type, TREE_TYPE (word_type),
395 a, b, code, true);
396 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
397 GSI_SAME_STMT);
398 }
399 else
400 {
401 /* Use a single scalar operation with a mode no wider than word_mode. */
402 if (!warning_suppressed_p (gsi_stmt (*gsi),
403 OPT_Wvector_operation_performance))
404 warning_at (loc, OPT_Wvector_operation_performance,
405 "vector operation will be expanded with a "
406 "single scalar operation");
407 scalar_int_mode mode
408 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
409 compute_type = lang_hooks.types.type_for_mode (mode, 1);
410 result = f (gsi, compute_type, a, b, bitsize_zero_node,
411 TYPE_SIZE (compute_type), code, type);
412 }
413
414 return result;
415 }
416
417 /* Expand a vector operation to scalars; for integer types we can use
418 special bit twiddling tricks to do the sums a word at a time, using
419 function F_PARALLEL instead of F. These tricks are done only if
420 they can process at least four items, that is, only if the vector
421 holds at least four items and if a word can hold four items. */
422 static tree
423 expand_vector_addition (gimple_stmt_iterator *gsi,
424 elem_op_func f, elem_op_func f_parallel,
425 tree type, tree a, tree b, enum tree_code code)
426 {
427 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
428
429 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
430 && parts_per_word >= 4
431 && nunits_for_known_piecewise_op (type) >= 4)
432 return expand_vector_parallel (gsi, f_parallel,
433 type, a, b, code);
434 else
435 return expand_vector_piecewise (gsi, f,
436 type, TREE_TYPE (type),
437 a, b, code, false);
438 }
439
440 static bool
441 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
442
443 /* Try to expand vector comparison expression OP0 CODE OP1 by
444 querying optab if the following expression:
445 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
446 can be expanded. */
447 static tree
448 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
449 tree op1, enum tree_code code,
450 bitmap dce_ssa_names)
451 {
452 tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
453 use_operand_p use_p;
454 imm_use_iterator iterator;
455 bool vec_cond_expr_only = true;
456
457 /* As seen in PR95830, we should not expand comparisons that are only
458 feeding a VEC_COND_EXPR statement. */
459 auto_vec<gimple *> uses;
460 FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
461 {
462 gimple *use = USE_STMT (use_p);
463 if (is_gimple_debug (use))
464 continue;
465 if (is_gimple_assign (use)
466 && gimple_assign_rhs_code (use) == VEC_COND_EXPR
467 && gimple_assign_rhs1 (use) == lhs
468 && gimple_assign_rhs2 (use) != lhs
469 && gimple_assign_rhs3 (use) != lhs)
470 uses.safe_push (use);
471 else
472 vec_cond_expr_only = false;
473 }
474
475 if (vec_cond_expr_only)
476 for (gimple *use : uses)
477 {
478 gimple_stmt_iterator it = gsi_for_stmt (use);
479 if (!expand_vector_condition (&it, dce_ssa_names))
480 {
481 vec_cond_expr_only = false;
482 break;
483 }
484 }
485
486 if (!uses.is_empty () && vec_cond_expr_only)
487 return NULL_TREE;
488
489 tree t;
490 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
491 {
492 if (VECTOR_BOOLEAN_TYPE_P (type)
493 && SCALAR_INT_MODE_P (TYPE_MODE (type))
494 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
495 TYPE_VECTOR_SUBPARTS (type)
496 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
497 (TREE_TYPE (type)))))
498 {
499 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
500 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
501 tree index = bitsize_int (0);
502 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
503 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
504 tree ret_type = build_nonstandard_integer_type (prec, 1);
505 tree ret_inner_type = boolean_type_node;
506 int i;
507 location_t loc = gimple_location (gsi_stmt (*gsi));
508 t = build_zero_cst (ret_type);
509
510 if (TYPE_PRECISION (ret_inner_type) != 1)
511 ret_inner_type = build_nonstandard_integer_type (1, 1);
512 if (!warning_suppressed_p (gsi_stmt (*gsi),
513 OPT_Wvector_operation_performance))
514 warning_at (loc, OPT_Wvector_operation_performance,
515 "vector operation will be expanded piecewise");
516 for (i = 0; i < nunits;
517 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
518 {
519 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
520 index);
521 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
522 index);
523 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
524 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
525 bitsize_int (i));
526 }
527 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
528 }
529 else
530 t = expand_vector_piecewise (gsi, do_compare, type,
531 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
532 code, false);
533 }
534 else
535 t = NULL_TREE;
536
537 return t;
538 }
539
540 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
541 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
542 the result if successful, otherwise return NULL_TREE. */
543 static tree
544 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
545 {
546 optab op;
547 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
548 bool scalar_shift = true;
549
550 for (i = 1; i < nunits; i++)
551 {
552 if (shiftcnts[i] != shiftcnts[0])
553 scalar_shift = false;
554 }
555
556 if (scalar_shift && shiftcnts[0] == 0)
557 return op0;
558
559 if (scalar_shift)
560 {
561 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
562 if (op != unknown_optab
563 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
564 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
565 build_int_cst (NULL_TREE, shiftcnts[0]));
566 }
567
568 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
569 if (op != unknown_optab
570 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
571 {
572 tree_vector_builder vec (type, nunits, 1);
573 for (i = 0; i < nunits; i++)
574 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
575 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
576 }
577
578 return NULL_TREE;
579 }
580
581 /* Try to expand integer vector division by constant using
582 widening multiply, shifts and additions. */
583 static tree
584 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
585 tree op1, enum tree_code code)
586 {
587 bool use_pow2 = true;
588 bool has_vector_shift = true;
589 bool use_abs_op1 = false;
590 int mode = -1, this_mode;
591 int pre_shift = -1, post_shift;
592 unsigned int nunits = nunits_for_known_piecewise_op (type);
593 int *shifts = XALLOCAVEC (int, nunits * 4);
594 int *pre_shifts = shifts + nunits;
595 int *post_shifts = pre_shifts + nunits;
596 int *shift_temps = post_shifts + nunits;
597 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
598 int prec = TYPE_PRECISION (TREE_TYPE (type));
599 int dummy_int;
600 unsigned int i;
601 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
602 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
603 tree cur_op, mulcst, tem;
604 optab op;
605
606 if (prec > HOST_BITS_PER_WIDE_INT)
607 return NULL_TREE;
608
609 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
610 if (op == unknown_optab
611 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
612 has_vector_shift = false;
613
614 /* Analysis phase. Determine if all op1 elements are either power
615 of two and it is possible to expand it using shifts (or for remainder
616 using masking). Additionally compute the multiplicative constants
617 and pre and post shifts if the division is to be expanded using
618 widening or high part multiplication plus shifts. */
619 for (i = 0; i < nunits; i++)
620 {
621 tree cst = VECTOR_CST_ELT (op1, i);
622 unsigned HOST_WIDE_INT ml;
623
624 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
625 return NULL_TREE;
626 pre_shifts[i] = 0;
627 post_shifts[i] = 0;
628 mulc[i] = 0;
629 if (use_pow2
630 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
631 use_pow2 = false;
632 if (use_pow2)
633 {
634 shifts[i] = tree_log2 (cst);
635 if (shifts[i] != shifts[0]
636 && code == TRUNC_DIV_EXPR
637 && !has_vector_shift)
638 use_pow2 = false;
639 }
640 if (mode == -2)
641 continue;
642 if (sign_p == UNSIGNED)
643 {
644 unsigned HOST_WIDE_INT mh;
645 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
646
647 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
648 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
649 return NULL_TREE;
650
651 if (d <= 1)
652 {
653 mode = -2;
654 continue;
655 }
656
657 /* Find a suitable multiplier and right shift count
658 instead of multiplying with D. */
659 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
660
661 /* If the suggested multiplier is more than SIZE bits, we can
662 do better for even divisors, using an initial right shift. */
663 if ((mh != 0 && (d & 1) == 0)
664 || (!has_vector_shift && pre_shift != -1))
665 {
666 if (has_vector_shift)
667 pre_shift = ctz_or_zero (d);
668 else if (pre_shift == -1)
669 {
670 unsigned int j;
671 for (j = 0; j < nunits; j++)
672 {
673 tree cst2 = VECTOR_CST_ELT (op1, j);
674 unsigned HOST_WIDE_INT d2;
675 int this_pre_shift;
676
677 if (!tree_fits_uhwi_p (cst2))
678 return NULL_TREE;
679 d2 = tree_to_uhwi (cst2) & mask;
680 if (d2 == 0)
681 return NULL_TREE;
682 this_pre_shift = floor_log2 (d2 & -d2);
683 if (pre_shift == -1 || this_pre_shift < pre_shift)
684 pre_shift = this_pre_shift;
685 }
686 if (i != 0 && pre_shift != 0)
687 {
688 /* Restart. */
689 i = -1U;
690 mode = -1;
691 continue;
692 }
693 }
694 if (pre_shift != 0)
695 {
696 if ((d >> pre_shift) <= 1)
697 {
698 mode = -2;
699 continue;
700 }
701 mh = choose_multiplier (d >> pre_shift, prec,
702 prec - pre_shift,
703 &ml, &post_shift, &dummy_int);
704 gcc_assert (!mh);
705 pre_shifts[i] = pre_shift;
706 }
707 }
708 if (!mh)
709 this_mode = 0;
710 else
711 this_mode = 1;
712 }
713 else
714 {
715 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
716 unsigned HOST_WIDE_INT abs_d;
717
718 if (d == -1)
719 return NULL_TREE;
720
721 /* Since d might be INT_MIN, we have to cast to
722 unsigned HOST_WIDE_INT before negating to avoid
723 undefined signed overflow. */
724 abs_d = (d >= 0
725 ? (unsigned HOST_WIDE_INT) d
726 : - (unsigned HOST_WIDE_INT) d);
727
728 /* n rem d = n rem -d */
729 if (code == TRUNC_MOD_EXPR && d < 0)
730 {
731 d = abs_d;
732 use_abs_op1 = true;
733 }
734 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
735 {
736 /* This case is not handled correctly below. */
737 mode = -2;
738 continue;
739 }
740 if (abs_d <= 1)
741 {
742 mode = -2;
743 continue;
744 }
745
746 choose_multiplier (abs_d, prec, prec - 1, &ml,
747 &post_shift, &dummy_int);
748 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
749 {
750 this_mode = 4 + (d < 0);
751 ml |= HOST_WIDE_INT_M1U << (prec - 1);
752 }
753 else
754 this_mode = 2 + (d < 0);
755 }
756 mulc[i] = ml;
757 post_shifts[i] = post_shift;
758 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
759 || post_shift >= prec
760 || pre_shifts[i] >= prec)
761 this_mode = -2;
762
763 if (i == 0)
764 mode = this_mode;
765 else if (mode != this_mode)
766 mode = -2;
767 }
768
769 if (use_pow2)
770 {
771 tree addend = NULL_TREE;
772 if (sign_p == SIGNED)
773 {
774 tree uns_type;
775
776 /* Both division and remainder sequences need
777 op0 < 0 ? mask : 0 computed. It can be either computed as
778 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
779 if none of the shifts is 0, or as the conditional. */
780 for (i = 0; i < nunits; i++)
781 if (shifts[i] == 0)
782 break;
783 uns_type
784 = build_vector_type (build_nonstandard_integer_type (prec, 1),
785 nunits);
786 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
787 {
788 for (i = 0; i < nunits; i++)
789 shift_temps[i] = prec - 1;
790 cur_op = add_rshift (gsi, type, op0, shift_temps);
791 if (cur_op != NULL_TREE)
792 {
793 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
794 uns_type, cur_op);
795 for (i = 0; i < nunits; i++)
796 shift_temps[i] = prec - shifts[i];
797 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
798 if (cur_op != NULL_TREE)
799 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
800 type, cur_op);
801 }
802 }
803 if (addend == NULL_TREE
804 && expand_vec_cond_expr_p (type, type, LT_EXPR))
805 {
806 tree zero, cst, mask_type, mask;
807 gimple *stmt, *cond;
808
809 mask_type = truth_type_for (type);
810 zero = build_zero_cst (type);
811 mask = make_ssa_name (mask_type);
812 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
813 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
814 tree_vector_builder vec (type, nunits, 1);
815 for (i = 0; i < nunits; i++)
816 vec.quick_push (build_int_cst (TREE_TYPE (type),
817 (HOST_WIDE_INT_1U
818 << shifts[i]) - 1));
819 cst = vec.build ();
820 addend = make_ssa_name (type);
821 stmt
822 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
823 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
824 }
825 }
826 if (code == TRUNC_DIV_EXPR)
827 {
828 if (sign_p == UNSIGNED)
829 {
830 /* q = op0 >> shift; */
831 cur_op = add_rshift (gsi, type, op0, shifts);
832 if (cur_op != NULL_TREE)
833 return cur_op;
834 }
835 else if (addend != NULL_TREE)
836 {
837 /* t1 = op0 + addend;
838 q = t1 >> shift; */
839 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
840 if (op != unknown_optab
841 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
842 {
843 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
844 cur_op = add_rshift (gsi, type, cur_op, shifts);
845 if (cur_op != NULL_TREE)
846 return cur_op;
847 }
848 }
849 }
850 else
851 {
852 tree mask;
853 tree_vector_builder vec (type, nunits, 1);
854 for (i = 0; i < nunits; i++)
855 vec.quick_push (build_int_cst (TREE_TYPE (type),
856 (HOST_WIDE_INT_1U
857 << shifts[i]) - 1));
858 mask = vec.build ();
859 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
860 if (op != unknown_optab
861 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
862 {
863 if (sign_p == UNSIGNED)
864 /* r = op0 & mask; */
865 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
866 else if (addend != NULL_TREE)
867 {
868 /* t1 = op0 + addend;
869 t2 = t1 & mask;
870 r = t2 - addend; */
871 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
872 if (op != unknown_optab
873 && optab_handler (op, TYPE_MODE (type))
874 != CODE_FOR_nothing)
875 {
876 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
877 addend);
878 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
879 cur_op, mask);
880 op = optab_for_tree_code (MINUS_EXPR, type,
881 optab_default);
882 if (op != unknown_optab
883 && optab_handler (op, TYPE_MODE (type))
884 != CODE_FOR_nothing)
885 return gimplify_build2 (gsi, MINUS_EXPR, type,
886 cur_op, addend);
887 }
888 }
889 }
890 }
891 }
892
893 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
894 return NULL_TREE;
895
896 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
897 return NULL_TREE;
898
899 cur_op = op0;
900
901 switch (mode)
902 {
903 case 0:
904 gcc_assert (sign_p == UNSIGNED);
905 /* t1 = oprnd0 >> pre_shift;
906 t2 = t1 h* ml;
907 q = t2 >> post_shift; */
908 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
909 if (cur_op == NULL_TREE)
910 return NULL_TREE;
911 break;
912 case 1:
913 gcc_assert (sign_p == UNSIGNED);
914 for (i = 0; i < nunits; i++)
915 {
916 shift_temps[i] = 1;
917 post_shifts[i]--;
918 }
919 break;
920 case 2:
921 case 3:
922 case 4:
923 case 5:
924 gcc_assert (sign_p == SIGNED);
925 for (i = 0; i < nunits; i++)
926 shift_temps[i] = prec - 1;
927 break;
928 default:
929 return NULL_TREE;
930 }
931
932 tree_vector_builder vec (type, nunits, 1);
933 for (i = 0; i < nunits; i++)
934 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
935 mulcst = vec.build ();
936
937 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
938
939 switch (mode)
940 {
941 case 0:
942 /* t1 = oprnd0 >> pre_shift;
943 t2 = t1 h* ml;
944 q = t2 >> post_shift; */
945 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
946 break;
947 case 1:
948 /* t1 = oprnd0 h* ml;
949 t2 = oprnd0 - t1;
950 t3 = t2 >> 1;
951 t4 = t1 + t3;
952 q = t4 >> (post_shift - 1); */
953 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
954 if (op == unknown_optab
955 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
956 return NULL_TREE;
957 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
958 tem = add_rshift (gsi, type, tem, shift_temps);
959 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
960 if (op == unknown_optab
961 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
962 return NULL_TREE;
963 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
964 cur_op = add_rshift (gsi, type, tem, post_shifts);
965 if (cur_op == NULL_TREE)
966 return NULL_TREE;
967 break;
968 case 2:
969 case 3:
970 case 4:
971 case 5:
972 /* t1 = oprnd0 h* ml;
973 t2 = t1; [ iff (mode & 2) != 0 ]
974 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
975 t3 = t2 >> post_shift;
976 t4 = oprnd0 >> (prec - 1);
977 q = t3 - t4; [ iff (mode & 1) == 0 ]
978 q = t4 - t3; [ iff (mode & 1) != 0 ] */
979 if ((mode & 2) == 0)
980 {
981 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
982 if (op == unknown_optab
983 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
984 return NULL_TREE;
985 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
986 }
987 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
988 if (cur_op == NULL_TREE)
989 return NULL_TREE;
990 tem = add_rshift (gsi, type, op0, shift_temps);
991 if (tem == NULL_TREE)
992 return NULL_TREE;
993 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
994 if (op == unknown_optab
995 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
996 return NULL_TREE;
997 if ((mode & 1) == 0)
998 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
999 else
1000 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
1001 break;
1002 default:
1003 gcc_unreachable ();
1004 }
1005
1006 if (code == TRUNC_DIV_EXPR)
1007 return cur_op;
1008
1009 /* We divided. Now finish by:
1010 t1 = q * oprnd1;
1011 r = oprnd0 - t1; */
1012 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
1013 if (op == unknown_optab
1014 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1015 return NULL_TREE;
1016 if (use_abs_op1)
1017 {
1018 tree_vector_builder elts;
1019 if (!elts.new_unary_operation (type, op1, false))
1020 return NULL_TREE;
1021 unsigned int count = elts.encoded_nelts ();
1022 for (unsigned int i = 0; i < count; ++i)
1023 {
1024 tree elem1 = VECTOR_CST_ELT (op1, i);
1025
1026 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
1027 if (elt == NULL_TREE)
1028 return NULL_TREE;
1029 elts.quick_push (elt);
1030 }
1031 op1 = elts.build ();
1032 }
1033 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
1034 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1035 if (op == unknown_optab
1036 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1037 return NULL_TREE;
1038 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
1039 }
1040
1041 /* Expand a vector condition to scalars, by using many conditions
1042 on the vector's elements. */
1043
1044 static bool
1045 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
1046 {
1047 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1048 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1049 tree a = gimple_assign_rhs1 (stmt);
1050 tree a1 = a;
1051 tree a2 = NULL_TREE;
1052 bool a_is_comparison = false;
1053 bool a_is_scalar_bitmask = false;
1054 tree b = gimple_assign_rhs2 (stmt);
1055 tree c = gimple_assign_rhs3 (stmt);
1056 vec<constructor_elt, va_gc> *v;
1057 tree constr;
1058 tree inner_type = TREE_TYPE (type);
1059 tree width = vector_element_bits_tree (type);
1060 tree cond_type = TREE_TYPE (TREE_TYPE (a));
1061 tree comp_inner_type = cond_type;
1062 tree index = bitsize_int (0);
1063 tree comp_width = width;
1064 tree comp_index = index;
1065 location_t loc = gimple_location (gsi_stmt (*gsi));
1066 tree_code code = TREE_CODE (a);
1067 gassign *assign = NULL;
1068
1069 if (code == SSA_NAME)
1070 {
1071 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1072 if (assign != NULL
1073 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1074 {
1075 a_is_comparison = true;
1076 a1 = gimple_assign_rhs1 (assign);
1077 a2 = gimple_assign_rhs2 (assign);
1078 code = gimple_assign_rhs_code (assign);
1079 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1080 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1081 }
1082 }
1083
1084 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
1085 || (integer_all_onesp (b) && integer_zerop (c)
1086 && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
1087 {
1088 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1089 return true;
1090 }
1091
1092 /* If a has vector boolean type and is a comparison, above
1093 expand_vec_cond_expr_p might fail, even if both the comparison and
1094 VEC_COND_EXPR could be supported individually. See PR109176. */
1095 if (a_is_comparison
1096 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1097 && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
1098 && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
1099 return true;
1100
1101 /* Handle vector boolean types with bitmasks. If there is a comparison
1102 and we can expand the comparison into the vector boolean bitmask,
1103 or otherwise if it is compatible with type, we can transform
1104 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1105 into
1106 tmp_6 = x_2 < y_3;
1107 tmp_7 = tmp_6 & vbfld_4;
1108 tmp_8 = ~tmp_6;
1109 tmp_9 = tmp_8 & vbfld_5;
1110 vbfld_1 = tmp_7 | tmp_9;
1111 Similarly for vbfld_10 instead of x_2 < y_3. */
1112 if (VECTOR_BOOLEAN_TYPE_P (type)
1113 && SCALAR_INT_MODE_P (TYPE_MODE (type))
1114 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1115 TYPE_VECTOR_SUBPARTS (type)
1116 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1117 && (a_is_comparison
1118 ? useless_type_conversion_p (type, TREE_TYPE (a))
1119 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1120 {
1121 if (a_is_comparison)
1122 a = gimplify_build2 (gsi, code, type, a1, a2);
1123 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1124 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1125 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1126 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1127 gimple_assign_set_rhs_from_tree (gsi, a);
1128 update_stmt (gsi_stmt (*gsi));
1129 return true;
1130 }
1131
1132 /* TODO: try and find a smaller vector type. */
1133
1134 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1135 warning_at (loc, OPT_Wvector_operation_performance,
1136 "vector condition will be expanded piecewise");
1137
1138 if (!a_is_comparison
1139 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1140 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1141 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1142 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1143 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1144 (TREE_TYPE (TREE_TYPE (a))))))
1145 {
1146 a_is_scalar_bitmask = true;
1147 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1148 tree atype = build_nonstandard_integer_type (prec, 1);
1149 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1150 }
1151 else if (!a_is_comparison
1152 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
1153 comp_width = vector_element_bits_tree (TREE_TYPE (a));
1154
1155 int nunits = nunits_for_known_piecewise_op (type);
1156 vec_alloc (v, nunits);
1157 bool constant_p = true;
1158 for (int i = 0; i < nunits; i++)
1159 {
1160 tree aa, result;
1161 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1162 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1163 if (a_is_comparison)
1164 {
1165 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1166 comp_width, comp_index);
1167 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1168 comp_width, comp_index);
1169 aa = build2 (code, cond_type, aa1, aa2);
1170 }
1171 else if (a_is_scalar_bitmask)
1172 {
1173 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1174 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1175 a, wide_int_to_tree (TREE_TYPE (a), w));
1176 aa = build2 (NE_EXPR, boolean_type_node, result,
1177 build_zero_cst (TREE_TYPE (a)));
1178 }
1179 else
1180 aa = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
1181 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1182 if (!CONSTANT_CLASS_P (result))
1183 constant_p = false;
1184 constructor_elt ce = {NULL_TREE, result};
1185 v->quick_push (ce);
1186 index = int_const_binop (PLUS_EXPR, index, width);
1187 if (width == comp_width)
1188 comp_index = index;
1189 else
1190 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1191 }
1192
1193 if (constant_p)
1194 constr = build_vector_from_ctor (type, v);
1195 else
1196 constr = build_constructor (type, v);
1197 gimple_assign_set_rhs_from_tree (gsi, constr);
1198 update_stmt (gsi_stmt (*gsi));
1199
1200 if (a_is_comparison)
1201 bitmap_set_bit (dce_ssa_names,
1202 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1203
1204 return false;
1205 }
1206
1207 static tree
1208 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1209 gassign *assign, enum tree_code code,
1210 bitmap dce_ssa_names)
1211 {
1212 machine_mode compute_mode = TYPE_MODE (compute_type);
1213
1214 /* If the compute mode is not a vector mode (hence we are not decomposing
1215 a BLKmode vector to smaller, hardware-supported vectors), we may want
1216 to expand the operations in parallel. */
1217 if (!VECTOR_MODE_P (compute_mode))
1218 switch (code)
1219 {
1220 case PLUS_EXPR:
1221 case MINUS_EXPR:
1222 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1223 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1224 gimple_assign_rhs1 (assign),
1225 gimple_assign_rhs2 (assign), code);
1226 break;
1227
1228 case NEGATE_EXPR:
1229 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1230 return expand_vector_addition (gsi, do_unop, do_negate, type,
1231 gimple_assign_rhs1 (assign),
1232 NULL_TREE, code);
1233 break;
1234
1235 case BIT_AND_EXPR:
1236 case BIT_IOR_EXPR:
1237 case BIT_XOR_EXPR:
1238 return expand_vector_parallel (gsi, do_binop, type,
1239 gimple_assign_rhs1 (assign),
1240 gimple_assign_rhs2 (assign), code);
1241
1242 case BIT_NOT_EXPR:
1243 return expand_vector_parallel (gsi, do_unop, type,
1244 gimple_assign_rhs1 (assign),
1245 NULL_TREE, code);
1246 case EQ_EXPR:
1247 case NE_EXPR:
1248 case GT_EXPR:
1249 case LT_EXPR:
1250 case GE_EXPR:
1251 case LE_EXPR:
1252 case UNEQ_EXPR:
1253 case UNGT_EXPR:
1254 case UNLT_EXPR:
1255 case UNGE_EXPR:
1256 case UNLE_EXPR:
1257 case LTGT_EXPR:
1258 case ORDERED_EXPR:
1259 case UNORDERED_EXPR:
1260 {
1261 tree rhs1 = gimple_assign_rhs1 (assign);
1262 tree rhs2 = gimple_assign_rhs2 (assign);
1263
1264 return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1265 dce_ssa_names);
1266 }
1267
1268 case TRUNC_DIV_EXPR:
1269 case TRUNC_MOD_EXPR:
1270 {
1271 tree rhs1 = gimple_assign_rhs1 (assign);
1272 tree rhs2 = gimple_assign_rhs2 (assign);
1273 tree ret;
1274
1275 if (!optimize
1276 || !VECTOR_INTEGER_TYPE_P (type)
1277 || TREE_CODE (rhs2) != VECTOR_CST
1278 || !VECTOR_MODE_P (TYPE_MODE (type)))
1279 break;
1280
1281 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1282 if (ret != NULL_TREE)
1283 return ret;
1284 break;
1285 }
1286
1287 default:
1288 break;
1289 }
1290
1291 if (TREE_CODE_CLASS (code) == tcc_unary)
1292 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1293 gimple_assign_rhs1 (assign),
1294 NULL_TREE, code, false);
1295 else
1296 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1297 gimple_assign_rhs1 (assign),
1298 gimple_assign_rhs2 (assign), code, false);
1299 }
1300
1301 /* Try to optimize
1302 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1303 style stmts into:
1304 _9 = { b_7, b_7, b_7, b_7 };
1305 a_5 = _9 + { 0, 3, 6, 9 };
1306 because vector splat operation is usually more efficient
1307 than piecewise initialization of the vector. */
1308
1309 static void
1310 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1311 {
1312 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1313 tree lhs = gimple_assign_lhs (stmt);
1314 tree rhs = gimple_assign_rhs1 (stmt);
1315 tree type = TREE_TYPE (rhs);
1316 unsigned int i, j;
1317 unsigned HOST_WIDE_INT nelts;
1318 bool all_same = true;
1319 constructor_elt *elt;
1320 gimple *g;
1321 tree base = NULL_TREE;
1322 optab op;
1323
1324 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1325 || nelts <= 2
1326 || CONSTRUCTOR_NELTS (rhs) != nelts)
1327 return;
1328 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1329 if (op == unknown_optab
1330 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1331 return;
1332 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1333 if (TREE_CODE (elt->value) != SSA_NAME
1334 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1335 return;
1336 else
1337 {
1338 tree this_base = elt->value;
1339 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1340 all_same = false;
1341 for (j = 0; j < nelts + 1; j++)
1342 {
1343 g = SSA_NAME_DEF_STMT (this_base);
1344 if (is_gimple_assign (g)
1345 && gimple_assign_rhs_code (g) == PLUS_EXPR
1346 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1347 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1348 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1349 this_base = gimple_assign_rhs1 (g);
1350 else
1351 break;
1352 }
1353 if (i == 0)
1354 base = this_base;
1355 else if (this_base != base)
1356 return;
1357 }
1358 if (all_same)
1359 return;
1360 tree_vector_builder cst (type, nelts, 1);
1361 for (i = 0; i < nelts; i++)
1362 {
1363 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1364 tree elt = build_zero_cst (TREE_TYPE (base));
1365 while (this_base != base)
1366 {
1367 g = SSA_NAME_DEF_STMT (this_base);
1368 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1369 elt, gimple_assign_rhs2 (g));
1370 if (elt == NULL_TREE
1371 || TREE_CODE (elt) != INTEGER_CST
1372 || TREE_OVERFLOW (elt))
1373 return;
1374 this_base = gimple_assign_rhs1 (g);
1375 }
1376 cst.quick_push (elt);
1377 }
1378 for (i = 0; i < nelts; i++)
1379 CONSTRUCTOR_ELT (rhs, i)->value = base;
1380 g = gimple_build_assign (make_ssa_name (type), rhs);
1381 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1382 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1383 cst.build ());
1384 gsi_replace (gsi, g, false);
1385 }
1386
1387 /* Return a type for the widest vector mode with the same element type as
1389 type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1390 ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1391 with optab OP, or return NULL_TREE if none is found. */
1392
1393 static tree
1394 type_for_widest_vector_mode (tree original_vector_type, optab op)
1395 {
1396 gcc_assert (VECTOR_TYPE_P (original_vector_type));
1397 tree type = TREE_TYPE (original_vector_type);
1398 machine_mode inner_mode = TYPE_MODE (type);
1399 machine_mode best_mode = VOIDmode, mode;
1400 poly_int64 best_nunits = 0;
1401
1402 if (SCALAR_FLOAT_MODE_P (inner_mode))
1403 mode = MIN_MODE_VECTOR_FLOAT;
1404 else if (SCALAR_FRACT_MODE_P (inner_mode))
1405 mode = MIN_MODE_VECTOR_FRACT;
1406 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1407 mode = MIN_MODE_VECTOR_UFRACT;
1408 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1409 mode = MIN_MODE_VECTOR_ACCUM;
1410 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1411 mode = MIN_MODE_VECTOR_UACCUM;
1412 else if (inner_mode == BImode)
1413 mode = MIN_MODE_VECTOR_BOOL;
1414 else
1415 mode = MIN_MODE_VECTOR_INT;
1416
1417 FOR_EACH_MODE_FROM (mode, mode)
1418 if (GET_MODE_INNER (mode) == inner_mode
1419 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1420 && optab_handler (op, mode) != CODE_FOR_nothing
1421 && known_le (GET_MODE_NUNITS (mode),
1422 TYPE_VECTOR_SUBPARTS (original_vector_type)))
1423 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1424
1425 if (best_mode == VOIDmode)
1426 return NULL_TREE;
1427 else
1428 return build_vector_type_for_mode (type, best_mode);
1429 }
1430
1431
1432 /* Build a reference to the element of the vector VECT. Function
1433 returns either the element itself, either BIT_FIELD_REF, or an
1434 ARRAY_REF expression.
1435
1436 GSI is required to insert temporary variables while building a
1437 refernece to the element of the vector VECT.
1438
1439 PTMPVEC is a pointer to the temporary variable for caching
1440 purposes. In case when PTMPVEC is NULL new temporary variable
1441 will be created. */
1442 static tree
1443 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1444 {
1445 tree vect_type, vect_elt_type;
1446 gimple *asgn;
1447 tree tmpvec;
1448 tree arraytype;
1449 bool need_asgn = true;
1450 unsigned int elements;
1451
1452 vect_type = TREE_TYPE (vect);
1453 vect_elt_type = TREE_TYPE (vect_type);
1454 elements = nunits_for_known_piecewise_op (vect_type);
1455
1456 if (TREE_CODE (idx) == INTEGER_CST)
1457 {
1458 unsigned HOST_WIDE_INT index;
1459
1460 /* Given that we're about to compute a binary modulus,
1461 we don't care about the high bits of the value. */
1462 index = TREE_INT_CST_LOW (idx);
1463 if (!tree_fits_uhwi_p (idx) || index >= elements)
1464 {
1465 index &= elements - 1;
1466 idx = build_int_cst (TREE_TYPE (idx), index);
1467 }
1468
1469 /* When lowering a vector statement sequence do some easy
1470 simplification by looking through intermediate vector results. */
1471 if (TREE_CODE (vect) == SSA_NAME)
1472 {
1473 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1474 if (is_gimple_assign (def_stmt)
1475 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1476 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1477 vect = gimple_assign_rhs1 (def_stmt);
1478 }
1479
1480 if (TREE_CODE (vect) == VECTOR_CST)
1481 return VECTOR_CST_ELT (vect, index);
1482 else if (TREE_CODE (vect) == CONSTRUCTOR
1483 && (CONSTRUCTOR_NELTS (vect) == 0
1484 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1485 != VECTOR_TYPE))
1486 {
1487 if (index < CONSTRUCTOR_NELTS (vect))
1488 return CONSTRUCTOR_ELT (vect, index)->value;
1489 return build_zero_cst (vect_elt_type);
1490 }
1491 else
1492 {
1493 tree size = vector_element_bits_tree (vect_type);
1494 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1495 size);
1496 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1497 }
1498 }
1499
1500 if (!ptmpvec)
1501 tmpvec = create_tmp_var (vect_type, "vectmp");
1502 else if (!*ptmpvec)
1503 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1504 else
1505 {
1506 tmpvec = *ptmpvec;
1507 need_asgn = false;
1508 }
1509
1510 if (need_asgn)
1511 {
1512 TREE_ADDRESSABLE (tmpvec) = 1;
1513 asgn = gimple_build_assign (tmpvec, vect);
1514 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1515 }
1516
1517 arraytype = build_array_type_nelts (vect_elt_type, elements);
1518 return build4 (ARRAY_REF, vect_elt_type,
1519 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1520 idx, NULL_TREE, NULL_TREE);
1521 }
1522
1523 /* Check if VEC_PERM_EXPR within the given setting is supported
1524 by hardware, or lower it piecewise.
1525
1526 When VEC_PERM_EXPR has the same first and second operands:
1527 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1528 {v0[mask[0]], v0[mask[1]], ...}
1529 MASK and V0 must have the same number of elements.
1530
1531 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1532 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1533 V0 and V1 must have the same type. MASK, V0, V1 must have the
1534 same number of arguments. */
1535
1536 static void
1537 lower_vec_perm (gimple_stmt_iterator *gsi)
1538 {
1539 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1540 tree mask = gimple_assign_rhs3 (stmt);
1541 tree vec0 = gimple_assign_rhs1 (stmt);
1542 tree vec1 = gimple_assign_rhs2 (stmt);
1543 tree vect_type = TREE_TYPE (vec0);
1544 tree mask_type = TREE_TYPE (mask);
1545 tree vect_elt_type = TREE_TYPE (vect_type);
1546 tree mask_elt_type = TREE_TYPE (mask_type);
1547 unsigned HOST_WIDE_INT elements;
1548 vec<constructor_elt, va_gc> *v;
1549 tree constr, t, si, i_val;
1550 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1551 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1552 location_t loc = gimple_location (gsi_stmt (*gsi));
1553 unsigned i;
1554
1555 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1556 return;
1557
1558 if (TREE_CODE (mask) == SSA_NAME)
1559 {
1560 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1561 if (is_gimple_assign (def_stmt)
1562 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1563 mask = gimple_assign_rhs1 (def_stmt);
1564 }
1565
1566 vec_perm_builder sel_int;
1567
1568 if (TREE_CODE (mask) == VECTOR_CST
1569 && tree_to_vec_perm_builder (&sel_int, mask))
1570 {
1571 vec_perm_indices indices (sel_int, 2, elements);
1572 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1573 {
1574 gimple_assign_set_rhs3 (stmt, mask);
1575 update_stmt (stmt);
1576 return;
1577 }
1578 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1579 vector as VEC1 and a right element shift MASK. */
1580 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1581 != CODE_FOR_nothing
1582 && TREE_CODE (vec1) == VECTOR_CST
1583 && initializer_zerop (vec1)
1584 && maybe_ne (indices[0], 0)
1585 && known_lt (poly_uint64 (indices[0]), elements))
1586 {
1587 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1588 if (!ok_p)
1589 {
1590 for (i = 1; i < elements; ++i)
1591 {
1592 poly_uint64 actual = indices[i];
1593 poly_uint64 expected = i + indices[0];
1594 /* Indices into the second vector are all equivalent. */
1595 if (maybe_lt (actual, elements)
1596 ? maybe_ne (actual, expected)
1597 : maybe_lt (expected, elements))
1598 break;
1599 }
1600 ok_p = i == elements;
1601 }
1602 if (ok_p)
1603 {
1604 gimple_assign_set_rhs3 (stmt, mask);
1605 update_stmt (stmt);
1606 return;
1607 }
1608 }
1609 /* And similarly vec_shl pattern. */
1610 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1611 != CODE_FOR_nothing
1612 && TREE_CODE (vec0) == VECTOR_CST
1613 && initializer_zerop (vec0))
1614 {
1615 unsigned int first = 0;
1616 for (i = 0; i < elements; ++i)
1617 if (known_eq (poly_uint64 (indices[i]), elements))
1618 {
1619 if (i == 0 || first)
1620 break;
1621 first = i;
1622 }
1623 else if (first
1624 ? maybe_ne (poly_uint64 (indices[i]),
1625 elements + i - first)
1626 : maybe_ge (poly_uint64 (indices[i]), elements))
1627 break;
1628 if (first && i == elements)
1629 {
1630 gimple_assign_set_rhs3 (stmt, mask);
1631 update_stmt (stmt);
1632 return;
1633 }
1634 }
1635 }
1636 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1637 return;
1638
1639 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1640 warning_at (loc, OPT_Wvector_operation_performance,
1641 "vector shuffling operation will be expanded piecewise");
1642
1643 vec_alloc (v, elements);
1644 bool constant_p = true;
1645 for (i = 0; i < elements; i++)
1646 {
1647 si = size_int (i);
1648 i_val = vector_element (gsi, mask, si, &masktmp);
1649
1650 if (TREE_CODE (i_val) == INTEGER_CST)
1651 {
1652 unsigned HOST_WIDE_INT index;
1653
1654 index = TREE_INT_CST_LOW (i_val);
1655 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1656 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1657
1658 if (two_operand_p && (index & elements) != 0)
1659 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1660 else
1661 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1662
1663 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1664 true, GSI_SAME_STMT);
1665 }
1666 else
1667 {
1668 tree cond = NULL_TREE, v0_val;
1669
1670 if (two_operand_p)
1671 {
1672 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1673 build_int_cst (mask_elt_type, elements));
1674 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1675 true, GSI_SAME_STMT);
1676 }
1677
1678 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1679 build_int_cst (mask_elt_type, elements - 1));
1680 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1681 true, GSI_SAME_STMT);
1682
1683 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1684 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1685 true, GSI_SAME_STMT);
1686
1687 if (two_operand_p)
1688 {
1689 tree v1_val;
1690
1691 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1692 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694
1695 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1696 cond, build_zero_cst (mask_elt_type));
1697 cond = fold_build3 (COND_EXPR, vect_elt_type,
1698 cond, v0_val, v1_val);
1699 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1700 true, GSI_SAME_STMT);
1701 }
1702 else
1703 t = v0_val;
1704 }
1705
1706 if (!CONSTANT_CLASS_P (t))
1707 constant_p = false;
1708 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1709 }
1710
1711 if (constant_p)
1712 constr = build_vector_from_ctor (vect_type, v);
1713 else
1714 constr = build_constructor (vect_type, v);
1715 gimple_assign_set_rhs_from_tree (gsi, constr);
1716 update_stmt (gsi_stmt (*gsi));
1717 }
1718
1719 /* If OP is a uniform vector return the element it is a splat from. */
1720
1721 static tree
1722 ssa_uniform_vector_p (tree op)
1723 {
1724 if (TREE_CODE (op) == VECTOR_CST
1725 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1726 || TREE_CODE (op) == CONSTRUCTOR)
1727 return uniform_vector_p (op);
1728 if (TREE_CODE (op) == SSA_NAME)
1729 {
1730 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1731 if (gimple_assign_single_p (def_stmt))
1732 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1733 }
1734 return NULL_TREE;
1735 }
1736
1737 /* Return type in which CODE operation with optab OP can be
1738 computed. */
1739
1740 static tree
1741 get_compute_type (enum tree_code code, optab op, tree type)
1742 {
1743 /* For very wide vectors, try using a smaller vector mode. */
1744 tree compute_type = type;
1745 if (op
1746 && (!VECTOR_MODE_P (TYPE_MODE (type))
1747 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1748 {
1749 tree vector_compute_type
1750 = type_for_widest_vector_mode (type, op);
1751 if (vector_compute_type != NULL_TREE
1752 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1753 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1754 != CODE_FOR_nothing))
1755 compute_type = vector_compute_type;
1756 }
1757
1758 /* If we are breaking a BLKmode vector into smaller pieces,
1759 type_for_widest_vector_mode has already looked into the optab,
1760 so skip these checks. */
1761 if (compute_type == type)
1762 {
1763 machine_mode compute_mode = TYPE_MODE (compute_type);
1764 if (VECTOR_MODE_P (compute_mode))
1765 {
1766 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1767 return compute_type;
1768 if (code == MULT_HIGHPART_EXPR
1769 && can_mult_highpart_p (compute_mode,
1770 TYPE_UNSIGNED (compute_type)))
1771 return compute_type;
1772 }
1773 /* There is no operation in hardware, so fall back to scalars. */
1774 compute_type = TREE_TYPE (type);
1775 }
1776
1777 return compute_type;
1778 }
1779
1780 static tree
1781 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1782 tree bitpos, tree bitsize, enum tree_code code,
1783 tree type ATTRIBUTE_UNUSED)
1784 {
1785 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1786 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1787 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1788 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1789 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1790 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1791 }
1792
1793 /* Expand a vector COND_EXPR to scalars, piecewise. */
1794 static void
1795 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1796 {
1797 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1798 tree lhs = gimple_assign_lhs (stmt);
1799 tree type = TREE_TYPE (lhs);
1800 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1801 machine_mode compute_mode = TYPE_MODE (compute_type);
1802 gcc_assert (compute_mode != BLKmode);
1803 tree rhs2 = gimple_assign_rhs2 (stmt);
1804 tree rhs3 = gimple_assign_rhs3 (stmt);
1805 tree new_rhs;
1806
1807 /* If the compute mode is not a vector mode (hence we are not decomposing
1808 a BLKmode vector to smaller, hardware-supported vectors), we may want
1809 to expand the operations in parallel. */
1810 if (!VECTOR_MODE_P (compute_mode))
1811 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1812 COND_EXPR);
1813 else
1814 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1815 rhs2, rhs3, COND_EXPR, false);
1816 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1817 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1818 new_rhs);
1819
1820 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1821 way to do it is change expand_vector_operation and its callees to
1822 return a tree_code, RHS1 and RHS2 instead of a tree. */
1823 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1824 update_stmt (gsi_stmt (*gsi));
1825 }
1826
1827 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1828 lowering. If INNER_TYPE is not a vector type, this is a scalar
1829 fallback. */
1830
1831 static tree
1832 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1833 tree decl, tree bitpos, tree bitsize,
1834 enum tree_code code, tree type)
1835 {
1836 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1837 if (!VECTOR_TYPE_P (inner_type))
1838 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1839 if (code == CALL_EXPR)
1840 {
1841 gimple *g = gimple_build_call (decl, 1, a);
1842 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1843 gimple_call_set_lhs (g, lhs);
1844 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1845 return lhs;
1846 }
1847 else
1848 {
1849 tree outer_type = build_vector_type (TREE_TYPE (type),
1850 TYPE_VECTOR_SUBPARTS (inner_type));
1851 return gimplify_build1 (gsi, code, outer_type, a);
1852 }
1853 }
1854
1855 /* Similarly, but for narrowing conversion. */
1856
1857 static tree
1858 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1859 tree, tree bitpos, tree, enum tree_code code,
1860 tree type)
1861 {
1862 tree itype = build_vector_type (TREE_TYPE (inner_type),
1863 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1864 2));
1865 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1866 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1867 int_const_binop (PLUS_EXPR, bitpos,
1868 TYPE_SIZE (itype)));
1869 tree outer_type = build_vector_type (TREE_TYPE (type),
1870 TYPE_VECTOR_SUBPARTS (inner_type));
1871 return gimplify_build2 (gsi, code, outer_type, b, c);
1872 }
1873
1874 /* Expand VEC_CONVERT ifn call. */
1875
1876 static void
1877 expand_vector_conversion (gimple_stmt_iterator *gsi)
1878 {
1879 gimple *stmt = gsi_stmt (*gsi);
1880 gimple *g;
1881 tree lhs = gimple_call_lhs (stmt);
1882 if (lhs == NULL_TREE)
1883 {
1884 g = gimple_build_nop ();
1885 gsi_replace (gsi, g, false);
1886 return;
1887 }
1888 tree arg = gimple_call_arg (stmt, 0);
1889 tree ret_type = TREE_TYPE (lhs);
1890 tree arg_type = TREE_TYPE (arg);
1891 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1892 enum tree_code code = NOP_EXPR;
1893 enum tree_code code1 = ERROR_MARK;
1894 enum { NARROW, NONE, WIDEN } modifier = NONE;
1895 optab optab1 = unknown_optab;
1896
1897 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1898 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1899 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1900 code = FIX_TRUNC_EXPR;
1901 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1902 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1903 code = FLOAT_EXPR;
1904 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1905 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1906 if (ret_elt_bits < arg_elt_bits)
1907 modifier = NARROW;
1908 else if (ret_elt_bits > arg_elt_bits)
1909 modifier = WIDEN;
1910
1911 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1912 {
1913 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1914 {
1915 g = gimple_build_assign (lhs, code1, arg);
1916 gsi_replace (gsi, g, false);
1917 return;
1918 }
1919 /* Can't use get_compute_type here, as supportable_convert_operation
1920 doesn't necessarily use an optab and needs two arguments. */
1921 tree vec_compute_type
1922 = type_for_widest_vector_mode (arg_type, mov_optab);
1923 if (vec_compute_type
1924 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1925 {
1926 unsigned HOST_WIDE_INT nelts
1927 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1928 while (nelts > 1)
1929 {
1930 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1931 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1932 if (supportable_convert_operation (code, ret1_type, arg1_type,
1933 &code1))
1934 {
1935 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1936 ret_type, arg1_type, arg,
1937 NULL_TREE, code1, false);
1938 g = gimple_build_assign (lhs, new_rhs);
1939 gsi_replace (gsi, g, false);
1940 return;
1941 }
1942 nelts = nelts / 2;
1943 }
1944 }
1945 }
1946 else if (modifier == NARROW)
1947 {
1948 switch (code)
1949 {
1950 CASE_CONVERT:
1951 code1 = VEC_PACK_TRUNC_EXPR;
1952 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1953 break;
1954 case FIX_TRUNC_EXPR:
1955 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1956 /* The signedness is determined from output operand. */
1957 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1958 break;
1959 case FLOAT_EXPR:
1960 code1 = VEC_PACK_FLOAT_EXPR;
1961 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1962 break;
1963 default:
1964 gcc_unreachable ();
1965 }
1966
1967 if (optab1)
1968 compute_type = get_compute_type (code1, optab1, arg_type);
1969 enum insn_code icode1;
1970 if (VECTOR_TYPE_P (compute_type)
1971 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1972 != CODE_FOR_nothing)
1973 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1974 {
1975 tree cretd_type
1976 = build_vector_type (TREE_TYPE (ret_type),
1977 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1978 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1979 {
1980 if (compute_type == arg_type)
1981 {
1982 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1983 arg, build_zero_cst (arg_type));
1984 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1985 TYPE_SIZE (ret_type),
1986 bitsize_int (0));
1987 g = gimple_build_assign (lhs, new_rhs);
1988 gsi_replace (gsi, g, false);
1989 return;
1990 }
1991 tree dcompute_type
1992 = build_vector_type (TREE_TYPE (compute_type),
1993 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1994 if (TYPE_MAIN_VARIANT (dcompute_type)
1995 == TYPE_MAIN_VARIANT (arg_type))
1996 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1997 NULL_TREE, bitsize_int (0),
1998 NULL_TREE, code1,
1999 ret_type);
2000 else
2001 new_rhs = expand_vector_piecewise (gsi,
2002 do_vec_narrow_conversion,
2003 arg_type, dcompute_type,
2004 arg, NULL_TREE, code1,
2005 false, ret_type);
2006 g = gimple_build_assign (lhs, new_rhs);
2007 gsi_replace (gsi, g, false);
2008 return;
2009 }
2010 }
2011 }
2012 else if (modifier == WIDEN)
2013 {
2014 enum tree_code code2 = ERROR_MARK;
2015 optab optab2 = unknown_optab;
2016 switch (code)
2017 {
2018 CASE_CONVERT:
2019 code1 = VEC_UNPACK_LO_EXPR;
2020 code2 = VEC_UNPACK_HI_EXPR;
2021 break;
2022 case FIX_TRUNC_EXPR:
2023 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
2024 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
2025 break;
2026 case FLOAT_EXPR:
2027 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
2028 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
2029 break;
2030 default:
2031 gcc_unreachable ();
2032 }
2033 if (BYTES_BIG_ENDIAN)
2034 std::swap (code1, code2);
2035
2036 if (code == FIX_TRUNC_EXPR)
2037 {
2038 /* The signedness is determined from output operand. */
2039 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
2040 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
2041 }
2042 else
2043 {
2044 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
2045 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
2046 }
2047
2048 if (optab1 && optab2)
2049 compute_type = get_compute_type (code1, optab1, arg_type);
2050
2051 enum insn_code icode1, icode2;
2052 if (VECTOR_TYPE_P (compute_type)
2053 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
2054 != CODE_FOR_nothing)
2055 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
2056 != CODE_FOR_nothing)
2057 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
2058 && (insn_data[icode1].operand[0].mode
2059 == insn_data[icode2].operand[0].mode))
2060 {
2061 poly_uint64 nunits
2062 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
2063 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
2064 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
2065 {
2066 vec<constructor_elt, va_gc> *v;
2067 tree part_width = TYPE_SIZE (compute_type);
2068 tree index = bitsize_int (0);
2069 int nunits = nunits_for_known_piecewise_op (arg_type);
2070 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
2071 int i;
2072 location_t loc = gimple_location (gsi_stmt (*gsi));
2073
2074 if (compute_type != arg_type)
2075 {
2076 if (!warning_suppressed_p (gsi_stmt (*gsi),
2077 OPT_Wvector_operation_performance))
2078 warning_at (loc, OPT_Wvector_operation_performance,
2079 "vector operation will be expanded piecewise");
2080 }
2081 else
2082 {
2083 nunits = 1;
2084 delta = 1;
2085 }
2086
2087 vec_alloc (v, (nunits + delta - 1) / delta * 2);
2088 bool constant_p = true;
2089 for (i = 0; i < nunits;
2090 i += delta, index = int_const_binop (PLUS_EXPR, index,
2091 part_width))
2092 {
2093 tree a = arg;
2094 if (compute_type != arg_type)
2095 a = tree_vec_extract (gsi, compute_type, a, part_width,
2096 index);
2097 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
2098 constructor_elt ce = { NULL_TREE, result };
2099 if (!CONSTANT_CLASS_P (ce.value))
2100 constant_p = false;
2101 v->quick_push (ce);
2102 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
2103 if (!CONSTANT_CLASS_P (ce.value))
2104 constant_p = false;
2105 v->quick_push (ce);
2106 }
2107
2108 if (constant_p)
2109 new_rhs = build_vector_from_ctor (ret_type, v);
2110 else
2111 new_rhs = build_constructor (ret_type, v);
2112 g = gimple_build_assign (lhs, new_rhs);
2113 gsi_replace (gsi, g, false);
2114 return;
2115 }
2116 }
2117 }
2118
2119 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
2120 TREE_TYPE (arg_type), arg,
2121 NULL_TREE, code, false, ret_type);
2122 g = gimple_build_assign (lhs, new_rhs);
2123 gsi_replace (gsi, g, false);
2124 }
2125
2126 /* Process one statement. If we identify a vector operation, expand it. */
2127
2128 static void
2129 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2130 bitmap dce_ssa_names)
2131 {
2132 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2133 enum tree_code code;
2134 optab op = unknown_optab;
2135 enum gimple_rhs_class rhs_class;
2136 tree new_rhs;
2137
2138 /* Only consider code == GIMPLE_ASSIGN. */
2139 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2140 if (!stmt)
2141 {
2142 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2143 expand_vector_conversion (gsi);
2144 return;
2145 }
2146
2147 code = gimple_assign_rhs_code (stmt);
2148 rhs_class = get_gimple_rhs_class (code);
2149 lhs = gimple_assign_lhs (stmt);
2150
2151 if (code == VEC_PERM_EXPR)
2152 {
2153 lower_vec_perm (gsi);
2154 return;
2155 }
2156
2157 if (code == VEC_COND_EXPR)
2158 {
2159 expand_vector_condition (gsi, dce_ssa_names);
2160 return;
2161 }
2162
2163 if (code == COND_EXPR
2164 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2165 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2166 {
2167 expand_vector_scalar_condition (gsi);
2168 return;
2169 }
2170
2171 if (code == CONSTRUCTOR
2172 && TREE_CODE (lhs) == SSA_NAME
2173 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2174 && !gimple_clobber_p (stmt)
2175 && optimize)
2176 {
2177 optimize_vector_constructor (gsi);
2178 return;
2179 }
2180
2181 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2182 return;
2183
2184 rhs1 = gimple_assign_rhs1 (stmt);
2185 if (rhs_class == GIMPLE_BINARY_RHS)
2186 rhs2 = gimple_assign_rhs2 (stmt);
2187
2188 type = TREE_TYPE (lhs);
2189 if (!VECTOR_TYPE_P (type)
2190 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2191 return;
2192
2193 /* A scalar operation pretending to be a vector one. */
2194 if (VECTOR_BOOLEAN_TYPE_P (type)
2195 && !VECTOR_MODE_P (TYPE_MODE (type))
2196 && TYPE_MODE (type) != BLKmode
2197 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2198 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2199 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2200 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2201 return;
2202
2203 /* If the vector operation is operating on all same vector elements
2204 implement it with a scalar operation and a splat if the target
2205 supports the scalar operation. */
2206 tree srhs1, srhs2 = NULL_TREE;
2207 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2208 && (rhs2 == NULL_TREE
2209 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2210 && (srhs2 = rhs2))
2211 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2212 /* As we query direct optabs restrict to non-convert operations. */
2213 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2214 {
2215 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2216 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2217 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2218 {
2219 tree stype = TREE_TYPE (TREE_TYPE (lhs));
2220 tree slhs = (rhs2 != NULL_TREE)
2221 ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
2222 : gimplify_build1 (gsi, code, stype, srhs1);
2223 gimple_assign_set_rhs_from_tree (gsi,
2224 build_vector_from_val (type, slhs));
2225 update_stmt (stmt);
2226 return;
2227 }
2228 }
2229
2230 if (CONVERT_EXPR_CODE_P (code)
2231 || code == FLOAT_EXPR
2232 || code == FIX_TRUNC_EXPR
2233 || code == VIEW_CONVERT_EXPR)
2234 return;
2235
2236 /* The signedness is determined from input argument. */
2237 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2238 || code == VEC_UNPACK_FLOAT_LO_EXPR
2239 || code == VEC_PACK_FLOAT_EXPR)
2240 {
2241 /* We do not know how to scalarize those. */
2242 return;
2243 }
2244
2245 /* For widening/narrowing vector operations, the relevant type is of the
2246 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2247 calculated in the same way above. */
2248 if (code == WIDEN_SUM_EXPR
2249 || code == VEC_WIDEN_PLUS_HI_EXPR
2250 || code == VEC_WIDEN_PLUS_LO_EXPR
2251 || code == VEC_WIDEN_MINUS_HI_EXPR
2252 || code == VEC_WIDEN_MINUS_LO_EXPR
2253 || code == VEC_WIDEN_MULT_HI_EXPR
2254 || code == VEC_WIDEN_MULT_LO_EXPR
2255 || code == VEC_WIDEN_MULT_EVEN_EXPR
2256 || code == VEC_WIDEN_MULT_ODD_EXPR
2257 || code == VEC_UNPACK_HI_EXPR
2258 || code == VEC_UNPACK_LO_EXPR
2259 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2260 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2261 || code == VEC_PACK_TRUNC_EXPR
2262 || code == VEC_PACK_SAT_EXPR
2263 || code == VEC_PACK_FIX_TRUNC_EXPR
2264 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2265 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2266 {
2267 /* We do not know how to scalarize those. */
2268 return;
2269 }
2270
2271 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2272 scalar */
2273 if (code == LSHIFT_EXPR
2274 || code == RSHIFT_EXPR
2275 || code == LROTATE_EXPR
2276 || code == RROTATE_EXPR)
2277 {
2278 optab opv;
2279
2280 /* Check whether we have vector <op> {x,x,x,x} where x
2281 could be a scalar variable or a constant. Transform
2282 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2283 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2284 {
2285 tree first;
2286
2287 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2288 {
2289 gimple_assign_set_rhs2 (stmt, first);
2290 update_stmt (stmt);
2291 rhs2 = first;
2292 }
2293 }
2294
2295 opv = optab_for_tree_code (code, type, optab_vector);
2296 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2297 op = opv;
2298 else
2299 {
2300 op = optab_for_tree_code (code, type, optab_scalar);
2301
2302 compute_type = get_compute_type (code, op, type);
2303 if (compute_type == type)
2304 return;
2305 /* The rtl expander will expand vector/scalar as vector/vector
2306 if necessary. Pick one with wider vector type. */
2307 tree compute_vtype = get_compute_type (code, opv, type);
2308 if (subparts_gt (compute_vtype, compute_type))
2309 {
2310 compute_type = compute_vtype;
2311 op = opv;
2312 }
2313 }
2314
2315 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2316 {
2317 if (compute_type == NULL_TREE)
2318 compute_type = get_compute_type (code, op, type);
2319 if (compute_type == type)
2320 return;
2321 /* Before splitting vector rotates into scalar rotates,
2322 see if we can't use vector shifts and BIT_IOR_EXPR
2323 instead. For vector by vector rotates we'd also
2324 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2325 for now, fold doesn't seem to create such rotates anyway. */
2326 if (compute_type == TREE_TYPE (type)
2327 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2328 {
2329 optab oplv = vashl_optab, opl = ashl_optab;
2330 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2331 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2332 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2333 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2334 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2335 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2336 /* The rtl expander will expand vector/scalar as vector/vector
2337 if necessary. Pick one with wider vector type. */
2338 if (subparts_gt (compute_lvtype, compute_ltype))
2339 {
2340 compute_ltype = compute_lvtype;
2341 opl = oplv;
2342 }
2343 if (subparts_gt (compute_rvtype, compute_rtype))
2344 {
2345 compute_rtype = compute_rvtype;
2346 opr = oprv;
2347 }
2348 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2349 BIT_IOR_EXPR. */
2350 compute_type = compute_ltype;
2351 if (subparts_gt (compute_type, compute_rtype))
2352 compute_type = compute_rtype;
2353 if (subparts_gt (compute_type, compute_otype))
2354 compute_type = compute_otype;
2355 /* Verify all 3 operations can be performed in that type. */
2356 if (compute_type != TREE_TYPE (type))
2357 {
2358 if (optab_handler (opl, TYPE_MODE (compute_type))
2359 == CODE_FOR_nothing
2360 || optab_handler (opr, TYPE_MODE (compute_type))
2361 == CODE_FOR_nothing
2362 || optab_handler (opo, TYPE_MODE (compute_type))
2363 == CODE_FOR_nothing)
2364 compute_type = TREE_TYPE (type);
2365 }
2366 }
2367 }
2368 }
2369 else
2370 op = optab_for_tree_code (code, type, optab_default);
2371
2372 /* Optabs will try converting a negation into a subtraction, so
2373 look for it as well. TODO: negation of floating-point vectors
2374 might be turned into an exclusive OR toggling the sign bit. */
2375 if (op == unknown_optab
2376 && code == NEGATE_EXPR
2377 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2378 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2379
2380 if (compute_type == NULL_TREE)
2381 compute_type = get_compute_type (code, op, type);
2382 if (compute_type == type)
2383 return;
2384
2385 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2386 dce_ssa_names);
2387
2388 /* Leave expression untouched for later expansion. */
2389 if (new_rhs == NULL_TREE)
2390 return;
2391
2392 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2393 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2394 new_rhs);
2395
2396 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2397 way to do it is change expand_vector_operation and its callees to
2398 return a tree_code, RHS1 and RHS2 instead of a tree. */
2399 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2400 update_stmt (gsi_stmt (*gsi));
2401 }
2402
2403 /* Use this to lower vector operations introduced by the vectorizer,
2405 if it may need the bit-twiddling tricks implemented in this file. */
2406
2407 static unsigned int
2408 expand_vector_operations (void)
2409 {
2410 gimple_stmt_iterator gsi;
2411 basic_block bb;
2412 bool cfg_changed = false;
2413
2414 auto_bitmap dce_ssa_names;
2415
2416 FOR_EACH_BB_FN (bb, cfun)
2417 {
2418 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2419 {
2420 expand_vector_operations_1 (&gsi, dce_ssa_names);
2421 /* ??? If we do not cleanup EH then we will ICE in
2422 verification. But in reality we have created wrong-code
2423 as we did not properly transition EH info and edges to
2424 the piecewise computations. */
2425 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2426 && gimple_purge_dead_eh_edges (bb))
2427 cfg_changed = true;
2428 }
2429 }
2430
2431 simple_dce_from_worklist (dce_ssa_names);
2432
2433 return cfg_changed ? TODO_cleanup_cfg : 0;
2434 }
2435
2436 namespace {
2437
2438 const pass_data pass_data_lower_vector =
2439 {
2440 GIMPLE_PASS, /* type */
2441 "veclower", /* name */
2442 OPTGROUP_VEC, /* optinfo_flags */
2443 TV_NONE, /* tv_id */
2444 PROP_cfg, /* properties_required */
2445 PROP_gimple_lvec, /* properties_provided */
2446 0, /* properties_destroyed */
2447 0, /* todo_flags_start */
2448 TODO_update_ssa, /* todo_flags_finish */
2449 };
2450
2451 class pass_lower_vector : public gimple_opt_pass
2452 {
2453 public:
2454 pass_lower_vector (gcc::context *ctxt)
2455 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2456 {}
2457
2458 /* opt_pass methods: */
2459 virtual bool gate (function *fun)
2460 {
2461 return !(fun->curr_properties & PROP_gimple_lvec);
2462 }
2463
2464 virtual unsigned int execute (function *)
2465 {
2466 return expand_vector_operations ();
2467 }
2468
2469 }; // class pass_lower_vector
2470
2471 } // anon namespace
2472
2473 gimple_opt_pass *
2474 make_pass_lower_vector (gcc::context *ctxt)
2475 {
2476 return new pass_lower_vector (ctxt);
2477 }
2478
2479 namespace {
2480
2481 const pass_data pass_data_lower_vector_ssa =
2482 {
2483 GIMPLE_PASS, /* type */
2484 "veclower2", /* name */
2485 OPTGROUP_VEC, /* optinfo_flags */
2486 TV_NONE, /* tv_id */
2487 PROP_cfg, /* properties_required */
2488 PROP_gimple_lvec, /* properties_provided */
2489 0, /* properties_destroyed */
2490 0, /* todo_flags_start */
2491 ( TODO_update_ssa
2492 | TODO_cleanup_cfg ), /* todo_flags_finish */
2493 };
2494
2495 class pass_lower_vector_ssa : public gimple_opt_pass
2496 {
2497 public:
2498 pass_lower_vector_ssa (gcc::context *ctxt)
2499 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2500 {}
2501
2502 /* opt_pass methods: */
2503 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2504 virtual unsigned int execute (function *)
2505 {
2506 return expand_vector_operations ();
2507 }
2508
2509 }; // class pass_lower_vector_ssa
2510
2511 } // anon namespace
2512
2513 gimple_opt_pass *
2514 make_pass_lower_vector_ssa (gcc::context *ctxt)
2515 {
2516 return new pass_lower_vector_ssa (ctxt);
2517 }
2518
2519 #include "gt-tree-vect-generic.h"
2520