tree-vectorizer.cc revision 1.1.1.1 1 /* Vectorizer
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Loop and basic block vectorizer.
22
23 This file contains drivers for the three vectorizers:
24 (1) loop vectorizer (inter-iteration parallelism),
25 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26 vectorizer)
27 (3) BB vectorizer (out-of-loops), aka SLP
28
29 The rest of the vectorizer's code is organized as follows:
30 - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
31 used by drivers (1) and (2).
32 - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
33 drivers (1) and (2).
34 - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
35 used by drivers (2) and (3).
36 - tree-vect-stmts.cc - statements analysis and transformation (used by all).
37 - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
38 manipulations (used by all).
39 - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
40
41 Here's a poor attempt at illustrating that:
42
43 tree-vectorizer.cc:
44 loop_vect() loop_aware_slp() slp_vect()
45 | / \ /
46 | / \ /
47 tree-vect-loop.cc tree-vect-slp.cc
48 | \ \ / / |
49 | \ \/ / |
50 | \ /\ / |
51 | \ / \ / |
52 tree-vect-stmts.cc tree-vect-data-refs.cc
53 \ /
54 tree-vect-patterns.cc
55 */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 #include "gimple-pretty-print.h"
82 #include "opt-problem.h"
83 #include "internal-fn.h"
84 #include "tree-ssa-sccvn.h"
85
86 /* Loop or bb location, with hotness information. */
87 dump_user_location_t vect_location;
88
89 /* auto_purge_vect_location's dtor: reset the vect_location
90 global, to avoid stale location_t values that could reference
91 GC-ed blocks. */
92
93 auto_purge_vect_location::~auto_purge_vect_location ()
94 {
95 vect_location = dump_user_location_t ();
96 }
97
98 /* Dump a cost entry according to args to F. */
99
100 void
101 dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
102 stmt_vec_info stmt_info, slp_tree node, tree,
103 int misalign, unsigned cost,
104 enum vect_cost_model_location where)
105 {
106 if (stmt_info)
107 {
108 print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109 fprintf (f, " ");
110 }
111 else if (node)
112 fprintf (f, "node %p ", (void *)node);
113 else
114 fprintf (f, "<unknown> ");
115 fprintf (f, "%d times ", count);
116 const char *ks = "unknown";
117 switch (kind)
118 {
119 case scalar_stmt:
120 ks = "scalar_stmt";
121 break;
122 case scalar_load:
123 ks = "scalar_load";
124 break;
125 case scalar_store:
126 ks = "scalar_store";
127 break;
128 case vector_stmt:
129 ks = "vector_stmt";
130 break;
131 case vector_load:
132 ks = "vector_load";
133 break;
134 case vector_gather_load:
135 ks = "vector_gather_load";
136 break;
137 case unaligned_load:
138 ks = "unaligned_load";
139 break;
140 case unaligned_store:
141 ks = "unaligned_store";
142 break;
143 case vector_store:
144 ks = "vector_store";
145 break;
146 case vector_scatter_store:
147 ks = "vector_scatter_store";
148 break;
149 case vec_to_scalar:
150 ks = "vec_to_scalar";
151 break;
152 case scalar_to_vec:
153 ks = "scalar_to_vec";
154 break;
155 case cond_branch_not_taken:
156 ks = "cond_branch_not_taken";
157 break;
158 case cond_branch_taken:
159 ks = "cond_branch_taken";
160 break;
161 case vec_perm:
162 ks = "vec_perm";
163 break;
164 case vec_promote_demote:
165 ks = "vec_promote_demote";
166 break;
167 case vec_construct:
168 ks = "vec_construct";
169 break;
170 }
171 fprintf (f, "%s ", ks);
172 if (kind == unaligned_load || kind == unaligned_store)
173 fprintf (f, "(misalign %d) ", misalign);
174 fprintf (f, "costs %u ", cost);
175 const char *ws = "unknown";
176 switch (where)
177 {
178 case vect_prologue:
179 ws = "prologue";
180 break;
181 case vect_body:
182 ws = "body";
183 break;
184 case vect_epilogue:
185 ws = "epilogue";
186 break;
187 }
188 fprintf (f, "in %s\n", ws);
189 }
190
191 /* For mapping simduid to vectorization factor. */
193
194 class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
195 {
196 public:
197 unsigned int simduid;
198 poly_uint64 vf;
199
200 /* hash_table support. */
201 static inline hashval_t hash (const simduid_to_vf *);
202 static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
203 };
204
205 inline hashval_t
206 simduid_to_vf::hash (const simduid_to_vf *p)
207 {
208 return p->simduid;
209 }
210
211 inline int
212 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
213 {
214 return p1->simduid == p2->simduid;
215 }
216
217 /* This hash maps the OMP simd array to the corresponding simduid used
218 to index into it. Like thus,
219
220 _7 = GOMP_SIMD_LANE (simduid.0)
221 ...
222 ...
223 D.1737[_7] = stuff;
224
225
226 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
227 simduid.0. */
228
229 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
230 {
231 tree decl;
232 unsigned int simduid;
233
234 /* hash_table support. */
235 static inline hashval_t hash (const simd_array_to_simduid *);
236 static inline int equal (const simd_array_to_simduid *,
237 const simd_array_to_simduid *);
238 };
239
240 inline hashval_t
241 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
242 {
243 return DECL_UID (p->decl);
244 }
245
246 inline int
247 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
248 const simd_array_to_simduid *p2)
249 {
250 return p1->decl == p2->decl;
251 }
252
253 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
254 into their corresponding constants and remove
255 IFN_GOMP_SIMD_ORDERED_{START,END}. */
256
257 static void
258 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
259 {
260 basic_block bb;
261
262 FOR_EACH_BB_FN (bb, fun)
263 {
264 gimple_stmt_iterator i;
265
266 for (i = gsi_start_bb (bb); !gsi_end_p (i); )
267 {
268 poly_uint64 vf = 1;
269 enum internal_fn ifn;
270 gimple *stmt = gsi_stmt (i);
271 tree t;
272 if (!is_gimple_call (stmt)
273 || !gimple_call_internal_p (stmt))
274 {
275 gsi_next (&i);
276 continue;
277 }
278 ifn = gimple_call_internal_fn (stmt);
279 switch (ifn)
280 {
281 case IFN_GOMP_SIMD_LANE:
282 case IFN_GOMP_SIMD_VF:
283 case IFN_GOMP_SIMD_LAST_LANE:
284 break;
285 case IFN_GOMP_SIMD_ORDERED_START:
286 case IFN_GOMP_SIMD_ORDERED_END:
287 if (integer_onep (gimple_call_arg (stmt, 0)))
288 {
289 enum built_in_function bcode
290 = (ifn == IFN_GOMP_SIMD_ORDERED_START
291 ? BUILT_IN_GOMP_ORDERED_START
292 : BUILT_IN_GOMP_ORDERED_END);
293 gimple *g
294 = gimple_build_call (builtin_decl_explicit (bcode), 0);
295 gimple_move_vops (g, stmt);
296 gsi_replace (&i, g, true);
297 continue;
298 }
299 gsi_remove (&i, true);
300 unlink_stmt_vdef (stmt);
301 continue;
302 default:
303 gsi_next (&i);
304 continue;
305 }
306 tree arg = gimple_call_arg (stmt, 0);
307 gcc_assert (arg != NULL_TREE);
308 gcc_assert (TREE_CODE (arg) == SSA_NAME);
309 simduid_to_vf *p = NULL, data;
310 data.simduid = DECL_UID (SSA_NAME_VAR (arg));
311 /* Need to nullify loop safelen field since it's value is not
312 valid after transformation. */
313 if (bb->loop_father && bb->loop_father->safelen > 0)
314 bb->loop_father->safelen = 0;
315 if (htab)
316 {
317 p = htab->find (&data);
318 if (p)
319 vf = p->vf;
320 }
321 switch (ifn)
322 {
323 case IFN_GOMP_SIMD_VF:
324 t = build_int_cst (unsigned_type_node, vf);
325 break;
326 case IFN_GOMP_SIMD_LANE:
327 t = build_int_cst (unsigned_type_node, 0);
328 break;
329 case IFN_GOMP_SIMD_LAST_LANE:
330 t = gimple_call_arg (stmt, 1);
331 break;
332 default:
333 gcc_unreachable ();
334 }
335 tree lhs = gimple_call_lhs (stmt);
336 if (lhs)
337 replace_uses_by (lhs, t);
338 release_defs (stmt);
339 gsi_remove (&i, true);
340 }
341 }
342 }
343
344 /* Helper structure for note_simd_array_uses. */
345
346 struct note_simd_array_uses_struct
347 {
348 hash_table<simd_array_to_simduid> **htab;
349 unsigned int simduid;
350 };
351
352 /* Callback for note_simd_array_uses, called through walk_gimple_op. */
353
354 static tree
355 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
356 {
357 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
358 struct note_simd_array_uses_struct *ns
359 = (struct note_simd_array_uses_struct *) wi->info;
360
361 if (TYPE_P (*tp))
362 *walk_subtrees = 0;
363 else if (VAR_P (*tp)
364 && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
365 && DECL_CONTEXT (*tp) == current_function_decl)
366 {
367 simd_array_to_simduid data;
368 if (!*ns->htab)
369 *ns->htab = new hash_table<simd_array_to_simduid> (15);
370 data.decl = *tp;
371 data.simduid = ns->simduid;
372 simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
373 if (*slot == NULL)
374 {
375 simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
376 *p = data;
377 *slot = p;
378 }
379 else if ((*slot)->simduid != ns->simduid)
380 (*slot)->simduid = -1U;
381 *walk_subtrees = 0;
382 }
383 return NULL_TREE;
384 }
385
386 /* Find "omp simd array" temporaries and map them to corresponding
387 simduid. */
388
389 static void
390 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
391 {
392 basic_block bb;
393 gimple_stmt_iterator gsi;
394 struct walk_stmt_info wi;
395 struct note_simd_array_uses_struct ns;
396
397 memset (&wi, 0, sizeof (wi));
398 wi.info = &ns;
399 ns.htab = htab;
400
401 FOR_EACH_BB_FN (bb, fun)
402 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
403 {
404 gimple *stmt = gsi_stmt (gsi);
405 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
406 continue;
407 switch (gimple_call_internal_fn (stmt))
408 {
409 case IFN_GOMP_SIMD_LANE:
410 case IFN_GOMP_SIMD_VF:
411 case IFN_GOMP_SIMD_LAST_LANE:
412 break;
413 default:
414 continue;
415 }
416 tree lhs = gimple_call_lhs (stmt);
417 if (lhs == NULL_TREE)
418 continue;
419 imm_use_iterator use_iter;
420 gimple *use_stmt;
421 ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
422 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
423 if (!is_gimple_debug (use_stmt))
424 walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
425 }
426 }
427
428 /* Shrink arrays with "omp simd array" attribute to the corresponding
429 vectorization factor. */
430
431 static void
432 shrink_simd_arrays
433 (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
434 hash_table<simduid_to_vf> *simduid_to_vf_htab)
435 {
436 for (hash_table<simd_array_to_simduid>::iterator iter
437 = simd_array_to_simduid_htab->begin ();
438 iter != simd_array_to_simduid_htab->end (); ++iter)
439 if ((*iter)->simduid != -1U)
440 {
441 tree decl = (*iter)->decl;
442 poly_uint64 vf = 1;
443 if (simduid_to_vf_htab)
444 {
445 simduid_to_vf *p = NULL, data;
446 data.simduid = (*iter)->simduid;
447 p = simduid_to_vf_htab->find (&data);
448 if (p)
449 vf = p->vf;
450 }
451 tree atype
452 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
453 TREE_TYPE (decl) = atype;
454 relayout_decl (decl);
455 }
456
457 delete simd_array_to_simduid_htab;
458 }
459
460 /* Initialize the vec_info with kind KIND_IN and target cost data
462 TARGET_COST_DATA_IN. */
463
464 vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
465 : kind (kind_in),
466 shared (shared_),
467 stmt_vec_info_ro (false)
468 {
469 stmt_vec_infos.create (50);
470 }
471
472 vec_info::~vec_info ()
473 {
474 for (slp_instance &instance : slp_instances)
475 vect_free_slp_instance (instance);
476
477 free_stmt_vec_infos ();
478 }
479
480 vec_info_shared::vec_info_shared ()
481 : n_stmts (0),
482 datarefs (vNULL),
483 datarefs_copy (vNULL),
484 ddrs (vNULL)
485 {
486 }
487
488 vec_info_shared::~vec_info_shared ()
489 {
490 free_data_refs (datarefs);
491 free_dependence_relations (ddrs);
492 datarefs_copy.release ();
493 }
494
495 void
496 vec_info_shared::save_datarefs ()
497 {
498 if (!flag_checking)
499 return;
500 datarefs_copy.reserve_exact (datarefs.length ());
501 for (unsigned i = 0; i < datarefs.length (); ++i)
502 datarefs_copy.quick_push (*datarefs[i]);
503 }
504
505 void
506 vec_info_shared::check_datarefs ()
507 {
508 if (!flag_checking)
509 return;
510 gcc_assert (datarefs.length () == datarefs_copy.length ());
511 for (unsigned i = 0; i < datarefs.length (); ++i)
512 if (memcmp (&datarefs_copy[i], datarefs[i],
513 offsetof (data_reference, alt_indices)) != 0)
514 gcc_unreachable ();
515 }
516
517 /* Record that STMT belongs to the vectorizable region. Create and return
518 an associated stmt_vec_info. */
519
520 stmt_vec_info
521 vec_info::add_stmt (gimple *stmt)
522 {
523 stmt_vec_info res = new_stmt_vec_info (stmt);
524 set_vinfo_for_stmt (stmt, res);
525 return res;
526 }
527
528 /* Record that STMT belongs to the vectorizable region. Create a new
529 stmt_vec_info and mark VECINFO as being related and return the new
530 stmt_vec_info. */
531
532 stmt_vec_info
533 vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
534 {
535 stmt_vec_info res = new_stmt_vec_info (stmt);
536 set_vinfo_for_stmt (stmt, res, false);
537 STMT_VINFO_RELATED_STMT (res) = stmt_info;
538 return res;
539 }
540
541 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
542 return null. It is safe to call this function on any statement, even if
543 it might not be part of the vectorizable region. */
544
545 stmt_vec_info
546 vec_info::lookup_stmt (gimple *stmt)
547 {
548 unsigned int uid = gimple_uid (stmt);
549 if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
550 {
551 stmt_vec_info res = stmt_vec_infos[uid - 1];
552 if (res && res->stmt == stmt)
553 return res;
554 }
555 return NULL;
556 }
557
558 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
559 return that stmt_vec_info, otherwise return null. It is safe to call
560 this on arbitrary operands. */
561
562 stmt_vec_info
563 vec_info::lookup_def (tree name)
564 {
565 if (TREE_CODE (name) == SSA_NAME
566 && !SSA_NAME_IS_DEFAULT_DEF (name))
567 return lookup_stmt (SSA_NAME_DEF_STMT (name));
568 return NULL;
569 }
570
571 /* See whether there is a single non-debug statement that uses LHS and
572 whether that statement has an associated stmt_vec_info. Return the
573 stmt_vec_info if so, otherwise return null. */
574
575 stmt_vec_info
576 vec_info::lookup_single_use (tree lhs)
577 {
578 use_operand_p dummy;
579 gimple *use_stmt;
580 if (single_imm_use (lhs, &dummy, &use_stmt))
581 return lookup_stmt (use_stmt);
582 return NULL;
583 }
584
585 /* Return vectorization information about DR. */
586
587 dr_vec_info *
588 vec_info::lookup_dr (data_reference *dr)
589 {
590 stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
591 /* DR_STMT should never refer to a stmt in a pattern replacement. */
592 gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
593 return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
594 }
595
596 /* Record that NEW_STMT_INFO now implements the same data reference
597 as OLD_STMT_INFO. */
598
599 void
600 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
601 {
602 gcc_assert (!is_pattern_stmt_p (old_stmt_info));
603 STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
604 new_stmt_info->dr_aux = old_stmt_info->dr_aux;
605 STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
606 = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
607 STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
608 = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
609 }
610
611 /* Permanently remove the statement described by STMT_INFO from the
612 function. */
613
614 void
615 vec_info::remove_stmt (stmt_vec_info stmt_info)
616 {
617 gcc_assert (!stmt_info->pattern_stmt_p);
618 set_vinfo_for_stmt (stmt_info->stmt, NULL);
619 unlink_stmt_vdef (stmt_info->stmt);
620 gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
621 gsi_remove (&si, true);
622 release_defs (stmt_info->stmt);
623 free_stmt_vec_info (stmt_info);
624 }
625
626 /* Replace the statement at GSI by NEW_STMT, both the vectorization
627 information and the function itself. STMT_INFO describes the statement
628 at GSI. */
629
630 void
631 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
632 gimple *new_stmt)
633 {
634 gimple *old_stmt = stmt_info->stmt;
635 gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
636 gimple_set_uid (new_stmt, gimple_uid (old_stmt));
637 stmt_info->stmt = new_stmt;
638 gsi_replace (gsi, new_stmt, true);
639 }
640
641 /* Insert stmts in SEQ on the VEC_INFO region entry. If CONTEXT is
642 not NULL it specifies whether to use the sub-region entry
643 determined by it, currently used for loop vectorization to insert
644 on the inner loop entry vs. the outer loop entry. */
645
646 void
647 vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
648 {
649 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
650 {
651 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
652 basic_block new_bb;
653 edge pe;
654
655 if (context && nested_in_vect_loop_p (loop, context))
656 loop = loop->inner;
657
658 pe = loop_preheader_edge (loop);
659 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
660 gcc_assert (!new_bb);
661 }
662 else
663 {
664 bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
665 gimple_stmt_iterator gsi_region_begin
666 = gsi_after_labels (bb_vinfo->bbs[0]);
667 gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
668 }
669 }
670
671 /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT. */
672
673 void
674 vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
675 {
676 gimple_seq seq = NULL;
677 gimple_stmt_iterator gsi = gsi_start (seq);
678 gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
679 insert_seq_on_entry (context, seq);
680 }
681
682 /* Create and initialize a new stmt_vec_info struct for STMT. */
683
684 stmt_vec_info
685 vec_info::new_stmt_vec_info (gimple *stmt)
686 {
687 stmt_vec_info res = XCNEW (class _stmt_vec_info);
688 res->stmt = stmt;
689
690 STMT_VINFO_TYPE (res) = undef_vec_info_type;
691 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
692 STMT_VINFO_VECTORIZABLE (res) = true;
693 STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
694 STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
695 STMT_VINFO_REDUC_FN (res) = IFN_LAST;
696 STMT_VINFO_REDUC_IDX (res) = -1;
697 STMT_VINFO_SLP_VECT_ONLY (res) = false;
698 STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
699 STMT_VINFO_VEC_STMTS (res) = vNULL;
700 res->reduc_initial_values = vNULL;
701 res->reduc_scalar_results = vNULL;
702
703 if (is_a <loop_vec_info> (this)
704 && gimple_code (stmt) == GIMPLE_PHI
705 && is_loop_header_bb_p (gimple_bb (stmt)))
706 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
707 else
708 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
709
710 STMT_SLP_TYPE (res) = loop_vect;
711
712 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
713 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
714
715 return res;
716 }
717
718 /* Associate STMT with INFO. */
719
720 void
721 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
722 {
723 unsigned int uid = gimple_uid (stmt);
724 if (uid == 0)
725 {
726 gcc_assert (!check_ro || !stmt_vec_info_ro);
727 gcc_checking_assert (info);
728 uid = stmt_vec_infos.length () + 1;
729 gimple_set_uid (stmt, uid);
730 stmt_vec_infos.safe_push (info);
731 }
732 else
733 {
734 gcc_checking_assert (info == NULL);
735 stmt_vec_infos[uid - 1] = info;
736 }
737 }
738
739 /* Free the contents of stmt_vec_infos. */
740
741 void
742 vec_info::free_stmt_vec_infos (void)
743 {
744 for (stmt_vec_info &info : stmt_vec_infos)
745 if (info != NULL)
746 free_stmt_vec_info (info);
747 stmt_vec_infos.release ();
748 }
749
750 /* Free STMT_INFO. */
751
752 void
753 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
754 {
755 if (stmt_info->pattern_stmt_p)
756 {
757 gimple_set_bb (stmt_info->stmt, NULL);
758 tree lhs = gimple_get_lhs (stmt_info->stmt);
759 if (lhs && TREE_CODE (lhs) == SSA_NAME)
760 release_ssa_name (lhs);
761 }
762
763 stmt_info->reduc_initial_values.release ();
764 stmt_info->reduc_scalar_results.release ();
765 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
766 STMT_VINFO_VEC_STMTS (stmt_info).release ();
767 free (stmt_info);
768 }
769
770 /* Returns true if S1 dominates S2. */
771
772 bool
773 vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
774 {
775 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
776
777 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
778 SSA_NAME. Assume it lives at the beginning of function and
779 thus dominates everything. */
780 if (!bb1 || s1 == s2)
781 return true;
782
783 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
784 if (!bb2)
785 return false;
786
787 if (bb1 != bb2)
788 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
789
790 /* PHIs in the same basic block are assumed to be
791 executed all in parallel, if only one stmt is a PHI,
792 it dominates the other stmt in the same basic block. */
793 if (gimple_code (s1) == GIMPLE_PHI)
794 return true;
795
796 if (gimple_code (s2) == GIMPLE_PHI)
797 return false;
798
799 /* Inserted vectorized stmts all have UID 0 while the original stmts
800 in the IL have UID increasing within a BB. Walk from both sides
801 until we find the other stmt or a stmt with UID != 0. */
802 gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
803 while (gimple_uid (gsi_stmt (gsi1)) == 0)
804 {
805 gsi_next (&gsi1);
806 if (gsi_end_p (gsi1))
807 return false;
808 if (gsi_stmt (gsi1) == s2)
809 return true;
810 }
811 if (gimple_uid (gsi_stmt (gsi1)) == -1u)
812 return false;
813
814 gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
815 while (gimple_uid (gsi_stmt (gsi2)) == 0)
816 {
817 gsi_prev (&gsi2);
818 if (gsi_end_p (gsi2))
819 return false;
820 if (gsi_stmt (gsi2) == s1)
821 return true;
822 }
823 if (gimple_uid (gsi_stmt (gsi2)) == -1u)
824 return false;
825
826 if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
827 return true;
828 return false;
829 }
830
831 /* A helper function to free scev and LOOP niter information, as well as
832 clear loop constraint LOOP_C_FINITE. */
833
834 void
835 vect_free_loop_info_assumptions (class loop *loop)
836 {
837 scev_reset_htab ();
838 /* We need to explicitly reset upper bound information since they are
839 used even after free_numbers_of_iterations_estimates. */
840 loop->any_upper_bound = false;
841 loop->any_likely_upper_bound = false;
842 free_numbers_of_iterations_estimates (loop);
843 loop_constraint_clear (loop, LOOP_C_FINITE);
844 }
845
846 /* If LOOP has been versioned during ifcvt, return the internal call
847 guarding it. */
848
849 gimple *
850 vect_loop_vectorized_call (class loop *loop, gcond **cond)
851 {
852 basic_block bb = loop_preheader_edge (loop)->src;
853 gimple *g;
854 do
855 {
856 g = last_stmt (bb);
857 if ((g && gimple_code (g) == GIMPLE_COND)
858 || !single_succ_p (bb))
859 break;
860 if (!single_pred_p (bb))
861 break;
862 bb = single_pred (bb);
863 }
864 while (1);
865 if (g && gimple_code (g) == GIMPLE_COND)
866 {
867 if (cond)
868 *cond = as_a <gcond *> (g);
869 gimple_stmt_iterator gsi = gsi_for_stmt (g);
870 gsi_prev (&gsi);
871 if (!gsi_end_p (gsi))
872 {
873 g = gsi_stmt (gsi);
874 if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
875 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
876 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
877 return g;
878 }
879 }
880 return NULL;
881 }
882
883 /* If LOOP has been versioned during loop distribution, return the gurading
884 internal call. */
885
886 static gimple *
887 vect_loop_dist_alias_call (class loop *loop, function *fun)
888 {
889 basic_block bb;
890 basic_block entry;
891 class loop *outer, *orig;
892 gimple_stmt_iterator gsi;
893 gimple *g;
894
895 if (loop->orig_loop_num == 0)
896 return NULL;
897
898 orig = get_loop (fun, loop->orig_loop_num);
899 if (orig == NULL)
900 {
901 /* The original loop is somehow destroyed. Clear the information. */
902 loop->orig_loop_num = 0;
903 return NULL;
904 }
905
906 if (loop != orig)
907 bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
908 else
909 bb = loop_preheader_edge (loop)->src;
910
911 outer = bb->loop_father;
912 entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
913
914 /* Look upward in dominance tree. */
915 for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
916 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
917 {
918 g = last_stmt (bb);
919 if (g == NULL || gimple_code (g) != GIMPLE_COND)
920 continue;
921
922 gsi = gsi_for_stmt (g);
923 gsi_prev (&gsi);
924 if (gsi_end_p (gsi))
925 continue;
926
927 g = gsi_stmt (gsi);
928 /* The guarding internal function call must have the same distribution
929 alias id. */
930 if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
931 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
932 return g;
933 }
934 return NULL;
935 }
936
937 /* Set the uids of all the statements in basic blocks inside loop
938 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
939 call guarding the loop which has been if converted. */
940 static void
941 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
942 function *fun)
943 {
944 tree arg = gimple_call_arg (loop_vectorized_call, 1);
945 basic_block *bbs;
946 unsigned int i;
947 class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
948
949 LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
950 gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
951 == loop_vectorized_call);
952 /* If we are going to vectorize outer loop, prevent vectorization
953 of the inner loop in the scalar loop - either the scalar loop is
954 thrown away, so it is a wasted work, or is used only for
955 a few iterations. */
956 if (scalar_loop->inner)
957 {
958 gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
959 if (g)
960 {
961 arg = gimple_call_arg (g, 0);
962 get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
963 fold_loop_internal_call (g, boolean_false_node);
964 }
965 }
966 bbs = get_loop_body (scalar_loop);
967 for (i = 0; i < scalar_loop->num_nodes; i++)
968 {
969 basic_block bb = bbs[i];
970 gimple_stmt_iterator gsi;
971 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
972 {
973 gimple *phi = gsi_stmt (gsi);
974 gimple_set_uid (phi, 0);
975 }
976 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
977 {
978 gimple *stmt = gsi_stmt (gsi);
979 gimple_set_uid (stmt, 0);
980 }
981 }
982 free (bbs);
983 }
984
985 /* Generate vectorized code for LOOP and its epilogues. */
986
987 static void
988 vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
989 loop_p loop, gimple *loop_vectorized_call,
990 function *fun)
991 {
992 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
993
994 if (loop_vectorized_call)
995 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
996
997 unsigned HOST_WIDE_INT bytes;
998 if (dump_enabled_p ())
999 {
1000 if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
1001 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1002 "loop vectorized using %wu byte vectors\n", bytes);
1003 else
1004 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1005 "loop vectorized using variable length vectors\n");
1006 }
1007
1008 loop_p new_loop = vect_transform_loop (loop_vinfo,
1009 loop_vectorized_call);
1010 /* Now that the loop has been vectorized, allow it to be unrolled
1011 etc. */
1012 loop->force_vectorize = false;
1013
1014 if (loop->simduid)
1015 {
1016 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1017 if (!simduid_to_vf_htab)
1018 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1019 simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1020 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1021 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1022 = simduid_to_vf_data;
1023 }
1024
1025 /* Epilogue of vectorized loop must be vectorized too. */
1026 if (new_loop)
1027 vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
1028 }
1029
1030 /* Try to vectorize LOOP. */
1031
1032 static unsigned
1033 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1034 unsigned *num_vectorized_loops, loop_p loop,
1035 gimple *loop_vectorized_call,
1036 gimple *loop_dist_alias_call,
1037 function *fun)
1038 {
1039 unsigned ret = 0;
1040 vec_info_shared shared;
1041 auto_purge_vect_location sentinel;
1042 vect_location = find_loop_location (loop);
1043
1044 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
1045 && dump_enabled_p ())
1046 dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1047 "\nAnalyzing loop at %s:%d\n",
1048 LOCATION_FILE (vect_location.get_location_t ()),
1049 LOCATION_LINE (vect_location.get_location_t ()));
1050
1051 /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
1052 opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared);
1053 loop->aux = loop_vinfo;
1054
1055 if (!loop_vinfo)
1056 if (dump_enabled_p ())
1057 if (opt_problem *problem = loop_vinfo.get_problem ())
1058 {
1059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 "couldn't vectorize loop\n");
1061 problem->emit_and_clear ();
1062 }
1063
1064 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1065 {
1066 /* Free existing information if loop is analyzed with some
1067 assumptions. */
1068 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1069 vect_free_loop_info_assumptions (loop);
1070
1071 /* If we applied if-conversion then try to vectorize the
1072 BB of innermost loops.
1073 ??? Ideally BB vectorization would learn to vectorize
1074 control flow by applying if-conversion on-the-fly, the
1075 following retains the if-converted loop body even when
1076 only non-if-converted parts took part in BB vectorization. */
1077 if (flag_tree_slp_vectorize != 0
1078 && loop_vectorized_call
1079 && ! loop->inner)
1080 {
1081 basic_block bb = loop->header;
1082 bool require_loop_vectorize = false;
1083 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1084 !gsi_end_p (gsi); gsi_next (&gsi))
1085 {
1086 gimple *stmt = gsi_stmt (gsi);
1087 gcall *call = dyn_cast <gcall *> (stmt);
1088 if (call && gimple_call_internal_p (call))
1089 {
1090 internal_fn ifn = gimple_call_internal_fn (call);
1091 if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
1092 /* Don't keep the if-converted parts when the ifn with
1093 specifc type is not supported by the backend. */
1094 || (direct_internal_fn_p (ifn)
1095 && !direct_internal_fn_supported_p
1096 (call, OPTIMIZE_FOR_SPEED)))
1097 {
1098 require_loop_vectorize = true;
1099 break;
1100 }
1101 }
1102 gimple_set_uid (stmt, -1);
1103 gimple_set_visited (stmt, false);
1104 }
1105 if (!require_loop_vectorize)
1106 {
1107 tree arg = gimple_call_arg (loop_vectorized_call, 1);
1108 class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
1109 if (vect_slp_if_converted_bb (bb, scalar_loop))
1110 {
1111 fold_loop_internal_call (loop_vectorized_call,
1112 boolean_true_node);
1113 loop_vectorized_call = NULL;
1114 ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1115 }
1116 }
1117 }
1118 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1119 loop, don't vectorize its inner loop; we'll attempt to
1120 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1121 loop version. */
1122 if (loop_vectorized_call && loop->inner)
1123 loop->inner->dont_vectorize = true;
1124 return ret;
1125 }
1126
1127 if (!dbg_cnt (vect_loop))
1128 {
1129 /* Free existing information if loop is analyzed with some
1130 assumptions. */
1131 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1132 vect_free_loop_info_assumptions (loop);
1133 return ret;
1134 }
1135
1136 (*num_vectorized_loops)++;
1137 /* Transform LOOP and its epilogues. */
1138 vect_transform_loops (simduid_to_vf_htab, loop, loop_vectorized_call, fun);
1139
1140 if (loop_vectorized_call)
1141 {
1142 fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1143 ret |= TODO_cleanup_cfg;
1144 }
1145 if (loop_dist_alias_call)
1146 {
1147 tree value = gimple_call_arg (loop_dist_alias_call, 1);
1148 fold_loop_internal_call (loop_dist_alias_call, value);
1149 ret |= TODO_cleanup_cfg;
1150 }
1151
1152 return ret;
1153 }
1154
1155 /* Try to vectorize LOOP. */
1156
1157 static unsigned
1158 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1159 unsigned *num_vectorized_loops, loop_p loop,
1160 function *fun)
1161 {
1162 if (!((flag_tree_loop_vectorize
1163 && optimize_loop_nest_for_speed_p (loop))
1164 || loop->force_vectorize))
1165 return 0;
1166
1167 return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1168 vect_loop_vectorized_call (loop),
1169 vect_loop_dist_alias_call (loop, fun), fun);
1170 }
1171
1172
1173 /* Loop autovectorization. */
1174
1175 namespace {
1176
1177 const pass_data pass_data_vectorize =
1178 {
1179 GIMPLE_PASS, /* type */
1180 "vect", /* name */
1181 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1182 TV_TREE_VECTORIZATION, /* tv_id */
1183 ( PROP_cfg | PROP_ssa ), /* properties_required */
1184 0, /* properties_provided */
1185 0, /* properties_destroyed */
1186 0, /* todo_flags_start */
1187 0, /* todo_flags_finish */
1188 };
1189
1190 class pass_vectorize : public gimple_opt_pass
1191 {
1192 public:
1193 pass_vectorize (gcc::context *ctxt)
1194 : gimple_opt_pass (pass_data_vectorize, ctxt)
1195 {}
1196
1197 /* opt_pass methods: */
1198 virtual bool gate (function *fun)
1199 {
1200 return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
1201 }
1202
1203 virtual unsigned int execute (function *);
1204
1205 }; // class pass_vectorize
1206
1207 /* Function vectorize_loops.
1208
1209 Entry point to loop vectorization phase. */
1210
1211 unsigned
1212 pass_vectorize::execute (function *fun)
1213 {
1214 unsigned int i;
1215 unsigned int num_vectorized_loops = 0;
1216 unsigned int vect_loops_num;
1217 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1218 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1219 bool any_ifcvt_loops = false;
1220 unsigned ret = 0;
1221
1222 vect_loops_num = number_of_loops (fun);
1223
1224 /* Bail out if there are no loops. */
1225 if (vect_loops_num <= 1)
1226 return 0;
1227
1228 vect_slp_init ();
1229
1230 if (fun->has_simduid_loops)
1231 note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1232
1233 /* ----------- Analyze loops. ----------- */
1234
1235 /* If some loop was duplicated, it gets bigger number
1236 than all previously defined loops. This fact allows us to run
1237 only over initial loops skipping newly generated ones. */
1238 for (auto loop : loops_list (fun, 0))
1239 if (loop->dont_vectorize)
1240 {
1241 any_ifcvt_loops = true;
1242 /* If-conversion sometimes versions both the outer loop
1243 (for the case when outer loop vectorization might be
1244 desirable) as well as the inner loop in the scalar version
1245 of the loop. So we have:
1246 if (LOOP_VECTORIZED (1, 3))
1247 {
1248 loop1
1249 loop2
1250 }
1251 else
1252 loop3 (copy of loop1)
1253 if (LOOP_VECTORIZED (4, 5))
1254 loop4 (copy of loop2)
1255 else
1256 loop5 (copy of loop4)
1257 If loops' iteration gives us loop3 first (which has
1258 dont_vectorize set), make sure to process loop1 before loop4;
1259 so that we can prevent vectorization of loop4 if loop1
1260 is successfully vectorized. */
1261 if (loop->inner)
1262 {
1263 gimple *loop_vectorized_call
1264 = vect_loop_vectorized_call (loop);
1265 if (loop_vectorized_call
1266 && vect_loop_vectorized_call (loop->inner))
1267 {
1268 tree arg = gimple_call_arg (loop_vectorized_call, 0);
1269 class loop *vector_loop
1270 = get_loop (fun, tree_to_shwi (arg));
1271 if (vector_loop && vector_loop != loop)
1272 {
1273 /* Make sure we don't vectorize it twice. */
1274 vector_loop->dont_vectorize = true;
1275 ret |= try_vectorize_loop (simduid_to_vf_htab,
1276 &num_vectorized_loops,
1277 vector_loop, fun);
1278 }
1279 }
1280 }
1281 }
1282 else
1283 ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1284 loop, fun);
1285
1286 vect_location = dump_user_location_t ();
1287
1288 statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
1289 if (dump_enabled_p ()
1290 || (num_vectorized_loops > 0 && dump_enabled_p ()))
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "vectorized %u loops in function.\n",
1293 num_vectorized_loops);
1294
1295 /* ----------- Finalize. ----------- */
1296
1297 if (any_ifcvt_loops)
1298 for (i = 1; i < number_of_loops (fun); i++)
1299 {
1300 class loop *loop = get_loop (fun, i);
1301 if (loop && loop->dont_vectorize)
1302 {
1303 gimple *g = vect_loop_vectorized_call (loop);
1304 if (g)
1305 {
1306 fold_loop_internal_call (g, boolean_false_node);
1307 ret |= TODO_cleanup_cfg;
1308 g = NULL;
1309 }
1310 else
1311 g = vect_loop_dist_alias_call (loop, fun);
1312
1313 if (g)
1314 {
1315 fold_loop_internal_call (g, boolean_false_node);
1316 ret |= TODO_cleanup_cfg;
1317 }
1318 }
1319 }
1320
1321 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1322 if (fun->has_simduid_loops)
1323 {
1324 adjust_simduid_builtins (simduid_to_vf_htab, fun);
1325 /* Avoid stale SCEV cache entries for the SIMD_LANE defs. */
1326 scev_reset ();
1327 }
1328 /* Shrink any "omp array simd" temporary arrays to the
1329 actual vectorization factors. */
1330 if (simd_array_to_simduid_htab)
1331 shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1332 delete simduid_to_vf_htab;
1333 fun->has_simduid_loops = false;
1334
1335 if (num_vectorized_loops > 0)
1336 {
1337 /* If we vectorized any loop only virtual SSA form needs to be updated.
1338 ??? Also while we try hard to update loop-closed SSA form we fail
1339 to properly do this in some corner-cases (see PR56286). */
1340 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1341 ret |= TODO_cleanup_cfg;
1342 }
1343
1344 for (i = 1; i < number_of_loops (fun); i++)
1345 {
1346 loop_vec_info loop_vinfo;
1347 bool has_mask_store;
1348
1349 class loop *loop = get_loop (fun, i);
1350 if (!loop || !loop->aux)
1351 continue;
1352 loop_vinfo = (loop_vec_info) loop->aux;
1353 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1354 delete loop_vinfo;
1355 if (has_mask_store
1356 && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1357 optimize_mask_stores (loop);
1358
1359 auto_bitmap exit_bbs;
1360 /* Perform local CSE, this esp. helps because we emit code for
1361 predicates that need to be shared for optimal predicate usage.
1362 However reassoc will re-order them and prevent CSE from working
1363 as it should. CSE only the loop body, not the entry. */
1364 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
1365
1366 edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
1367 do_rpo_vn (fun, entry, exit_bbs);
1368
1369 loop->aux = NULL;
1370 }
1371
1372 vect_slp_fini ();
1373
1374 return ret;
1375 }
1376
1377 } // anon namespace
1378
1379 gimple_opt_pass *
1380 make_pass_vectorize (gcc::context *ctxt)
1381 {
1382 return new pass_vectorize (ctxt);
1383 }
1384
1385 /* Entry point to the simduid cleanup pass. */
1386
1387 namespace {
1388
1389 const pass_data pass_data_simduid_cleanup =
1390 {
1391 GIMPLE_PASS, /* type */
1392 "simduid", /* name */
1393 OPTGROUP_NONE, /* optinfo_flags */
1394 TV_NONE, /* tv_id */
1395 ( PROP_ssa | PROP_cfg ), /* properties_required */
1396 0, /* properties_provided */
1397 0, /* properties_destroyed */
1398 0, /* todo_flags_start */
1399 0, /* todo_flags_finish */
1400 };
1401
1402 class pass_simduid_cleanup : public gimple_opt_pass
1403 {
1404 public:
1405 pass_simduid_cleanup (gcc::context *ctxt)
1406 : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1407 {}
1408
1409 /* opt_pass methods: */
1410 opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
1411 virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1412 virtual unsigned int execute (function *);
1413
1414 }; // class pass_simduid_cleanup
1415
1416 unsigned int
1417 pass_simduid_cleanup::execute (function *fun)
1418 {
1419 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1420
1421 note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1422
1423 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1424 adjust_simduid_builtins (NULL, fun);
1425
1426 /* Shrink any "omp array simd" temporary arrays to the
1427 actual vectorization factors. */
1428 if (simd_array_to_simduid_htab)
1429 shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1430 fun->has_simduid_loops = false;
1431 return 0;
1432 }
1433
1434 } // anon namespace
1435
1436 gimple_opt_pass *
1437 make_pass_simduid_cleanup (gcc::context *ctxt)
1438 {
1439 return new pass_simduid_cleanup (ctxt);
1440 }
1441
1442
1443 /* Entry point to basic block SLP phase. */
1444
1445 namespace {
1446
1447 const pass_data pass_data_slp_vectorize =
1448 {
1449 GIMPLE_PASS, /* type */
1450 "slp", /* name */
1451 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1452 TV_TREE_SLP_VECTORIZATION, /* tv_id */
1453 ( PROP_ssa | PROP_cfg ), /* properties_required */
1454 0, /* properties_provided */
1455 0, /* properties_destroyed */
1456 0, /* todo_flags_start */
1457 TODO_update_ssa, /* todo_flags_finish */
1458 };
1459
1460 class pass_slp_vectorize : public gimple_opt_pass
1461 {
1462 public:
1463 pass_slp_vectorize (gcc::context *ctxt)
1464 : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1465 {}
1466
1467 /* opt_pass methods: */
1468 opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
1469 virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1470 virtual unsigned int execute (function *);
1471
1472 }; // class pass_slp_vectorize
1473
1474 unsigned int
1475 pass_slp_vectorize::execute (function *fun)
1476 {
1477 auto_purge_vect_location sentinel;
1478 basic_block bb;
1479
1480 bool in_loop_pipeline = scev_initialized_p ();
1481 if (!in_loop_pipeline)
1482 {
1483 loop_optimizer_init (LOOPS_NORMAL);
1484 scev_initialize ();
1485 }
1486
1487 /* Mark all stmts as not belonging to the current region and unvisited. */
1488 FOR_EACH_BB_FN (bb, fun)
1489 {
1490 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1491 gsi_next (&gsi))
1492 {
1493 gphi *stmt = gsi.phi ();
1494 gimple_set_uid (stmt, -1);
1495 gimple_set_visited (stmt, false);
1496 }
1497 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1498 gsi_next (&gsi))
1499 {
1500 gimple *stmt = gsi_stmt (gsi);
1501 gimple_set_uid (stmt, -1);
1502 gimple_set_visited (stmt, false);
1503 }
1504 }
1505
1506 vect_slp_init ();
1507
1508 vect_slp_function (fun);
1509
1510 vect_slp_fini ();
1511
1512 if (!in_loop_pipeline)
1513 {
1514 scev_finalize ();
1515 loop_optimizer_finalize ();
1516 }
1517
1518 return 0;
1519 }
1520
1521 } // anon namespace
1522
1523 gimple_opt_pass *
1524 make_pass_slp_vectorize (gcc::context *ctxt)
1525 {
1526 return new pass_slp_vectorize (ctxt);
1527 }
1528
1529
1530 /* Increase alignment of global arrays to improve vectorization potential.
1531 TODO:
1532 - Consider also structs that have an array field.
1533 - Use ipa analysis to prune arrays that can't be vectorized?
1534 This should involve global alignment analysis and in the future also
1535 array padding. */
1536
1537 static unsigned get_vec_alignment_for_type (tree);
1538 static hash_map<tree, unsigned> *type_align_map;
1539
1540 /* Return alignment of array's vector type corresponding to scalar type.
1541 0 if no vector type exists. */
1542 static unsigned
1543 get_vec_alignment_for_array_type (tree type)
1544 {
1545 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1546 poly_uint64 array_size, vector_size;
1547
1548 tree scalar_type = strip_array_types (type);
1549 tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1550 if (!vectype
1551 || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1552 || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1553 || maybe_lt (array_size, vector_size))
1554 return 0;
1555
1556 return TYPE_ALIGN (vectype);
1557 }
1558
1559 /* Return alignment of field having maximum alignment of vector type
1560 corresponding to it's scalar type. For now, we only consider fields whose
1561 offset is a multiple of it's vector alignment.
1562 0 if no suitable field is found. */
1563 static unsigned
1564 get_vec_alignment_for_record_type (tree type)
1565 {
1566 gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1567
1568 unsigned max_align = 0, alignment;
1569 HOST_WIDE_INT offset;
1570 tree offset_tree;
1571
1572 if (TYPE_PACKED (type))
1573 return 0;
1574
1575 unsigned *slot = type_align_map->get (type);
1576 if (slot)
1577 return *slot;
1578
1579 for (tree field = first_field (type);
1580 field != NULL_TREE;
1581 field = DECL_CHAIN (field))
1582 {
1583 /* Skip if not FIELD_DECL or if alignment is set by user. */
1584 if (TREE_CODE (field) != FIELD_DECL
1585 || DECL_USER_ALIGN (field)
1586 || DECL_ARTIFICIAL (field))
1587 continue;
1588
1589 /* We don't need to process the type further if offset is variable,
1590 since the offsets of remaining members will also be variable. */
1591 if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1592 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1593 break;
1594
1595 /* Similarly stop processing the type if offset_tree
1596 does not fit in unsigned HOST_WIDE_INT. */
1597 offset_tree = bit_position (field);
1598 if (!tree_fits_uhwi_p (offset_tree))
1599 break;
1600
1601 offset = tree_to_uhwi (offset_tree);
1602 alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1603
1604 /* Get maximum alignment of vectorized field/array among those members
1605 whose offset is multiple of the vector alignment. */
1606 if (alignment
1607 && (offset % alignment == 0)
1608 && (alignment > max_align))
1609 max_align = alignment;
1610 }
1611
1612 type_align_map->put (type, max_align);
1613 return max_align;
1614 }
1615
1616 /* Return alignment of vector type corresponding to decl's scalar type
1617 or 0 if it doesn't exist or the vector alignment is lesser than
1618 decl's alignment. */
1619 static unsigned
1620 get_vec_alignment_for_type (tree type)
1621 {
1622 if (type == NULL_TREE)
1623 return 0;
1624
1625 gcc_assert (TYPE_P (type));
1626
1627 static unsigned alignment = 0;
1628 switch (TREE_CODE (type))
1629 {
1630 case ARRAY_TYPE:
1631 alignment = get_vec_alignment_for_array_type (type);
1632 break;
1633 case RECORD_TYPE:
1634 alignment = get_vec_alignment_for_record_type (type);
1635 break;
1636 default:
1637 alignment = 0;
1638 break;
1639 }
1640
1641 return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1642 }
1643
1644 /* Entry point to increase_alignment pass. */
1645 static unsigned int
1646 increase_alignment (void)
1647 {
1648 varpool_node *vnode;
1649
1650 vect_location = dump_user_location_t ();
1651 type_align_map = new hash_map<tree, unsigned>;
1652
1653 /* Increase the alignment of all global arrays for vectorization. */
1654 FOR_EACH_DEFINED_VARIABLE (vnode)
1655 {
1656 tree decl = vnode->decl;
1657 unsigned int alignment;
1658
1659 if ((decl_in_symtab_p (decl)
1660 && !symtab_node::get (decl)->can_increase_alignment_p ())
1661 || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1662 continue;
1663
1664 alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1665 if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1666 {
1667 vnode->increase_alignment (alignment);
1668 if (dump_enabled_p ())
1669 dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1670 }
1671 }
1672
1673 delete type_align_map;
1674 return 0;
1675 }
1676
1677
1678 namespace {
1679
1680 const pass_data pass_data_ipa_increase_alignment =
1681 {
1682 SIMPLE_IPA_PASS, /* type */
1683 "increase_alignment", /* name */
1684 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1685 TV_IPA_OPT, /* tv_id */
1686 0, /* properties_required */
1687 0, /* properties_provided */
1688 0, /* properties_destroyed */
1689 0, /* todo_flags_start */
1690 0, /* todo_flags_finish */
1691 };
1692
1693 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1694 {
1695 public:
1696 pass_ipa_increase_alignment (gcc::context *ctxt)
1697 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1698 {}
1699
1700 /* opt_pass methods: */
1701 virtual bool gate (function *)
1702 {
1703 return flag_section_anchors && flag_tree_loop_vectorize;
1704 }
1705
1706 virtual unsigned int execute (function *) { return increase_alignment (); }
1707
1708 }; // class pass_ipa_increase_alignment
1709
1710 } // anon namespace
1711
1712 simple_ipa_opt_pass *
1713 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1714 {
1715 return new pass_ipa_increase_alignment (ctxt);
1716 }
1717
1718 /* If the condition represented by T is a comparison or the SSA name
1719 result of a comparison, extract the comparison's operands. Represent
1720 T as NE_EXPR <T, 0> otherwise. */
1721
1722 void
1723 scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1724 {
1725 if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1726 {
1727 this->code = TREE_CODE (t);
1728 this->op0 = TREE_OPERAND (t, 0);
1729 this->op1 = TREE_OPERAND (t, 1);
1730 this->inverted_p = false;
1731 return;
1732 }
1733
1734 if (TREE_CODE (t) == SSA_NAME)
1735 if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1736 {
1737 tree_code code = gimple_assign_rhs_code (stmt);
1738 if (TREE_CODE_CLASS (code) == tcc_comparison)
1739 {
1740 this->code = code;
1741 this->op0 = gimple_assign_rhs1 (stmt);
1742 this->op1 = gimple_assign_rhs2 (stmt);
1743 this->inverted_p = false;
1744 return;
1745 }
1746 else if (code == BIT_NOT_EXPR)
1747 {
1748 tree n_op = gimple_assign_rhs1 (stmt);
1749 if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
1750 {
1751 code = gimple_assign_rhs_code (stmt);
1752 if (TREE_CODE_CLASS (code) == tcc_comparison)
1753 {
1754 this->code = code;
1755 this->op0 = gimple_assign_rhs1 (stmt);
1756 this->op1 = gimple_assign_rhs2 (stmt);
1757 this->inverted_p = true;
1758 return;
1759 }
1760 }
1761 }
1762 }
1763
1764 this->code = NE_EXPR;
1765 this->op0 = t;
1766 this->op1 = build_zero_cst (TREE_TYPE (t));
1767 this->inverted_p = false;
1768 }
1769
1770 /* See the comment above the declaration for details. */
1771
1772 unsigned int
1773 vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
1774 stmt_vec_info stmt_info, slp_tree,
1775 tree vectype, int misalign,
1776 vect_cost_model_location where)
1777 {
1778 unsigned int cost
1779 = builtin_vectorization_cost (kind, vectype, misalign) * count;
1780 return record_stmt_cost (stmt_info, where, cost);
1781 }
1782
1783 /* See the comment above the declaration for details. */
1784
1785 void
1786 vector_costs::finish_cost (const vector_costs *)
1787 {
1788 gcc_assert (!m_finished);
1789 m_finished = true;
1790 }
1791
1792 /* Record a base cost of COST units against WHERE. If STMT_INFO is
1793 nonnull, use it to adjust the cost based on execution frequency
1794 (where appropriate). */
1795
1796 unsigned int
1797 vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
1798 vect_cost_model_location where,
1799 unsigned int cost)
1800 {
1801 cost = adjust_cost_for_freq (stmt_info, where, cost);
1802 m_costs[where] += cost;
1803 return cost;
1804 }
1805
1806 /* COST is the base cost we have calculated for an operation in location WHERE.
1807 If STMT_INFO is nonnull, use it to adjust the cost based on execution
1808 frequency (where appropriate). Return the adjusted cost. */
1809
1810 unsigned int
1811 vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
1812 vect_cost_model_location where,
1813 unsigned int cost)
1814 {
1815 /* Statements in an inner loop relative to the loop being
1816 vectorized are weighted more heavily. The value here is
1817 arbitrary and could potentially be improved with analysis. */
1818 if (where == vect_body
1819 && stmt_info
1820 && stmt_in_inner_loop_p (m_vinfo, stmt_info))
1821 {
1822 loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
1823 cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1824 }
1825 return cost;
1826 }
1827
1828 /* See the comment above the declaration for details. */
1829
1830 bool
1831 vector_costs::better_main_loop_than_p (const vector_costs *other) const
1832 {
1833 int diff = compare_inside_loop_cost (other);
1834 if (diff != 0)
1835 return diff < 0;
1836
1837 /* If there's nothing to choose between the loop bodies, see whether
1838 there's a difference in the prologue and epilogue costs. */
1839 diff = compare_outside_loop_cost (other);
1840 if (diff != 0)
1841 return diff < 0;
1842
1843 return false;
1844 }
1845
1846
1847 /* See the comment above the declaration for details. */
1848
1849 bool
1850 vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
1851 loop_vec_info main_loop) const
1852 {
1853 loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1854 loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1855
1856 poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1857 poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1858
1859 poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
1860 unsigned HOST_WIDE_INT main_vf;
1861 unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
1862 /* If we can determine how many iterations are left for the epilogue
1863 loop, that is if both the main loop's vectorization factor and number
1864 of iterations are constant, then we use them to calculate the cost of
1865 the epilogue loop together with a 'likely value' for the epilogues
1866 vectorization factor. Otherwise we use the main loop's vectorization
1867 factor and the maximum poly value for the epilogue's. If the target
1868 has not provided with a sensible upper bound poly vectorization
1869 factors are likely to be favored over constant ones. */
1870 if (main_poly_vf.is_constant (&main_vf)
1871 && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
1872 {
1873 unsigned HOST_WIDE_INT niters
1874 = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
1875 HOST_WIDE_INT other_likely_vf
1876 = estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
1877 HOST_WIDE_INT this_likely_vf
1878 = estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
1879
1880 /* If the epilogue is using partial vectors we account for the
1881 partial iteration here too. */
1882 other_factor = niters / other_likely_vf;
1883 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
1884 && niters % other_likely_vf != 0)
1885 other_factor++;
1886
1887 this_factor = niters / this_likely_vf;
1888 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
1889 && niters % this_likely_vf != 0)
1890 this_factor++;
1891 }
1892 else
1893 {
1894 unsigned HOST_WIDE_INT main_vf_max
1895 = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
1896 unsigned HOST_WIDE_INT other_vf_max
1897 = estimated_poly_value (other_vf, POLY_VALUE_MAX);
1898 unsigned HOST_WIDE_INT this_vf_max
1899 = estimated_poly_value (this_vf, POLY_VALUE_MAX);
1900
1901 other_factor = CEIL (main_vf_max, other_vf_max);
1902 this_factor = CEIL (main_vf_max, this_vf_max);
1903
1904 /* If the loop is not using partial vectors then it will iterate one
1905 time less than one that does. It is safe to subtract one here,
1906 because the main loop's vf is always at least 2x bigger than that
1907 of an epilogue. */
1908 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
1909 other_factor -= 1;
1910 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
1911 this_factor -= 1;
1912 }
1913
1914 /* Compute the costs by multiplying the inside costs with the factor and
1915 add the outside costs for a more complete picture. The factor is the
1916 amount of times we are expecting to iterate this epilogue. */
1917 other_cost = other->body_cost () * other_factor;
1918 this_cost = this->body_cost () * this_factor;
1919 other_cost += other->outside_cost ();
1920 this_cost += this->outside_cost ();
1921 return this_cost < other_cost;
1922 }
1923
1924 /* A <=>-style subroutine of better_main_loop_than_p. Check whether we can
1925 determine the return value of better_main_loop_than_p by comparing the
1926 inside (loop body) costs of THIS and OTHER. Return:
1927
1928 * -1 if better_main_loop_than_p should return true.
1929 * 1 if better_main_loop_than_p should return false.
1930 * 0 if we can't decide. */
1931
1932 int
1933 vector_costs::compare_inside_loop_cost (const vector_costs *other) const
1934 {
1935 loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1936 loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1937
1938 struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
1939 gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
1940
1941 poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1942 poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1943
1944 /* Limit the VFs to what is likely to be the maximum number of iterations,
1945 to handle cases in which at least one loop_vinfo is fully-masked. */
1946 HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
1947 if (estimated_max_niter != -1)
1948 {
1949 if (known_le (estimated_max_niter, this_vf))
1950 this_vf = estimated_max_niter;
1951 if (known_le (estimated_max_niter, other_vf))
1952 other_vf = estimated_max_niter;
1953 }
1954
1955 /* Check whether the (fractional) cost per scalar iteration is lower or
1956 higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf. */
1957 poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
1958 poly_int64 rel_other
1959 = other_loop_vinfo->vector_costs->body_cost () * this_vf;
1960
1961 HOST_WIDE_INT est_rel_this_min
1962 = estimated_poly_value (rel_this, POLY_VALUE_MIN);
1963 HOST_WIDE_INT est_rel_this_max
1964 = estimated_poly_value (rel_this, POLY_VALUE_MAX);
1965
1966 HOST_WIDE_INT est_rel_other_min
1967 = estimated_poly_value (rel_other, POLY_VALUE_MIN);
1968 HOST_WIDE_INT est_rel_other_max
1969 = estimated_poly_value (rel_other, POLY_VALUE_MAX);
1970
1971 /* Check first if we can make out an unambigous total order from the minimum
1972 and maximum estimates. */
1973 if (est_rel_this_min < est_rel_other_min
1974 && est_rel_this_max < est_rel_other_max)
1975 return -1;
1976
1977 if (est_rel_other_min < est_rel_this_min
1978 && est_rel_other_max < est_rel_this_max)
1979 return 1;
1980
1981 /* When other_loop_vinfo uses a variable vectorization factor,
1982 we know that it has a lower cost for at least one runtime VF.
1983 However, we don't know how likely that VF is.
1984
1985 One option would be to compare the costs for the estimated VFs.
1986 The problem is that that can put too much pressure on the cost
1987 model. E.g. if the estimated VF is also the lowest possible VF,
1988 and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
1989 for the estimated VF, we'd then choose this_loop_vinfo even
1990 though (a) this_loop_vinfo might not actually be better than
1991 other_loop_vinfo for that VF and (b) it would be significantly
1992 worse at larger VFs.
1993
1994 Here we go for a hacky compromise: pick this_loop_vinfo if it is
1995 no more expensive than other_loop_vinfo even after doubling the
1996 estimated other_loop_vinfo VF. For all but trivial loops, this
1997 ensures that we only pick this_loop_vinfo if it is significantly
1998 better than other_loop_vinfo at the estimated VF. */
1999 if (est_rel_other_min != est_rel_this_min
2000 || est_rel_other_max != est_rel_this_max)
2001 {
2002 HOST_WIDE_INT est_rel_this_likely
2003 = estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
2004 HOST_WIDE_INT est_rel_other_likely
2005 = estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
2006
2007 return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
2008 }
2009
2010 return 0;
2011 }
2012
2013 /* A <=>-style subroutine of better_main_loop_than_p, used when there is
2014 nothing to choose between the inside (loop body) costs of THIS and OTHER.
2015 Check whether we can determine the return value of better_main_loop_than_p
2016 by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
2017 Return:
2018
2019 * -1 if better_main_loop_than_p should return true.
2020 * 1 if better_main_loop_than_p should return false.
2021 * 0 if we can't decide. */
2022
2023 int
2024 vector_costs::compare_outside_loop_cost (const vector_costs *other) const
2025 {
2026 auto this_outside_cost = this->outside_cost ();
2027 auto other_outside_cost = other->outside_cost ();
2028 if (this_outside_cost != other_outside_cost)
2029 return this_outside_cost < other_outside_cost ? -1 : 1;
2030
2031 return 0;
2032 }
2033