tree-vectorizer.cc revision 1.1.1.1 1 1.1 mrg /* Vectorizer
2 1.1 mrg Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Dorit Naishlos <dorit (at) il.ibm.com>
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
8 1.1 mrg the terms of the GNU General Public License as published by the Free
9 1.1 mrg Software Foundation; either version 3, or (at your option) any later
10 1.1 mrg version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 1.1 mrg for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg /* Loop and basic block vectorizer.
22 1.1 mrg
23 1.1 mrg This file contains drivers for the three vectorizers:
24 1.1 mrg (1) loop vectorizer (inter-iteration parallelism),
25 1.1 mrg (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26 1.1 mrg vectorizer)
27 1.1 mrg (3) BB vectorizer (out-of-loops), aka SLP
28 1.1 mrg
29 1.1 mrg The rest of the vectorizer's code is organized as follows:
30 1.1 mrg - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
31 1.1 mrg used by drivers (1) and (2).
32 1.1 mrg - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
33 1.1 mrg drivers (1) and (2).
34 1.1 mrg - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
35 1.1 mrg used by drivers (2) and (3).
36 1.1 mrg - tree-vect-stmts.cc - statements analysis and transformation (used by all).
37 1.1 mrg - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
38 1.1 mrg manipulations (used by all).
39 1.1 mrg - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
40 1.1 mrg
41 1.1 mrg Here's a poor attempt at illustrating that:
42 1.1 mrg
43 1.1 mrg tree-vectorizer.cc:
44 1.1 mrg loop_vect() loop_aware_slp() slp_vect()
45 1.1 mrg | / \ /
46 1.1 mrg | / \ /
47 1.1 mrg tree-vect-loop.cc tree-vect-slp.cc
48 1.1 mrg | \ \ / / |
49 1.1 mrg | \ \/ / |
50 1.1 mrg | \ /\ / |
51 1.1 mrg | \ / \ / |
52 1.1 mrg tree-vect-stmts.cc tree-vect-data-refs.cc
53 1.1 mrg \ /
54 1.1 mrg tree-vect-patterns.cc
55 1.1 mrg */
56 1.1 mrg
57 1.1 mrg #include "config.h"
58 1.1 mrg #include "system.h"
59 1.1 mrg #include "coretypes.h"
60 1.1 mrg #include "backend.h"
61 1.1 mrg #include "tree.h"
62 1.1 mrg #include "gimple.h"
63 1.1 mrg #include "predict.h"
64 1.1 mrg #include "tree-pass.h"
65 1.1 mrg #include "ssa.h"
66 1.1 mrg #include "cgraph.h"
67 1.1 mrg #include "fold-const.h"
68 1.1 mrg #include "stor-layout.h"
69 1.1 mrg #include "gimple-iterator.h"
70 1.1 mrg #include "gimple-walk.h"
71 1.1 mrg #include "tree-ssa-loop-manip.h"
72 1.1 mrg #include "tree-ssa-loop-niter.h"
73 1.1 mrg #include "tree-cfg.h"
74 1.1 mrg #include "cfgloop.h"
75 1.1 mrg #include "tree-vectorizer.h"
76 1.1 mrg #include "tree-ssa-propagate.h"
77 1.1 mrg #include "dbgcnt.h"
78 1.1 mrg #include "tree-scalar-evolution.h"
79 1.1 mrg #include "stringpool.h"
80 1.1 mrg #include "attribs.h"
81 1.1 mrg #include "gimple-pretty-print.h"
82 1.1 mrg #include "opt-problem.h"
83 1.1 mrg #include "internal-fn.h"
84 1.1 mrg #include "tree-ssa-sccvn.h"
85 1.1 mrg
86 1.1 mrg /* Loop or bb location, with hotness information. */
87 1.1 mrg dump_user_location_t vect_location;
88 1.1 mrg
89 1.1 mrg /* auto_purge_vect_location's dtor: reset the vect_location
90 1.1 mrg global, to avoid stale location_t values that could reference
91 1.1 mrg GC-ed blocks. */
92 1.1 mrg
93 1.1 mrg auto_purge_vect_location::~auto_purge_vect_location ()
94 1.1 mrg {
95 1.1 mrg vect_location = dump_user_location_t ();
96 1.1 mrg }
97 1.1 mrg
98 1.1 mrg /* Dump a cost entry according to args to F. */
99 1.1 mrg
100 1.1 mrg void
101 1.1 mrg dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
102 1.1 mrg stmt_vec_info stmt_info, slp_tree node, tree,
103 1.1 mrg int misalign, unsigned cost,
104 1.1 mrg enum vect_cost_model_location where)
105 1.1 mrg {
106 1.1 mrg if (stmt_info)
107 1.1 mrg {
108 1.1 mrg print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109 1.1 mrg fprintf (f, " ");
110 1.1 mrg }
111 1.1 mrg else if (node)
112 1.1 mrg fprintf (f, "node %p ", (void *)node);
113 1.1 mrg else
114 1.1 mrg fprintf (f, "<unknown> ");
115 1.1 mrg fprintf (f, "%d times ", count);
116 1.1 mrg const char *ks = "unknown";
117 1.1 mrg switch (kind)
118 1.1 mrg {
119 1.1 mrg case scalar_stmt:
120 1.1 mrg ks = "scalar_stmt";
121 1.1 mrg break;
122 1.1 mrg case scalar_load:
123 1.1 mrg ks = "scalar_load";
124 1.1 mrg break;
125 1.1 mrg case scalar_store:
126 1.1 mrg ks = "scalar_store";
127 1.1 mrg break;
128 1.1 mrg case vector_stmt:
129 1.1 mrg ks = "vector_stmt";
130 1.1 mrg break;
131 1.1 mrg case vector_load:
132 1.1 mrg ks = "vector_load";
133 1.1 mrg break;
134 1.1 mrg case vector_gather_load:
135 1.1 mrg ks = "vector_gather_load";
136 1.1 mrg break;
137 1.1 mrg case unaligned_load:
138 1.1 mrg ks = "unaligned_load";
139 1.1 mrg break;
140 1.1 mrg case unaligned_store:
141 1.1 mrg ks = "unaligned_store";
142 1.1 mrg break;
143 1.1 mrg case vector_store:
144 1.1 mrg ks = "vector_store";
145 1.1 mrg break;
146 1.1 mrg case vector_scatter_store:
147 1.1 mrg ks = "vector_scatter_store";
148 1.1 mrg break;
149 1.1 mrg case vec_to_scalar:
150 1.1 mrg ks = "vec_to_scalar";
151 1.1 mrg break;
152 1.1 mrg case scalar_to_vec:
153 1.1 mrg ks = "scalar_to_vec";
154 1.1 mrg break;
155 1.1 mrg case cond_branch_not_taken:
156 1.1 mrg ks = "cond_branch_not_taken";
157 1.1 mrg break;
158 1.1 mrg case cond_branch_taken:
159 1.1 mrg ks = "cond_branch_taken";
160 1.1 mrg break;
161 1.1 mrg case vec_perm:
162 1.1 mrg ks = "vec_perm";
163 1.1 mrg break;
164 1.1 mrg case vec_promote_demote:
165 1.1 mrg ks = "vec_promote_demote";
166 1.1 mrg break;
167 1.1 mrg case vec_construct:
168 1.1 mrg ks = "vec_construct";
169 1.1 mrg break;
170 1.1 mrg }
171 1.1 mrg fprintf (f, "%s ", ks);
172 1.1 mrg if (kind == unaligned_load || kind == unaligned_store)
173 1.1 mrg fprintf (f, "(misalign %d) ", misalign);
174 1.1 mrg fprintf (f, "costs %u ", cost);
175 1.1 mrg const char *ws = "unknown";
176 1.1 mrg switch (where)
177 1.1 mrg {
178 1.1 mrg case vect_prologue:
179 1.1 mrg ws = "prologue";
180 1.1 mrg break;
181 1.1 mrg case vect_body:
182 1.1 mrg ws = "body";
183 1.1 mrg break;
184 1.1 mrg case vect_epilogue:
185 1.1 mrg ws = "epilogue";
186 1.1 mrg break;
187 1.1 mrg }
188 1.1 mrg fprintf (f, "in %s\n", ws);
189 1.1 mrg }
190 1.1 mrg
191 1.1 mrg /* For mapping simduid to vectorization factor. */
193 1.1 mrg
194 1.1 mrg class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
195 1.1 mrg {
196 1.1 mrg public:
197 1.1 mrg unsigned int simduid;
198 1.1 mrg poly_uint64 vf;
199 1.1 mrg
200 1.1 mrg /* hash_table support. */
201 1.1 mrg static inline hashval_t hash (const simduid_to_vf *);
202 1.1 mrg static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
203 1.1 mrg };
204 1.1 mrg
205 1.1 mrg inline hashval_t
206 1.1 mrg simduid_to_vf::hash (const simduid_to_vf *p)
207 1.1 mrg {
208 1.1 mrg return p->simduid;
209 1.1 mrg }
210 1.1 mrg
211 1.1 mrg inline int
212 1.1 mrg simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
213 1.1 mrg {
214 1.1 mrg return p1->simduid == p2->simduid;
215 1.1 mrg }
216 1.1 mrg
217 1.1 mrg /* This hash maps the OMP simd array to the corresponding simduid used
218 1.1 mrg to index into it. Like thus,
219 1.1 mrg
220 1.1 mrg _7 = GOMP_SIMD_LANE (simduid.0)
221 1.1 mrg ...
222 1.1 mrg ...
223 1.1 mrg D.1737[_7] = stuff;
224 1.1 mrg
225 1.1 mrg
226 1.1 mrg This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
227 1.1 mrg simduid.0. */
228 1.1 mrg
229 1.1 mrg struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
230 1.1 mrg {
231 1.1 mrg tree decl;
232 1.1 mrg unsigned int simduid;
233 1.1 mrg
234 1.1 mrg /* hash_table support. */
235 1.1 mrg static inline hashval_t hash (const simd_array_to_simduid *);
236 1.1 mrg static inline int equal (const simd_array_to_simduid *,
237 1.1 mrg const simd_array_to_simduid *);
238 1.1 mrg };
239 1.1 mrg
240 1.1 mrg inline hashval_t
241 1.1 mrg simd_array_to_simduid::hash (const simd_array_to_simduid *p)
242 1.1 mrg {
243 1.1 mrg return DECL_UID (p->decl);
244 1.1 mrg }
245 1.1 mrg
246 1.1 mrg inline int
247 1.1 mrg simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
248 1.1 mrg const simd_array_to_simduid *p2)
249 1.1 mrg {
250 1.1 mrg return p1->decl == p2->decl;
251 1.1 mrg }
252 1.1 mrg
253 1.1 mrg /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
254 1.1 mrg into their corresponding constants and remove
255 1.1 mrg IFN_GOMP_SIMD_ORDERED_{START,END}. */
256 1.1 mrg
257 1.1 mrg static void
258 1.1 mrg adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
259 1.1 mrg {
260 1.1 mrg basic_block bb;
261 1.1 mrg
262 1.1 mrg FOR_EACH_BB_FN (bb, fun)
263 1.1 mrg {
264 1.1 mrg gimple_stmt_iterator i;
265 1.1 mrg
266 1.1 mrg for (i = gsi_start_bb (bb); !gsi_end_p (i); )
267 1.1 mrg {
268 1.1 mrg poly_uint64 vf = 1;
269 1.1 mrg enum internal_fn ifn;
270 1.1 mrg gimple *stmt = gsi_stmt (i);
271 1.1 mrg tree t;
272 1.1 mrg if (!is_gimple_call (stmt)
273 1.1 mrg || !gimple_call_internal_p (stmt))
274 1.1 mrg {
275 1.1 mrg gsi_next (&i);
276 1.1 mrg continue;
277 1.1 mrg }
278 1.1 mrg ifn = gimple_call_internal_fn (stmt);
279 1.1 mrg switch (ifn)
280 1.1 mrg {
281 1.1 mrg case IFN_GOMP_SIMD_LANE:
282 1.1 mrg case IFN_GOMP_SIMD_VF:
283 1.1 mrg case IFN_GOMP_SIMD_LAST_LANE:
284 1.1 mrg break;
285 1.1 mrg case IFN_GOMP_SIMD_ORDERED_START:
286 1.1 mrg case IFN_GOMP_SIMD_ORDERED_END:
287 1.1 mrg if (integer_onep (gimple_call_arg (stmt, 0)))
288 1.1 mrg {
289 1.1 mrg enum built_in_function bcode
290 1.1 mrg = (ifn == IFN_GOMP_SIMD_ORDERED_START
291 1.1 mrg ? BUILT_IN_GOMP_ORDERED_START
292 1.1 mrg : BUILT_IN_GOMP_ORDERED_END);
293 1.1 mrg gimple *g
294 1.1 mrg = gimple_build_call (builtin_decl_explicit (bcode), 0);
295 1.1 mrg gimple_move_vops (g, stmt);
296 1.1 mrg gsi_replace (&i, g, true);
297 1.1 mrg continue;
298 1.1 mrg }
299 1.1 mrg gsi_remove (&i, true);
300 1.1 mrg unlink_stmt_vdef (stmt);
301 1.1 mrg continue;
302 1.1 mrg default:
303 1.1 mrg gsi_next (&i);
304 1.1 mrg continue;
305 1.1 mrg }
306 1.1 mrg tree arg = gimple_call_arg (stmt, 0);
307 1.1 mrg gcc_assert (arg != NULL_TREE);
308 1.1 mrg gcc_assert (TREE_CODE (arg) == SSA_NAME);
309 1.1 mrg simduid_to_vf *p = NULL, data;
310 1.1 mrg data.simduid = DECL_UID (SSA_NAME_VAR (arg));
311 1.1 mrg /* Need to nullify loop safelen field since it's value is not
312 1.1 mrg valid after transformation. */
313 1.1 mrg if (bb->loop_father && bb->loop_father->safelen > 0)
314 1.1 mrg bb->loop_father->safelen = 0;
315 1.1 mrg if (htab)
316 1.1 mrg {
317 1.1 mrg p = htab->find (&data);
318 1.1 mrg if (p)
319 1.1 mrg vf = p->vf;
320 1.1 mrg }
321 1.1 mrg switch (ifn)
322 1.1 mrg {
323 1.1 mrg case IFN_GOMP_SIMD_VF:
324 1.1 mrg t = build_int_cst (unsigned_type_node, vf);
325 1.1 mrg break;
326 1.1 mrg case IFN_GOMP_SIMD_LANE:
327 1.1 mrg t = build_int_cst (unsigned_type_node, 0);
328 1.1 mrg break;
329 1.1 mrg case IFN_GOMP_SIMD_LAST_LANE:
330 1.1 mrg t = gimple_call_arg (stmt, 1);
331 1.1 mrg break;
332 1.1 mrg default:
333 1.1 mrg gcc_unreachable ();
334 1.1 mrg }
335 1.1 mrg tree lhs = gimple_call_lhs (stmt);
336 1.1 mrg if (lhs)
337 1.1 mrg replace_uses_by (lhs, t);
338 1.1 mrg release_defs (stmt);
339 1.1 mrg gsi_remove (&i, true);
340 1.1 mrg }
341 1.1 mrg }
342 1.1 mrg }
343 1.1 mrg
344 1.1 mrg /* Helper structure for note_simd_array_uses. */
345 1.1 mrg
346 1.1 mrg struct note_simd_array_uses_struct
347 1.1 mrg {
348 1.1 mrg hash_table<simd_array_to_simduid> **htab;
349 1.1 mrg unsigned int simduid;
350 1.1 mrg };
351 1.1 mrg
352 1.1 mrg /* Callback for note_simd_array_uses, called through walk_gimple_op. */
353 1.1 mrg
354 1.1 mrg static tree
355 1.1 mrg note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
356 1.1 mrg {
357 1.1 mrg struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
358 1.1 mrg struct note_simd_array_uses_struct *ns
359 1.1 mrg = (struct note_simd_array_uses_struct *) wi->info;
360 1.1 mrg
361 1.1 mrg if (TYPE_P (*tp))
362 1.1 mrg *walk_subtrees = 0;
363 1.1 mrg else if (VAR_P (*tp)
364 1.1 mrg && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
365 1.1 mrg && DECL_CONTEXT (*tp) == current_function_decl)
366 1.1 mrg {
367 1.1 mrg simd_array_to_simduid data;
368 1.1 mrg if (!*ns->htab)
369 1.1 mrg *ns->htab = new hash_table<simd_array_to_simduid> (15);
370 1.1 mrg data.decl = *tp;
371 1.1 mrg data.simduid = ns->simduid;
372 1.1 mrg simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
373 1.1 mrg if (*slot == NULL)
374 1.1 mrg {
375 1.1 mrg simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
376 1.1 mrg *p = data;
377 1.1 mrg *slot = p;
378 1.1 mrg }
379 1.1 mrg else if ((*slot)->simduid != ns->simduid)
380 1.1 mrg (*slot)->simduid = -1U;
381 1.1 mrg *walk_subtrees = 0;
382 1.1 mrg }
383 1.1 mrg return NULL_TREE;
384 1.1 mrg }
385 1.1 mrg
386 1.1 mrg /* Find "omp simd array" temporaries and map them to corresponding
387 1.1 mrg simduid. */
388 1.1 mrg
389 1.1 mrg static void
390 1.1 mrg note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
391 1.1 mrg {
392 1.1 mrg basic_block bb;
393 1.1 mrg gimple_stmt_iterator gsi;
394 1.1 mrg struct walk_stmt_info wi;
395 1.1 mrg struct note_simd_array_uses_struct ns;
396 1.1 mrg
397 1.1 mrg memset (&wi, 0, sizeof (wi));
398 1.1 mrg wi.info = &ns;
399 1.1 mrg ns.htab = htab;
400 1.1 mrg
401 1.1 mrg FOR_EACH_BB_FN (bb, fun)
402 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
403 1.1 mrg {
404 1.1 mrg gimple *stmt = gsi_stmt (gsi);
405 1.1 mrg if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
406 1.1 mrg continue;
407 1.1 mrg switch (gimple_call_internal_fn (stmt))
408 1.1 mrg {
409 1.1 mrg case IFN_GOMP_SIMD_LANE:
410 1.1 mrg case IFN_GOMP_SIMD_VF:
411 1.1 mrg case IFN_GOMP_SIMD_LAST_LANE:
412 1.1 mrg break;
413 1.1 mrg default:
414 1.1 mrg continue;
415 1.1 mrg }
416 1.1 mrg tree lhs = gimple_call_lhs (stmt);
417 1.1 mrg if (lhs == NULL_TREE)
418 1.1 mrg continue;
419 1.1 mrg imm_use_iterator use_iter;
420 1.1 mrg gimple *use_stmt;
421 1.1 mrg ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
422 1.1 mrg FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
423 1.1 mrg if (!is_gimple_debug (use_stmt))
424 1.1 mrg walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
425 1.1 mrg }
426 1.1 mrg }
427 1.1 mrg
428 1.1 mrg /* Shrink arrays with "omp simd array" attribute to the corresponding
429 1.1 mrg vectorization factor. */
430 1.1 mrg
431 1.1 mrg static void
432 1.1 mrg shrink_simd_arrays
433 1.1 mrg (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
434 1.1 mrg hash_table<simduid_to_vf> *simduid_to_vf_htab)
435 1.1 mrg {
436 1.1 mrg for (hash_table<simd_array_to_simduid>::iterator iter
437 1.1 mrg = simd_array_to_simduid_htab->begin ();
438 1.1 mrg iter != simd_array_to_simduid_htab->end (); ++iter)
439 1.1 mrg if ((*iter)->simduid != -1U)
440 1.1 mrg {
441 1.1 mrg tree decl = (*iter)->decl;
442 1.1 mrg poly_uint64 vf = 1;
443 1.1 mrg if (simduid_to_vf_htab)
444 1.1 mrg {
445 1.1 mrg simduid_to_vf *p = NULL, data;
446 1.1 mrg data.simduid = (*iter)->simduid;
447 1.1 mrg p = simduid_to_vf_htab->find (&data);
448 1.1 mrg if (p)
449 1.1 mrg vf = p->vf;
450 1.1 mrg }
451 1.1 mrg tree atype
452 1.1 mrg = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
453 1.1 mrg TREE_TYPE (decl) = atype;
454 1.1 mrg relayout_decl (decl);
455 1.1 mrg }
456 1.1 mrg
457 1.1 mrg delete simd_array_to_simduid_htab;
458 1.1 mrg }
459 1.1 mrg
460 1.1 mrg /* Initialize the vec_info with kind KIND_IN and target cost data
462 1.1 mrg TARGET_COST_DATA_IN. */
463 1.1 mrg
464 1.1 mrg vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
465 1.1 mrg : kind (kind_in),
466 1.1 mrg shared (shared_),
467 1.1 mrg stmt_vec_info_ro (false)
468 1.1 mrg {
469 1.1 mrg stmt_vec_infos.create (50);
470 1.1 mrg }
471 1.1 mrg
472 1.1 mrg vec_info::~vec_info ()
473 1.1 mrg {
474 1.1 mrg for (slp_instance &instance : slp_instances)
475 1.1 mrg vect_free_slp_instance (instance);
476 1.1 mrg
477 1.1 mrg free_stmt_vec_infos ();
478 1.1 mrg }
479 1.1 mrg
480 1.1 mrg vec_info_shared::vec_info_shared ()
481 1.1 mrg : n_stmts (0),
482 1.1 mrg datarefs (vNULL),
483 1.1 mrg datarefs_copy (vNULL),
484 1.1 mrg ddrs (vNULL)
485 1.1 mrg {
486 1.1 mrg }
487 1.1 mrg
488 1.1 mrg vec_info_shared::~vec_info_shared ()
489 1.1 mrg {
490 1.1 mrg free_data_refs (datarefs);
491 1.1 mrg free_dependence_relations (ddrs);
492 1.1 mrg datarefs_copy.release ();
493 1.1 mrg }
494 1.1 mrg
495 1.1 mrg void
496 1.1 mrg vec_info_shared::save_datarefs ()
497 1.1 mrg {
498 1.1 mrg if (!flag_checking)
499 1.1 mrg return;
500 1.1 mrg datarefs_copy.reserve_exact (datarefs.length ());
501 1.1 mrg for (unsigned i = 0; i < datarefs.length (); ++i)
502 1.1 mrg datarefs_copy.quick_push (*datarefs[i]);
503 1.1 mrg }
504 1.1 mrg
505 1.1 mrg void
506 1.1 mrg vec_info_shared::check_datarefs ()
507 1.1 mrg {
508 1.1 mrg if (!flag_checking)
509 1.1 mrg return;
510 1.1 mrg gcc_assert (datarefs.length () == datarefs_copy.length ());
511 1.1 mrg for (unsigned i = 0; i < datarefs.length (); ++i)
512 1.1 mrg if (memcmp (&datarefs_copy[i], datarefs[i],
513 1.1 mrg offsetof (data_reference, alt_indices)) != 0)
514 1.1 mrg gcc_unreachable ();
515 1.1 mrg }
516 1.1 mrg
517 1.1 mrg /* Record that STMT belongs to the vectorizable region. Create and return
518 1.1 mrg an associated stmt_vec_info. */
519 1.1 mrg
520 1.1 mrg stmt_vec_info
521 1.1 mrg vec_info::add_stmt (gimple *stmt)
522 1.1 mrg {
523 1.1 mrg stmt_vec_info res = new_stmt_vec_info (stmt);
524 1.1 mrg set_vinfo_for_stmt (stmt, res);
525 1.1 mrg return res;
526 1.1 mrg }
527 1.1 mrg
528 1.1 mrg /* Record that STMT belongs to the vectorizable region. Create a new
529 1.1 mrg stmt_vec_info and mark VECINFO as being related and return the new
530 1.1 mrg stmt_vec_info. */
531 1.1 mrg
532 1.1 mrg stmt_vec_info
533 1.1 mrg vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
534 1.1 mrg {
535 1.1 mrg stmt_vec_info res = new_stmt_vec_info (stmt);
536 1.1 mrg set_vinfo_for_stmt (stmt, res, false);
537 1.1 mrg STMT_VINFO_RELATED_STMT (res) = stmt_info;
538 1.1 mrg return res;
539 1.1 mrg }
540 1.1 mrg
541 1.1 mrg /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
542 1.1 mrg return null. It is safe to call this function on any statement, even if
543 1.1 mrg it might not be part of the vectorizable region. */
544 1.1 mrg
545 1.1 mrg stmt_vec_info
546 1.1 mrg vec_info::lookup_stmt (gimple *stmt)
547 1.1 mrg {
548 1.1 mrg unsigned int uid = gimple_uid (stmt);
549 1.1 mrg if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
550 1.1 mrg {
551 1.1 mrg stmt_vec_info res = stmt_vec_infos[uid - 1];
552 1.1 mrg if (res && res->stmt == stmt)
553 1.1 mrg return res;
554 1.1 mrg }
555 1.1 mrg return NULL;
556 1.1 mrg }
557 1.1 mrg
558 1.1 mrg /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
559 1.1 mrg return that stmt_vec_info, otherwise return null. It is safe to call
560 1.1 mrg this on arbitrary operands. */
561 1.1 mrg
562 1.1 mrg stmt_vec_info
563 1.1 mrg vec_info::lookup_def (tree name)
564 1.1 mrg {
565 1.1 mrg if (TREE_CODE (name) == SSA_NAME
566 1.1 mrg && !SSA_NAME_IS_DEFAULT_DEF (name))
567 1.1 mrg return lookup_stmt (SSA_NAME_DEF_STMT (name));
568 1.1 mrg return NULL;
569 1.1 mrg }
570 1.1 mrg
571 1.1 mrg /* See whether there is a single non-debug statement that uses LHS and
572 1.1 mrg whether that statement has an associated stmt_vec_info. Return the
573 1.1 mrg stmt_vec_info if so, otherwise return null. */
574 1.1 mrg
575 1.1 mrg stmt_vec_info
576 1.1 mrg vec_info::lookup_single_use (tree lhs)
577 1.1 mrg {
578 1.1 mrg use_operand_p dummy;
579 1.1 mrg gimple *use_stmt;
580 1.1 mrg if (single_imm_use (lhs, &dummy, &use_stmt))
581 1.1 mrg return lookup_stmt (use_stmt);
582 1.1 mrg return NULL;
583 1.1 mrg }
584 1.1 mrg
585 1.1 mrg /* Return vectorization information about DR. */
586 1.1 mrg
587 1.1 mrg dr_vec_info *
588 1.1 mrg vec_info::lookup_dr (data_reference *dr)
589 1.1 mrg {
590 1.1 mrg stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
591 1.1 mrg /* DR_STMT should never refer to a stmt in a pattern replacement. */
592 1.1 mrg gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
593 1.1 mrg return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
594 1.1 mrg }
595 1.1 mrg
596 1.1 mrg /* Record that NEW_STMT_INFO now implements the same data reference
597 1.1 mrg as OLD_STMT_INFO. */
598 1.1 mrg
599 1.1 mrg void
600 1.1 mrg vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
601 1.1 mrg {
602 1.1 mrg gcc_assert (!is_pattern_stmt_p (old_stmt_info));
603 1.1 mrg STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
604 1.1 mrg new_stmt_info->dr_aux = old_stmt_info->dr_aux;
605 1.1 mrg STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
606 1.1 mrg = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
607 1.1 mrg STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
608 1.1 mrg = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
609 1.1 mrg }
610 1.1 mrg
611 1.1 mrg /* Permanently remove the statement described by STMT_INFO from the
612 1.1 mrg function. */
613 1.1 mrg
614 1.1 mrg void
615 1.1 mrg vec_info::remove_stmt (stmt_vec_info stmt_info)
616 1.1 mrg {
617 1.1 mrg gcc_assert (!stmt_info->pattern_stmt_p);
618 1.1 mrg set_vinfo_for_stmt (stmt_info->stmt, NULL);
619 1.1 mrg unlink_stmt_vdef (stmt_info->stmt);
620 1.1 mrg gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
621 1.1 mrg gsi_remove (&si, true);
622 1.1 mrg release_defs (stmt_info->stmt);
623 1.1 mrg free_stmt_vec_info (stmt_info);
624 1.1 mrg }
625 1.1 mrg
626 1.1 mrg /* Replace the statement at GSI by NEW_STMT, both the vectorization
627 1.1 mrg information and the function itself. STMT_INFO describes the statement
628 1.1 mrg at GSI. */
629 1.1 mrg
630 1.1 mrg void
631 1.1 mrg vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
632 1.1 mrg gimple *new_stmt)
633 1.1 mrg {
634 1.1 mrg gimple *old_stmt = stmt_info->stmt;
635 1.1 mrg gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
636 1.1 mrg gimple_set_uid (new_stmt, gimple_uid (old_stmt));
637 1.1 mrg stmt_info->stmt = new_stmt;
638 1.1 mrg gsi_replace (gsi, new_stmt, true);
639 1.1 mrg }
640 1.1 mrg
641 1.1 mrg /* Insert stmts in SEQ on the VEC_INFO region entry. If CONTEXT is
642 1.1 mrg not NULL it specifies whether to use the sub-region entry
643 1.1 mrg determined by it, currently used for loop vectorization to insert
644 1.1 mrg on the inner loop entry vs. the outer loop entry. */
645 1.1 mrg
646 1.1 mrg void
647 1.1 mrg vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
648 1.1 mrg {
649 1.1 mrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
650 1.1 mrg {
651 1.1 mrg class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
652 1.1 mrg basic_block new_bb;
653 1.1 mrg edge pe;
654 1.1 mrg
655 1.1 mrg if (context && nested_in_vect_loop_p (loop, context))
656 1.1 mrg loop = loop->inner;
657 1.1 mrg
658 1.1 mrg pe = loop_preheader_edge (loop);
659 1.1 mrg new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
660 1.1 mrg gcc_assert (!new_bb);
661 1.1 mrg }
662 1.1 mrg else
663 1.1 mrg {
664 1.1 mrg bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
665 1.1 mrg gimple_stmt_iterator gsi_region_begin
666 1.1 mrg = gsi_after_labels (bb_vinfo->bbs[0]);
667 1.1 mrg gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
668 1.1 mrg }
669 1.1 mrg }
670 1.1 mrg
671 1.1 mrg /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT. */
672 1.1 mrg
673 1.1 mrg void
674 1.1 mrg vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
675 1.1 mrg {
676 1.1 mrg gimple_seq seq = NULL;
677 1.1 mrg gimple_stmt_iterator gsi = gsi_start (seq);
678 1.1 mrg gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
679 1.1 mrg insert_seq_on_entry (context, seq);
680 1.1 mrg }
681 1.1 mrg
682 1.1 mrg /* Create and initialize a new stmt_vec_info struct for STMT. */
683 1.1 mrg
684 1.1 mrg stmt_vec_info
685 1.1 mrg vec_info::new_stmt_vec_info (gimple *stmt)
686 1.1 mrg {
687 1.1 mrg stmt_vec_info res = XCNEW (class _stmt_vec_info);
688 1.1 mrg res->stmt = stmt;
689 1.1 mrg
690 1.1 mrg STMT_VINFO_TYPE (res) = undef_vec_info_type;
691 1.1 mrg STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
692 1.1 mrg STMT_VINFO_VECTORIZABLE (res) = true;
693 1.1 mrg STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
694 1.1 mrg STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
695 1.1 mrg STMT_VINFO_REDUC_FN (res) = IFN_LAST;
696 1.1 mrg STMT_VINFO_REDUC_IDX (res) = -1;
697 1.1 mrg STMT_VINFO_SLP_VECT_ONLY (res) = false;
698 1.1 mrg STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
699 1.1 mrg STMT_VINFO_VEC_STMTS (res) = vNULL;
700 1.1 mrg res->reduc_initial_values = vNULL;
701 1.1 mrg res->reduc_scalar_results = vNULL;
702 1.1 mrg
703 1.1 mrg if (is_a <loop_vec_info> (this)
704 1.1 mrg && gimple_code (stmt) == GIMPLE_PHI
705 1.1 mrg && is_loop_header_bb_p (gimple_bb (stmt)))
706 1.1 mrg STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
707 1.1 mrg else
708 1.1 mrg STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
709 1.1 mrg
710 1.1 mrg STMT_SLP_TYPE (res) = loop_vect;
711 1.1 mrg
712 1.1 mrg /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
713 1.1 mrg res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
714 1.1 mrg
715 1.1 mrg return res;
716 1.1 mrg }
717 1.1 mrg
718 1.1 mrg /* Associate STMT with INFO. */
719 1.1 mrg
720 1.1 mrg void
721 1.1 mrg vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
722 1.1 mrg {
723 1.1 mrg unsigned int uid = gimple_uid (stmt);
724 1.1 mrg if (uid == 0)
725 1.1 mrg {
726 1.1 mrg gcc_assert (!check_ro || !stmt_vec_info_ro);
727 1.1 mrg gcc_checking_assert (info);
728 1.1 mrg uid = stmt_vec_infos.length () + 1;
729 1.1 mrg gimple_set_uid (stmt, uid);
730 1.1 mrg stmt_vec_infos.safe_push (info);
731 1.1 mrg }
732 1.1 mrg else
733 1.1 mrg {
734 1.1 mrg gcc_checking_assert (info == NULL);
735 1.1 mrg stmt_vec_infos[uid - 1] = info;
736 1.1 mrg }
737 1.1 mrg }
738 1.1 mrg
739 1.1 mrg /* Free the contents of stmt_vec_infos. */
740 1.1 mrg
741 1.1 mrg void
742 1.1 mrg vec_info::free_stmt_vec_infos (void)
743 1.1 mrg {
744 1.1 mrg for (stmt_vec_info &info : stmt_vec_infos)
745 1.1 mrg if (info != NULL)
746 1.1 mrg free_stmt_vec_info (info);
747 1.1 mrg stmt_vec_infos.release ();
748 1.1 mrg }
749 1.1 mrg
750 1.1 mrg /* Free STMT_INFO. */
751 1.1 mrg
752 1.1 mrg void
753 1.1 mrg vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
754 1.1 mrg {
755 1.1 mrg if (stmt_info->pattern_stmt_p)
756 1.1 mrg {
757 1.1 mrg gimple_set_bb (stmt_info->stmt, NULL);
758 1.1 mrg tree lhs = gimple_get_lhs (stmt_info->stmt);
759 1.1 mrg if (lhs && TREE_CODE (lhs) == SSA_NAME)
760 1.1 mrg release_ssa_name (lhs);
761 1.1 mrg }
762 1.1 mrg
763 1.1 mrg stmt_info->reduc_initial_values.release ();
764 1.1 mrg stmt_info->reduc_scalar_results.release ();
765 1.1 mrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
766 1.1 mrg STMT_VINFO_VEC_STMTS (stmt_info).release ();
767 1.1 mrg free (stmt_info);
768 1.1 mrg }
769 1.1 mrg
770 1.1 mrg /* Returns true if S1 dominates S2. */
771 1.1 mrg
772 1.1 mrg bool
773 1.1 mrg vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
774 1.1 mrg {
775 1.1 mrg basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
776 1.1 mrg
777 1.1 mrg /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
778 1.1 mrg SSA_NAME. Assume it lives at the beginning of function and
779 1.1 mrg thus dominates everything. */
780 1.1 mrg if (!bb1 || s1 == s2)
781 1.1 mrg return true;
782 1.1 mrg
783 1.1 mrg /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
784 1.1 mrg if (!bb2)
785 1.1 mrg return false;
786 1.1 mrg
787 1.1 mrg if (bb1 != bb2)
788 1.1 mrg return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
789 1.1 mrg
790 1.1 mrg /* PHIs in the same basic block are assumed to be
791 1.1 mrg executed all in parallel, if only one stmt is a PHI,
792 1.1 mrg it dominates the other stmt in the same basic block. */
793 1.1 mrg if (gimple_code (s1) == GIMPLE_PHI)
794 1.1 mrg return true;
795 1.1 mrg
796 1.1 mrg if (gimple_code (s2) == GIMPLE_PHI)
797 1.1 mrg return false;
798 1.1 mrg
799 1.1 mrg /* Inserted vectorized stmts all have UID 0 while the original stmts
800 1.1 mrg in the IL have UID increasing within a BB. Walk from both sides
801 1.1 mrg until we find the other stmt or a stmt with UID != 0. */
802 1.1 mrg gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
803 1.1 mrg while (gimple_uid (gsi_stmt (gsi1)) == 0)
804 1.1 mrg {
805 1.1 mrg gsi_next (&gsi1);
806 1.1 mrg if (gsi_end_p (gsi1))
807 1.1 mrg return false;
808 1.1 mrg if (gsi_stmt (gsi1) == s2)
809 1.1 mrg return true;
810 1.1 mrg }
811 1.1 mrg if (gimple_uid (gsi_stmt (gsi1)) == -1u)
812 1.1 mrg return false;
813 1.1 mrg
814 1.1 mrg gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
815 1.1 mrg while (gimple_uid (gsi_stmt (gsi2)) == 0)
816 1.1 mrg {
817 1.1 mrg gsi_prev (&gsi2);
818 1.1 mrg if (gsi_end_p (gsi2))
819 1.1 mrg return false;
820 1.1 mrg if (gsi_stmt (gsi2) == s1)
821 1.1 mrg return true;
822 1.1 mrg }
823 1.1 mrg if (gimple_uid (gsi_stmt (gsi2)) == -1u)
824 1.1 mrg return false;
825 1.1 mrg
826 1.1 mrg if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
827 1.1 mrg return true;
828 1.1 mrg return false;
829 1.1 mrg }
830 1.1 mrg
831 1.1 mrg /* A helper function to free scev and LOOP niter information, as well as
832 1.1 mrg clear loop constraint LOOP_C_FINITE. */
833 1.1 mrg
834 1.1 mrg void
835 1.1 mrg vect_free_loop_info_assumptions (class loop *loop)
836 1.1 mrg {
837 1.1 mrg scev_reset_htab ();
838 1.1 mrg /* We need to explicitly reset upper bound information since they are
839 1.1 mrg used even after free_numbers_of_iterations_estimates. */
840 1.1 mrg loop->any_upper_bound = false;
841 1.1 mrg loop->any_likely_upper_bound = false;
842 1.1 mrg free_numbers_of_iterations_estimates (loop);
843 1.1 mrg loop_constraint_clear (loop, LOOP_C_FINITE);
844 1.1 mrg }
845 1.1 mrg
846 1.1 mrg /* If LOOP has been versioned during ifcvt, return the internal call
847 1.1 mrg guarding it. */
848 1.1 mrg
849 1.1 mrg gimple *
850 1.1 mrg vect_loop_vectorized_call (class loop *loop, gcond **cond)
851 1.1 mrg {
852 1.1 mrg basic_block bb = loop_preheader_edge (loop)->src;
853 1.1 mrg gimple *g;
854 1.1 mrg do
855 1.1 mrg {
856 1.1 mrg g = last_stmt (bb);
857 1.1 mrg if ((g && gimple_code (g) == GIMPLE_COND)
858 1.1 mrg || !single_succ_p (bb))
859 1.1 mrg break;
860 1.1 mrg if (!single_pred_p (bb))
861 1.1 mrg break;
862 1.1 mrg bb = single_pred (bb);
863 1.1 mrg }
864 1.1 mrg while (1);
865 1.1 mrg if (g && gimple_code (g) == GIMPLE_COND)
866 1.1 mrg {
867 1.1 mrg if (cond)
868 1.1 mrg *cond = as_a <gcond *> (g);
869 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (g);
870 1.1 mrg gsi_prev (&gsi);
871 1.1 mrg if (!gsi_end_p (gsi))
872 1.1 mrg {
873 1.1 mrg g = gsi_stmt (gsi);
874 1.1 mrg if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
875 1.1 mrg && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
876 1.1 mrg || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
877 1.1 mrg return g;
878 1.1 mrg }
879 1.1 mrg }
880 1.1 mrg return NULL;
881 1.1 mrg }
882 1.1 mrg
883 1.1 mrg /* If LOOP has been versioned during loop distribution, return the gurading
884 1.1 mrg internal call. */
885 1.1 mrg
886 1.1 mrg static gimple *
887 1.1 mrg vect_loop_dist_alias_call (class loop *loop, function *fun)
888 1.1 mrg {
889 1.1 mrg basic_block bb;
890 1.1 mrg basic_block entry;
891 1.1 mrg class loop *outer, *orig;
892 1.1 mrg gimple_stmt_iterator gsi;
893 1.1 mrg gimple *g;
894 1.1 mrg
895 1.1 mrg if (loop->orig_loop_num == 0)
896 1.1 mrg return NULL;
897 1.1 mrg
898 1.1 mrg orig = get_loop (fun, loop->orig_loop_num);
899 1.1 mrg if (orig == NULL)
900 1.1 mrg {
901 1.1 mrg /* The original loop is somehow destroyed. Clear the information. */
902 1.1 mrg loop->orig_loop_num = 0;
903 1.1 mrg return NULL;
904 1.1 mrg }
905 1.1 mrg
906 1.1 mrg if (loop != orig)
907 1.1 mrg bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
908 1.1 mrg else
909 1.1 mrg bb = loop_preheader_edge (loop)->src;
910 1.1 mrg
911 1.1 mrg outer = bb->loop_father;
912 1.1 mrg entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
913 1.1 mrg
914 1.1 mrg /* Look upward in dominance tree. */
915 1.1 mrg for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
916 1.1 mrg bb = get_immediate_dominator (CDI_DOMINATORS, bb))
917 1.1 mrg {
918 1.1 mrg g = last_stmt (bb);
919 1.1 mrg if (g == NULL || gimple_code (g) != GIMPLE_COND)
920 1.1 mrg continue;
921 1.1 mrg
922 1.1 mrg gsi = gsi_for_stmt (g);
923 1.1 mrg gsi_prev (&gsi);
924 1.1 mrg if (gsi_end_p (gsi))
925 1.1 mrg continue;
926 1.1 mrg
927 1.1 mrg g = gsi_stmt (gsi);
928 1.1 mrg /* The guarding internal function call must have the same distribution
929 1.1 mrg alias id. */
930 1.1 mrg if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
931 1.1 mrg && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
932 1.1 mrg return g;
933 1.1 mrg }
934 1.1 mrg return NULL;
935 1.1 mrg }
936 1.1 mrg
937 1.1 mrg /* Set the uids of all the statements in basic blocks inside loop
938 1.1 mrg represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
939 1.1 mrg call guarding the loop which has been if converted. */
940 1.1 mrg static void
941 1.1 mrg set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
942 1.1 mrg function *fun)
943 1.1 mrg {
944 1.1 mrg tree arg = gimple_call_arg (loop_vectorized_call, 1);
945 1.1 mrg basic_block *bbs;
946 1.1 mrg unsigned int i;
947 1.1 mrg class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
948 1.1 mrg
949 1.1 mrg LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
950 1.1 mrg gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
951 1.1 mrg == loop_vectorized_call);
952 1.1 mrg /* If we are going to vectorize outer loop, prevent vectorization
953 1.1 mrg of the inner loop in the scalar loop - either the scalar loop is
954 1.1 mrg thrown away, so it is a wasted work, or is used only for
955 1.1 mrg a few iterations. */
956 1.1 mrg if (scalar_loop->inner)
957 1.1 mrg {
958 1.1 mrg gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
959 1.1 mrg if (g)
960 1.1 mrg {
961 1.1 mrg arg = gimple_call_arg (g, 0);
962 1.1 mrg get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
963 1.1 mrg fold_loop_internal_call (g, boolean_false_node);
964 1.1 mrg }
965 1.1 mrg }
966 1.1 mrg bbs = get_loop_body (scalar_loop);
967 1.1 mrg for (i = 0; i < scalar_loop->num_nodes; i++)
968 1.1 mrg {
969 1.1 mrg basic_block bb = bbs[i];
970 1.1 mrg gimple_stmt_iterator gsi;
971 1.1 mrg for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
972 1.1 mrg {
973 1.1 mrg gimple *phi = gsi_stmt (gsi);
974 1.1 mrg gimple_set_uid (phi, 0);
975 1.1 mrg }
976 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
977 1.1 mrg {
978 1.1 mrg gimple *stmt = gsi_stmt (gsi);
979 1.1 mrg gimple_set_uid (stmt, 0);
980 1.1 mrg }
981 1.1 mrg }
982 1.1 mrg free (bbs);
983 1.1 mrg }
984 1.1 mrg
985 1.1 mrg /* Generate vectorized code for LOOP and its epilogues. */
986 1.1 mrg
987 1.1 mrg static void
988 1.1 mrg vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
989 1.1 mrg loop_p loop, gimple *loop_vectorized_call,
990 1.1 mrg function *fun)
991 1.1 mrg {
992 1.1 mrg loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
993 1.1 mrg
994 1.1 mrg if (loop_vectorized_call)
995 1.1 mrg set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
996 1.1 mrg
997 1.1 mrg unsigned HOST_WIDE_INT bytes;
998 1.1 mrg if (dump_enabled_p ())
999 1.1 mrg {
1000 1.1 mrg if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
1001 1.1 mrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1002 1.1 mrg "loop vectorized using %wu byte vectors\n", bytes);
1003 1.1 mrg else
1004 1.1 mrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1005 1.1 mrg "loop vectorized using variable length vectors\n");
1006 1.1 mrg }
1007 1.1 mrg
1008 1.1 mrg loop_p new_loop = vect_transform_loop (loop_vinfo,
1009 1.1 mrg loop_vectorized_call);
1010 1.1 mrg /* Now that the loop has been vectorized, allow it to be unrolled
1011 1.1 mrg etc. */
1012 1.1 mrg loop->force_vectorize = false;
1013 1.1 mrg
1014 1.1 mrg if (loop->simduid)
1015 1.1 mrg {
1016 1.1 mrg simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1017 1.1 mrg if (!simduid_to_vf_htab)
1018 1.1 mrg simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1019 1.1 mrg simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1020 1.1 mrg simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1021 1.1 mrg *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1022 1.1 mrg = simduid_to_vf_data;
1023 1.1 mrg }
1024 1.1 mrg
1025 1.1 mrg /* Epilogue of vectorized loop must be vectorized too. */
1026 1.1 mrg if (new_loop)
1027 1.1 mrg vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
1028 1.1 mrg }
1029 1.1 mrg
1030 1.1 mrg /* Try to vectorize LOOP. */
1031 1.1 mrg
1032 1.1 mrg static unsigned
1033 1.1 mrg try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1034 1.1 mrg unsigned *num_vectorized_loops, loop_p loop,
1035 1.1 mrg gimple *loop_vectorized_call,
1036 1.1 mrg gimple *loop_dist_alias_call,
1037 1.1 mrg function *fun)
1038 1.1 mrg {
1039 1.1 mrg unsigned ret = 0;
1040 1.1 mrg vec_info_shared shared;
1041 1.1 mrg auto_purge_vect_location sentinel;
1042 1.1 mrg vect_location = find_loop_location (loop);
1043 1.1 mrg
1044 1.1 mrg if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
1045 1.1 mrg && dump_enabled_p ())
1046 1.1 mrg dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1047 1.1 mrg "\nAnalyzing loop at %s:%d\n",
1048 1.1 mrg LOCATION_FILE (vect_location.get_location_t ()),
1049 1.1 mrg LOCATION_LINE (vect_location.get_location_t ()));
1050 1.1 mrg
1051 1.1 mrg /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
1052 1.1 mrg opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared);
1053 1.1 mrg loop->aux = loop_vinfo;
1054 1.1 mrg
1055 1.1 mrg if (!loop_vinfo)
1056 1.1 mrg if (dump_enabled_p ())
1057 1.1 mrg if (opt_problem *problem = loop_vinfo.get_problem ())
1058 1.1 mrg {
1059 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 1.1 mrg "couldn't vectorize loop\n");
1061 1.1 mrg problem->emit_and_clear ();
1062 1.1 mrg }
1063 1.1 mrg
1064 1.1 mrg if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1065 1.1 mrg {
1066 1.1 mrg /* Free existing information if loop is analyzed with some
1067 1.1 mrg assumptions. */
1068 1.1 mrg if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1069 1.1 mrg vect_free_loop_info_assumptions (loop);
1070 1.1 mrg
1071 1.1 mrg /* If we applied if-conversion then try to vectorize the
1072 1.1 mrg BB of innermost loops.
1073 1.1 mrg ??? Ideally BB vectorization would learn to vectorize
1074 1.1 mrg control flow by applying if-conversion on-the-fly, the
1075 1.1 mrg following retains the if-converted loop body even when
1076 1.1 mrg only non-if-converted parts took part in BB vectorization. */
1077 1.1 mrg if (flag_tree_slp_vectorize != 0
1078 1.1 mrg && loop_vectorized_call
1079 1.1 mrg && ! loop->inner)
1080 1.1 mrg {
1081 1.1 mrg basic_block bb = loop->header;
1082 1.1 mrg bool require_loop_vectorize = false;
1083 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1084 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
1085 1.1 mrg {
1086 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1087 1.1 mrg gcall *call = dyn_cast <gcall *> (stmt);
1088 1.1 mrg if (call && gimple_call_internal_p (call))
1089 1.1 mrg {
1090 1.1 mrg internal_fn ifn = gimple_call_internal_fn (call);
1091 1.1 mrg if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
1092 1.1 mrg /* Don't keep the if-converted parts when the ifn with
1093 1.1 mrg specifc type is not supported by the backend. */
1094 1.1 mrg || (direct_internal_fn_p (ifn)
1095 1.1 mrg && !direct_internal_fn_supported_p
1096 1.1 mrg (call, OPTIMIZE_FOR_SPEED)))
1097 1.1 mrg {
1098 1.1 mrg require_loop_vectorize = true;
1099 1.1 mrg break;
1100 1.1 mrg }
1101 1.1 mrg }
1102 1.1 mrg gimple_set_uid (stmt, -1);
1103 1.1 mrg gimple_set_visited (stmt, false);
1104 1.1 mrg }
1105 1.1 mrg if (!require_loop_vectorize)
1106 1.1 mrg {
1107 1.1 mrg tree arg = gimple_call_arg (loop_vectorized_call, 1);
1108 1.1 mrg class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
1109 1.1 mrg if (vect_slp_if_converted_bb (bb, scalar_loop))
1110 1.1 mrg {
1111 1.1 mrg fold_loop_internal_call (loop_vectorized_call,
1112 1.1 mrg boolean_true_node);
1113 1.1 mrg loop_vectorized_call = NULL;
1114 1.1 mrg ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1115 1.1 mrg }
1116 1.1 mrg }
1117 1.1 mrg }
1118 1.1 mrg /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1119 1.1 mrg loop, don't vectorize its inner loop; we'll attempt to
1120 1.1 mrg vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1121 1.1 mrg loop version. */
1122 1.1 mrg if (loop_vectorized_call && loop->inner)
1123 1.1 mrg loop->inner->dont_vectorize = true;
1124 1.1 mrg return ret;
1125 1.1 mrg }
1126 1.1 mrg
1127 1.1 mrg if (!dbg_cnt (vect_loop))
1128 1.1 mrg {
1129 1.1 mrg /* Free existing information if loop is analyzed with some
1130 1.1 mrg assumptions. */
1131 1.1 mrg if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1132 1.1 mrg vect_free_loop_info_assumptions (loop);
1133 1.1 mrg return ret;
1134 1.1 mrg }
1135 1.1 mrg
1136 1.1 mrg (*num_vectorized_loops)++;
1137 1.1 mrg /* Transform LOOP and its epilogues. */
1138 1.1 mrg vect_transform_loops (simduid_to_vf_htab, loop, loop_vectorized_call, fun);
1139 1.1 mrg
1140 1.1 mrg if (loop_vectorized_call)
1141 1.1 mrg {
1142 1.1 mrg fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1143 1.1 mrg ret |= TODO_cleanup_cfg;
1144 1.1 mrg }
1145 1.1 mrg if (loop_dist_alias_call)
1146 1.1 mrg {
1147 1.1 mrg tree value = gimple_call_arg (loop_dist_alias_call, 1);
1148 1.1 mrg fold_loop_internal_call (loop_dist_alias_call, value);
1149 1.1 mrg ret |= TODO_cleanup_cfg;
1150 1.1 mrg }
1151 1.1 mrg
1152 1.1 mrg return ret;
1153 1.1 mrg }
1154 1.1 mrg
1155 1.1 mrg /* Try to vectorize LOOP. */
1156 1.1 mrg
1157 1.1 mrg static unsigned
1158 1.1 mrg try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1159 1.1 mrg unsigned *num_vectorized_loops, loop_p loop,
1160 1.1 mrg function *fun)
1161 1.1 mrg {
1162 1.1 mrg if (!((flag_tree_loop_vectorize
1163 1.1 mrg && optimize_loop_nest_for_speed_p (loop))
1164 1.1 mrg || loop->force_vectorize))
1165 1.1 mrg return 0;
1166 1.1 mrg
1167 1.1 mrg return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1168 1.1 mrg vect_loop_vectorized_call (loop),
1169 1.1 mrg vect_loop_dist_alias_call (loop, fun), fun);
1170 1.1 mrg }
1171 1.1 mrg
1172 1.1 mrg
1173 1.1 mrg /* Loop autovectorization. */
1174 1.1 mrg
1175 1.1 mrg namespace {
1176 1.1 mrg
1177 1.1 mrg const pass_data pass_data_vectorize =
1178 1.1 mrg {
1179 1.1 mrg GIMPLE_PASS, /* type */
1180 1.1 mrg "vect", /* name */
1181 1.1 mrg OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1182 1.1 mrg TV_TREE_VECTORIZATION, /* tv_id */
1183 1.1 mrg ( PROP_cfg | PROP_ssa ), /* properties_required */
1184 1.1 mrg 0, /* properties_provided */
1185 1.1 mrg 0, /* properties_destroyed */
1186 1.1 mrg 0, /* todo_flags_start */
1187 1.1 mrg 0, /* todo_flags_finish */
1188 1.1 mrg };
1189 1.1 mrg
1190 1.1 mrg class pass_vectorize : public gimple_opt_pass
1191 1.1 mrg {
1192 1.1 mrg public:
1193 1.1 mrg pass_vectorize (gcc::context *ctxt)
1194 1.1 mrg : gimple_opt_pass (pass_data_vectorize, ctxt)
1195 1.1 mrg {}
1196 1.1 mrg
1197 1.1 mrg /* opt_pass methods: */
1198 1.1 mrg virtual bool gate (function *fun)
1199 1.1 mrg {
1200 1.1 mrg return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
1201 1.1 mrg }
1202 1.1 mrg
1203 1.1 mrg virtual unsigned int execute (function *);
1204 1.1 mrg
1205 1.1 mrg }; // class pass_vectorize
1206 1.1 mrg
1207 1.1 mrg /* Function vectorize_loops.
1208 1.1 mrg
1209 1.1 mrg Entry point to loop vectorization phase. */
1210 1.1 mrg
1211 1.1 mrg unsigned
1212 1.1 mrg pass_vectorize::execute (function *fun)
1213 1.1 mrg {
1214 1.1 mrg unsigned int i;
1215 1.1 mrg unsigned int num_vectorized_loops = 0;
1216 1.1 mrg unsigned int vect_loops_num;
1217 1.1 mrg hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1218 1.1 mrg hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1219 1.1 mrg bool any_ifcvt_loops = false;
1220 1.1 mrg unsigned ret = 0;
1221 1.1 mrg
1222 1.1 mrg vect_loops_num = number_of_loops (fun);
1223 1.1 mrg
1224 1.1 mrg /* Bail out if there are no loops. */
1225 1.1 mrg if (vect_loops_num <= 1)
1226 1.1 mrg return 0;
1227 1.1 mrg
1228 1.1 mrg vect_slp_init ();
1229 1.1 mrg
1230 1.1 mrg if (fun->has_simduid_loops)
1231 1.1 mrg note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1232 1.1 mrg
1233 1.1 mrg /* ----------- Analyze loops. ----------- */
1234 1.1 mrg
1235 1.1 mrg /* If some loop was duplicated, it gets bigger number
1236 1.1 mrg than all previously defined loops. This fact allows us to run
1237 1.1 mrg only over initial loops skipping newly generated ones. */
1238 1.1 mrg for (auto loop : loops_list (fun, 0))
1239 1.1 mrg if (loop->dont_vectorize)
1240 1.1 mrg {
1241 1.1 mrg any_ifcvt_loops = true;
1242 1.1 mrg /* If-conversion sometimes versions both the outer loop
1243 1.1 mrg (for the case when outer loop vectorization might be
1244 1.1 mrg desirable) as well as the inner loop in the scalar version
1245 1.1 mrg of the loop. So we have:
1246 1.1 mrg if (LOOP_VECTORIZED (1, 3))
1247 1.1 mrg {
1248 1.1 mrg loop1
1249 1.1 mrg loop2
1250 1.1 mrg }
1251 1.1 mrg else
1252 1.1 mrg loop3 (copy of loop1)
1253 1.1 mrg if (LOOP_VECTORIZED (4, 5))
1254 1.1 mrg loop4 (copy of loop2)
1255 1.1 mrg else
1256 1.1 mrg loop5 (copy of loop4)
1257 1.1 mrg If loops' iteration gives us loop3 first (which has
1258 1.1 mrg dont_vectorize set), make sure to process loop1 before loop4;
1259 1.1 mrg so that we can prevent vectorization of loop4 if loop1
1260 1.1 mrg is successfully vectorized. */
1261 1.1 mrg if (loop->inner)
1262 1.1 mrg {
1263 1.1 mrg gimple *loop_vectorized_call
1264 1.1 mrg = vect_loop_vectorized_call (loop);
1265 1.1 mrg if (loop_vectorized_call
1266 1.1 mrg && vect_loop_vectorized_call (loop->inner))
1267 1.1 mrg {
1268 1.1 mrg tree arg = gimple_call_arg (loop_vectorized_call, 0);
1269 1.1 mrg class loop *vector_loop
1270 1.1 mrg = get_loop (fun, tree_to_shwi (arg));
1271 1.1 mrg if (vector_loop && vector_loop != loop)
1272 1.1 mrg {
1273 1.1 mrg /* Make sure we don't vectorize it twice. */
1274 1.1 mrg vector_loop->dont_vectorize = true;
1275 1.1 mrg ret |= try_vectorize_loop (simduid_to_vf_htab,
1276 1.1 mrg &num_vectorized_loops,
1277 1.1 mrg vector_loop, fun);
1278 1.1 mrg }
1279 1.1 mrg }
1280 1.1 mrg }
1281 1.1 mrg }
1282 1.1 mrg else
1283 1.1 mrg ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1284 1.1 mrg loop, fun);
1285 1.1 mrg
1286 1.1 mrg vect_location = dump_user_location_t ();
1287 1.1 mrg
1288 1.1 mrg statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
1289 1.1 mrg if (dump_enabled_p ()
1290 1.1 mrg || (num_vectorized_loops > 0 && dump_enabled_p ()))
1291 1.1 mrg dump_printf_loc (MSG_NOTE, vect_location,
1292 1.1 mrg "vectorized %u loops in function.\n",
1293 1.1 mrg num_vectorized_loops);
1294 1.1 mrg
1295 1.1 mrg /* ----------- Finalize. ----------- */
1296 1.1 mrg
1297 1.1 mrg if (any_ifcvt_loops)
1298 1.1 mrg for (i = 1; i < number_of_loops (fun); i++)
1299 1.1 mrg {
1300 1.1 mrg class loop *loop = get_loop (fun, i);
1301 1.1 mrg if (loop && loop->dont_vectorize)
1302 1.1 mrg {
1303 1.1 mrg gimple *g = vect_loop_vectorized_call (loop);
1304 1.1 mrg if (g)
1305 1.1 mrg {
1306 1.1 mrg fold_loop_internal_call (g, boolean_false_node);
1307 1.1 mrg ret |= TODO_cleanup_cfg;
1308 1.1 mrg g = NULL;
1309 1.1 mrg }
1310 1.1 mrg else
1311 1.1 mrg g = vect_loop_dist_alias_call (loop, fun);
1312 1.1 mrg
1313 1.1 mrg if (g)
1314 1.1 mrg {
1315 1.1 mrg fold_loop_internal_call (g, boolean_false_node);
1316 1.1 mrg ret |= TODO_cleanup_cfg;
1317 1.1 mrg }
1318 1.1 mrg }
1319 1.1 mrg }
1320 1.1 mrg
1321 1.1 mrg /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1322 1.1 mrg if (fun->has_simduid_loops)
1323 1.1 mrg {
1324 1.1 mrg adjust_simduid_builtins (simduid_to_vf_htab, fun);
1325 1.1 mrg /* Avoid stale SCEV cache entries for the SIMD_LANE defs. */
1326 1.1 mrg scev_reset ();
1327 1.1 mrg }
1328 1.1 mrg /* Shrink any "omp array simd" temporary arrays to the
1329 1.1 mrg actual vectorization factors. */
1330 1.1 mrg if (simd_array_to_simduid_htab)
1331 1.1 mrg shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1332 1.1 mrg delete simduid_to_vf_htab;
1333 1.1 mrg fun->has_simduid_loops = false;
1334 1.1 mrg
1335 1.1 mrg if (num_vectorized_loops > 0)
1336 1.1 mrg {
1337 1.1 mrg /* If we vectorized any loop only virtual SSA form needs to be updated.
1338 1.1 mrg ??? Also while we try hard to update loop-closed SSA form we fail
1339 1.1 mrg to properly do this in some corner-cases (see PR56286). */
1340 1.1 mrg rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1341 1.1 mrg ret |= TODO_cleanup_cfg;
1342 1.1 mrg }
1343 1.1 mrg
1344 1.1 mrg for (i = 1; i < number_of_loops (fun); i++)
1345 1.1 mrg {
1346 1.1 mrg loop_vec_info loop_vinfo;
1347 1.1 mrg bool has_mask_store;
1348 1.1 mrg
1349 1.1 mrg class loop *loop = get_loop (fun, i);
1350 1.1 mrg if (!loop || !loop->aux)
1351 1.1 mrg continue;
1352 1.1 mrg loop_vinfo = (loop_vec_info) loop->aux;
1353 1.1 mrg has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1354 1.1 mrg delete loop_vinfo;
1355 1.1 mrg if (has_mask_store
1356 1.1 mrg && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1357 1.1 mrg optimize_mask_stores (loop);
1358 1.1 mrg
1359 1.1 mrg auto_bitmap exit_bbs;
1360 1.1 mrg /* Perform local CSE, this esp. helps because we emit code for
1361 1.1 mrg predicates that need to be shared for optimal predicate usage.
1362 1.1 mrg However reassoc will re-order them and prevent CSE from working
1363 1.1 mrg as it should. CSE only the loop body, not the entry. */
1364 1.1 mrg bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
1365 1.1 mrg
1366 1.1 mrg edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
1367 1.1 mrg do_rpo_vn (fun, entry, exit_bbs);
1368 1.1 mrg
1369 1.1 mrg loop->aux = NULL;
1370 1.1 mrg }
1371 1.1 mrg
1372 1.1 mrg vect_slp_fini ();
1373 1.1 mrg
1374 1.1 mrg return ret;
1375 1.1 mrg }
1376 1.1 mrg
1377 1.1 mrg } // anon namespace
1378 1.1 mrg
1379 1.1 mrg gimple_opt_pass *
1380 1.1 mrg make_pass_vectorize (gcc::context *ctxt)
1381 1.1 mrg {
1382 1.1 mrg return new pass_vectorize (ctxt);
1383 1.1 mrg }
1384 1.1 mrg
1385 1.1 mrg /* Entry point to the simduid cleanup pass. */
1386 1.1 mrg
1387 1.1 mrg namespace {
1388 1.1 mrg
1389 1.1 mrg const pass_data pass_data_simduid_cleanup =
1390 1.1 mrg {
1391 1.1 mrg GIMPLE_PASS, /* type */
1392 1.1 mrg "simduid", /* name */
1393 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */
1394 1.1 mrg TV_NONE, /* tv_id */
1395 1.1 mrg ( PROP_ssa | PROP_cfg ), /* properties_required */
1396 1.1 mrg 0, /* properties_provided */
1397 1.1 mrg 0, /* properties_destroyed */
1398 1.1 mrg 0, /* todo_flags_start */
1399 1.1 mrg 0, /* todo_flags_finish */
1400 1.1 mrg };
1401 1.1 mrg
1402 1.1 mrg class pass_simduid_cleanup : public gimple_opt_pass
1403 1.1 mrg {
1404 1.1 mrg public:
1405 1.1 mrg pass_simduid_cleanup (gcc::context *ctxt)
1406 1.1 mrg : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1407 1.1 mrg {}
1408 1.1 mrg
1409 1.1 mrg /* opt_pass methods: */
1410 1.1 mrg opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
1411 1.1 mrg virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1412 1.1 mrg virtual unsigned int execute (function *);
1413 1.1 mrg
1414 1.1 mrg }; // class pass_simduid_cleanup
1415 1.1 mrg
1416 1.1 mrg unsigned int
1417 1.1 mrg pass_simduid_cleanup::execute (function *fun)
1418 1.1 mrg {
1419 1.1 mrg hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1420 1.1 mrg
1421 1.1 mrg note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1422 1.1 mrg
1423 1.1 mrg /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1424 1.1 mrg adjust_simduid_builtins (NULL, fun);
1425 1.1 mrg
1426 1.1 mrg /* Shrink any "omp array simd" temporary arrays to the
1427 1.1 mrg actual vectorization factors. */
1428 1.1 mrg if (simd_array_to_simduid_htab)
1429 1.1 mrg shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1430 1.1 mrg fun->has_simduid_loops = false;
1431 1.1 mrg return 0;
1432 1.1 mrg }
1433 1.1 mrg
1434 1.1 mrg } // anon namespace
1435 1.1 mrg
1436 1.1 mrg gimple_opt_pass *
1437 1.1 mrg make_pass_simduid_cleanup (gcc::context *ctxt)
1438 1.1 mrg {
1439 1.1 mrg return new pass_simduid_cleanup (ctxt);
1440 1.1 mrg }
1441 1.1 mrg
1442 1.1 mrg
1443 1.1 mrg /* Entry point to basic block SLP phase. */
1444 1.1 mrg
1445 1.1 mrg namespace {
1446 1.1 mrg
1447 1.1 mrg const pass_data pass_data_slp_vectorize =
1448 1.1 mrg {
1449 1.1 mrg GIMPLE_PASS, /* type */
1450 1.1 mrg "slp", /* name */
1451 1.1 mrg OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1452 1.1 mrg TV_TREE_SLP_VECTORIZATION, /* tv_id */
1453 1.1 mrg ( PROP_ssa | PROP_cfg ), /* properties_required */
1454 1.1 mrg 0, /* properties_provided */
1455 1.1 mrg 0, /* properties_destroyed */
1456 1.1 mrg 0, /* todo_flags_start */
1457 1.1 mrg TODO_update_ssa, /* todo_flags_finish */
1458 1.1 mrg };
1459 1.1 mrg
1460 1.1 mrg class pass_slp_vectorize : public gimple_opt_pass
1461 1.1 mrg {
1462 1.1 mrg public:
1463 1.1 mrg pass_slp_vectorize (gcc::context *ctxt)
1464 1.1 mrg : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1465 1.1 mrg {}
1466 1.1 mrg
1467 1.1 mrg /* opt_pass methods: */
1468 1.1 mrg opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
1469 1.1 mrg virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1470 1.1 mrg virtual unsigned int execute (function *);
1471 1.1 mrg
1472 1.1 mrg }; // class pass_slp_vectorize
1473 1.1 mrg
1474 1.1 mrg unsigned int
1475 1.1 mrg pass_slp_vectorize::execute (function *fun)
1476 1.1 mrg {
1477 1.1 mrg auto_purge_vect_location sentinel;
1478 1.1 mrg basic_block bb;
1479 1.1 mrg
1480 1.1 mrg bool in_loop_pipeline = scev_initialized_p ();
1481 1.1 mrg if (!in_loop_pipeline)
1482 1.1 mrg {
1483 1.1 mrg loop_optimizer_init (LOOPS_NORMAL);
1484 1.1 mrg scev_initialize ();
1485 1.1 mrg }
1486 1.1 mrg
1487 1.1 mrg /* Mark all stmts as not belonging to the current region and unvisited. */
1488 1.1 mrg FOR_EACH_BB_FN (bb, fun)
1489 1.1 mrg {
1490 1.1 mrg for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1491 1.1 mrg gsi_next (&gsi))
1492 1.1 mrg {
1493 1.1 mrg gphi *stmt = gsi.phi ();
1494 1.1 mrg gimple_set_uid (stmt, -1);
1495 1.1 mrg gimple_set_visited (stmt, false);
1496 1.1 mrg }
1497 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1498 1.1 mrg gsi_next (&gsi))
1499 1.1 mrg {
1500 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1501 1.1 mrg gimple_set_uid (stmt, -1);
1502 1.1 mrg gimple_set_visited (stmt, false);
1503 1.1 mrg }
1504 1.1 mrg }
1505 1.1 mrg
1506 1.1 mrg vect_slp_init ();
1507 1.1 mrg
1508 1.1 mrg vect_slp_function (fun);
1509 1.1 mrg
1510 1.1 mrg vect_slp_fini ();
1511 1.1 mrg
1512 1.1 mrg if (!in_loop_pipeline)
1513 1.1 mrg {
1514 1.1 mrg scev_finalize ();
1515 1.1 mrg loop_optimizer_finalize ();
1516 1.1 mrg }
1517 1.1 mrg
1518 1.1 mrg return 0;
1519 1.1 mrg }
1520 1.1 mrg
1521 1.1 mrg } // anon namespace
1522 1.1 mrg
1523 1.1 mrg gimple_opt_pass *
1524 1.1 mrg make_pass_slp_vectorize (gcc::context *ctxt)
1525 1.1 mrg {
1526 1.1 mrg return new pass_slp_vectorize (ctxt);
1527 1.1 mrg }
1528 1.1 mrg
1529 1.1 mrg
1530 1.1 mrg /* Increase alignment of global arrays to improve vectorization potential.
1531 1.1 mrg TODO:
1532 1.1 mrg - Consider also structs that have an array field.
1533 1.1 mrg - Use ipa analysis to prune arrays that can't be vectorized?
1534 1.1 mrg This should involve global alignment analysis and in the future also
1535 1.1 mrg array padding. */
1536 1.1 mrg
1537 1.1 mrg static unsigned get_vec_alignment_for_type (tree);
1538 1.1 mrg static hash_map<tree, unsigned> *type_align_map;
1539 1.1 mrg
1540 1.1 mrg /* Return alignment of array's vector type corresponding to scalar type.
1541 1.1 mrg 0 if no vector type exists. */
1542 1.1 mrg static unsigned
1543 1.1 mrg get_vec_alignment_for_array_type (tree type)
1544 1.1 mrg {
1545 1.1 mrg gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1546 1.1 mrg poly_uint64 array_size, vector_size;
1547 1.1 mrg
1548 1.1 mrg tree scalar_type = strip_array_types (type);
1549 1.1 mrg tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1550 1.1 mrg if (!vectype
1551 1.1 mrg || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1552 1.1 mrg || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1553 1.1 mrg || maybe_lt (array_size, vector_size))
1554 1.1 mrg return 0;
1555 1.1 mrg
1556 1.1 mrg return TYPE_ALIGN (vectype);
1557 1.1 mrg }
1558 1.1 mrg
1559 1.1 mrg /* Return alignment of field having maximum alignment of vector type
1560 1.1 mrg corresponding to it's scalar type. For now, we only consider fields whose
1561 1.1 mrg offset is a multiple of it's vector alignment.
1562 1.1 mrg 0 if no suitable field is found. */
1563 1.1 mrg static unsigned
1564 1.1 mrg get_vec_alignment_for_record_type (tree type)
1565 1.1 mrg {
1566 1.1 mrg gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1567 1.1 mrg
1568 1.1 mrg unsigned max_align = 0, alignment;
1569 1.1 mrg HOST_WIDE_INT offset;
1570 1.1 mrg tree offset_tree;
1571 1.1 mrg
1572 1.1 mrg if (TYPE_PACKED (type))
1573 1.1 mrg return 0;
1574 1.1 mrg
1575 1.1 mrg unsigned *slot = type_align_map->get (type);
1576 1.1 mrg if (slot)
1577 1.1 mrg return *slot;
1578 1.1 mrg
1579 1.1 mrg for (tree field = first_field (type);
1580 1.1 mrg field != NULL_TREE;
1581 1.1 mrg field = DECL_CHAIN (field))
1582 1.1 mrg {
1583 1.1 mrg /* Skip if not FIELD_DECL or if alignment is set by user. */
1584 1.1 mrg if (TREE_CODE (field) != FIELD_DECL
1585 1.1 mrg || DECL_USER_ALIGN (field)
1586 1.1 mrg || DECL_ARTIFICIAL (field))
1587 1.1 mrg continue;
1588 1.1 mrg
1589 1.1 mrg /* We don't need to process the type further if offset is variable,
1590 1.1 mrg since the offsets of remaining members will also be variable. */
1591 1.1 mrg if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1592 1.1 mrg || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1593 1.1 mrg break;
1594 1.1 mrg
1595 1.1 mrg /* Similarly stop processing the type if offset_tree
1596 1.1 mrg does not fit in unsigned HOST_WIDE_INT. */
1597 1.1 mrg offset_tree = bit_position (field);
1598 1.1 mrg if (!tree_fits_uhwi_p (offset_tree))
1599 1.1 mrg break;
1600 1.1 mrg
1601 1.1 mrg offset = tree_to_uhwi (offset_tree);
1602 1.1 mrg alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1603 1.1 mrg
1604 1.1 mrg /* Get maximum alignment of vectorized field/array among those members
1605 1.1 mrg whose offset is multiple of the vector alignment. */
1606 1.1 mrg if (alignment
1607 1.1 mrg && (offset % alignment == 0)
1608 1.1 mrg && (alignment > max_align))
1609 1.1 mrg max_align = alignment;
1610 1.1 mrg }
1611 1.1 mrg
1612 1.1 mrg type_align_map->put (type, max_align);
1613 1.1 mrg return max_align;
1614 1.1 mrg }
1615 1.1 mrg
1616 1.1 mrg /* Return alignment of vector type corresponding to decl's scalar type
1617 1.1 mrg or 0 if it doesn't exist or the vector alignment is lesser than
1618 1.1 mrg decl's alignment. */
1619 1.1 mrg static unsigned
1620 1.1 mrg get_vec_alignment_for_type (tree type)
1621 1.1 mrg {
1622 1.1 mrg if (type == NULL_TREE)
1623 1.1 mrg return 0;
1624 1.1 mrg
1625 1.1 mrg gcc_assert (TYPE_P (type));
1626 1.1 mrg
1627 1.1 mrg static unsigned alignment = 0;
1628 1.1 mrg switch (TREE_CODE (type))
1629 1.1 mrg {
1630 1.1 mrg case ARRAY_TYPE:
1631 1.1 mrg alignment = get_vec_alignment_for_array_type (type);
1632 1.1 mrg break;
1633 1.1 mrg case RECORD_TYPE:
1634 1.1 mrg alignment = get_vec_alignment_for_record_type (type);
1635 1.1 mrg break;
1636 1.1 mrg default:
1637 1.1 mrg alignment = 0;
1638 1.1 mrg break;
1639 1.1 mrg }
1640 1.1 mrg
1641 1.1 mrg return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1642 1.1 mrg }
1643 1.1 mrg
1644 1.1 mrg /* Entry point to increase_alignment pass. */
1645 1.1 mrg static unsigned int
1646 1.1 mrg increase_alignment (void)
1647 1.1 mrg {
1648 1.1 mrg varpool_node *vnode;
1649 1.1 mrg
1650 1.1 mrg vect_location = dump_user_location_t ();
1651 1.1 mrg type_align_map = new hash_map<tree, unsigned>;
1652 1.1 mrg
1653 1.1 mrg /* Increase the alignment of all global arrays for vectorization. */
1654 1.1 mrg FOR_EACH_DEFINED_VARIABLE (vnode)
1655 1.1 mrg {
1656 1.1 mrg tree decl = vnode->decl;
1657 1.1 mrg unsigned int alignment;
1658 1.1 mrg
1659 1.1 mrg if ((decl_in_symtab_p (decl)
1660 1.1 mrg && !symtab_node::get (decl)->can_increase_alignment_p ())
1661 1.1 mrg || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1662 1.1 mrg continue;
1663 1.1 mrg
1664 1.1 mrg alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1665 1.1 mrg if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1666 1.1 mrg {
1667 1.1 mrg vnode->increase_alignment (alignment);
1668 1.1 mrg if (dump_enabled_p ())
1669 1.1 mrg dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1670 1.1 mrg }
1671 1.1 mrg }
1672 1.1 mrg
1673 1.1 mrg delete type_align_map;
1674 1.1 mrg return 0;
1675 1.1 mrg }
1676 1.1 mrg
1677 1.1 mrg
1678 1.1 mrg namespace {
1679 1.1 mrg
1680 1.1 mrg const pass_data pass_data_ipa_increase_alignment =
1681 1.1 mrg {
1682 1.1 mrg SIMPLE_IPA_PASS, /* type */
1683 1.1 mrg "increase_alignment", /* name */
1684 1.1 mrg OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1685 1.1 mrg TV_IPA_OPT, /* tv_id */
1686 1.1 mrg 0, /* properties_required */
1687 1.1 mrg 0, /* properties_provided */
1688 1.1 mrg 0, /* properties_destroyed */
1689 1.1 mrg 0, /* todo_flags_start */
1690 1.1 mrg 0, /* todo_flags_finish */
1691 1.1 mrg };
1692 1.1 mrg
1693 1.1 mrg class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1694 1.1 mrg {
1695 1.1 mrg public:
1696 1.1 mrg pass_ipa_increase_alignment (gcc::context *ctxt)
1697 1.1 mrg : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1698 1.1 mrg {}
1699 1.1 mrg
1700 1.1 mrg /* opt_pass methods: */
1701 1.1 mrg virtual bool gate (function *)
1702 1.1 mrg {
1703 1.1 mrg return flag_section_anchors && flag_tree_loop_vectorize;
1704 1.1 mrg }
1705 1.1 mrg
1706 1.1 mrg virtual unsigned int execute (function *) { return increase_alignment (); }
1707 1.1 mrg
1708 1.1 mrg }; // class pass_ipa_increase_alignment
1709 1.1 mrg
1710 1.1 mrg } // anon namespace
1711 1.1 mrg
1712 1.1 mrg simple_ipa_opt_pass *
1713 1.1 mrg make_pass_ipa_increase_alignment (gcc::context *ctxt)
1714 1.1 mrg {
1715 1.1 mrg return new pass_ipa_increase_alignment (ctxt);
1716 1.1 mrg }
1717 1.1 mrg
1718 1.1 mrg /* If the condition represented by T is a comparison or the SSA name
1719 1.1 mrg result of a comparison, extract the comparison's operands. Represent
1720 1.1 mrg T as NE_EXPR <T, 0> otherwise. */
1721 1.1 mrg
1722 1.1 mrg void
1723 1.1 mrg scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1724 1.1 mrg {
1725 1.1 mrg if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1726 1.1 mrg {
1727 1.1 mrg this->code = TREE_CODE (t);
1728 1.1 mrg this->op0 = TREE_OPERAND (t, 0);
1729 1.1 mrg this->op1 = TREE_OPERAND (t, 1);
1730 1.1 mrg this->inverted_p = false;
1731 1.1 mrg return;
1732 1.1 mrg }
1733 1.1 mrg
1734 1.1 mrg if (TREE_CODE (t) == SSA_NAME)
1735 1.1 mrg if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1736 1.1 mrg {
1737 1.1 mrg tree_code code = gimple_assign_rhs_code (stmt);
1738 1.1 mrg if (TREE_CODE_CLASS (code) == tcc_comparison)
1739 1.1 mrg {
1740 1.1 mrg this->code = code;
1741 1.1 mrg this->op0 = gimple_assign_rhs1 (stmt);
1742 1.1 mrg this->op1 = gimple_assign_rhs2 (stmt);
1743 1.1 mrg this->inverted_p = false;
1744 1.1 mrg return;
1745 1.1 mrg }
1746 1.1 mrg else if (code == BIT_NOT_EXPR)
1747 1.1 mrg {
1748 1.1 mrg tree n_op = gimple_assign_rhs1 (stmt);
1749 1.1 mrg if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
1750 1.1 mrg {
1751 1.1 mrg code = gimple_assign_rhs_code (stmt);
1752 1.1 mrg if (TREE_CODE_CLASS (code) == tcc_comparison)
1753 1.1 mrg {
1754 1.1 mrg this->code = code;
1755 1.1 mrg this->op0 = gimple_assign_rhs1 (stmt);
1756 1.1 mrg this->op1 = gimple_assign_rhs2 (stmt);
1757 1.1 mrg this->inverted_p = true;
1758 1.1 mrg return;
1759 1.1 mrg }
1760 1.1 mrg }
1761 1.1 mrg }
1762 1.1 mrg }
1763 1.1 mrg
1764 1.1 mrg this->code = NE_EXPR;
1765 1.1 mrg this->op0 = t;
1766 1.1 mrg this->op1 = build_zero_cst (TREE_TYPE (t));
1767 1.1 mrg this->inverted_p = false;
1768 1.1 mrg }
1769 1.1 mrg
1770 1.1 mrg /* See the comment above the declaration for details. */
1771 1.1 mrg
1772 1.1 mrg unsigned int
1773 1.1 mrg vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
1774 1.1 mrg stmt_vec_info stmt_info, slp_tree,
1775 1.1 mrg tree vectype, int misalign,
1776 1.1 mrg vect_cost_model_location where)
1777 1.1 mrg {
1778 1.1 mrg unsigned int cost
1779 1.1 mrg = builtin_vectorization_cost (kind, vectype, misalign) * count;
1780 1.1 mrg return record_stmt_cost (stmt_info, where, cost);
1781 1.1 mrg }
1782 1.1 mrg
1783 1.1 mrg /* See the comment above the declaration for details. */
1784 1.1 mrg
1785 1.1 mrg void
1786 1.1 mrg vector_costs::finish_cost (const vector_costs *)
1787 1.1 mrg {
1788 1.1 mrg gcc_assert (!m_finished);
1789 1.1 mrg m_finished = true;
1790 1.1 mrg }
1791 1.1 mrg
1792 1.1 mrg /* Record a base cost of COST units against WHERE. If STMT_INFO is
1793 1.1 mrg nonnull, use it to adjust the cost based on execution frequency
1794 1.1 mrg (where appropriate). */
1795 1.1 mrg
1796 1.1 mrg unsigned int
1797 1.1 mrg vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
1798 1.1 mrg vect_cost_model_location where,
1799 1.1 mrg unsigned int cost)
1800 1.1 mrg {
1801 1.1 mrg cost = adjust_cost_for_freq (stmt_info, where, cost);
1802 1.1 mrg m_costs[where] += cost;
1803 1.1 mrg return cost;
1804 1.1 mrg }
1805 1.1 mrg
1806 1.1 mrg /* COST is the base cost we have calculated for an operation in location WHERE.
1807 1.1 mrg If STMT_INFO is nonnull, use it to adjust the cost based on execution
1808 1.1 mrg frequency (where appropriate). Return the adjusted cost. */
1809 1.1 mrg
1810 1.1 mrg unsigned int
1811 1.1 mrg vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
1812 1.1 mrg vect_cost_model_location where,
1813 1.1 mrg unsigned int cost)
1814 1.1 mrg {
1815 1.1 mrg /* Statements in an inner loop relative to the loop being
1816 1.1 mrg vectorized are weighted more heavily. The value here is
1817 1.1 mrg arbitrary and could potentially be improved with analysis. */
1818 1.1 mrg if (where == vect_body
1819 1.1 mrg && stmt_info
1820 1.1 mrg && stmt_in_inner_loop_p (m_vinfo, stmt_info))
1821 1.1 mrg {
1822 1.1 mrg loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
1823 1.1 mrg cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1824 1.1 mrg }
1825 1.1 mrg return cost;
1826 1.1 mrg }
1827 1.1 mrg
1828 1.1 mrg /* See the comment above the declaration for details. */
1829 1.1 mrg
1830 1.1 mrg bool
1831 1.1 mrg vector_costs::better_main_loop_than_p (const vector_costs *other) const
1832 1.1 mrg {
1833 1.1 mrg int diff = compare_inside_loop_cost (other);
1834 1.1 mrg if (diff != 0)
1835 1.1 mrg return diff < 0;
1836 1.1 mrg
1837 1.1 mrg /* If there's nothing to choose between the loop bodies, see whether
1838 1.1 mrg there's a difference in the prologue and epilogue costs. */
1839 1.1 mrg diff = compare_outside_loop_cost (other);
1840 1.1 mrg if (diff != 0)
1841 1.1 mrg return diff < 0;
1842 1.1 mrg
1843 1.1 mrg return false;
1844 1.1 mrg }
1845 1.1 mrg
1846 1.1 mrg
1847 1.1 mrg /* See the comment above the declaration for details. */
1848 1.1 mrg
1849 1.1 mrg bool
1850 1.1 mrg vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
1851 1.1 mrg loop_vec_info main_loop) const
1852 1.1 mrg {
1853 1.1 mrg loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1854 1.1 mrg loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1855 1.1 mrg
1856 1.1 mrg poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1857 1.1 mrg poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1858 1.1 mrg
1859 1.1 mrg poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
1860 1.1 mrg unsigned HOST_WIDE_INT main_vf;
1861 1.1 mrg unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
1862 1.1 mrg /* If we can determine how many iterations are left for the epilogue
1863 1.1 mrg loop, that is if both the main loop's vectorization factor and number
1864 1.1 mrg of iterations are constant, then we use them to calculate the cost of
1865 1.1 mrg the epilogue loop together with a 'likely value' for the epilogues
1866 1.1 mrg vectorization factor. Otherwise we use the main loop's vectorization
1867 1.1 mrg factor and the maximum poly value for the epilogue's. If the target
1868 1.1 mrg has not provided with a sensible upper bound poly vectorization
1869 1.1 mrg factors are likely to be favored over constant ones. */
1870 1.1 mrg if (main_poly_vf.is_constant (&main_vf)
1871 1.1 mrg && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
1872 1.1 mrg {
1873 1.1 mrg unsigned HOST_WIDE_INT niters
1874 1.1 mrg = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
1875 1.1 mrg HOST_WIDE_INT other_likely_vf
1876 1.1 mrg = estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
1877 1.1 mrg HOST_WIDE_INT this_likely_vf
1878 1.1 mrg = estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
1879 1.1 mrg
1880 1.1 mrg /* If the epilogue is using partial vectors we account for the
1881 1.1 mrg partial iteration here too. */
1882 1.1 mrg other_factor = niters / other_likely_vf;
1883 1.1 mrg if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
1884 1.1 mrg && niters % other_likely_vf != 0)
1885 1.1 mrg other_factor++;
1886 1.1 mrg
1887 1.1 mrg this_factor = niters / this_likely_vf;
1888 1.1 mrg if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
1889 1.1 mrg && niters % this_likely_vf != 0)
1890 1.1 mrg this_factor++;
1891 1.1 mrg }
1892 1.1 mrg else
1893 1.1 mrg {
1894 1.1 mrg unsigned HOST_WIDE_INT main_vf_max
1895 1.1 mrg = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
1896 1.1 mrg unsigned HOST_WIDE_INT other_vf_max
1897 1.1 mrg = estimated_poly_value (other_vf, POLY_VALUE_MAX);
1898 1.1 mrg unsigned HOST_WIDE_INT this_vf_max
1899 1.1 mrg = estimated_poly_value (this_vf, POLY_VALUE_MAX);
1900 1.1 mrg
1901 1.1 mrg other_factor = CEIL (main_vf_max, other_vf_max);
1902 1.1 mrg this_factor = CEIL (main_vf_max, this_vf_max);
1903 1.1 mrg
1904 1.1 mrg /* If the loop is not using partial vectors then it will iterate one
1905 1.1 mrg time less than one that does. It is safe to subtract one here,
1906 1.1 mrg because the main loop's vf is always at least 2x bigger than that
1907 1.1 mrg of an epilogue. */
1908 1.1 mrg if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
1909 1.1 mrg other_factor -= 1;
1910 1.1 mrg if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
1911 1.1 mrg this_factor -= 1;
1912 1.1 mrg }
1913 1.1 mrg
1914 1.1 mrg /* Compute the costs by multiplying the inside costs with the factor and
1915 1.1 mrg add the outside costs for a more complete picture. The factor is the
1916 1.1 mrg amount of times we are expecting to iterate this epilogue. */
1917 1.1 mrg other_cost = other->body_cost () * other_factor;
1918 1.1 mrg this_cost = this->body_cost () * this_factor;
1919 1.1 mrg other_cost += other->outside_cost ();
1920 1.1 mrg this_cost += this->outside_cost ();
1921 1.1 mrg return this_cost < other_cost;
1922 1.1 mrg }
1923 1.1 mrg
1924 1.1 mrg /* A <=>-style subroutine of better_main_loop_than_p. Check whether we can
1925 1.1 mrg determine the return value of better_main_loop_than_p by comparing the
1926 1.1 mrg inside (loop body) costs of THIS and OTHER. Return:
1927 1.1 mrg
1928 1.1 mrg * -1 if better_main_loop_than_p should return true.
1929 1.1 mrg * 1 if better_main_loop_than_p should return false.
1930 1.1 mrg * 0 if we can't decide. */
1931 1.1 mrg
1932 1.1 mrg int
1933 1.1 mrg vector_costs::compare_inside_loop_cost (const vector_costs *other) const
1934 1.1 mrg {
1935 1.1 mrg loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1936 1.1 mrg loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1937 1.1 mrg
1938 1.1 mrg struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
1939 1.1 mrg gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
1940 1.1 mrg
1941 1.1 mrg poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1942 1.1 mrg poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1943 1.1 mrg
1944 1.1 mrg /* Limit the VFs to what is likely to be the maximum number of iterations,
1945 1.1 mrg to handle cases in which at least one loop_vinfo is fully-masked. */
1946 1.1 mrg HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
1947 1.1 mrg if (estimated_max_niter != -1)
1948 1.1 mrg {
1949 1.1 mrg if (known_le (estimated_max_niter, this_vf))
1950 1.1 mrg this_vf = estimated_max_niter;
1951 1.1 mrg if (known_le (estimated_max_niter, other_vf))
1952 1.1 mrg other_vf = estimated_max_niter;
1953 1.1 mrg }
1954 1.1 mrg
1955 1.1 mrg /* Check whether the (fractional) cost per scalar iteration is lower or
1956 1.1 mrg higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf. */
1957 1.1 mrg poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
1958 1.1 mrg poly_int64 rel_other
1959 1.1 mrg = other_loop_vinfo->vector_costs->body_cost () * this_vf;
1960 1.1 mrg
1961 1.1 mrg HOST_WIDE_INT est_rel_this_min
1962 1.1 mrg = estimated_poly_value (rel_this, POLY_VALUE_MIN);
1963 1.1 mrg HOST_WIDE_INT est_rel_this_max
1964 1.1 mrg = estimated_poly_value (rel_this, POLY_VALUE_MAX);
1965 1.1 mrg
1966 1.1 mrg HOST_WIDE_INT est_rel_other_min
1967 1.1 mrg = estimated_poly_value (rel_other, POLY_VALUE_MIN);
1968 1.1 mrg HOST_WIDE_INT est_rel_other_max
1969 1.1 mrg = estimated_poly_value (rel_other, POLY_VALUE_MAX);
1970 1.1 mrg
1971 1.1 mrg /* Check first if we can make out an unambigous total order from the minimum
1972 1.1 mrg and maximum estimates. */
1973 1.1 mrg if (est_rel_this_min < est_rel_other_min
1974 1.1 mrg && est_rel_this_max < est_rel_other_max)
1975 1.1 mrg return -1;
1976 1.1 mrg
1977 1.1 mrg if (est_rel_other_min < est_rel_this_min
1978 1.1 mrg && est_rel_other_max < est_rel_this_max)
1979 1.1 mrg return 1;
1980 1.1 mrg
1981 1.1 mrg /* When other_loop_vinfo uses a variable vectorization factor,
1982 1.1 mrg we know that it has a lower cost for at least one runtime VF.
1983 1.1 mrg However, we don't know how likely that VF is.
1984 1.1 mrg
1985 1.1 mrg One option would be to compare the costs for the estimated VFs.
1986 1.1 mrg The problem is that that can put too much pressure on the cost
1987 1.1 mrg model. E.g. if the estimated VF is also the lowest possible VF,
1988 1.1 mrg and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
1989 1.1 mrg for the estimated VF, we'd then choose this_loop_vinfo even
1990 1.1 mrg though (a) this_loop_vinfo might not actually be better than
1991 1.1 mrg other_loop_vinfo for that VF and (b) it would be significantly
1992 1.1 mrg worse at larger VFs.
1993 1.1 mrg
1994 1.1 mrg Here we go for a hacky compromise: pick this_loop_vinfo if it is
1995 1.1 mrg no more expensive than other_loop_vinfo even after doubling the
1996 1.1 mrg estimated other_loop_vinfo VF. For all but trivial loops, this
1997 1.1 mrg ensures that we only pick this_loop_vinfo if it is significantly
1998 1.1 mrg better than other_loop_vinfo at the estimated VF. */
1999 1.1 mrg if (est_rel_other_min != est_rel_this_min
2000 1.1 mrg || est_rel_other_max != est_rel_this_max)
2001 1.1 mrg {
2002 1.1 mrg HOST_WIDE_INT est_rel_this_likely
2003 1.1 mrg = estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
2004 1.1 mrg HOST_WIDE_INT est_rel_other_likely
2005 1.1 mrg = estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
2006 1.1 mrg
2007 1.1 mrg return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
2008 1.1 mrg }
2009 1.1 mrg
2010 1.1 mrg return 0;
2011 1.1 mrg }
2012 1.1 mrg
2013 1.1 mrg /* A <=>-style subroutine of better_main_loop_than_p, used when there is
2014 1.1 mrg nothing to choose between the inside (loop body) costs of THIS and OTHER.
2015 1.1 mrg Check whether we can determine the return value of better_main_loop_than_p
2016 1.1 mrg by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
2017 1.1 mrg Return:
2018 1.1 mrg
2019 1.1 mrg * -1 if better_main_loop_than_p should return true.
2020 1.1 mrg * 1 if better_main_loop_than_p should return false.
2021 1.1 mrg * 0 if we can't decide. */
2022 1.1 mrg
2023 1.1 mrg int
2024 1.1 mrg vector_costs::compare_outside_loop_cost (const vector_costs *other) const
2025 1.1 mrg {
2026 1.1 mrg auto this_outside_cost = this->outside_cost ();
2027 1.1 mrg auto other_outside_cost = other->outside_cost ();
2028 1.1 mrg if (this_outside_cost != other_outside_cost)
2029 1.1 mrg return this_outside_cost < other_outside_cost ? -1 : 1;
2030 1.1 mrg
2031 return 0;
2032 }
2033