tree-ssa-loop-manip.cc revision 1.1 1 1.1 mrg /* High-level loop manipulation functions.
2 1.1 mrg Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify it
7 1.1 mrg under the terms of the GNU General Public License as published by the
8 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
9 1.1 mrg later version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
12 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 1.1 mrg for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1 mrg #include "config.h"
21 1.1 mrg #include "system.h"
22 1.1 mrg #include "coretypes.h"
23 1.1 mrg #include "backend.h"
24 1.1 mrg #include "tree.h"
25 1.1 mrg #include "gimple.h"
26 1.1 mrg #include "cfghooks.h"
27 1.1 mrg #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */
28 1.1 mrg #include "ssa.h"
29 1.1 mrg #include "gimple-pretty-print.h"
30 1.1 mrg #include "fold-const.h"
31 1.1 mrg #include "cfganal.h"
32 1.1 mrg #include "gimplify.h"
33 1.1 mrg #include "gimple-iterator.h"
34 1.1 mrg #include "gimplify-me.h"
35 1.1 mrg #include "tree-cfg.h"
36 1.1 mrg #include "tree-ssa-loop-ivopts.h"
37 1.1 mrg #include "tree-ssa-loop-manip.h"
38 1.1 mrg #include "tree-ssa-loop-niter.h"
39 1.1 mrg #include "tree-ssa-loop.h"
40 1.1 mrg #include "tree-into-ssa.h"
41 1.1 mrg #include "tree-ssa.h"
42 1.1 mrg #include "cfgloop.h"
43 1.1 mrg #include "tree-scalar-evolution.h"
44 1.1 mrg #include "tree-inline.h"
45 1.1 mrg
46 1.1 mrg /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
47 1.1 mrg so that we can free them all at once. */
48 1.1 mrg static bitmap_obstack loop_renamer_obstack;
49 1.1 mrg
50 1.1 mrg /* Creates an induction variable with value BASE + STEP * iteration in LOOP.
51 1.1 mrg It is expected that neither BASE nor STEP are shared with other expressions
52 1.1 mrg (unless the sharing rules allow this). Use VAR as a base var_decl for it
53 1.1 mrg (if NULL, a new temporary will be created). The increment will occur at
54 1.1 mrg INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and
55 1.1 mrg AFTER can be computed using standard_iv_increment_position. The ssa versions
56 1.1 mrg of the variable before and after increment will be stored in VAR_BEFORE and
57 1.1 mrg VAR_AFTER (unless they are NULL). */
58 1.1 mrg
59 1.1 mrg void
60 1.1 mrg create_iv (tree base, tree step, tree var, class loop *loop,
61 1.1 mrg gimple_stmt_iterator *incr_pos, bool after,
62 1.1 mrg tree *var_before, tree *var_after)
63 1.1 mrg {
64 1.1 mrg gassign *stmt;
65 1.1 mrg gphi *phi;
66 1.1 mrg tree initial, step1;
67 1.1 mrg gimple_seq stmts;
68 1.1 mrg tree vb, va;
69 1.1 mrg enum tree_code incr_op = PLUS_EXPR;
70 1.1 mrg edge pe = loop_preheader_edge (loop);
71 1.1 mrg
72 1.1 mrg if (var != NULL_TREE)
73 1.1 mrg {
74 1.1 mrg vb = make_ssa_name (var);
75 1.1 mrg va = make_ssa_name (var);
76 1.1 mrg }
77 1.1 mrg else
78 1.1 mrg {
79 1.1 mrg vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
80 1.1 mrg va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
81 1.1 mrg }
82 1.1 mrg if (var_before)
83 1.1 mrg *var_before = vb;
84 1.1 mrg if (var_after)
85 1.1 mrg *var_after = va;
86 1.1 mrg
87 1.1 mrg /* For easier readability of the created code, produce MINUS_EXPRs
88 1.1 mrg when suitable. */
89 1.1 mrg if (TREE_CODE (step) == INTEGER_CST)
90 1.1 mrg {
91 1.1 mrg if (TYPE_UNSIGNED (TREE_TYPE (step)))
92 1.1 mrg {
93 1.1 mrg step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
94 1.1 mrg if (tree_int_cst_lt (step1, step))
95 1.1 mrg {
96 1.1 mrg incr_op = MINUS_EXPR;
97 1.1 mrg step = step1;
98 1.1 mrg }
99 1.1 mrg }
100 1.1 mrg else
101 1.1 mrg {
102 1.1 mrg bool ovf;
103 1.1 mrg
104 1.1 mrg if (!tree_expr_nonnegative_warnv_p (step, &ovf)
105 1.1 mrg && may_negate_without_overflow_p (step))
106 1.1 mrg {
107 1.1 mrg incr_op = MINUS_EXPR;
108 1.1 mrg step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
109 1.1 mrg }
110 1.1 mrg }
111 1.1 mrg }
112 1.1 mrg if (POINTER_TYPE_P (TREE_TYPE (base)))
113 1.1 mrg {
114 1.1 mrg if (TREE_CODE (base) == ADDR_EXPR)
115 1.1 mrg mark_addressable (TREE_OPERAND (base, 0));
116 1.1 mrg step = convert_to_ptrofftype (step);
117 1.1 mrg if (incr_op == MINUS_EXPR)
118 1.1 mrg step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
119 1.1 mrg incr_op = POINTER_PLUS_EXPR;
120 1.1 mrg }
121 1.1 mrg /* Gimplify the step if necessary. We put the computations in front of the
122 1.1 mrg loop (i.e. the step should be loop invariant). */
123 1.1 mrg step = force_gimple_operand (step, &stmts, true, NULL_TREE);
124 1.1 mrg if (stmts)
125 1.1 mrg gsi_insert_seq_on_edge_immediate (pe, stmts);
126 1.1 mrg
127 1.1 mrg stmt = gimple_build_assign (va, incr_op, vb, step);
128 1.1 mrg /* Prevent the increment from inheriting a bogus location if it is not put
129 1.1 mrg immediately after a statement whose location is known. */
130 1.1 mrg if (after)
131 1.1 mrg {
132 1.1 mrg if (gsi_end_p (*incr_pos)
133 1.1 mrg || (is_gimple_debug (gsi_stmt (*incr_pos))
134 1.1 mrg && gsi_bb (*incr_pos)
135 1.1 mrg && gsi_end_p (gsi_last_nondebug_bb (gsi_bb (*incr_pos)))))
136 1.1 mrg {
137 1.1 mrg edge e = single_succ_edge (gsi_bb (*incr_pos));
138 1.1 mrg gimple_set_location (stmt, e->goto_locus);
139 1.1 mrg }
140 1.1 mrg gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
141 1.1 mrg }
142 1.1 mrg else
143 1.1 mrg {
144 1.1 mrg gimple_stmt_iterator gsi = *incr_pos;
145 1.1 mrg if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
146 1.1 mrg gsi_next_nondebug (&gsi);
147 1.1 mrg if (!gsi_end_p (gsi))
148 1.1 mrg gimple_set_location (stmt, gimple_location (gsi_stmt (gsi)));
149 1.1 mrg gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
150 1.1 mrg }
151 1.1 mrg
152 1.1 mrg initial = force_gimple_operand (base, &stmts, true, var);
153 1.1 mrg if (stmts)
154 1.1 mrg gsi_insert_seq_on_edge_immediate (pe, stmts);
155 1.1 mrg
156 1.1 mrg phi = create_phi_node (vb, loop->header);
157 1.1 mrg add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
158 1.1 mrg add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
159 1.1 mrg }
160 1.1 mrg
161 1.1 mrg /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
162 1.1 mrg both DEF_LOOP and USE_LOOP. */
163 1.1 mrg
164 1.1 mrg static inline class loop *
165 1.1 mrg find_sibling_superloop (class loop *use_loop, class loop *def_loop)
166 1.1 mrg {
167 1.1 mrg unsigned ud = loop_depth (use_loop);
168 1.1 mrg unsigned dd = loop_depth (def_loop);
169 1.1 mrg gcc_assert (ud > 0 && dd > 0);
170 1.1 mrg if (ud > dd)
171 1.1 mrg use_loop = superloop_at_depth (use_loop, dd);
172 1.1 mrg if (ud < dd)
173 1.1 mrg def_loop = superloop_at_depth (def_loop, ud);
174 1.1 mrg while (loop_outer (use_loop) != loop_outer (def_loop))
175 1.1 mrg {
176 1.1 mrg use_loop = loop_outer (use_loop);
177 1.1 mrg def_loop = loop_outer (def_loop);
178 1.1 mrg gcc_assert (use_loop && def_loop);
179 1.1 mrg }
180 1.1 mrg return use_loop;
181 1.1 mrg }
182 1.1 mrg
183 1.1 mrg /* DEF_BB is a basic block containing a DEF that needs rewriting into
184 1.1 mrg loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing
185 1.1 mrg uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
186 1.1 mrg USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_B).
187 1.1 mrg ALL_EXITS[I] is the set of all basic blocks that exit loop I.
188 1.1 mrg
189 1.1 mrg Compute the subset of LOOP_EXITS that exit the loop containing DEF_BB
190 1.1 mrg or one of its loop fathers, in which DEF is live. This set is returned
191 1.1 mrg in the bitmap LIVE_EXITS.
192 1.1 mrg
193 1.1 mrg Instead of computing the complete livein set of the def, we use the loop
194 1.1 mrg nesting tree as a form of poor man's structure analysis. This greatly
195 1.1 mrg speeds up the analysis, which is important because this function may be
196 1.1 mrg called on all SSA names that need rewriting, one at a time. */
197 1.1 mrg
198 1.1 mrg static void
199 1.1 mrg compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
200 1.1 mrg bitmap *loop_exits, basic_block def_bb)
201 1.1 mrg {
202 1.1 mrg unsigned i;
203 1.1 mrg bitmap_iterator bi;
204 1.1 mrg class loop *def_loop = def_bb->loop_father;
205 1.1 mrg unsigned def_loop_depth = loop_depth (def_loop);
206 1.1 mrg bitmap def_loop_exits;
207 1.1 mrg
208 1.1 mrg /* Normally the work list size is bounded by the number of basic
209 1.1 mrg blocks in the largest loop. We don't know this number, but we
210 1.1 mrg can be fairly sure that it will be relatively small. */
211 1.1 mrg auto_vec<basic_block> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
212 1.1 mrg
213 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
214 1.1 mrg {
215 1.1 mrg basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
216 1.1 mrg class loop *use_loop = use_bb->loop_father;
217 1.1 mrg gcc_checking_assert (def_loop != use_loop
218 1.1 mrg && ! flow_loop_nested_p (def_loop, use_loop));
219 1.1 mrg if (! flow_loop_nested_p (use_loop, def_loop))
220 1.1 mrg use_bb = find_sibling_superloop (use_loop, def_loop)->header;
221 1.1 mrg if (bitmap_set_bit (live_exits, use_bb->index))
222 1.1 mrg worklist.safe_push (use_bb);
223 1.1 mrg }
224 1.1 mrg
225 1.1 mrg /* Iterate until the worklist is empty. */
226 1.1 mrg while (! worklist.is_empty ())
227 1.1 mrg {
228 1.1 mrg edge e;
229 1.1 mrg edge_iterator ei;
230 1.1 mrg
231 1.1 mrg /* Pull a block off the worklist. */
232 1.1 mrg basic_block bb = worklist.pop ();
233 1.1 mrg
234 1.1 mrg /* Make sure we have at least enough room in the work list
235 1.1 mrg for all predecessors of this block. */
236 1.1 mrg worklist.reserve (EDGE_COUNT (bb->preds));
237 1.1 mrg
238 1.1 mrg /* For each predecessor block. */
239 1.1 mrg FOR_EACH_EDGE (e, ei, bb->preds)
240 1.1 mrg {
241 1.1 mrg basic_block pred = e->src;
242 1.1 mrg class loop *pred_loop = pred->loop_father;
243 1.1 mrg unsigned pred_loop_depth = loop_depth (pred_loop);
244 1.1 mrg bool pred_visited;
245 1.1 mrg
246 1.1 mrg /* We should have met DEF_BB along the way. */
247 1.1 mrg gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
248 1.1 mrg
249 1.1 mrg if (pred_loop_depth >= def_loop_depth)
250 1.1 mrg {
251 1.1 mrg if (pred_loop_depth > def_loop_depth)
252 1.1 mrg pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
253 1.1 mrg /* If we've reached DEF_LOOP, our train ends here. */
254 1.1 mrg if (pred_loop == def_loop)
255 1.1 mrg continue;
256 1.1 mrg }
257 1.1 mrg else if (! flow_loop_nested_p (pred_loop, def_loop))
258 1.1 mrg pred = find_sibling_superloop (pred_loop, def_loop)->header;
259 1.1 mrg
260 1.1 mrg /* Add PRED to the LIVEIN set. PRED_VISITED is true if
261 1.1 mrg we had already added PRED to LIVEIN before. */
262 1.1 mrg pred_visited = !bitmap_set_bit (live_exits, pred->index);
263 1.1 mrg
264 1.1 mrg /* If we have visited PRED before, don't add it to the worklist.
265 1.1 mrg If BB dominates PRED, then we're probably looking at a loop.
266 1.1 mrg We're only interested in looking up in the dominance tree
267 1.1 mrg because DEF_BB dominates all the uses. */
268 1.1 mrg if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
269 1.1 mrg continue;
270 1.1 mrg
271 1.1 mrg worklist.quick_push (pred);
272 1.1 mrg }
273 1.1 mrg }
274 1.1 mrg
275 1.1 mrg def_loop_exits = BITMAP_ALLOC (&loop_renamer_obstack);
276 1.1 mrg for (class loop *loop = def_loop;
277 1.1 mrg loop != current_loops->tree_root;
278 1.1 mrg loop = loop_outer (loop))
279 1.1 mrg bitmap_ior_into (def_loop_exits, loop_exits[loop->num]);
280 1.1 mrg bitmap_and_into (live_exits, def_loop_exits);
281 1.1 mrg BITMAP_FREE (def_loop_exits);
282 1.1 mrg }
283 1.1 mrg
284 1.1 mrg /* Add a loop-closing PHI for VAR in basic block EXIT. */
285 1.1 mrg
286 1.1 mrg static void
287 1.1 mrg add_exit_phi (basic_block exit, tree var)
288 1.1 mrg {
289 1.1 mrg gphi *phi;
290 1.1 mrg edge e;
291 1.1 mrg edge_iterator ei;
292 1.1 mrg
293 1.1 mrg /* Check that at least one of the edges entering the EXIT block exits
294 1.1 mrg the loop, or a superloop of that loop, that VAR is defined in. */
295 1.1 mrg if (flag_checking)
296 1.1 mrg {
297 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var);
298 1.1 mrg basic_block def_bb = gimple_bb (def_stmt);
299 1.1 mrg FOR_EACH_EDGE (e, ei, exit->preds)
300 1.1 mrg {
301 1.1 mrg class loop *aloop = find_common_loop (def_bb->loop_father,
302 1.1 mrg e->src->loop_father);
303 1.1 mrg if (!flow_bb_inside_loop_p (aloop, e->dest))
304 1.1 mrg break;
305 1.1 mrg }
306 1.1 mrg gcc_assert (e);
307 1.1 mrg }
308 1.1 mrg
309 1.1 mrg phi = create_phi_node (NULL_TREE, exit);
310 1.1 mrg create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
311 1.1 mrg FOR_EACH_EDGE (e, ei, exit->preds)
312 1.1 mrg add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
313 1.1 mrg
314 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
315 1.1 mrg {
316 1.1 mrg fprintf (dump_file, ";; Created LCSSA PHI: ");
317 1.1 mrg print_gimple_stmt (dump_file, phi, 0, dump_flags);
318 1.1 mrg }
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg /* Add exit phis for VAR that is used in LIVEIN.
322 1.1 mrg Exits of the loops are stored in LOOP_EXITS. */
323 1.1 mrg
324 1.1 mrg static void
325 1.1 mrg add_exit_phis_var (tree var, bitmap use_blocks, bitmap *loop_exits)
326 1.1 mrg {
327 1.1 mrg unsigned index;
328 1.1 mrg bitmap_iterator bi;
329 1.1 mrg basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
330 1.1 mrg bitmap live_exits = BITMAP_ALLOC (&loop_renamer_obstack);
331 1.1 mrg
332 1.1 mrg gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
333 1.1 mrg
334 1.1 mrg compute_live_loop_exits (live_exits, use_blocks, loop_exits, def_bb);
335 1.1 mrg
336 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
337 1.1 mrg {
338 1.1 mrg add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
339 1.1 mrg }
340 1.1 mrg
341 1.1 mrg BITMAP_FREE (live_exits);
342 1.1 mrg }
343 1.1 mrg
344 1.1 mrg /* Add exit phis for the names marked in NAMES_TO_RENAME.
345 1.1 mrg Exits of the loops are stored in EXITS. Sets of blocks where the ssa
346 1.1 mrg names are used are stored in USE_BLOCKS. */
347 1.1 mrg
348 1.1 mrg static void
349 1.1 mrg add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap *loop_exits)
350 1.1 mrg {
351 1.1 mrg unsigned i;
352 1.1 mrg bitmap_iterator bi;
353 1.1 mrg
354 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
355 1.1 mrg {
356 1.1 mrg add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits);
357 1.1 mrg }
358 1.1 mrg }
359 1.1 mrg
360 1.1 mrg /* Fill the array of bitmaps LOOP_EXITS with all loop exit edge targets. */
361 1.1 mrg
362 1.1 mrg static void
363 1.1 mrg get_loops_exits (bitmap *loop_exits)
364 1.1 mrg {
365 1.1 mrg unsigned j;
366 1.1 mrg edge e;
367 1.1 mrg
368 1.1 mrg for (auto loop : loops_list (cfun, 0))
369 1.1 mrg {
370 1.1 mrg auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
371 1.1 mrg loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack);
372 1.1 mrg FOR_EACH_VEC_ELT (exit_edges, j, e)
373 1.1 mrg bitmap_set_bit (loop_exits[loop->num], e->dest->index);
374 1.1 mrg }
375 1.1 mrg }
376 1.1 mrg
377 1.1 mrg /* For USE in BB, if it is used outside of the loop it is defined in,
378 1.1 mrg mark it for rewrite. Record basic block BB where it is used
379 1.1 mrg to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap.
380 1.1 mrg Note that for USEs in phis, BB should be the src of the edge corresponding to
381 1.1 mrg the use, rather than the bb containing the phi. */
382 1.1 mrg
383 1.1 mrg static void
384 1.1 mrg find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
385 1.1 mrg bitmap need_phis)
386 1.1 mrg {
387 1.1 mrg unsigned ver;
388 1.1 mrg basic_block def_bb;
389 1.1 mrg class loop *def_loop;
390 1.1 mrg
391 1.1 mrg if (TREE_CODE (use) != SSA_NAME)
392 1.1 mrg return;
393 1.1 mrg
394 1.1 mrg ver = SSA_NAME_VERSION (use);
395 1.1 mrg def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
396 1.1 mrg if (!def_bb)
397 1.1 mrg return;
398 1.1 mrg def_loop = def_bb->loop_father;
399 1.1 mrg
400 1.1 mrg /* If the definition is not inside a loop, it is not interesting. */
401 1.1 mrg if (!loop_outer (def_loop))
402 1.1 mrg return;
403 1.1 mrg
404 1.1 mrg /* If the use is not outside of the loop it is defined in, it is not
405 1.1 mrg interesting. */
406 1.1 mrg if (flow_bb_inside_loop_p (def_loop, bb))
407 1.1 mrg return;
408 1.1 mrg
409 1.1 mrg /* If we're seeing VER for the first time, we still have to allocate
410 1.1 mrg a bitmap for its uses. */
411 1.1 mrg if (bitmap_set_bit (need_phis, ver))
412 1.1 mrg use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
413 1.1 mrg bitmap_set_bit (use_blocks[ver], bb->index);
414 1.1 mrg }
415 1.1 mrg
416 1.1 mrg /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
417 1.1 mrg loop they are defined to rewrite. Record the set of blocks in which the ssa
418 1.1 mrg names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */
419 1.1 mrg
420 1.1 mrg static void
421 1.1 mrg find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
422 1.1 mrg int use_flags)
423 1.1 mrg {
424 1.1 mrg ssa_op_iter iter;
425 1.1 mrg tree var;
426 1.1 mrg basic_block bb = gimple_bb (stmt);
427 1.1 mrg
428 1.1 mrg if (is_gimple_debug (stmt))
429 1.1 mrg return;
430 1.1 mrg
431 1.1 mrg /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
432 1.1 mrg only. */
433 1.1 mrg if (use_flags == SSA_OP_VIRTUAL_USES)
434 1.1 mrg {
435 1.1 mrg tree vuse = gimple_vuse (stmt);
436 1.1 mrg if (vuse != NULL_TREE)
437 1.1 mrg find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
438 1.1 mrg }
439 1.1 mrg else
440 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
441 1.1 mrg find_uses_to_rename_use (bb, var, use_blocks, need_phis);
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg /* Marks names matching USE_FLAGS that are used in BB and outside of the loop
445 1.1 mrg they are defined in for rewrite. Records the set of blocks in which the ssa
446 1.1 mrg names are used to USE_BLOCKS. Record the SSA names that will
447 1.1 mrg need exit PHIs in NEED_PHIS. */
448 1.1 mrg
449 1.1 mrg static void
450 1.1 mrg find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
451 1.1 mrg int use_flags)
452 1.1 mrg {
453 1.1 mrg edge e;
454 1.1 mrg edge_iterator ei;
455 1.1 mrg bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
456 1.1 mrg bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
457 1.1 mrg
458 1.1 mrg FOR_EACH_EDGE (e, ei, bb->succs)
459 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
460 1.1 mrg gsi_next (&bsi))
461 1.1 mrg {
462 1.1 mrg gphi *phi = bsi.phi ();
463 1.1 mrg bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
464 1.1 mrg if ((virtual_p && do_virtuals)
465 1.1 mrg || (!virtual_p && do_nonvirtuals))
466 1.1 mrg find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
467 1.1 mrg use_blocks, need_phis);
468 1.1 mrg }
469 1.1 mrg
470 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
471 1.1 mrg gsi_next (&bsi))
472 1.1 mrg find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
473 1.1 mrg use_flags);
474 1.1 mrg }
475 1.1 mrg
476 1.1 mrg /* Marks names matching USE_FLAGS that are used outside of the loop they are
477 1.1 mrg defined in for rewrite. Records the set of blocks in which the ssa names are
478 1.1 mrg used to USE_BLOCKS. Record the SSA names that will need exit PHIs in
479 1.1 mrg NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */
480 1.1 mrg
481 1.1 mrg static void
482 1.1 mrg find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
483 1.1 mrg int use_flags)
484 1.1 mrg {
485 1.1 mrg basic_block bb;
486 1.1 mrg unsigned index;
487 1.1 mrg bitmap_iterator bi;
488 1.1 mrg
489 1.1 mrg if (changed_bbs)
490 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
491 1.1 mrg {
492 1.1 mrg bb = BASIC_BLOCK_FOR_FN (cfun, index);
493 1.1 mrg if (bb)
494 1.1 mrg find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
495 1.1 mrg }
496 1.1 mrg else
497 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
498 1.1 mrg find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
499 1.1 mrg }
500 1.1 mrg
501 1.1 mrg /* Mark uses of DEF that are used outside of the loop they are defined in for
502 1.1 mrg rewrite. Record the set of blocks in which the ssa names are used to
503 1.1 mrg USE_BLOCKS. Record the SSA names that will need exit PHIs in NEED_PHIS. */
504 1.1 mrg
505 1.1 mrg static void
506 1.1 mrg find_uses_to_rename_def (tree def, bitmap *use_blocks, bitmap need_phis)
507 1.1 mrg {
508 1.1 mrg gimple *use_stmt;
509 1.1 mrg imm_use_iterator imm_iter;
510 1.1 mrg
511 1.1 mrg FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
512 1.1 mrg {
513 1.1 mrg if (is_gimple_debug (use_stmt))
514 1.1 mrg continue;
515 1.1 mrg
516 1.1 mrg basic_block use_bb = gimple_bb (use_stmt);
517 1.1 mrg
518 1.1 mrg use_operand_p use_p;
519 1.1 mrg FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
520 1.1 mrg {
521 1.1 mrg if (gimple_code (use_stmt) == GIMPLE_PHI)
522 1.1 mrg {
523 1.1 mrg edge e = gimple_phi_arg_edge (as_a <gphi *> (use_stmt),
524 1.1 mrg PHI_ARG_INDEX_FROM_USE (use_p));
525 1.1 mrg use_bb = e->src;
526 1.1 mrg }
527 1.1 mrg find_uses_to_rename_use (use_bb, USE_FROM_PTR (use_p), use_blocks,
528 1.1 mrg need_phis);
529 1.1 mrg }
530 1.1 mrg }
531 1.1 mrg }
532 1.1 mrg
533 1.1 mrg /* Marks names matching USE_FLAGS that are defined in LOOP and used outside of
534 1.1 mrg it for rewrite. Records the set of blocks in which the ssa names are used to
535 1.1 mrg USE_BLOCKS. Record the SSA names that will need exit PHIs in NEED_PHIS. */
536 1.1 mrg
537 1.1 mrg static void
538 1.1 mrg find_uses_to_rename_in_loop (class loop *loop, bitmap *use_blocks,
539 1.1 mrg bitmap need_phis, int use_flags)
540 1.1 mrg {
541 1.1 mrg bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
542 1.1 mrg bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
543 1.1 mrg int def_flags = ((do_virtuals ? SSA_OP_VIRTUAL_DEFS : 0)
544 1.1 mrg | (do_nonvirtuals ? SSA_OP_DEF : 0));
545 1.1 mrg
546 1.1 mrg
547 1.1 mrg basic_block *bbs = get_loop_body (loop);
548 1.1 mrg
549 1.1 mrg for (unsigned int i = 0; i < loop->num_nodes; i++)
550 1.1 mrg {
551 1.1 mrg basic_block bb = bbs[i];
552 1.1 mrg
553 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
554 1.1 mrg gsi_next (&bsi))
555 1.1 mrg {
556 1.1 mrg gphi *phi = bsi.phi ();
557 1.1 mrg tree res = gimple_phi_result (phi);
558 1.1 mrg bool virtual_p = virtual_operand_p (res);
559 1.1 mrg if ((virtual_p && do_virtuals)
560 1.1 mrg || (!virtual_p && do_nonvirtuals))
561 1.1 mrg find_uses_to_rename_def (res, use_blocks, need_phis);
562 1.1 mrg }
563 1.1 mrg
564 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
565 1.1 mrg gsi_next (&bsi))
566 1.1 mrg {
567 1.1 mrg gimple *stmt = gsi_stmt (bsi);
568 1.1 mrg /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows
569 1.1 mrg SSA_OP_VIRTUAL_DEFS only. */
570 1.1 mrg if (def_flags == SSA_OP_VIRTUAL_DEFS)
571 1.1 mrg {
572 1.1 mrg tree vdef = gimple_vdef (stmt);
573 1.1 mrg if (vdef != NULL)
574 1.1 mrg find_uses_to_rename_def (vdef, use_blocks, need_phis);
575 1.1 mrg }
576 1.1 mrg else
577 1.1 mrg {
578 1.1 mrg tree var;
579 1.1 mrg ssa_op_iter iter;
580 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, def_flags)
581 1.1 mrg find_uses_to_rename_def (var, use_blocks, need_phis);
582 1.1 mrg }
583 1.1 mrg }
584 1.1 mrg }
585 1.1 mrg
586 1.1 mrg XDELETEVEC (bbs);
587 1.1 mrg }
588 1.1 mrg
589 1.1 mrg /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
590 1.1 mrg phi nodes to ensure that no variable is used outside the loop it is
591 1.1 mrg defined in.
592 1.1 mrg
593 1.1 mrg This strengthening of the basic ssa form has several advantages:
594 1.1 mrg
595 1.1 mrg 1) Updating it during unrolling/peeling/versioning is trivial, since
596 1.1 mrg we do not need to care about the uses outside of the loop.
597 1.1 mrg The same applies to virtual operands which are also rewritten into
598 1.1 mrg loop closed SSA form. Note that virtual operands are always live
599 1.1 mrg until function exit.
600 1.1 mrg 2) The behavior of all uses of an induction variable is the same.
601 1.1 mrg Without this, you need to distinguish the case when the variable
602 1.1 mrg is used outside of the loop it is defined in, for example
603 1.1 mrg
604 1.1 mrg for (i = 0; i < 100; i++)
605 1.1 mrg {
606 1.1 mrg for (j = 0; j < 100; j++)
607 1.1 mrg {
608 1.1 mrg k = i + j;
609 1.1 mrg use1 (k);
610 1.1 mrg }
611 1.1 mrg use2 (k);
612 1.1 mrg }
613 1.1 mrg
614 1.1 mrg Looking from the outer loop with the normal SSA form, the first use of k
615 1.1 mrg is not well-behaved, while the second one is an induction variable with
616 1.1 mrg base 99 and step 1.
617 1.1 mrg
618 1.1 mrg If LOOP is non-null, only rewrite uses that have defs in LOOP. Otherwise,
619 1.1 mrg if CHANGED_BBS is not NULL, we look for uses outside loops only in the
620 1.1 mrg basic blocks in this set.
621 1.1 mrg
622 1.1 mrg USE_FLAGS allows us to specify whether we want virtual, non-virtual or
623 1.1 mrg both variables rewritten.
624 1.1 mrg
625 1.1 mrg UPDATE_FLAG is used in the call to update_ssa. See
626 1.1 mrg TODO_update_ssa* for documentation. */
627 1.1 mrg
628 1.1 mrg void
629 1.1 mrg rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
630 1.1 mrg int use_flags, class loop *loop)
631 1.1 mrg {
632 1.1 mrg bitmap *use_blocks;
633 1.1 mrg bitmap names_to_rename;
634 1.1 mrg
635 1.1 mrg loops_state_set (LOOP_CLOSED_SSA);
636 1.1 mrg if (number_of_loops (cfun) <= 1)
637 1.1 mrg return;
638 1.1 mrg
639 1.1 mrg /* If the pass has caused the SSA form to be out-of-date, update it
640 1.1 mrg now. */
641 1.1 mrg if (update_flag != 0)
642 1.1 mrg update_ssa (update_flag);
643 1.1 mrg else if (flag_checking)
644 1.1 mrg verify_ssa (true, true);
645 1.1 mrg
646 1.1 mrg bitmap_obstack_initialize (&loop_renamer_obstack);
647 1.1 mrg
648 1.1 mrg names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
649 1.1 mrg
650 1.1 mrg /* Uses of names to rename. We don't have to initialize this array,
651 1.1 mrg because we know that we will only have entries for the SSA names
652 1.1 mrg in NAMES_TO_RENAME. */
653 1.1 mrg use_blocks = XNEWVEC (bitmap, num_ssa_names);
654 1.1 mrg
655 1.1 mrg if (loop != NULL)
656 1.1 mrg {
657 1.1 mrg gcc_assert (changed_bbs == NULL);
658 1.1 mrg find_uses_to_rename_in_loop (loop, use_blocks, names_to_rename,
659 1.1 mrg use_flags);
660 1.1 mrg }
661 1.1 mrg else
662 1.1 mrg {
663 1.1 mrg gcc_assert (loop == NULL);
664 1.1 mrg find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
665 1.1 mrg }
666 1.1 mrg
667 1.1 mrg if (!bitmap_empty_p (names_to_rename))
668 1.1 mrg {
669 1.1 mrg /* An array of bitmaps where LOOP_EXITS[I] is the set of basic blocks
670 1.1 mrg that are the destination of an edge exiting loop number I. */
671 1.1 mrg bitmap *loop_exits = XNEWVEC (bitmap, number_of_loops (cfun));
672 1.1 mrg get_loops_exits (loop_exits);
673 1.1 mrg
674 1.1 mrg /* Add the PHI nodes on exits of the loops for the names we need to
675 1.1 mrg rewrite. */
676 1.1 mrg add_exit_phis (names_to_rename, use_blocks, loop_exits);
677 1.1 mrg
678 1.1 mrg free (loop_exits);
679 1.1 mrg
680 1.1 mrg /* Fix up all the names found to be used outside their original
681 1.1 mrg loops. */
682 1.1 mrg update_ssa (TODO_update_ssa);
683 1.1 mrg }
684 1.1 mrg
685 1.1 mrg bitmap_obstack_release (&loop_renamer_obstack);
686 1.1 mrg free (use_blocks);
687 1.1 mrg }
688 1.1 mrg
689 1.1 mrg /* Rewrites the non-virtual defs and uses into a loop closed ssa form. If
690 1.1 mrg CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
691 1.1 mrg blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See
692 1.1 mrg TODO_update_ssa* for documentation. */
693 1.1 mrg
694 1.1 mrg void
695 1.1 mrg rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
696 1.1 mrg {
697 1.1 mrg rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_USE, NULL);
698 1.1 mrg }
699 1.1 mrg
700 1.1 mrg /* Rewrites virtual defs and uses with def in LOOP into loop closed ssa
701 1.1 mrg form. */
702 1.1 mrg
703 1.1 mrg void
704 1.1 mrg rewrite_virtuals_into_loop_closed_ssa (class loop *loop)
705 1.1 mrg {
706 1.1 mrg rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_VIRTUAL_USES, loop);
707 1.1 mrg }
708 1.1 mrg
709 1.1 mrg /* Check invariants of the loop closed ssa form for the def in DEF_BB. */
710 1.1 mrg
711 1.1 mrg static void
712 1.1 mrg check_loop_closed_ssa_def (basic_block def_bb, tree def)
713 1.1 mrg {
714 1.1 mrg use_operand_p use_p;
715 1.1 mrg imm_use_iterator iterator;
716 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
717 1.1 mrg {
718 1.1 mrg if (is_gimple_debug (USE_STMT (use_p)))
719 1.1 mrg continue;
720 1.1 mrg
721 1.1 mrg basic_block use_bb = gimple_bb (USE_STMT (use_p));
722 1.1 mrg if (is_a <gphi *> (USE_STMT (use_p)))
723 1.1 mrg use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
724 1.1 mrg
725 1.1 mrg gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
726 1.1 mrg }
727 1.1 mrg }
728 1.1 mrg
729 1.1 mrg /* Checks invariants of loop closed ssa form in BB. */
730 1.1 mrg
731 1.1 mrg static void
732 1.1 mrg check_loop_closed_ssa_bb (basic_block bb)
733 1.1 mrg {
734 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
735 1.1 mrg gsi_next (&bsi))
736 1.1 mrg {
737 1.1 mrg gphi *phi = bsi.phi ();
738 1.1 mrg
739 1.1 mrg if (!virtual_operand_p (PHI_RESULT (phi)))
740 1.1 mrg check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
741 1.1 mrg }
742 1.1 mrg
743 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
744 1.1 mrg gsi_next_nondebug (&bsi))
745 1.1 mrg {
746 1.1 mrg ssa_op_iter iter;
747 1.1 mrg tree var;
748 1.1 mrg gimple *stmt = gsi_stmt (bsi);
749 1.1 mrg
750 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
751 1.1 mrg check_loop_closed_ssa_def (bb, var);
752 1.1 mrg }
753 1.1 mrg }
754 1.1 mrg
755 1.1 mrg /* Checks that invariants of the loop closed ssa form are preserved.
756 1.1 mrg Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked
757 1.1 mrg if LOOP is NULL, otherwise, only LOOP is checked. */
758 1.1 mrg
759 1.1 mrg DEBUG_FUNCTION void
760 1.1 mrg verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
761 1.1 mrg {
762 1.1 mrg if (number_of_loops (cfun) <= 1)
763 1.1 mrg return;
764 1.1 mrg
765 1.1 mrg if (verify_ssa_p)
766 1.1 mrg verify_ssa (false, true);
767 1.1 mrg
768 1.1 mrg timevar_push (TV_VERIFY_LOOP_CLOSED);
769 1.1 mrg
770 1.1 mrg if (loop == NULL)
771 1.1 mrg {
772 1.1 mrg basic_block bb;
773 1.1 mrg
774 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
775 1.1 mrg if (bb->loop_father && bb->loop_father->num > 0)
776 1.1 mrg check_loop_closed_ssa_bb (bb);
777 1.1 mrg }
778 1.1 mrg else
779 1.1 mrg {
780 1.1 mrg basic_block *bbs = get_loop_body (loop);
781 1.1 mrg
782 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; ++i)
783 1.1 mrg check_loop_closed_ssa_bb (bbs[i]);
784 1.1 mrg
785 1.1 mrg free (bbs);
786 1.1 mrg }
787 1.1 mrg
788 1.1 mrg timevar_pop (TV_VERIFY_LOOP_CLOSED);
789 1.1 mrg }
790 1.1 mrg
791 1.1 mrg /* Split loop exit edge EXIT. The things are a bit complicated by a need to
792 1.1 mrg preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then
793 1.1 mrg forwarder PHIs are also created for constant arguments.
794 1.1 mrg The newly created block is returned. */
795 1.1 mrg
796 1.1 mrg basic_block
797 1.1 mrg split_loop_exit_edge (edge exit, bool copy_constants_p)
798 1.1 mrg {
799 1.1 mrg basic_block dest = exit->dest;
800 1.1 mrg basic_block bb = split_edge (exit);
801 1.1 mrg gphi *phi, *new_phi;
802 1.1 mrg tree new_name, name;
803 1.1 mrg use_operand_p op_p;
804 1.1 mrg gphi_iterator psi;
805 1.1 mrg location_t locus;
806 1.1 mrg
807 1.1 mrg for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
808 1.1 mrg {
809 1.1 mrg phi = psi.phi ();
810 1.1 mrg op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
811 1.1 mrg locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
812 1.1 mrg
813 1.1 mrg name = USE_FROM_PTR (op_p);
814 1.1 mrg
815 1.1 mrg /* If the argument of the PHI node is a constant, we do not need
816 1.1 mrg to keep it inside loop. */
817 1.1 mrg if (TREE_CODE (name) != SSA_NAME
818 1.1 mrg && !copy_constants_p)
819 1.1 mrg continue;
820 1.1 mrg
821 1.1 mrg /* Otherwise create an auxiliary phi node that will copy the value
822 1.1 mrg of the SSA name out of the loop. */
823 1.1 mrg new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
824 1.1 mrg new_phi = create_phi_node (new_name, bb);
825 1.1 mrg add_phi_arg (new_phi, name, exit, locus);
826 1.1 mrg SET_USE (op_p, new_name);
827 1.1 mrg }
828 1.1 mrg
829 1.1 mrg return bb;
830 1.1 mrg }
831 1.1 mrg
832 1.1 mrg /* Returns the basic block in that statements should be emitted for induction
833 1.1 mrg variables incremented at the end of the LOOP. */
834 1.1 mrg
835 1.1 mrg basic_block
836 1.1 mrg ip_end_pos (class loop *loop)
837 1.1 mrg {
838 1.1 mrg return loop->latch;
839 1.1 mrg }
840 1.1 mrg
841 1.1 mrg /* Returns the basic block in that statements should be emitted for induction
842 1.1 mrg variables incremented just before exit condition of a LOOP. */
843 1.1 mrg
844 1.1 mrg basic_block
845 1.1 mrg ip_normal_pos (class loop *loop)
846 1.1 mrg {
847 1.1 mrg gimple *last;
848 1.1 mrg basic_block bb;
849 1.1 mrg edge exit;
850 1.1 mrg
851 1.1 mrg if (!single_pred_p (loop->latch))
852 1.1 mrg return NULL;
853 1.1 mrg
854 1.1 mrg bb = single_pred (loop->latch);
855 1.1 mrg last = last_stmt (bb);
856 1.1 mrg if (!last
857 1.1 mrg || gimple_code (last) != GIMPLE_COND)
858 1.1 mrg return NULL;
859 1.1 mrg
860 1.1 mrg exit = EDGE_SUCC (bb, 0);
861 1.1 mrg if (exit->dest == loop->latch)
862 1.1 mrg exit = EDGE_SUCC (bb, 1);
863 1.1 mrg
864 1.1 mrg if (flow_bb_inside_loop_p (loop, exit->dest))
865 1.1 mrg return NULL;
866 1.1 mrg
867 1.1 mrg return bb;
868 1.1 mrg }
869 1.1 mrg
870 1.1 mrg /* Stores the standard position for induction variable increment in LOOP
871 1.1 mrg (just before the exit condition if it is available and latch block is empty,
872 1.1 mrg end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if
873 1.1 mrg the increment should be inserted after *BSI. */
874 1.1 mrg
875 1.1 mrg void
876 1.1 mrg standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
877 1.1 mrg bool *insert_after)
878 1.1 mrg {
879 1.1 mrg basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
880 1.1 mrg gimple *last = last_stmt (latch);
881 1.1 mrg
882 1.1 mrg if (!bb
883 1.1 mrg || (last && gimple_code (last) != GIMPLE_LABEL))
884 1.1 mrg {
885 1.1 mrg *bsi = gsi_last_bb (latch);
886 1.1 mrg *insert_after = true;
887 1.1 mrg }
888 1.1 mrg else
889 1.1 mrg {
890 1.1 mrg *bsi = gsi_last_bb (bb);
891 1.1 mrg *insert_after = false;
892 1.1 mrg }
893 1.1 mrg }
894 1.1 mrg
895 1.1 mrg /* Copies phi node arguments for duplicated blocks. The index of the first
896 1.1 mrg duplicated block is FIRST_NEW_BLOCK. */
897 1.1 mrg
898 1.1 mrg static void
899 1.1 mrg copy_phi_node_args (unsigned first_new_block)
900 1.1 mrg {
901 1.1 mrg unsigned i;
902 1.1 mrg
903 1.1 mrg for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
904 1.1 mrg BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
905 1.1 mrg
906 1.1 mrg for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
907 1.1 mrg add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
908 1.1 mrg
909 1.1 mrg for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
910 1.1 mrg BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
911 1.1 mrg }
912 1.1 mrg
913 1.1 mrg
914 1.1 mrg /* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also
915 1.1 mrg updates the PHI nodes at start of the copied region. In order to
916 1.1 mrg achieve this, only loops whose exits all lead to the same location
917 1.1 mrg are handled.
918 1.1 mrg
919 1.1 mrg Notice that we do not completely update the SSA web after
920 1.1 mrg duplication. The caller is responsible for calling update_ssa
921 1.1 mrg after the loop has been duplicated. */
922 1.1 mrg
923 1.1 mrg bool
924 1.1 mrg gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e,
925 1.1 mrg unsigned int ndupl,
926 1.1 mrg sbitmap wont_exit, edge orig,
927 1.1 mrg vec<edge> *to_remove, int flags)
928 1.1 mrg {
929 1.1 mrg unsigned first_new_block;
930 1.1 mrg
931 1.1 mrg if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
932 1.1 mrg return false;
933 1.1 mrg if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
934 1.1 mrg return false;
935 1.1 mrg
936 1.1 mrg first_new_block = last_basic_block_for_fn (cfun);
937 1.1 mrg if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig,
938 1.1 mrg to_remove, flags))
939 1.1 mrg return false;
940 1.1 mrg
941 1.1 mrg /* Readd the removed phi args for e. */
942 1.1 mrg flush_pending_stmts (e);
943 1.1 mrg
944 1.1 mrg /* Copy the phi node arguments. */
945 1.1 mrg copy_phi_node_args (first_new_block);
946 1.1 mrg
947 1.1 mrg scev_reset ();
948 1.1 mrg
949 1.1 mrg return true;
950 1.1 mrg }
951 1.1 mrg
952 1.1 mrg /* Returns true if we can unroll LOOP FACTOR times. Number
953 1.1 mrg of iterations of the loop is returned in NITER. */
954 1.1 mrg
955 1.1 mrg bool
956 1.1 mrg can_unroll_loop_p (class loop *loop, unsigned factor,
957 1.1 mrg class tree_niter_desc *niter)
958 1.1 mrg {
959 1.1 mrg edge exit;
960 1.1 mrg
961 1.1 mrg /* Check whether unrolling is possible. We only want to unroll loops
962 1.1 mrg for that we are able to determine number of iterations. We also
963 1.1 mrg want to split the extra iterations of the loop from its end,
964 1.1 mrg therefore we require that the loop has precisely one
965 1.1 mrg exit. */
966 1.1 mrg
967 1.1 mrg exit = single_dom_exit (loop);
968 1.1 mrg if (!exit)
969 1.1 mrg return false;
970 1.1 mrg
971 1.1 mrg if (!number_of_iterations_exit (loop, exit, niter, false)
972 1.1 mrg || niter->cmp == ERROR_MARK
973 1.1 mrg /* Scalar evolutions analysis might have copy propagated
974 1.1 mrg the abnormal ssa names into these expressions, hence
975 1.1 mrg emitting the computations based on them during loop
976 1.1 mrg unrolling might create overlapping life ranges for
977 1.1 mrg them, and failures in out-of-ssa. */
978 1.1 mrg || contains_abnormal_ssa_name_p (niter->may_be_zero)
979 1.1 mrg || contains_abnormal_ssa_name_p (niter->control.base)
980 1.1 mrg || contains_abnormal_ssa_name_p (niter->control.step)
981 1.1 mrg || contains_abnormal_ssa_name_p (niter->bound))
982 1.1 mrg return false;
983 1.1 mrg
984 1.1 mrg /* And of course, we must be able to duplicate the loop. */
985 1.1 mrg if (!can_duplicate_loop_p (loop))
986 1.1 mrg return false;
987 1.1 mrg
988 1.1 mrg /* The final loop should be small enough. */
989 1.1 mrg if (tree_num_loop_insns (loop, &eni_size_weights) * factor
990 1.1 mrg > (unsigned) param_max_unrolled_insns)
991 1.1 mrg return false;
992 1.1 mrg
993 1.1 mrg return true;
994 1.1 mrg }
995 1.1 mrg
996 1.1 mrg /* Determines the conditions that control execution of LOOP unrolled FACTOR
997 1.1 mrg times. DESC is number of iterations of LOOP. ENTER_COND is set to
998 1.1 mrg condition that must be true if the main loop can be entered.
999 1.1 mrg If the loop does not always iterate an exact multiple of FACTOR times,
1000 1.1 mrg EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
1001 1.1 mrg how the exit from the unrolled loop should be controlled. Otherwise,
1002 1.1 mrg the trees are set to null and EXIT_CMP is set to ERROR_MARK. */
1003 1.1 mrg
1004 1.1 mrg static void
1005 1.1 mrg determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
1006 1.1 mrg unsigned factor, tree *enter_cond,
1007 1.1 mrg tree *exit_base, tree *exit_step,
1008 1.1 mrg enum tree_code *exit_cmp, tree *exit_bound)
1009 1.1 mrg {
1010 1.1 mrg gimple_seq stmts;
1011 1.1 mrg tree base = desc->control.base;
1012 1.1 mrg tree step = desc->control.step;
1013 1.1 mrg tree bound = desc->bound;
1014 1.1 mrg tree type = TREE_TYPE (step);
1015 1.1 mrg tree bigstep, delta;
1016 1.1 mrg tree min = lower_bound_in_type (type, type);
1017 1.1 mrg tree max = upper_bound_in_type (type, type);
1018 1.1 mrg enum tree_code cmp = desc->cmp;
1019 1.1 mrg tree cond = boolean_true_node, assum;
1020 1.1 mrg
1021 1.1 mrg /* For pointers, do the arithmetics in the type of step. */
1022 1.1 mrg base = fold_convert (type, base);
1023 1.1 mrg bound = fold_convert (type, bound);
1024 1.1 mrg
1025 1.1 mrg *enter_cond = boolean_false_node;
1026 1.1 mrg *exit_base = NULL_TREE;
1027 1.1 mrg *exit_step = NULL_TREE;
1028 1.1 mrg *exit_cmp = ERROR_MARK;
1029 1.1 mrg *exit_bound = NULL_TREE;
1030 1.1 mrg gcc_assert (cmp != ERROR_MARK);
1031 1.1 mrg
1032 1.1 mrg /* We only need to be correct when we answer question
1033 1.1 mrg "Do at least FACTOR more iterations remain?" in the unrolled loop.
1034 1.1 mrg Thus, transforming BASE + STEP * i <> BOUND to
1035 1.1 mrg BASE + STEP * i < BOUND is ok. */
1036 1.1 mrg if (cmp == NE_EXPR)
1037 1.1 mrg {
1038 1.1 mrg if (tree_int_cst_sign_bit (step))
1039 1.1 mrg cmp = GT_EXPR;
1040 1.1 mrg else
1041 1.1 mrg cmp = LT_EXPR;
1042 1.1 mrg }
1043 1.1 mrg else if (cmp == LT_EXPR)
1044 1.1 mrg {
1045 1.1 mrg gcc_assert (!tree_int_cst_sign_bit (step));
1046 1.1 mrg }
1047 1.1 mrg else if (cmp == GT_EXPR)
1048 1.1 mrg {
1049 1.1 mrg gcc_assert (tree_int_cst_sign_bit (step));
1050 1.1 mrg }
1051 1.1 mrg else
1052 1.1 mrg gcc_unreachable ();
1053 1.1 mrg
1054 1.1 mrg /* The main body of the loop may be entered iff:
1055 1.1 mrg
1056 1.1 mrg 1) desc->may_be_zero is false.
1057 1.1 mrg 2) it is possible to check that there are at least FACTOR iterations
1058 1.1 mrg of the loop, i.e., BOUND - step * FACTOR does not overflow.
1059 1.1 mrg 3) # of iterations is at least FACTOR */
1060 1.1 mrg
1061 1.1 mrg if (!integer_zerop (desc->may_be_zero))
1062 1.1 mrg cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1063 1.1 mrg invert_truthvalue (desc->may_be_zero),
1064 1.1 mrg cond);
1065 1.1 mrg
1066 1.1 mrg bigstep = fold_build2 (MULT_EXPR, type, step,
1067 1.1 mrg build_int_cst_type (type, factor));
1068 1.1 mrg delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
1069 1.1 mrg if (cmp == LT_EXPR)
1070 1.1 mrg assum = fold_build2 (GE_EXPR, boolean_type_node,
1071 1.1 mrg bound,
1072 1.1 mrg fold_build2 (PLUS_EXPR, type, min, delta));
1073 1.1 mrg else
1074 1.1 mrg assum = fold_build2 (LE_EXPR, boolean_type_node,
1075 1.1 mrg bound,
1076 1.1 mrg fold_build2 (PLUS_EXPR, type, max, delta));
1077 1.1 mrg cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1078 1.1 mrg
1079 1.1 mrg bound = fold_build2 (MINUS_EXPR, type, bound, delta);
1080 1.1 mrg assum = fold_build2 (cmp, boolean_type_node, base, bound);
1081 1.1 mrg cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1082 1.1 mrg
1083 1.1 mrg if (integer_nonzerop (cond)
1084 1.1 mrg && integer_zerop (desc->may_be_zero))
1085 1.1 mrg {
1086 1.1 mrg /* Convert the latch count to an iteration count. */
1087 1.1 mrg tree niter = fold_build2 (PLUS_EXPR, type, desc->niter,
1088 1.1 mrg build_one_cst (type));
1089 1.1 mrg if (multiple_of_p (type, niter, bigstep))
1090 1.1 mrg return;
1091 1.1 mrg }
1092 1.1 mrg
1093 1.1 mrg cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
1094 1.1 mrg if (stmts)
1095 1.1 mrg gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1096 1.1 mrg /* cond now may be a gimple comparison, which would be OK, but also any
1097 1.1 mrg other gimple rhs (say a && b). In this case we need to force it to
1098 1.1 mrg operand. */
1099 1.1 mrg if (!is_gimple_condexpr (cond))
1100 1.1 mrg {
1101 1.1 mrg cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
1102 1.1 mrg if (stmts)
1103 1.1 mrg gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1104 1.1 mrg }
1105 1.1 mrg *enter_cond = cond;
1106 1.1 mrg
1107 1.1 mrg base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
1108 1.1 mrg if (stmts)
1109 1.1 mrg gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1110 1.1 mrg bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
1111 1.1 mrg if (stmts)
1112 1.1 mrg gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1113 1.1 mrg
1114 1.1 mrg *exit_base = base;
1115 1.1 mrg *exit_step = bigstep;
1116 1.1 mrg *exit_cmp = cmp;
1117 1.1 mrg *exit_bound = bound;
1118 1.1 mrg }
1119 1.1 mrg
1120 1.1 mrg /* Scales the frequencies of all basic blocks in LOOP that are strictly
1121 1.1 mrg dominated by BB by NUM/DEN. */
1122 1.1 mrg
1123 1.1 mrg static void
1124 1.1 mrg scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
1125 1.1 mrg profile_count num, profile_count den)
1126 1.1 mrg {
1127 1.1 mrg basic_block son;
1128 1.1 mrg
1129 1.1 mrg if (!den.nonzero_p () && !(num == profile_count::zero ()))
1130 1.1 mrg return;
1131 1.1 mrg
1132 1.1 mrg for (son = first_dom_son (CDI_DOMINATORS, bb);
1133 1.1 mrg son;
1134 1.1 mrg son = next_dom_son (CDI_DOMINATORS, son))
1135 1.1 mrg {
1136 1.1 mrg if (!flow_bb_inside_loop_p (loop, son))
1137 1.1 mrg continue;
1138 1.1 mrg scale_bbs_frequencies_profile_count (&son, 1, num, den);
1139 1.1 mrg scale_dominated_blocks_in_loop (loop, son, num, den);
1140 1.1 mrg }
1141 1.1 mrg }
1142 1.1 mrg
1143 1.1 mrg /* Return estimated niter for LOOP after unrolling by FACTOR times. */
1144 1.1 mrg
1145 1.1 mrg gcov_type
1146 1.1 mrg niter_for_unrolled_loop (class loop *loop, unsigned factor)
1147 1.1 mrg {
1148 1.1 mrg gcc_assert (factor != 0);
1149 1.1 mrg bool profile_p = false;
1150 1.1 mrg gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
1151 1.1 mrg /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
1152 1.1 mrg "+ 1" converts latch iterations to loop iterations and the "- 1"
1153 1.1 mrg converts back. */
1154 1.1 mrg gcov_type new_est_niter = est_niter / factor;
1155 1.1 mrg
1156 1.1 mrg if (est_niter == -1)
1157 1.1 mrg return -1;
1158 1.1 mrg
1159 1.1 mrg /* Without profile feedback, loops for which we do not know a better estimate
1160 1.1 mrg are assumed to roll 10 times. When we unroll such loop, it appears to
1161 1.1 mrg roll too little, and it may even seem to be cold. To avoid this, we
1162 1.1 mrg ensure that the created loop appears to roll at least 5 times (but at
1163 1.1 mrg most as many times as before unrolling). Don't do adjustment if profile
1164 1.1 mrg feedback is present. */
1165 1.1 mrg if (new_est_niter < 5 && !profile_p)
1166 1.1 mrg {
1167 1.1 mrg if (est_niter < 5)
1168 1.1 mrg new_est_niter = est_niter;
1169 1.1 mrg else
1170 1.1 mrg new_est_niter = 5;
1171 1.1 mrg }
1172 1.1 mrg
1173 1.1 mrg if (loop->any_upper_bound)
1174 1.1 mrg {
1175 1.1 mrg /* As above, this is really CEIL (upper_bound + 1, factor) - 1. */
1176 1.1 mrg widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
1177 1.1 mrg factor);
1178 1.1 mrg if (wi::ltu_p (bound, new_est_niter))
1179 1.1 mrg new_est_niter = bound.to_uhwi ();
1180 1.1 mrg }
1181 1.1 mrg
1182 1.1 mrg return new_est_niter;
1183 1.1 mrg }
1184 1.1 mrg
1185 1.1 mrg /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
1186 1.1 mrg whose source block dominates the latch. DESC describes the number of
1187 1.1 mrg iterations of LOOP.
1188 1.1 mrg
1189 1.1 mrg If N is number of iterations of the loop and MAY_BE_ZERO is the condition
1190 1.1 mrg under that loop exits in the first iteration even if N != 0,
1191 1.1 mrg
1192 1.1 mrg while (1)
1193 1.1 mrg {
1194 1.1 mrg x = phi (init, next);
1195 1.1 mrg
1196 1.1 mrg pre;
1197 1.1 mrg if (st)
1198 1.1 mrg break;
1199 1.1 mrg post;
1200 1.1 mrg }
1201 1.1 mrg
1202 1.1 mrg becomes (with possibly the exit conditions formulated a bit differently,
1203 1.1 mrg avoiding the need to create a new iv):
1204 1.1 mrg
1205 1.1 mrg if (MAY_BE_ZERO || N < FACTOR)
1206 1.1 mrg goto rest;
1207 1.1 mrg
1208 1.1 mrg do
1209 1.1 mrg {
1210 1.1 mrg x = phi (init, next);
1211 1.1 mrg
1212 1.1 mrg pre;
1213 1.1 mrg post;
1214 1.1 mrg pre;
1215 1.1 mrg post;
1216 1.1 mrg ...
1217 1.1 mrg pre;
1218 1.1 mrg post;
1219 1.1 mrg N -= FACTOR;
1220 1.1 mrg
1221 1.1 mrg } while (N >= FACTOR);
1222 1.1 mrg
1223 1.1 mrg rest:
1224 1.1 mrg init' = phi (init, x);
1225 1.1 mrg
1226 1.1 mrg while (1)
1227 1.1 mrg {
1228 1.1 mrg x = phi (init', next);
1229 1.1 mrg
1230 1.1 mrg pre;
1231 1.1 mrg if (st)
1232 1.1 mrg break;
1233 1.1 mrg post;
1234 1.1 mrg }
1235 1.1 mrg
1236 1.1 mrg Before the loop is unrolled, TRANSFORM is called for it (only for the
1237 1.1 mrg unrolled loop, but not for its versioned copy). DATA is passed to
1238 1.1 mrg TRANSFORM. */
1239 1.1 mrg
1240 1.1 mrg /* Probability in % that the unrolled loop is entered. Just a guess. */
1241 1.1 mrg #define PROB_UNROLLED_LOOP_ENTERED 90
1242 1.1 mrg
1243 1.1 mrg void
1244 1.1 mrg tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
1245 1.1 mrg class tree_niter_desc *desc,
1246 1.1 mrg transform_callback transform,
1247 1.1 mrg void *data)
1248 1.1 mrg {
1249 1.1 mrg gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
1250 1.1 mrg unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1251 1.1 mrg
1252 1.1 mrg enum tree_code exit_cmp;
1253 1.1 mrg tree enter_main_cond, exit_base, exit_step, exit_bound;
1254 1.1 mrg determine_exit_conditions (loop, desc, factor,
1255 1.1 mrg &enter_main_cond, &exit_base, &exit_step,
1256 1.1 mrg &exit_cmp, &exit_bound);
1257 1.1 mrg bool single_loop_p = !exit_base;
1258 1.1 mrg
1259 1.1 mrg /* Let us assume that the unrolled loop is quite likely to be entered. */
1260 1.1 mrg profile_probability prob_entry;
1261 1.1 mrg if (integer_nonzerop (enter_main_cond))
1262 1.1 mrg prob_entry = profile_probability::always ();
1263 1.1 mrg else
1264 1.1 mrg prob_entry = profile_probability::guessed_always ()
1265 1.1 mrg .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
1266 1.1 mrg
1267 1.1 mrg gcond *exit_if = nullptr;
1268 1.1 mrg class loop *new_loop = nullptr;
1269 1.1 mrg edge new_exit;
1270 1.1 mrg if (!single_loop_p)
1271 1.1 mrg {
1272 1.1 mrg edge exit = single_dom_exit (loop);
1273 1.1 mrg
1274 1.1 mrg /* The values for scales should keep profile consistent, and somewhat
1275 1.1 mrg close to correct.
1276 1.1 mrg
1277 1.1 mrg TODO: The current value of SCALE_REST makes it appear that the loop
1278 1.1 mrg that is created by splitting the remaining iterations of the unrolled
1279 1.1 mrg loop is executed the same number of times as the original loop, and
1280 1.1 mrg with the same frequencies, which is obviously wrong. This does not
1281 1.1 mrg appear to cause problems, so we do not bother with fixing it for now.
1282 1.1 mrg To make the profile correct, we would need to change the probability
1283 1.1 mrg of the exit edge of the loop, and recompute the distribution of
1284 1.1 mrg frequencies in its body because of this change (scale the frequencies
1285 1.1 mrg of blocks before and after the exit by appropriate factors). */
1286 1.1 mrg profile_probability scale_unrolled = prob_entry;
1287 1.1 mrg new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
1288 1.1 mrg prob_entry.invert (), scale_unrolled,
1289 1.1 mrg profile_probability::guessed_always (),
1290 1.1 mrg true);
1291 1.1 mrg gcc_assert (new_loop != NULL);
1292 1.1 mrg update_ssa (TODO_update_ssa);
1293 1.1 mrg
1294 1.1 mrg /* Prepare the cfg and update the phi nodes. Move the loop exit to the
1295 1.1 mrg loop latch (and make its condition dummy, for the moment). */
1296 1.1 mrg basic_block rest = loop_preheader_edge (new_loop)->src;
1297 1.1 mrg edge precond_edge = single_pred_edge (rest);
1298 1.1 mrg split_edge (loop_latch_edge (loop));
1299 1.1 mrg basic_block exit_bb = single_pred (loop->latch);
1300 1.1 mrg
1301 1.1 mrg /* Since the exit edge will be removed, the frequency of all the blocks
1302 1.1 mrg in the loop that are dominated by it must be scaled by
1303 1.1 mrg 1 / (1 - exit->probability). */
1304 1.1 mrg if (exit->probability.initialized_p ())
1305 1.1 mrg scale_dominated_blocks_in_loop (loop, exit->src,
1306 1.1 mrg /* We are scaling up here so
1307 1.1 mrg probability does not fit. */
1308 1.1 mrg loop->header->count,
1309 1.1 mrg loop->header->count
1310 1.1 mrg - loop->header->count.apply_probability
1311 1.1 mrg (exit->probability));
1312 1.1 mrg
1313 1.1 mrg gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
1314 1.1 mrg exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1315 1.1 mrg integer_zero_node,
1316 1.1 mrg NULL_TREE, NULL_TREE);
1317 1.1 mrg
1318 1.1 mrg gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
1319 1.1 mrg new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
1320 1.1 mrg rescan_loop_exit (new_exit, true, false);
1321 1.1 mrg
1322 1.1 mrg /* Set the probability of new exit to the same of the old one. Fix
1323 1.1 mrg the frequency of the latch block, by scaling it back by
1324 1.1 mrg 1 - exit->probability. */
1325 1.1 mrg new_exit->probability = exit->probability;
1326 1.1 mrg edge new_nonexit = single_pred_edge (loop->latch);
1327 1.1 mrg new_nonexit->probability = exit->probability.invert ();
1328 1.1 mrg new_nonexit->flags = EDGE_TRUE_VALUE;
1329 1.1 mrg if (new_nonexit->probability.initialized_p ())
1330 1.1 mrg scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
1331 1.1 mrg
1332 1.1 mrg edge old_entry = loop_preheader_edge (loop);
1333 1.1 mrg edge new_entry = loop_preheader_edge (new_loop);
1334 1.1 mrg edge old_latch = loop_latch_edge (loop);
1335 1.1 mrg for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header),
1336 1.1 mrg psi_new_loop = gsi_start_phis (new_loop->header);
1337 1.1 mrg !gsi_end_p (psi_old_loop);
1338 1.1 mrg gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
1339 1.1 mrg {
1340 1.1 mrg gphi *phi_old_loop = psi_old_loop.phi ();
1341 1.1 mrg gphi *phi_new_loop = psi_new_loop.phi ();
1342 1.1 mrg
1343 1.1 mrg tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1344 1.1 mrg use_operand_p op
1345 1.1 mrg = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1346 1.1 mrg gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1347 1.1 mrg tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1348 1.1 mrg
1349 1.1 mrg /* Prefer using original variable as a base for the new ssa name.
1350 1.1 mrg This is necessary for virtual ops, and useful in order to avoid
1351 1.1 mrg losing debug info for real ops. */
1352 1.1 mrg tree new_init;
1353 1.1 mrg if (TREE_CODE (next) == SSA_NAME
1354 1.1 mrg && useless_type_conversion_p (TREE_TYPE (next),
1355 1.1 mrg TREE_TYPE (init)))
1356 1.1 mrg new_init = copy_ssa_name (next);
1357 1.1 mrg else if (TREE_CODE (init) == SSA_NAME
1358 1.1 mrg && useless_type_conversion_p (TREE_TYPE (init),
1359 1.1 mrg TREE_TYPE (next)))
1360 1.1 mrg new_init = copy_ssa_name (init);
1361 1.1 mrg else if (useless_type_conversion_p (TREE_TYPE (next),
1362 1.1 mrg TREE_TYPE (init)))
1363 1.1 mrg new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
1364 1.1 mrg "unrinittmp");
1365 1.1 mrg else
1366 1.1 mrg new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
1367 1.1 mrg "unrinittmp");
1368 1.1 mrg
1369 1.1 mrg gphi *phi_rest = create_phi_node (new_init, rest);
1370 1.1 mrg add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1371 1.1 mrg add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
1372 1.1 mrg SET_USE (op, new_init);
1373 1.1 mrg }
1374 1.1 mrg
1375 1.1 mrg remove_path (exit);
1376 1.1 mrg }
1377 1.1 mrg else
1378 1.1 mrg new_exit = single_dom_exit (loop);
1379 1.1 mrg
1380 1.1 mrg /* Transform the loop. */
1381 1.1 mrg if (transform)
1382 1.1 mrg (*transform) (loop, data);
1383 1.1 mrg
1384 1.1 mrg /* Unroll the loop and remove the exits in all iterations except for the
1385 1.1 mrg last one. */
1386 1.1 mrg auto_sbitmap wont_exit (factor);
1387 1.1 mrg bitmap_ones (wont_exit);
1388 1.1 mrg bitmap_clear_bit (wont_exit, factor - 1);
1389 1.1 mrg
1390 1.1 mrg auto_vec<edge> to_remove;
1391 1.1 mrg bool ok
1392 1.1 mrg = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
1393 1.1 mrg factor - 1, wont_exit,
1394 1.1 mrg new_exit, &to_remove,
1395 1.1 mrg DLTHE_FLAG_UPDATE_FREQ);
1396 1.1 mrg gcc_assert (ok);
1397 1.1 mrg
1398 1.1 mrg for (edge e : to_remove)
1399 1.1 mrg {
1400 1.1 mrg ok = remove_path (e);
1401 1.1 mrg gcc_assert (ok);
1402 1.1 mrg }
1403 1.1 mrg update_ssa (TODO_update_ssa);
1404 1.1 mrg
1405 1.1 mrg new_exit = single_dom_exit (loop);
1406 1.1 mrg if (!single_loop_p)
1407 1.1 mrg {
1408 1.1 mrg /* Ensure that the frequencies in the loop match the new estimated
1409 1.1 mrg number of iterations, and change the probability of the new
1410 1.1 mrg exit edge. */
1411 1.1 mrg
1412 1.1 mrg profile_count freq_h = loop->header->count;
1413 1.1 mrg profile_count freq_e = (loop_preheader_edge (loop))->count ();
1414 1.1 mrg if (freq_h.nonzero_p ())
1415 1.1 mrg {
1416 1.1 mrg /* Avoid dropping loop body profile counter to 0 because of zero
1417 1.1 mrg count in loop's preheader. */
1418 1.1 mrg if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
1419 1.1 mrg freq_e = freq_e.force_nonzero ();
1420 1.1 mrg scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
1421 1.1 mrg }
1422 1.1 mrg
1423 1.1 mrg basic_block rest = new_exit->dest;
1424 1.1 mrg new_exit->probability = profile_probability::always ()
1425 1.1 mrg .apply_scale (1, new_est_niter + 1);
1426 1.1 mrg
1427 1.1 mrg rest->count += new_exit->count ();
1428 1.1 mrg
1429 1.1 mrg edge new_nonexit = single_pred_edge (loop->latch);
1430 1.1 mrg profile_probability prob = new_nonexit->probability;
1431 1.1 mrg new_nonexit->probability = new_exit->probability.invert ();
1432 1.1 mrg prob = new_nonexit->probability / prob;
1433 1.1 mrg if (prob.initialized_p ())
1434 1.1 mrg scale_bbs_frequencies (&loop->latch, 1, prob);
1435 1.1 mrg
1436 1.1 mrg /* Finally create the new counter for number of iterations and add
1437 1.1 mrg the new exit instruction. */
1438 1.1 mrg tree ctr_before, ctr_after;
1439 1.1 mrg gimple_stmt_iterator bsi = gsi_last_nondebug_bb (new_exit->src);
1440 1.1 mrg exit_if = as_a <gcond *> (gsi_stmt (bsi));
1441 1.1 mrg create_iv (exit_base, exit_step, NULL_TREE, loop,
1442 1.1 mrg &bsi, false, &ctr_before, &ctr_after);
1443 1.1 mrg gimple_cond_set_code (exit_if, exit_cmp);
1444 1.1 mrg gimple_cond_set_lhs (exit_if, ctr_after);
1445 1.1 mrg gimple_cond_set_rhs (exit_if, exit_bound);
1446 1.1 mrg update_stmt (exit_if);
1447 1.1 mrg }
1448 1.1 mrg else
1449 1.1 mrg {
1450 1.1 mrg /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
1451 1.1 mrg original profile counts in line with the unroll factor. However,
1452 1.1 mrg the old counts might not have been consistent with the old
1453 1.1 mrg iteration count.
1454 1.1 mrg
1455 1.1 mrg Therefore, if the iteration count is known exactly, make sure that the
1456 1.1 mrg profile counts of the loop header (and any other blocks that might be
1457 1.1 mrg executed in the final iteration) are consistent with the combination
1458 1.1 mrg of (a) the incoming profile count and (b) the new iteration count. */
1459 1.1 mrg profile_count in_count = loop_preheader_edge (loop)->count ();
1460 1.1 mrg profile_count old_header_count = loop->header->count;
1461 1.1 mrg if (in_count.nonzero_p ()
1462 1.1 mrg && old_header_count.nonzero_p ()
1463 1.1 mrg && TREE_CODE (desc->niter) == INTEGER_CST)
1464 1.1 mrg {
1465 1.1 mrg /* The + 1 converts latch counts to iteration counts. */
1466 1.1 mrg profile_count new_header_count
1467 1.1 mrg = (in_count.apply_scale (new_est_niter + 1, 1));
1468 1.1 mrg basic_block *body = get_loop_body (loop);
1469 1.1 mrg scale_bbs_frequencies_profile_count (body, loop->num_nodes,
1470 1.1 mrg new_header_count,
1471 1.1 mrg old_header_count);
1472 1.1 mrg free (body);
1473 1.1 mrg }
1474 1.1 mrg
1475 1.1 mrg /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
1476 1.1 mrg exit edges and adjusted the loop body's profile counts for the
1477 1.1 mrg new probabilities of the remaining non-exit edges. However,
1478 1.1 mrg the remaining exit edge still has the same probability as it
1479 1.1 mrg did before, even though it is now more likely.
1480 1.1 mrg
1481 1.1 mrg Therefore, all blocks executed after a failed exit test now have
1482 1.1 mrg a profile count that is too high, and the sum of the profile counts
1483 1.1 mrg for the header's incoming edges is greater than the profile count
1484 1.1 mrg of the header itself.
1485 1.1 mrg
1486 1.1 mrg Adjust the profile counts of all code in the loop body after
1487 1.1 mrg the exit test so that the sum of the counts on entry to the
1488 1.1 mrg header agree. */
1489 1.1 mrg profile_count old_latch_count = loop_latch_edge (loop)->count ();
1490 1.1 mrg profile_count new_latch_count = loop->header->count - in_count;
1491 1.1 mrg if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
1492 1.1 mrg scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
1493 1.1 mrg old_latch_count);
1494 1.1 mrg
1495 1.1 mrg /* Set the probability of the exit edge based on NEW_EST_NITER
1496 1.1 mrg (which estimates latch counts rather than iteration counts).
1497 1.1 mrg Update the probabilities of other edges to match.
1498 1.1 mrg
1499 1.1 mrg If the profile counts are large enough to give the required
1500 1.1 mrg precision, the updates above will have made
1501 1.1 mrg
1502 1.1 mrg e->dest->count / e->src->count ~= new e->probability
1503 1.1 mrg
1504 1.1 mrg for every outgoing edge e of NEW_EXIT->src. */
1505 1.1 mrg profile_probability new_exit_prob = profile_probability::always ()
1506 1.1 mrg .apply_scale (1, new_est_niter + 1);
1507 1.1 mrg change_edge_frequency (new_exit, new_exit_prob);
1508 1.1 mrg }
1509 1.1 mrg
1510 1.1 mrg checking_verify_flow_info ();
1511 1.1 mrg checking_verify_loop_structure ();
1512 1.1 mrg checking_verify_loop_closed_ssa (true, loop);
1513 1.1 mrg checking_verify_loop_closed_ssa (true, new_loop);
1514 1.1 mrg }
1515 1.1 mrg
1516 1.1 mrg /* Wrapper over tree_transform_and_unroll_loop for case we do not
1517 1.1 mrg want to transform the loop before unrolling. The meaning
1518 1.1 mrg of the arguments is the same as for tree_transform_and_unroll_loop. */
1519 1.1 mrg
1520 1.1 mrg void
1521 1.1 mrg tree_unroll_loop (class loop *loop, unsigned factor,
1522 1.1 mrg class tree_niter_desc *desc)
1523 1.1 mrg {
1524 1.1 mrg tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL);
1525 1.1 mrg }
1526 1.1 mrg
1527 1.1 mrg /* Rewrite the phi node at position PSI in function of the main
1528 1.1 mrg induction variable MAIN_IV and insert the generated code at GSI. */
1529 1.1 mrg
1530 1.1 mrg static void
1531 1.1 mrg rewrite_phi_with_iv (loop_p loop,
1532 1.1 mrg gphi_iterator *psi,
1533 1.1 mrg gimple_stmt_iterator *gsi,
1534 1.1 mrg tree main_iv)
1535 1.1 mrg {
1536 1.1 mrg affine_iv iv;
1537 1.1 mrg gassign *stmt;
1538 1.1 mrg gphi *phi = psi->phi ();
1539 1.1 mrg tree atype, mtype, val, res = PHI_RESULT (phi);
1540 1.1 mrg
1541 1.1 mrg if (virtual_operand_p (res) || res == main_iv)
1542 1.1 mrg {
1543 1.1 mrg gsi_next (psi);
1544 1.1 mrg return;
1545 1.1 mrg }
1546 1.1 mrg
1547 1.1 mrg if (!simple_iv (loop, loop, res, &iv, true))
1548 1.1 mrg {
1549 1.1 mrg gsi_next (psi);
1550 1.1 mrg return;
1551 1.1 mrg }
1552 1.1 mrg
1553 1.1 mrg remove_phi_node (psi, false);
1554 1.1 mrg
1555 1.1 mrg atype = TREE_TYPE (res);
1556 1.1 mrg mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1557 1.1 mrg val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1558 1.1 mrg fold_convert (mtype, main_iv));
1559 1.1 mrg val = fold_build2 (POINTER_TYPE_P (atype)
1560 1.1 mrg ? POINTER_PLUS_EXPR : PLUS_EXPR,
1561 1.1 mrg atype, unshare_expr (iv.base), val);
1562 1.1 mrg val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1563 1.1 mrg GSI_SAME_STMT);
1564 1.1 mrg stmt = gimple_build_assign (res, val);
1565 1.1 mrg gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1566 1.1 mrg }
1567 1.1 mrg
1568 1.1 mrg /* Rewrite all the phi nodes of LOOP in function of the main induction
1569 1.1 mrg variable MAIN_IV. */
1570 1.1 mrg
1571 1.1 mrg static void
1572 1.1 mrg rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1573 1.1 mrg {
1574 1.1 mrg unsigned i;
1575 1.1 mrg basic_block *bbs = get_loop_body_in_dom_order (loop);
1576 1.1 mrg gphi_iterator psi;
1577 1.1 mrg
1578 1.1 mrg for (i = 0; i < loop->num_nodes; i++)
1579 1.1 mrg {
1580 1.1 mrg basic_block bb = bbs[i];
1581 1.1 mrg gimple_stmt_iterator gsi = gsi_after_labels (bb);
1582 1.1 mrg
1583 1.1 mrg if (bb->loop_father != loop)
1584 1.1 mrg continue;
1585 1.1 mrg
1586 1.1 mrg for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1587 1.1 mrg rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1588 1.1 mrg }
1589 1.1 mrg
1590 1.1 mrg free (bbs);
1591 1.1 mrg }
1592 1.1 mrg
1593 1.1 mrg /* Bases all the induction variables in LOOP on a single induction variable
1594 1.1 mrg (with base 0 and step 1), whose final value is compared with *NIT. When the
1595 1.1 mrg IV type precision has to be larger than *NIT type precision, *NIT is
1596 1.1 mrg converted to the larger type, the conversion code is inserted before the
1597 1.1 mrg loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true,
1598 1.1 mrg the induction variable is incremented in the loop latch, otherwise it is
1599 1.1 mrg incremented in the loop header. Return the induction variable that was
1600 1.1 mrg created. */
1601 1.1 mrg
1602 1.1 mrg tree
1603 1.1 mrg canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
1604 1.1 mrg {
1605 1.1 mrg unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1606 1.1 mrg unsigned original_precision = precision;
1607 1.1 mrg tree type, var_before;
1608 1.1 mrg gimple_stmt_iterator gsi;
1609 1.1 mrg gphi_iterator psi;
1610 1.1 mrg gcond *stmt;
1611 1.1 mrg edge exit = single_dom_exit (loop);
1612 1.1 mrg gimple_seq stmts;
1613 1.1 mrg bool unsigned_p = false;
1614 1.1 mrg
1615 1.1 mrg for (psi = gsi_start_phis (loop->header);
1616 1.1 mrg !gsi_end_p (psi); gsi_next (&psi))
1617 1.1 mrg {
1618 1.1 mrg gphi *phi = psi.phi ();
1619 1.1 mrg tree res = PHI_RESULT (phi);
1620 1.1 mrg bool uns;
1621 1.1 mrg
1622 1.1 mrg type = TREE_TYPE (res);
1623 1.1 mrg if (virtual_operand_p (res)
1624 1.1 mrg || (!INTEGRAL_TYPE_P (type)
1625 1.1 mrg && !POINTER_TYPE_P (type))
1626 1.1 mrg || TYPE_PRECISION (type) < precision)
1627 1.1 mrg continue;
1628 1.1 mrg
1629 1.1 mrg uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1630 1.1 mrg
1631 1.1 mrg if (TYPE_PRECISION (type) > precision)
1632 1.1 mrg unsigned_p = uns;
1633 1.1 mrg else
1634 1.1 mrg unsigned_p |= uns;
1635 1.1 mrg
1636 1.1 mrg precision = TYPE_PRECISION (type);
1637 1.1 mrg }
1638 1.1 mrg
1639 1.1 mrg scalar_int_mode mode = smallest_int_mode_for_size (precision);
1640 1.1 mrg precision = GET_MODE_PRECISION (mode);
1641 1.1 mrg type = build_nonstandard_integer_type (precision, unsigned_p);
1642 1.1 mrg
1643 1.1 mrg if (original_precision != precision
1644 1.1 mrg || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
1645 1.1 mrg {
1646 1.1 mrg *nit = fold_convert (type, *nit);
1647 1.1 mrg *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1648 1.1 mrg if (stmts)
1649 1.1 mrg gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1650 1.1 mrg }
1651 1.1 mrg
1652 1.1 mrg if (bump_in_latch)
1653 1.1 mrg gsi = gsi_last_bb (loop->latch);
1654 1.1 mrg else
1655 1.1 mrg gsi = gsi_last_nondebug_bb (loop->header);
1656 1.1 mrg create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
1657 1.1 mrg loop, &gsi, bump_in_latch, &var_before, NULL);
1658 1.1 mrg
1659 1.1 mrg rewrite_all_phi_nodes_with_iv (loop, var_before);
1660 1.1 mrg
1661 1.1 mrg stmt = as_a <gcond *> (last_stmt (exit->src));
1662 1.1 mrg /* Make the loop exit if the control condition is not satisfied. */
1663 1.1 mrg if (exit->flags & EDGE_TRUE_VALUE)
1664 1.1 mrg {
1665 1.1 mrg edge te, fe;
1666 1.1 mrg
1667 1.1 mrg extract_true_false_edges_from_block (exit->src, &te, &fe);
1668 1.1 mrg te->flags = EDGE_FALSE_VALUE;
1669 1.1 mrg fe->flags = EDGE_TRUE_VALUE;
1670 1.1 mrg }
1671 1.1 mrg gimple_cond_set_code (stmt, LT_EXPR);
1672 1.1 mrg gimple_cond_set_lhs (stmt, var_before);
1673 1.1 mrg gimple_cond_set_rhs (stmt, *nit);
1674 1.1 mrg update_stmt (stmt);
1675 1.1 mrg
1676 1.1 mrg return var_before;
1677 1.1 mrg }
1678