gimple-loop-interchange.cc revision 1.1 1 1.1 mrg /* Loop interchange.
2 1.1 mrg Copyright (C) 2017-2018 Free Software Foundation, Inc.
3 1.1 mrg Contributed by ARM Ltd.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by the
9 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
10 1.1 mrg later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 1.1 mrg for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg #include "config.h"
22 1.1 mrg #include "system.h"
23 1.1 mrg #include "coretypes.h"
24 1.1 mrg #include "backend.h"
25 1.1 mrg #include "is-a.h"
26 1.1 mrg #include "tree.h"
27 1.1 mrg #include "gimple.h"
28 1.1 mrg #include "tree-pass.h"
29 1.1 mrg #include "ssa.h"
30 1.1 mrg #include "gimple-pretty-print.h"
31 1.1 mrg #include "fold-const.h"
32 1.1 mrg #include "gimplify.h"
33 1.1 mrg #include "gimple-iterator.h"
34 1.1 mrg #include "gimplify-me.h"
35 1.1 mrg #include "cfgloop.h"
36 1.1 mrg #include "params.h"
37 1.1 mrg #include "tree-ssa.h"
38 1.1 mrg #include "tree-scalar-evolution.h"
39 1.1 mrg #include "tree-ssa-loop-manip.h"
40 1.1 mrg #include "tree-ssa-loop-niter.h"
41 1.1 mrg #include "tree-ssa-loop-ivopts.h"
42 1.1 mrg #include "tree-ssa-dce.h"
43 1.1 mrg #include "tree-data-ref.h"
44 1.1 mrg #include "tree-vectorizer.h"
45 1.1 mrg
46 1.1 mrg /* This pass performs loop interchange: for example, the loop nest
47 1.1 mrg
48 1.1 mrg for (int j = 0; j < N; j++)
49 1.1 mrg for (int k = 0; k < N; k++)
50 1.1 mrg for (int i = 0; i < N; i++)
51 1.1 mrg c[i][j] = c[i][j] + a[i][k]*b[k][j];
52 1.1 mrg
53 1.1 mrg is transformed to
54 1.1 mrg
55 1.1 mrg for (int i = 0; i < N; i++)
56 1.1 mrg for (int j = 0; j < N; j++)
57 1.1 mrg for (int k = 0; k < N; k++)
58 1.1 mrg c[i][j] = c[i][j] + a[i][k]*b[k][j];
59 1.1 mrg
60 1.1 mrg This pass implements loop interchange in the following steps:
61 1.1 mrg
62 1.1 mrg 1) Find perfect loop nest for each innermost loop and compute data
63 1.1 mrg dependence relations for it. For above example, loop nest is
64 1.1 mrg <loop_j, loop_k, loop_i>.
65 1.1 mrg 2) From innermost to outermost loop, this pass tries to interchange
66 1.1 mrg each loop pair. For above case, it firstly tries to interchange
67 1.1 mrg <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
68 1.1 mrg Then it tries to interchange <loop_j, loop_i> and loop nest becomes
69 1.1 mrg <loop_i, loop_j, loop_k>. The overall effect is to move innermost
70 1.1 mrg loop to the outermost position. For loop pair <loop_i, loop_j>
71 1.1 mrg to be interchanged, we:
72 1.1 mrg 3) Check if data dependence relations are valid for loop interchange.
73 1.1 mrg 4) Check if both loops can be interchanged in terms of transformation.
74 1.1 mrg 5) Check if interchanging the two loops is profitable.
75 1.1 mrg 6) Interchange the two loops by mapping induction variables.
76 1.1 mrg
77 1.1 mrg This pass also handles reductions in loop nest. So far we only support
78 1.1 mrg simple reduction of inner loop and double reduction of the loop nest. */
79 1.1 mrg
80 1.1 mrg /* Maximum number of stmts in each loop that should be interchanged. */
81 1.1 mrg #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
82 1.1 mrg /* Maximum number of data references in loop nest. */
83 1.1 mrg #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
84 1.1 mrg
85 1.1 mrg /* Comparison ratio of access stride between inner/outer loops to be
86 1.1 mrg interchanged. This is the minimum stride ratio for loop interchange
87 1.1 mrg to be profitable. */
88 1.1 mrg #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
89 1.1 mrg /* The same as above, but we require higher ratio for interchanging the
90 1.1 mrg innermost two loops. */
91 1.1 mrg #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
92 1.1 mrg
93 1.1 mrg /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
94 1.1 mrg be interchanged if outer loop has too many stmts. */
95 1.1 mrg #define STMT_COST_RATIO (3)
96 1.1 mrg
97 1.1 mrg /* Vector of strides that DR accesses in each level loop of a loop nest. */
98 1.1 mrg #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
99 1.1 mrg
100 1.1 mrg /* Structure recording loop induction variable. */
101 1.1 mrg typedef struct induction
102 1.1 mrg {
103 1.1 mrg /* IV itself. */
104 1.1 mrg tree var;
105 1.1 mrg /* IV's initializing value, which is the init arg of the IV PHI node. */
106 1.1 mrg tree init_val;
107 1.1 mrg /* IV's initializing expr, which is (the expanded result of) init_val. */
108 1.1 mrg tree init_expr;
109 1.1 mrg /* IV's step. */
110 1.1 mrg tree step;
111 1.1 mrg } *induction_p;
112 1.1 mrg
113 1.1 mrg /* Enum type for loop reduction variable. */
114 1.1 mrg enum reduction_type
115 1.1 mrg {
116 1.1 mrg UNKNOWN_RTYPE = 0,
117 1.1 mrg SIMPLE_RTYPE,
118 1.1 mrg DOUBLE_RTYPE
119 1.1 mrg };
120 1.1 mrg
121 1.1 mrg /* Structure recording loop reduction variable. */
122 1.1 mrg typedef struct reduction
123 1.1 mrg {
124 1.1 mrg /* Reduction itself. */
125 1.1 mrg tree var;
126 1.1 mrg /* PHI node defining reduction variable. */
127 1.1 mrg gphi *phi;
128 1.1 mrg /* Init and next variables of the reduction. */
129 1.1 mrg tree init;
130 1.1 mrg tree next;
131 1.1 mrg /* Lcssa PHI node if reduction is used outside of its definition loop. */
132 1.1 mrg gphi *lcssa_phi;
133 1.1 mrg /* Stmts defining init and next. */
134 1.1 mrg gimple *producer;
135 1.1 mrg gimple *consumer;
136 1.1 mrg /* If init is loaded from memory, this is the loading memory reference. */
137 1.1 mrg tree init_ref;
138 1.1 mrg /* If reduction is finally stored to memory, this is the stored memory
139 1.1 mrg reference. */
140 1.1 mrg tree fini_ref;
141 1.1 mrg enum reduction_type type;
142 1.1 mrg } *reduction_p;
143 1.1 mrg
144 1.1 mrg
145 1.1 mrg /* Dump reduction RE. */
146 1.1 mrg
147 1.1 mrg static void
148 1.1 mrg dump_reduction (reduction_p re)
149 1.1 mrg {
150 1.1 mrg if (re->type == SIMPLE_RTYPE)
151 1.1 mrg fprintf (dump_file, " Simple reduction: ");
152 1.1 mrg else if (re->type == DOUBLE_RTYPE)
153 1.1 mrg fprintf (dump_file, " Double reduction: ");
154 1.1 mrg else
155 1.1 mrg fprintf (dump_file, " Unknown reduction: ");
156 1.1 mrg
157 1.1 mrg print_gimple_stmt (dump_file, re->phi, 0);
158 1.1 mrg }
159 1.1 mrg
160 1.1 mrg /* Dump LOOP's induction IV. */
161 1.1 mrg static void
162 1.1 mrg dump_induction (struct loop *loop, induction_p iv)
163 1.1 mrg {
164 1.1 mrg fprintf (dump_file, " Induction: ");
165 1.1 mrg print_generic_expr (dump_file, iv->var, TDF_SLIM);
166 1.1 mrg fprintf (dump_file, " = {");
167 1.1 mrg print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
168 1.1 mrg fprintf (dump_file, ", ");
169 1.1 mrg print_generic_expr (dump_file, iv->step, TDF_SLIM);
170 1.1 mrg fprintf (dump_file, "}_%d\n", loop->num);
171 1.1 mrg }
172 1.1 mrg
173 1.1 mrg /* Loop candidate for interchange. */
174 1.1 mrg
175 1.1 mrg struct loop_cand
176 1.1 mrg {
177 1.1 mrg loop_cand (struct loop *, struct loop *);
178 1.1 mrg ~loop_cand ();
179 1.1 mrg
180 1.1 mrg reduction_p find_reduction_by_stmt (gimple *);
181 1.1 mrg void classify_simple_reduction (reduction_p);
182 1.1 mrg bool analyze_iloop_reduction_var (tree);
183 1.1 mrg bool analyze_oloop_reduction_var (loop_cand *, tree);
184 1.1 mrg bool analyze_induction_var (tree, tree);
185 1.1 mrg bool analyze_carried_vars (loop_cand *);
186 1.1 mrg bool analyze_lcssa_phis (void);
187 1.1 mrg bool can_interchange_p (loop_cand *);
188 1.1 mrg void undo_simple_reduction (reduction_p, bitmap);
189 1.1 mrg
190 1.1 mrg /* The loop itself. */
191 1.1 mrg struct loop *m_loop;
192 1.1 mrg /* The outer loop for interchange. It equals to loop if this loop cand
193 1.1 mrg itself represents the outer loop. */
194 1.1 mrg struct loop *m_outer;
195 1.1 mrg /* Vector of induction variables in loop. */
196 1.1 mrg vec<induction_p> m_inductions;
197 1.1 mrg /* Vector of reduction variables in loop. */
198 1.1 mrg vec<reduction_p> m_reductions;
199 1.1 mrg /* Lcssa PHI nodes of this loop. */
200 1.1 mrg vec<gphi *> m_lcssa_nodes;
201 1.1 mrg /* Single exit edge of this loop. */
202 1.1 mrg edge m_exit;
203 1.1 mrg /* Basic blocks of this loop. */
204 1.1 mrg basic_block *m_bbs;
205 1.1 mrg /* Number of stmts of this loop. Inner loops' stmts are not included. */
206 1.1 mrg int m_num_stmts;
207 1.1 mrg /* Number of constant initialized simple reduction. */
208 1.1 mrg int m_const_init_reduc;
209 1.1 mrg };
210 1.1 mrg
211 1.1 mrg /* Constructor. */
212 1.1 mrg
213 1.1 mrg loop_cand::loop_cand (struct loop *loop, struct loop *outer)
214 1.1 mrg : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
215 1.1 mrg m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
216 1.1 mrg {
217 1.1 mrg m_inductions.create (3);
218 1.1 mrg m_reductions.create (3);
219 1.1 mrg m_lcssa_nodes.create (3);
220 1.1 mrg }
221 1.1 mrg
222 1.1 mrg /* Destructor. */
223 1.1 mrg
224 1.1 mrg loop_cand::~loop_cand ()
225 1.1 mrg {
226 1.1 mrg induction_p iv;
227 1.1 mrg for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
228 1.1 mrg free (iv);
229 1.1 mrg
230 1.1 mrg reduction_p re;
231 1.1 mrg for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
232 1.1 mrg free (re);
233 1.1 mrg
234 1.1 mrg m_inductions.release ();
235 1.1 mrg m_reductions.release ();
236 1.1 mrg m_lcssa_nodes.release ();
237 1.1 mrg free (m_bbs);
238 1.1 mrg }
239 1.1 mrg
240 1.1 mrg /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
241 1.1 mrg
242 1.1 mrg static gimple *
243 1.1 mrg single_use_in_loop (tree var, struct loop *loop)
244 1.1 mrg {
245 1.1 mrg gimple *stmt, *res = NULL;
246 1.1 mrg use_operand_p use_p;
247 1.1 mrg imm_use_iterator iterator;
248 1.1 mrg
249 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
250 1.1 mrg {
251 1.1 mrg stmt = USE_STMT (use_p);
252 1.1 mrg if (is_gimple_debug (stmt))
253 1.1 mrg continue;
254 1.1 mrg
255 1.1 mrg if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
256 1.1 mrg continue;
257 1.1 mrg
258 1.1 mrg if (res)
259 1.1 mrg return NULL;
260 1.1 mrg
261 1.1 mrg res = stmt;
262 1.1 mrg }
263 1.1 mrg return res;
264 1.1 mrg }
265 1.1 mrg
266 1.1 mrg /* Return true if E is unsupported in loop interchange, i.e, E is a complex
267 1.1 mrg edge or part of irreducible loop. */
268 1.1 mrg
269 1.1 mrg static inline bool
270 1.1 mrg unsupported_edge (edge e)
271 1.1 mrg {
272 1.1 mrg return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
273 1.1 mrg }
274 1.1 mrg
275 1.1 mrg /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
276 1.1 mrg stmt. */
277 1.1 mrg
278 1.1 mrg reduction_p
279 1.1 mrg loop_cand::find_reduction_by_stmt (gimple *stmt)
280 1.1 mrg {
281 1.1 mrg gphi *phi = dyn_cast <gphi *> (stmt);
282 1.1 mrg reduction_p re;
283 1.1 mrg
284 1.1 mrg for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
285 1.1 mrg if ((phi != NULL && phi == re->lcssa_phi)
286 1.1 mrg || (stmt == re->producer || stmt == re->consumer))
287 1.1 mrg return re;
288 1.1 mrg
289 1.1 mrg return NULL;
290 1.1 mrg }
291 1.1 mrg
292 1.1 mrg /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
293 1.1 mrg current loop_cand is outer loop in loop nest. */
294 1.1 mrg
295 1.1 mrg bool
296 1.1 mrg loop_cand::can_interchange_p (loop_cand *iloop)
297 1.1 mrg {
298 1.1 mrg /* For now we only support at most one reduction. */
299 1.1 mrg unsigned allowed_reduction_num = 1;
300 1.1 mrg
301 1.1 mrg /* Only support reduction if the loop nest to be interchanged is the
302 1.1 mrg innermostin two loops. */
303 1.1 mrg if ((iloop == NULL && m_loop->inner != NULL)
304 1.1 mrg || (iloop != NULL && iloop->m_loop->inner != NULL))
305 1.1 mrg allowed_reduction_num = 0;
306 1.1 mrg
307 1.1 mrg if (m_reductions.length () > allowed_reduction_num
308 1.1 mrg || (m_reductions.length () == 1
309 1.1 mrg && m_reductions[0]->type == UNKNOWN_RTYPE))
310 1.1 mrg return false;
311 1.1 mrg
312 1.1 mrg /* Only support lcssa PHI node which is for reduction. */
313 1.1 mrg if (m_lcssa_nodes.length () > allowed_reduction_num)
314 1.1 mrg return false;
315 1.1 mrg
316 1.1 mrg /* Check if basic block has any unsupported operation. Note basic blocks
317 1.1 mrg of inner loops are not checked here. */
318 1.1 mrg for (unsigned i = 0; i < m_loop->num_nodes; i++)
319 1.1 mrg {
320 1.1 mrg basic_block bb = m_bbs[i];
321 1.1 mrg gphi_iterator psi;
322 1.1 mrg gimple_stmt_iterator gsi;
323 1.1 mrg
324 1.1 mrg /* Skip basic blocks of inner loops. */
325 1.1 mrg if (bb->loop_father != m_loop)
326 1.1 mrg continue;
327 1.1 mrg
328 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
329 1.1 mrg {
330 1.1 mrg gimple *stmt = gsi_stmt (gsi);
331 1.1 mrg if (is_gimple_debug (stmt))
332 1.1 mrg continue;
333 1.1 mrg
334 1.1 mrg if (gimple_has_side_effects (stmt))
335 1.1 mrg return false;
336 1.1 mrg
337 1.1 mrg m_num_stmts++;
338 1.1 mrg if (gcall *call = dyn_cast <gcall *> (stmt))
339 1.1 mrg {
340 1.1 mrg /* In basic block of outer loop, the call should be cheap since
341 1.1 mrg it will be moved to inner loop. */
342 1.1 mrg if (iloop != NULL
343 1.1 mrg && !gimple_inexpensive_call_p (call))
344 1.1 mrg return false;
345 1.1 mrg continue;
346 1.1 mrg }
347 1.1 mrg
348 1.1 mrg if (!iloop || !gimple_vuse (stmt))
349 1.1 mrg continue;
350 1.1 mrg
351 1.1 mrg /* Support stmt accessing memory in outer loop only if it is for
352 1.1 mrg inner loop's reduction. */
353 1.1 mrg if (iloop->find_reduction_by_stmt (stmt))
354 1.1 mrg continue;
355 1.1 mrg
356 1.1 mrg tree lhs;
357 1.1 mrg /* Support loop invariant memory reference if it's only used once by
358 1.1 mrg inner loop. */
359 1.1 mrg /* ??? How's this checking for invariantness? */
360 1.1 mrg if (gimple_assign_single_p (stmt)
361 1.1 mrg && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
362 1.1 mrg && TREE_CODE (lhs) == SSA_NAME
363 1.1 mrg && single_use_in_loop (lhs, iloop->m_loop))
364 1.1 mrg continue;
365 1.1 mrg
366 1.1 mrg return false;
367 1.1 mrg }
368 1.1 mrg /* Check if loop has too many stmts. */
369 1.1 mrg if (m_num_stmts > MAX_NUM_STMT)
370 1.1 mrg return false;
371 1.1 mrg
372 1.1 mrg /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
373 1.1 mrg loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
374 1.1 mrg if (!iloop || bb == m_loop->header
375 1.1 mrg || bb == iloop->m_exit->dest)
376 1.1 mrg continue;
377 1.1 mrg
378 1.1 mrg /* Don't allow any other PHI nodes. */
379 1.1 mrg for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
380 1.1 mrg if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
381 1.1 mrg return false;
382 1.1 mrg }
383 1.1 mrg
384 1.1 mrg return true;
385 1.1 mrg }
386 1.1 mrg
387 1.1 mrg /* Programmers and optimizers (like loop store motion) may optimize code:
388 1.1 mrg
389 1.1 mrg for (int i = 0; i < N; i++)
390 1.1 mrg for (int j = 0; j < N; j++)
391 1.1 mrg a[i] += b[j][i] * c[j][i];
392 1.1 mrg
393 1.1 mrg into reduction:
394 1.1 mrg
395 1.1 mrg for (int i = 0; i < N; i++)
396 1.1 mrg {
397 1.1 mrg // producer. Note sum can be intitialized to a constant.
398 1.1 mrg int sum = a[i];
399 1.1 mrg for (int j = 0; j < N; j++)
400 1.1 mrg {
401 1.1 mrg sum += b[j][i] * c[j][i];
402 1.1 mrg }
403 1.1 mrg // consumer.
404 1.1 mrg a[i] = sum;
405 1.1 mrg }
406 1.1 mrg
407 1.1 mrg The result code can't be interchanged without undoing the optimization.
408 1.1 mrg This function classifies this kind reduction and records information so
409 1.1 mrg that we can undo the store motion during interchange. */
410 1.1 mrg
411 1.1 mrg void
412 1.1 mrg loop_cand::classify_simple_reduction (reduction_p re)
413 1.1 mrg {
414 1.1 mrg gimple *producer, *consumer;
415 1.1 mrg
416 1.1 mrg /* Check init variable of reduction and how it is initialized. */
417 1.1 mrg if (TREE_CODE (re->init) == SSA_NAME)
418 1.1 mrg {
419 1.1 mrg producer = SSA_NAME_DEF_STMT (re->init);
420 1.1 mrg re->producer = producer;
421 1.1 mrg basic_block bb = gimple_bb (producer);
422 1.1 mrg if (!bb || bb->loop_father != m_outer)
423 1.1 mrg return;
424 1.1 mrg
425 1.1 mrg if (!gimple_assign_load_p (producer))
426 1.1 mrg return;
427 1.1 mrg
428 1.1 mrg re->init_ref = gimple_assign_rhs1 (producer);
429 1.1 mrg }
430 1.1 mrg else if (CONSTANT_CLASS_P (re->init))
431 1.1 mrg m_const_init_reduc++;
432 1.1 mrg else
433 1.1 mrg return;
434 1.1 mrg
435 1.1 mrg /* Check how reduction variable is used. */
436 1.1 mrg consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
437 1.1 mrg if (!consumer
438 1.1 mrg || !gimple_store_p (consumer))
439 1.1 mrg return;
440 1.1 mrg
441 1.1 mrg re->fini_ref = gimple_get_lhs (consumer);
442 1.1 mrg re->consumer = consumer;
443 1.1 mrg
444 1.1 mrg /* Simple reduction with constant initializer. */
445 1.1 mrg if (!re->init_ref)
446 1.1 mrg {
447 1.1 mrg gcc_assert (CONSTANT_CLASS_P (re->init));
448 1.1 mrg re->init_ref = unshare_expr (re->fini_ref);
449 1.1 mrg }
450 1.1 mrg
451 1.1 mrg /* Require memory references in producer and consumer are the same so
452 1.1 mrg that we can undo reduction during interchange. */
453 1.1 mrg if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
454 1.1 mrg return;
455 1.1 mrg
456 1.1 mrg re->type = SIMPLE_RTYPE;
457 1.1 mrg }
458 1.1 mrg
459 1.1 mrg /* Analyze reduction variable VAR for inner loop of the loop nest to be
460 1.1 mrg interchanged. Return true if analysis succeeds. */
461 1.1 mrg
462 1.1 mrg bool
463 1.1 mrg loop_cand::analyze_iloop_reduction_var (tree var)
464 1.1 mrg {
465 1.1 mrg gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
466 1.1 mrg gphi *lcssa_phi = NULL, *use_phi;
467 1.1 mrg tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
468 1.1 mrg tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
469 1.1 mrg reduction_p re;
470 1.1 mrg gimple *stmt, *next_def, *single_use = NULL;
471 1.1 mrg use_operand_p use_p;
472 1.1 mrg imm_use_iterator iterator;
473 1.1 mrg
474 1.1 mrg if (TREE_CODE (next) != SSA_NAME)
475 1.1 mrg return false;
476 1.1 mrg
477 1.1 mrg next_def = SSA_NAME_DEF_STMT (next);
478 1.1 mrg basic_block bb = gimple_bb (next_def);
479 1.1 mrg if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
480 1.1 mrg return false;
481 1.1 mrg
482 1.1 mrg /* In restricted reduction, the var is (and must be) used in defining
483 1.1 mrg the updated var. The process can be depicted as below:
484 1.1 mrg
485 1.1 mrg var ;; = PHI<init, next>
486 1.1 mrg |
487 1.1 mrg |
488 1.1 mrg v
489 1.1 mrg +---------------------+
490 1.1 mrg | reduction operators | <-- other operands
491 1.1 mrg +---------------------+
492 1.1 mrg |
493 1.1 mrg |
494 1.1 mrg v
495 1.1 mrg next
496 1.1 mrg
497 1.1 mrg In terms loop interchange, we don't change how NEXT is computed based
498 1.1 mrg on VAR and OTHER OPERANDS. In case of double reduction in loop nest
499 1.1 mrg to be interchanged, we don't changed it at all. In the case of simple
500 1.1 mrg reduction in inner loop, we only make change how VAR/NEXT is loaded or
501 1.1 mrg stored. With these conditions, we can relax restrictions on reduction
502 1.1 mrg in a way that reduction operation is seen as black box. In general,
503 1.1 mrg we can ignore reassociation of reduction operator; we can handle fake
504 1.1 mrg reductions in which VAR is not even used to compute NEXT. */
505 1.1 mrg if (! single_imm_use (var, &use_p, &single_use)
506 1.1 mrg || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
507 1.1 mrg return false;
508 1.1 mrg
509 1.1 mrg /* Check the reduction operation. We require a left-associative operation.
510 1.1 mrg For FP math we also need to be allowed to associate operations. */
511 1.1 mrg if (gassign *ass = dyn_cast <gassign *> (single_use))
512 1.1 mrg {
513 1.1 mrg enum tree_code code = gimple_assign_rhs_code (ass);
514 1.1 mrg if (! (associative_tree_code (code)
515 1.1 mrg || (code == MINUS_EXPR
516 1.1 mrg && use_p->use == gimple_assign_rhs1_ptr (ass)))
517 1.1 mrg || (FLOAT_TYPE_P (TREE_TYPE (var))
518 1.1 mrg && ! flag_associative_math))
519 1.1 mrg return false;
520 1.1 mrg }
521 1.1 mrg else
522 1.1 mrg return false;
523 1.1 mrg
524 1.1 mrg /* Handle and verify a series of stmts feeding the reduction op. */
525 1.1 mrg if (single_use != next_def
526 1.1 mrg && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
527 1.1 mrg gimple_assign_rhs_code (single_use)))
528 1.1 mrg return false;
529 1.1 mrg
530 1.1 mrg /* Only support cases in which INIT is used in inner loop. */
531 1.1 mrg if (TREE_CODE (init) == SSA_NAME)
532 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
533 1.1 mrg {
534 1.1 mrg stmt = USE_STMT (use_p);
535 1.1 mrg if (is_gimple_debug (stmt))
536 1.1 mrg continue;
537 1.1 mrg
538 1.1 mrg if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
539 1.1 mrg return false;
540 1.1 mrg }
541 1.1 mrg
542 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
543 1.1 mrg {
544 1.1 mrg stmt = USE_STMT (use_p);
545 1.1 mrg if (is_gimple_debug (stmt))
546 1.1 mrg continue;
547 1.1 mrg
548 1.1 mrg /* Or else it's used in PHI itself. */
549 1.1 mrg use_phi = dyn_cast <gphi *> (stmt);
550 1.1 mrg if (use_phi == phi)
551 1.1 mrg continue;
552 1.1 mrg
553 1.1 mrg if (use_phi != NULL
554 1.1 mrg && lcssa_phi == NULL
555 1.1 mrg && gimple_bb (stmt) == m_exit->dest
556 1.1 mrg && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
557 1.1 mrg lcssa_phi = use_phi;
558 1.1 mrg else
559 1.1 mrg return false;
560 1.1 mrg }
561 1.1 mrg if (!lcssa_phi)
562 1.1 mrg return false;
563 1.1 mrg
564 1.1 mrg re = XCNEW (struct reduction);
565 1.1 mrg re->var = var;
566 1.1 mrg re->init = init;
567 1.1 mrg re->next = next;
568 1.1 mrg re->phi = phi;
569 1.1 mrg re->lcssa_phi = lcssa_phi;
570 1.1 mrg
571 1.1 mrg classify_simple_reduction (re);
572 1.1 mrg
573 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
574 1.1 mrg dump_reduction (re);
575 1.1 mrg
576 1.1 mrg m_reductions.safe_push (re);
577 1.1 mrg return true;
578 1.1 mrg }
579 1.1 mrg
580 1.1 mrg /* Analyze reduction variable VAR for outer loop of the loop nest to be
581 1.1 mrg interchanged. ILOOP is not NULL and points to inner loop. For the
582 1.1 mrg moment, we only support double reduction for outer loop, like:
583 1.1 mrg
584 1.1 mrg for (int i = 0; i < n; i++)
585 1.1 mrg {
586 1.1 mrg int sum = 0;
587 1.1 mrg
588 1.1 mrg for (int j = 0; j < n; j++) // outer loop
589 1.1 mrg for (int k = 0; k < n; k++) // inner loop
590 1.1 mrg sum += a[i][k]*b[k][j];
591 1.1 mrg
592 1.1 mrg s[i] = sum;
593 1.1 mrg }
594 1.1 mrg
595 1.1 mrg Note the innermost two loops are the loop nest to be interchanged.
596 1.1 mrg Return true if analysis succeeds. */
597 1.1 mrg
598 1.1 mrg bool
599 1.1 mrg loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
600 1.1 mrg {
601 1.1 mrg gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
602 1.1 mrg gphi *lcssa_phi = NULL, *use_phi;
603 1.1 mrg tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
604 1.1 mrg tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
605 1.1 mrg reduction_p re;
606 1.1 mrg gimple *stmt, *next_def;
607 1.1 mrg use_operand_p use_p;
608 1.1 mrg imm_use_iterator iterator;
609 1.1 mrg
610 1.1 mrg if (TREE_CODE (next) != SSA_NAME)
611 1.1 mrg return false;
612 1.1 mrg
613 1.1 mrg next_def = SSA_NAME_DEF_STMT (next);
614 1.1 mrg basic_block bb = gimple_bb (next_def);
615 1.1 mrg if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
616 1.1 mrg return false;
617 1.1 mrg
618 1.1 mrg /* Find inner loop's simple reduction that uses var as initializer. */
619 1.1 mrg reduction_p inner_re = NULL;
620 1.1 mrg for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
621 1.1 mrg if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
622 1.1 mrg break;
623 1.1 mrg
624 1.1 mrg if (inner_re == NULL
625 1.1 mrg || inner_re->type != UNKNOWN_RTYPE
626 1.1 mrg || inner_re->producer != phi)
627 1.1 mrg return false;
628 1.1 mrg
629 1.1 mrg /* In case of double reduction, outer loop's reduction should be updated
630 1.1 mrg by inner loop's simple reduction. */
631 1.1 mrg if (next_def != inner_re->lcssa_phi)
632 1.1 mrg return false;
633 1.1 mrg
634 1.1 mrg /* Outer loop's reduction should only be used to initialize inner loop's
635 1.1 mrg simple reduction. */
636 1.1 mrg if (! single_imm_use (var, &use_p, &stmt)
637 1.1 mrg || stmt != inner_re->phi)
638 1.1 mrg return false;
639 1.1 mrg
640 1.1 mrg /* Check this reduction is correctly used outside of loop via lcssa phi. */
641 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
642 1.1 mrg {
643 1.1 mrg stmt = USE_STMT (use_p);
644 1.1 mrg if (is_gimple_debug (stmt))
645 1.1 mrg continue;
646 1.1 mrg
647 1.1 mrg /* Or else it's used in PHI itself. */
648 1.1 mrg use_phi = dyn_cast <gphi *> (stmt);
649 1.1 mrg if (use_phi == phi)
650 1.1 mrg continue;
651 1.1 mrg
652 1.1 mrg if (lcssa_phi == NULL
653 1.1 mrg && use_phi != NULL
654 1.1 mrg && gimple_bb (stmt) == m_exit->dest
655 1.1 mrg && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
656 1.1 mrg lcssa_phi = use_phi;
657 1.1 mrg else
658 1.1 mrg return false;
659 1.1 mrg }
660 1.1 mrg if (!lcssa_phi)
661 1.1 mrg return false;
662 1.1 mrg
663 1.1 mrg re = XCNEW (struct reduction);
664 1.1 mrg re->var = var;
665 1.1 mrg re->init = init;
666 1.1 mrg re->next = next;
667 1.1 mrg re->phi = phi;
668 1.1 mrg re->lcssa_phi = lcssa_phi;
669 1.1 mrg re->type = DOUBLE_RTYPE;
670 1.1 mrg inner_re->type = DOUBLE_RTYPE;
671 1.1 mrg
672 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
673 1.1 mrg dump_reduction (re);
674 1.1 mrg
675 1.1 mrg m_reductions.safe_push (re);
676 1.1 mrg return true;
677 1.1 mrg }
678 1.1 mrg
679 1.1 mrg /* Return true if VAR is induction variable of current loop whose scev is
680 1.1 mrg specified by CHREC. */
681 1.1 mrg
682 1.1 mrg bool
683 1.1 mrg loop_cand::analyze_induction_var (tree var, tree chrec)
684 1.1 mrg {
685 1.1 mrg gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
686 1.1 mrg tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
687 1.1 mrg
688 1.1 mrg /* Var is loop invariant, though it's unlikely to happen. */
689 1.1 mrg if (tree_does_not_contain_chrecs (chrec))
690 1.1 mrg {
691 1.1 mrg /* Punt on floating point invariants if honoring signed zeros,
692 1.1 mrg representing that as + 0.0 would change the result if init
693 1.1 mrg is -0.0. Similarly for SNaNs it can raise exception. */
694 1.1 mrg if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
695 1.1 mrg return false;
696 1.1 mrg struct induction *iv = XCNEW (struct induction);
697 1.1 mrg iv->var = var;
698 1.1 mrg iv->init_val = init;
699 1.1 mrg iv->init_expr = chrec;
700 1.1 mrg iv->step = build_zero_cst (TREE_TYPE (chrec));
701 1.1 mrg m_inductions.safe_push (iv);
702 1.1 mrg return true;
703 1.1 mrg }
704 1.1 mrg
705 1.1 mrg if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
706 1.1 mrg || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
707 1.1 mrg || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
708 1.1 mrg || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
709 1.1 mrg return false;
710 1.1 mrg
711 1.1 mrg struct induction *iv = XCNEW (struct induction);
712 1.1 mrg iv->var = var;
713 1.1 mrg iv->init_val = init;
714 1.1 mrg iv->init_expr = CHREC_LEFT (chrec);
715 1.1 mrg iv->step = CHREC_RIGHT (chrec);
716 1.1 mrg
717 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
718 1.1 mrg dump_induction (m_loop, iv);
719 1.1 mrg
720 1.1 mrg m_inductions.safe_push (iv);
721 1.1 mrg return true;
722 1.1 mrg }
723 1.1 mrg
724 1.1 mrg /* Return true if all loop carried variables defined in loop header can
725 1.1 mrg be successfully analyzed. */
726 1.1 mrg
727 1.1 mrg bool
728 1.1 mrg loop_cand::analyze_carried_vars (loop_cand *iloop)
729 1.1 mrg {
730 1.1 mrg edge e = loop_preheader_edge (m_outer);
731 1.1 mrg gphi_iterator gsi;
732 1.1 mrg
733 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
734 1.1 mrg fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
735 1.1 mrg
736 1.1 mrg for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
737 1.1 mrg {
738 1.1 mrg gphi *phi = gsi.phi ();
739 1.1 mrg
740 1.1 mrg tree var = PHI_RESULT (phi);
741 1.1 mrg if (virtual_operand_p (var))
742 1.1 mrg continue;
743 1.1 mrg
744 1.1 mrg tree chrec = analyze_scalar_evolution (m_loop, var);
745 1.1 mrg chrec = instantiate_scev (e, m_loop, chrec);
746 1.1 mrg
747 1.1 mrg /* Analyze var as reduction variable. */
748 1.1 mrg if (chrec_contains_undetermined (chrec)
749 1.1 mrg || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
750 1.1 mrg {
751 1.1 mrg if (iloop && !analyze_oloop_reduction_var (iloop, var))
752 1.1 mrg return false;
753 1.1 mrg if (!iloop && !analyze_iloop_reduction_var (var))
754 1.1 mrg return false;
755 1.1 mrg }
756 1.1 mrg /* Analyze var as induction variable. */
757 1.1 mrg else if (!analyze_induction_var (var, chrec))
758 1.1 mrg return false;
759 1.1 mrg }
760 1.1 mrg
761 1.1 mrg return true;
762 1.1 mrg }
763 1.1 mrg
764 1.1 mrg /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
765 1.1 mrg
766 1.1 mrg bool
767 1.1 mrg loop_cand::analyze_lcssa_phis (void)
768 1.1 mrg {
769 1.1 mrg gphi_iterator gsi;
770 1.1 mrg for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
771 1.1 mrg {
772 1.1 mrg gphi *phi = gsi.phi ();
773 1.1 mrg
774 1.1 mrg if (virtual_operand_p (PHI_RESULT (phi)))
775 1.1 mrg continue;
776 1.1 mrg
777 1.1 mrg /* TODO: We only support lcssa phi for reduction for now. */
778 1.1 mrg if (!find_reduction_by_stmt (phi))
779 1.1 mrg return false;
780 1.1 mrg }
781 1.1 mrg
782 1.1 mrg return true;
783 1.1 mrg }
784 1.1 mrg
785 1.1 mrg /* CONSUMER is a stmt in BB storing reduction result into memory object.
786 1.1 mrg When the reduction is intialized from constant value, we need to add
787 1.1 mrg a stmt loading from the memory object to target basic block in inner
788 1.1 mrg loop during undoing the reduction. Problem is that memory reference
789 1.1 mrg may use ssa variables not dominating the target basic block. This
790 1.1 mrg function finds all stmts on which CONSUMER depends in basic block BB,
791 1.1 mrg records and returns them via STMTS. */
792 1.1 mrg
793 1.1 mrg static void
794 1.1 mrg find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
795 1.1 mrg {
796 1.1 mrg auto_vec<gimple *, 4> worklist;
797 1.1 mrg use_operand_p use_p;
798 1.1 mrg ssa_op_iter iter;
799 1.1 mrg gimple *stmt, *def_stmt;
800 1.1 mrg gimple_stmt_iterator gsi;
801 1.1 mrg
802 1.1 mrg /* First clear flag for stmts in bb. */
803 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
804 1.1 mrg gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
805 1.1 mrg
806 1.1 mrg /* DFS search all depended stmts in bb and mark flag for these stmts. */
807 1.1 mrg worklist.safe_push (consumer);
808 1.1 mrg while (!worklist.is_empty ())
809 1.1 mrg {
810 1.1 mrg stmt = worklist.pop ();
811 1.1 mrg FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
812 1.1 mrg {
813 1.1 mrg def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
814 1.1 mrg
815 1.1 mrg if (is_a <gphi *> (def_stmt)
816 1.1 mrg || gimple_bb (def_stmt) != bb
817 1.1 mrg || gimple_plf (def_stmt, GF_PLF_1))
818 1.1 mrg continue;
819 1.1 mrg
820 1.1 mrg worklist.safe_push (def_stmt);
821 1.1 mrg }
822 1.1 mrg gimple_set_plf (stmt, GF_PLF_1, true);
823 1.1 mrg }
824 1.1 mrg for (gsi = gsi_start_nondebug_bb (bb);
825 1.1 mrg !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
826 1.1 mrg {
827 1.1 mrg /* Move dep stmts to sequence STMTS. */
828 1.1 mrg if (gimple_plf (stmt, GF_PLF_1))
829 1.1 mrg {
830 1.1 mrg gsi_remove (&gsi, false);
831 1.1 mrg gimple_seq_add_stmt_without_update (stmts, stmt);
832 1.1 mrg }
833 1.1 mrg else
834 1.1 mrg gsi_next_nondebug (&gsi);
835 1.1 mrg }
836 1.1 mrg }
837 1.1 mrg
838 1.1 mrg /* User can write, optimizers can generate simple reduction RE for inner
839 1.1 mrg loop. In order to make interchange valid, we have to undo reduction by
840 1.1 mrg moving producer and consumer stmts into the inner loop. For example,
841 1.1 mrg below code:
842 1.1 mrg
843 1.1 mrg init = MEM_REF[idx]; //producer
844 1.1 mrg loop:
845 1.1 mrg var = phi<init, next>
846 1.1 mrg next = var op ...
847 1.1 mrg reduc_sum = phi<next>
848 1.1 mrg MEM_REF[idx] = reduc_sum //consumer
849 1.1 mrg
850 1.1 mrg is transformed into:
851 1.1 mrg
852 1.1 mrg loop:
853 1.1 mrg new_var = MEM_REF[idx]; //producer after moving
854 1.1 mrg next = new_var op ...
855 1.1 mrg MEM_REF[idx] = next; //consumer after moving
856 1.1 mrg
857 1.1 mrg Note if the reduction variable is initialized to constant, like:
858 1.1 mrg
859 1.1 mrg var = phi<0.0, next>
860 1.1 mrg
861 1.1 mrg we compute new_var as below:
862 1.1 mrg
863 1.1 mrg loop:
864 1.1 mrg tmp = MEM_REF[idx];
865 1.1 mrg new_var = !first_iteration ? tmp : 0.0;
866 1.1 mrg
867 1.1 mrg so that the initial const is used in the first iteration of loop. Also
868 1.1 mrg record ssa variables for dead code elimination in DCE_SEEDS. */
869 1.1 mrg
870 1.1 mrg void
871 1.1 mrg loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
872 1.1 mrg {
873 1.1 mrg gimple *stmt;
874 1.1 mrg gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
875 1.1 mrg gimple_seq stmts = NULL;
876 1.1 mrg tree new_var;
877 1.1 mrg
878 1.1 mrg /* Prepare the initialization stmts and insert it to inner loop. */
879 1.1 mrg if (re->producer != NULL)
880 1.1 mrg {
881 1.1 mrg gimple_set_vuse (re->producer, NULL_TREE);
882 1.1 mrg from = gsi_for_stmt (re->producer);
883 1.1 mrg gsi_remove (&from, false);
884 1.1 mrg gimple_seq_add_stmt_without_update (&stmts, re->producer);
885 1.1 mrg new_var = re->init;
886 1.1 mrg }
887 1.1 mrg else
888 1.1 mrg {
889 1.1 mrg /* Find all stmts on which expression "MEM_REF[idx]" depends. */
890 1.1 mrg find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
891 1.1 mrg /* Because we generate new stmt loading from the MEM_REF to TMP. */
892 1.1 mrg tree cond, tmp = copy_ssa_name (re->var);
893 1.1 mrg stmt = gimple_build_assign (tmp, re->init_ref);
894 1.1 mrg gimple_seq_add_stmt_without_update (&stmts, stmt);
895 1.1 mrg
896 1.1 mrg /* Init new_var to MEM_REF or CONST depending on if it is the first
897 1.1 mrg iteration. */
898 1.1 mrg induction_p iv = m_inductions[0];
899 1.1 mrg cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
900 1.1 mrg new_var = copy_ssa_name (re->var);
901 1.1 mrg stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
902 1.1 mrg gimple_seq_add_stmt_without_update (&stmts, stmt);
903 1.1 mrg }
904 1.1 mrg gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
905 1.1 mrg
906 1.1 mrg /* Replace all uses of reduction var with new variable. */
907 1.1 mrg use_operand_p use_p;
908 1.1 mrg imm_use_iterator iterator;
909 1.1 mrg FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
910 1.1 mrg {
911 1.1 mrg FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
912 1.1 mrg SET_USE (use_p, new_var);
913 1.1 mrg
914 1.1 mrg update_stmt (stmt);
915 1.1 mrg }
916 1.1 mrg
917 1.1 mrg /* Move consumer stmt into inner loop, just after reduction next's def. */
918 1.1 mrg unlink_stmt_vdef (re->consumer);
919 1.1 mrg release_ssa_name (gimple_vdef (re->consumer));
920 1.1 mrg gimple_set_vdef (re->consumer, NULL_TREE);
921 1.1 mrg gimple_set_vuse (re->consumer, NULL_TREE);
922 1.1 mrg gimple_assign_set_rhs1 (re->consumer, re->next);
923 1.1 mrg from = gsi_for_stmt (re->consumer);
924 1.1 mrg to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
925 1.1 mrg gsi_move_after (&from, &to);
926 1.1 mrg
927 1.1 mrg /* Mark the reduction variables for DCE. */
928 1.1 mrg bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
929 1.1 mrg bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
930 1.1 mrg }
931 1.1 mrg
932 1.1 mrg /* Free DATAREFS and its auxiliary memory. */
933 1.1 mrg
934 1.1 mrg static void
935 1.1 mrg free_data_refs_with_aux (vec<data_reference_p> datarefs)
936 1.1 mrg {
937 1.1 mrg data_reference_p dr;
938 1.1 mrg for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
939 1.1 mrg if (dr->aux != NULL)
940 1.1 mrg {
941 1.1 mrg DR_ACCESS_STRIDE (dr)->release ();
942 1.1 mrg delete (vec<tree> *) dr->aux;
943 1.1 mrg }
944 1.1 mrg
945 1.1 mrg free_data_refs (datarefs);
946 1.1 mrg }
947 1.1 mrg
948 1.1 mrg /* Class for loop interchange transformation. */
949 1.1 mrg
950 1.1 mrg class tree_loop_interchange
951 1.1 mrg {
952 1.1 mrg public:
953 1.1 mrg tree_loop_interchange (vec<struct loop *> loop_nest)
954 1.1 mrg : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
955 1.1 mrg m_dce_seeds (BITMAP_ALLOC (NULL)) { }
956 1.1 mrg ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
957 1.1 mrg bool interchange (vec<data_reference_p>, vec<ddr_p>);
958 1.1 mrg
959 1.1 mrg private:
960 1.1 mrg void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
961 1.1 mrg bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
962 1.1 mrg void interchange_loops (loop_cand &, loop_cand &);
963 1.1 mrg void map_inductions_to_loop (loop_cand &, loop_cand &);
964 1.1 mrg void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
965 1.1 mrg
966 1.1 mrg /* The whole loop nest in which interchange is ongoing. */
967 1.1 mrg vec<struct loop *> m_loop_nest;
968 1.1 mrg /* We create new IV which is only used in loop's exit condition check.
969 1.1 mrg In case of 3-level loop nest interchange, when we interchange the
970 1.1 mrg innermost two loops, new IV created in the middle level loop does
971 1.1 mrg not need to be preserved in interchanging the outermost two loops
972 1.1 mrg later. We record the IV so that it can be skipped. */
973 1.1 mrg tree m_niters_iv_var;
974 1.1 mrg /* Bitmap of seed variables for dead code elimination after interchange. */
975 1.1 mrg bitmap m_dce_seeds;
976 1.1 mrg };
977 1.1 mrg
978 1.1 mrg /* Update data refs' access stride and dependence information after loop
979 1.1 mrg interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
980 1.1 mrg nest. DATAREFS are data references. DDRS are data dependences. */
981 1.1 mrg
982 1.1 mrg void
983 1.1 mrg tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
984 1.1 mrg vec<data_reference_p> datarefs,
985 1.1 mrg vec<ddr_p> ddrs)
986 1.1 mrg {
987 1.1 mrg struct data_reference *dr;
988 1.1 mrg struct data_dependence_relation *ddr;
989 1.1 mrg
990 1.1 mrg /* Update strides of data references. */
991 1.1 mrg for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
992 1.1 mrg {
993 1.1 mrg vec<tree> *stride = DR_ACCESS_STRIDE (dr);
994 1.1 mrg gcc_assert (stride->length () > i_idx);
995 1.1 mrg std::swap ((*stride)[i_idx], (*stride)[o_idx]);
996 1.1 mrg }
997 1.1 mrg /* Update data dependences. */
998 1.1 mrg for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
999 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1000 1.1 mrg {
1001 1.1 mrg for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1002 1.1 mrg {
1003 1.1 mrg lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1004 1.1 mrg std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1005 1.1 mrg }
1006 1.1 mrg }
1007 1.1 mrg }
1008 1.1 mrg
1009 1.1 mrg /* Check data dependence relations, return TRUE if it's valid to interchange
1010 1.1 mrg two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1011 1.1 mrg loops is valid only if dist vector, after interchanging, doesn't have '>'
1012 1.1 mrg as the leftmost non-'=' direction. Practically, this function have been
1013 1.1 mrg conservative here by not checking some valid cases. */
1014 1.1 mrg
1015 1.1 mrg bool
1016 1.1 mrg tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1017 1.1 mrg vec<ddr_p> ddrs)
1018 1.1 mrg {
1019 1.1 mrg struct data_dependence_relation *ddr;
1020 1.1 mrg
1021 1.1 mrg for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1022 1.1 mrg {
1023 1.1 mrg /* Skip no-dependence case. */
1024 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1025 1.1 mrg continue;
1026 1.1 mrg
1027 1.1 mrg for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1028 1.1 mrg {
1029 1.1 mrg lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1030 1.1 mrg unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1031 1.1 mrg
1032 1.1 mrg /* If there is no carried dependence. */
1033 1.1 mrg if (level == 0)
1034 1.1 mrg continue;
1035 1.1 mrg
1036 1.1 mrg level --;
1037 1.1 mrg
1038 1.1 mrg /* If dependence is not carried by any loop in between the two
1039 1.1 mrg loops [oloop, iloop] to interchange. */
1040 1.1 mrg if (level < o_idx || level > i_idx)
1041 1.1 mrg continue;
1042 1.1 mrg
1043 1.1 mrg /* Be conservative, skip case if either direction at i_idx/o_idx
1044 1.1 mrg levels is not '=' or '<'. */
1045 1.1 mrg if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1046 1.1 mrg return false;
1047 1.1 mrg }
1048 1.1 mrg }
1049 1.1 mrg
1050 1.1 mrg return true;
1051 1.1 mrg }
1052 1.1 mrg
1053 1.1 mrg /* Interchange two loops specified by ILOOP and OLOOP. */
1054 1.1 mrg
1055 1.1 mrg void
1056 1.1 mrg tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1057 1.1 mrg {
1058 1.1 mrg reduction_p re;
1059 1.1 mrg gimple_stmt_iterator gsi;
1060 1.1 mrg tree i_niters, o_niters, var_after;
1061 1.1 mrg
1062 1.1 mrg /* Undo inner loop's simple reduction. */
1063 1.1 mrg for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1064 1.1 mrg if (re->type != DOUBLE_RTYPE)
1065 1.1 mrg {
1066 1.1 mrg if (re->producer)
1067 1.1 mrg reset_debug_uses (re->producer);
1068 1.1 mrg
1069 1.1 mrg iloop.undo_simple_reduction (re, m_dce_seeds);
1070 1.1 mrg }
1071 1.1 mrg
1072 1.1 mrg /* Only need to reset debug uses for double reduction. */
1073 1.1 mrg for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1074 1.1 mrg {
1075 1.1 mrg gcc_assert (re->type == DOUBLE_RTYPE);
1076 1.1 mrg reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1077 1.1 mrg reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1078 1.1 mrg }
1079 1.1 mrg
1080 1.1 mrg /* Prepare niters for both loops. */
1081 1.1 mrg struct loop *loop_nest = m_loop_nest[0];
1082 1.1 mrg edge instantiate_below = loop_preheader_edge (loop_nest);
1083 1.1 mrg gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1084 1.1 mrg i_niters = number_of_latch_executions (iloop.m_loop);
1085 1.1 mrg i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1086 1.1 mrg i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1087 1.1 mrg i_niters);
1088 1.1 mrg i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1089 1.1 mrg NULL_TREE, false, GSI_CONTINUE_LINKING);
1090 1.1 mrg o_niters = number_of_latch_executions (oloop.m_loop);
1091 1.1 mrg if (oloop.m_loop != loop_nest)
1092 1.1 mrg {
1093 1.1 mrg o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1094 1.1 mrg o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1095 1.1 mrg o_niters);
1096 1.1 mrg }
1097 1.1 mrg o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1098 1.1 mrg NULL_TREE, false, GSI_CONTINUE_LINKING);
1099 1.1 mrg
1100 1.1 mrg /* Move src's code to tgt loop. This is necessary when src is the outer
1101 1.1 mrg loop and tgt is the inner loop. */
1102 1.1 mrg move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1103 1.1 mrg
1104 1.1 mrg /* Map outer loop's IV to inner loop, and vice versa. */
1105 1.1 mrg map_inductions_to_loop (oloop, iloop);
1106 1.1 mrg map_inductions_to_loop (iloop, oloop);
1107 1.1 mrg
1108 1.1 mrg /* Create canonical IV for both loops. Note canonical IV for outer/inner
1109 1.1 mrg loop is actually from inner/outer loop. Also we record the new IV
1110 1.1 mrg created for the outer loop so that it can be skipped in later loop
1111 1.1 mrg interchange. */
1112 1.1 mrg create_canonical_iv (oloop.m_loop, oloop.m_exit,
1113 1.1 mrg i_niters, &m_niters_iv_var, &var_after);
1114 1.1 mrg bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1115 1.1 mrg create_canonical_iv (iloop.m_loop, iloop.m_exit,
1116 1.1 mrg o_niters, NULL, &var_after);
1117 1.1 mrg bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1118 1.1 mrg
1119 1.1 mrg /* Scrap niters estimation of interchanged loops. */
1120 1.1 mrg iloop.m_loop->any_upper_bound = false;
1121 1.1 mrg iloop.m_loop->any_likely_upper_bound = false;
1122 1.1 mrg free_numbers_of_iterations_estimates (iloop.m_loop);
1123 1.1 mrg oloop.m_loop->any_upper_bound = false;
1124 1.1 mrg oloop.m_loop->any_likely_upper_bound = false;
1125 1.1 mrg free_numbers_of_iterations_estimates (oloop.m_loop);
1126 1.1 mrg
1127 1.1 mrg /* Clear all cached scev information. This is expensive but shouldn't be
1128 1.1 mrg a problem given we interchange in very limited times. */
1129 1.1 mrg scev_reset_htab ();
1130 1.1 mrg
1131 1.1 mrg /* ??? The association between the loop data structure and the
1132 1.1 mrg CFG changed, so what was loop N at the source level is now
1133 1.1 mrg loop M. We should think of retaining the association or breaking
1134 1.1 mrg it fully by creating a new loop instead of re-using the "wrong" one. */
1135 1.1 mrg }
1136 1.1 mrg
1137 1.1 mrg /* Map induction variables of SRC loop to TGT loop. The function firstly
1138 1.1 mrg creates the same IV of SRC loop in TGT loop, then deletes the original
1139 1.1 mrg IV and re-initialize it using the newly created IV. For example, loop
1140 1.1 mrg nest:
1141 1.1 mrg
1142 1.1 mrg for (i = 0; i < N; i++)
1143 1.1 mrg for (j = 0; j < M; j++)
1144 1.1 mrg {
1145 1.1 mrg //use of i;
1146 1.1 mrg //use of j;
1147 1.1 mrg }
1148 1.1 mrg
1149 1.1 mrg will be transformed into:
1150 1.1 mrg
1151 1.1 mrg for (jj = 0; jj < M; jj++)
1152 1.1 mrg for (ii = 0; ii < N; ii++)
1153 1.1 mrg {
1154 1.1 mrg //use of ii;
1155 1.1 mrg //use of jj;
1156 1.1 mrg }
1157 1.1 mrg
1158 1.1 mrg after loop interchange. */
1159 1.1 mrg
1160 1.1 mrg void
1161 1.1 mrg tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1162 1.1 mrg {
1163 1.1 mrg induction_p iv;
1164 1.1 mrg edge e = tgt.m_exit;
1165 1.1 mrg gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1166 1.1 mrg
1167 1.1 mrg /* Map source loop's IV to target loop. */
1168 1.1 mrg for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1169 1.1 mrg {
1170 1.1 mrg gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1171 1.1 mrg gcc_assert (is_a <gphi *> (stmt));
1172 1.1 mrg
1173 1.1 mrg use_operand_p use_p;
1174 1.1 mrg /* Only map original IV to target loop. */
1175 1.1 mrg if (m_niters_iv_var != iv->var)
1176 1.1 mrg {
1177 1.1 mrg /* Map the IV by creating the same one in target loop. */
1178 1.1 mrg tree var_before, var_after;
1179 1.1 mrg tree base = unshare_expr (iv->init_expr);
1180 1.1 mrg tree step = unshare_expr (iv->step);
1181 1.1 mrg create_iv (base, step, SSA_NAME_VAR (iv->var),
1182 1.1 mrg tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1183 1.1 mrg bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1184 1.1 mrg bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1185 1.1 mrg
1186 1.1 mrg /* Replace uses of the original IV var with newly created IV var. */
1187 1.1 mrg imm_use_iterator imm_iter;
1188 1.1 mrg FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1189 1.1 mrg {
1190 1.1 mrg FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1191 1.1 mrg SET_USE (use_p, var_before);
1192 1.1 mrg
1193 1.1 mrg update_stmt (use_stmt);
1194 1.1 mrg }
1195 1.1 mrg }
1196 1.1 mrg
1197 1.1 mrg /* Mark all uses for DCE. */
1198 1.1 mrg ssa_op_iter op_iter;
1199 1.1 mrg FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1200 1.1 mrg {
1201 1.1 mrg tree use = USE_FROM_PTR (use_p);
1202 1.1 mrg if (TREE_CODE (use) == SSA_NAME
1203 1.1 mrg && ! SSA_NAME_IS_DEFAULT_DEF (use))
1204 1.1 mrg bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1205 1.1 mrg }
1206 1.1 mrg
1207 1.1 mrg /* Delete definition of the original IV in the source loop. */
1208 1.1 mrg gsi = gsi_for_stmt (stmt);
1209 1.1 mrg remove_phi_node (&gsi, true);
1210 1.1 mrg }
1211 1.1 mrg }
1212 1.1 mrg
1213 1.1 mrg /* Move stmts of outer loop to inner loop. */
1214 1.1 mrg
1215 1.1 mrg void
1216 1.1 mrg tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
1217 1.1 mrg struct loop *inner,
1218 1.1 mrg basic_block *outer_bbs)
1219 1.1 mrg {
1220 1.1 mrg basic_block oloop_exit_bb = single_exit (outer)->src;
1221 1.1 mrg gimple_stmt_iterator gsi, to;
1222 1.1 mrg
1223 1.1 mrg for (unsigned i = 0; i < outer->num_nodes; i++)
1224 1.1 mrg {
1225 1.1 mrg basic_block bb = outer_bbs[i];
1226 1.1 mrg
1227 1.1 mrg /* Skip basic blocks of inner loop. */
1228 1.1 mrg if (flow_bb_inside_loop_p (inner, bb))
1229 1.1 mrg continue;
1230 1.1 mrg
1231 1.1 mrg /* Move code from header/latch to header/latch. */
1232 1.1 mrg if (bb == outer->header)
1233 1.1 mrg to = gsi_after_labels (inner->header);
1234 1.1 mrg else if (bb == outer->latch)
1235 1.1 mrg to = gsi_after_labels (inner->latch);
1236 1.1 mrg else
1237 1.1 mrg /* Otherwise, simply move to exit->src. */
1238 1.1 mrg to = gsi_last_bb (single_exit (inner)->src);
1239 1.1 mrg
1240 1.1 mrg for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1241 1.1 mrg {
1242 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1243 1.1 mrg
1244 1.1 mrg if (oloop_exit_bb == bb
1245 1.1 mrg && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1246 1.1 mrg {
1247 1.1 mrg gsi_next (&gsi);
1248 1.1 mrg continue;
1249 1.1 mrg }
1250 1.1 mrg
1251 1.1 mrg if (gimple_vuse (stmt))
1252 1.1 mrg gimple_set_vuse (stmt, NULL_TREE);
1253 1.1 mrg if (gimple_vdef (stmt))
1254 1.1 mrg {
1255 1.1 mrg unlink_stmt_vdef (stmt);
1256 1.1 mrg release_ssa_name (gimple_vdef (stmt));
1257 1.1 mrg gimple_set_vdef (stmt, NULL_TREE);
1258 1.1 mrg }
1259 1.1 mrg
1260 1.1 mrg reset_debug_uses (stmt);
1261 1.1 mrg gsi_move_before (&gsi, &to);
1262 1.1 mrg }
1263 1.1 mrg }
1264 1.1 mrg }
1265 1.1 mrg
1266 1.1 mrg /* Given data reference DR in LOOP_NEST, the function computes DR's access
1267 1.1 mrg stride at each level of loop from innermost LOOP to outer. On success,
1268 1.1 mrg it saves access stride at each level loop in a vector which is pointed
1269 1.1 mrg by DR->aux. For example:
1270 1.1 mrg
1271 1.1 mrg int arr[100][100][100];
1272 1.1 mrg for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1273 1.1 mrg for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1274 1.1 mrg for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1275 1.1 mrg arr[i][j - 1][k] = 0; */
1276 1.1 mrg
1277 1.1 mrg static void
1278 1.1 mrg compute_access_stride (struct loop *loop_nest, struct loop *loop,
1279 1.1 mrg data_reference_p dr)
1280 1.1 mrg {
1281 1.1 mrg vec<tree> *strides = new vec<tree> ();
1282 1.1 mrg basic_block bb = gimple_bb (DR_STMT (dr));
1283 1.1 mrg
1284 1.1 mrg while (!flow_bb_inside_loop_p (loop, bb))
1285 1.1 mrg {
1286 1.1 mrg strides->safe_push (build_int_cst (sizetype, 0));
1287 1.1 mrg loop = loop_outer (loop);
1288 1.1 mrg }
1289 1.1 mrg gcc_assert (loop == bb->loop_father);
1290 1.1 mrg
1291 1.1 mrg tree ref = DR_REF (dr);
1292 1.1 mrg if (TREE_CODE (ref) == COMPONENT_REF
1293 1.1 mrg && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1294 1.1 mrg {
1295 1.1 mrg /* We can't take address of bitfields. If the bitfield is at constant
1296 1.1 mrg offset from the start of the struct, just use address of the
1297 1.1 mrg struct, for analysis of the strides that shouldn't matter. */
1298 1.1 mrg if (!TREE_OPERAND (ref, 2)
1299 1.1 mrg || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1300 1.1 mrg ref = TREE_OPERAND (ref, 0);
1301 1.1 mrg /* Otherwise, if we have a bit field representative, use that. */
1302 1.1 mrg else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1303 1.1 mrg != NULL_TREE)
1304 1.1 mrg {
1305 1.1 mrg tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1306 1.1 mrg ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1307 1.1 mrg repr, TREE_OPERAND (ref, 2));
1308 1.1 mrg }
1309 1.1 mrg /* Otherwise punt. */
1310 1.1 mrg else
1311 1.1 mrg {
1312 1.1 mrg dr->aux = strides;
1313 1.1 mrg return;
1314 1.1 mrg }
1315 1.1 mrg }
1316 1.1 mrg tree scev_base = build_fold_addr_expr (ref);
1317 1.1 mrg tree scev = analyze_scalar_evolution (loop, scev_base);
1318 1.1 mrg scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1319 1.1 mrg if (! chrec_contains_undetermined (scev))
1320 1.1 mrg {
1321 1.1 mrg tree sl = scev;
1322 1.1 mrg struct loop *expected = loop;
1323 1.1 mrg while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1324 1.1 mrg {
1325 1.1 mrg struct loop *sl_loop = get_chrec_loop (sl);
1326 1.1 mrg while (sl_loop != expected)
1327 1.1 mrg {
1328 1.1 mrg strides->safe_push (size_int (0));
1329 1.1 mrg expected = loop_outer (expected);
1330 1.1 mrg }
1331 1.1 mrg strides->safe_push (CHREC_RIGHT (sl));
1332 1.1 mrg sl = CHREC_LEFT (sl);
1333 1.1 mrg expected = loop_outer (expected);
1334 1.1 mrg }
1335 1.1 mrg if (! tree_contains_chrecs (sl, NULL))
1336 1.1 mrg while (expected != loop_outer (loop_nest))
1337 1.1 mrg {
1338 1.1 mrg strides->safe_push (size_int (0));
1339 1.1 mrg expected = loop_outer (expected);
1340 1.1 mrg }
1341 1.1 mrg }
1342 1.1 mrg
1343 1.1 mrg dr->aux = strides;
1344 1.1 mrg }
1345 1.1 mrg
1346 1.1 mrg /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1347 1.1 mrg access strides with respect to each level loop for all data refs in
1348 1.1 mrg DATAREFS from inner loop to outer loop. On success, it returns the
1349 1.1 mrg outermost loop that access strides can be computed successfully for
1350 1.1 mrg all data references. If access strides cannot be computed at least
1351 1.1 mrg for two levels of loop for any data reference, it returns NULL. */
1352 1.1 mrg
1353 1.1 mrg static struct loop *
1354 1.1 mrg compute_access_strides (struct loop *loop_nest, struct loop *loop,
1355 1.1 mrg vec<data_reference_p> datarefs)
1356 1.1 mrg {
1357 1.1 mrg unsigned i, j, num_loops = (unsigned) -1;
1358 1.1 mrg data_reference_p dr;
1359 1.1 mrg vec<tree> *stride;
1360 1.1 mrg
1361 1.1 mrg for (i = 0; datarefs.iterate (i, &dr); ++i)
1362 1.1 mrg {
1363 1.1 mrg compute_access_stride (loop_nest, loop, dr);
1364 1.1 mrg stride = DR_ACCESS_STRIDE (dr);
1365 1.1 mrg if (stride->length () < num_loops)
1366 1.1 mrg {
1367 1.1 mrg num_loops = stride->length ();
1368 1.1 mrg if (num_loops < 2)
1369 1.1 mrg return NULL;
1370 1.1 mrg }
1371 1.1 mrg }
1372 1.1 mrg
1373 1.1 mrg for (i = 0; datarefs.iterate (i, &dr); ++i)
1374 1.1 mrg {
1375 1.1 mrg stride = DR_ACCESS_STRIDE (dr);
1376 1.1 mrg if (stride->length () > num_loops)
1377 1.1 mrg stride->truncate (num_loops);
1378 1.1 mrg
1379 1.1 mrg for (j = 0; j < (num_loops >> 1); ++j)
1380 1.1 mrg std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1381 1.1 mrg }
1382 1.1 mrg
1383 1.1 mrg loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1384 1.1 mrg gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1385 1.1 mrg return loop;
1386 1.1 mrg }
1387 1.1 mrg
1388 1.1 mrg /* Prune access strides for data references in DATAREFS by removing strides
1389 1.1 mrg of loops that isn't in current LOOP_NEST. */
1390 1.1 mrg
1391 1.1 mrg static void
1392 1.1 mrg prune_access_strides_not_in_loop (struct loop *loop_nest,
1393 1.1 mrg struct loop *innermost,
1394 1.1 mrg vec<data_reference_p> datarefs)
1395 1.1 mrg {
1396 1.1 mrg data_reference_p dr;
1397 1.1 mrg unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1398 1.1 mrg gcc_assert (num_loops > 1);
1399 1.1 mrg
1400 1.1 mrg /* Block remove strides of loops that is not in current loop nest. */
1401 1.1 mrg for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1402 1.1 mrg {
1403 1.1 mrg vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1404 1.1 mrg if (stride->length () > num_loops)
1405 1.1 mrg stride->block_remove (0, stride->length () - num_loops);
1406 1.1 mrg }
1407 1.1 mrg }
1408 1.1 mrg
1409 1.1 mrg /* Dump access strides for all DATAREFS. */
1410 1.1 mrg
1411 1.1 mrg static void
1412 1.1 mrg dump_access_strides (vec<data_reference_p> datarefs)
1413 1.1 mrg {
1414 1.1 mrg data_reference_p dr;
1415 1.1 mrg fprintf (dump_file, "Access Strides for DRs:\n");
1416 1.1 mrg for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1417 1.1 mrg {
1418 1.1 mrg fprintf (dump_file, " ");
1419 1.1 mrg print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1420 1.1 mrg fprintf (dump_file, ":\t\t<");
1421 1.1 mrg
1422 1.1 mrg vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1423 1.1 mrg unsigned num_loops = stride->length ();
1424 1.1 mrg for (unsigned j = 0; j < num_loops; ++j)
1425 1.1 mrg {
1426 1.1 mrg print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1427 1.1 mrg fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1428 1.1 mrg }
1429 1.1 mrg }
1430 1.1 mrg }
1431 1.1 mrg
1432 1.1 mrg /* Return true if it's profitable to interchange two loops whose index
1433 1.1 mrg in whole loop nest vector are I_IDX/O_IDX respectively. The function
1434 1.1 mrg computes and compares three types information from all DATAREFS:
1435 1.1 mrg 1) Access stride for loop I_IDX and O_IDX.
1436 1.1 mrg 2) Number of invariant memory references with respect to I_IDX before
1437 1.1 mrg and after loop interchange.
1438 1.1 mrg 3) Flags indicating if all memory references access sequential memory
1439 1.1 mrg in ILOOP, before and after loop interchange.
1440 1.1 mrg If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1441 1.1 mrg innermost loops in loop nest. This function also dumps information if
1442 1.1 mrg DUMP_INFO_P is true. */
1443 1.1 mrg
1444 1.1 mrg static bool
1445 1.1 mrg should_interchange_loops (unsigned i_idx, unsigned o_idx,
1446 1.1 mrg vec<data_reference_p> datarefs,
1447 1.1 mrg unsigned i_stmt_cost, unsigned o_stmt_cost,
1448 1.1 mrg bool innermost_loops_p, bool dump_info_p = true)
1449 1.1 mrg {
1450 1.1 mrg unsigned HOST_WIDE_INT ratio;
1451 1.1 mrg unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1452 1.1 mrg struct data_reference *dr;
1453 1.1 mrg bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1454 1.1 mrg widest_int iloop_strides = 0, oloop_strides = 0;
1455 1.1 mrg unsigned num_unresolved_drs = 0;
1456 1.1 mrg unsigned num_resolved_ok_drs = 0;
1457 1.1 mrg unsigned num_resolved_not_ok_drs = 0;
1458 1.1 mrg
1459 1.1 mrg if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1460 1.1 mrg fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1461 1.1 mrg
1462 1.1 mrg for (i = 0; datarefs.iterate (i, &dr); ++i)
1463 1.1 mrg {
1464 1.1 mrg vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1465 1.1 mrg tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1466 1.1 mrg
1467 1.1 mrg bool subloop_stride_p = false;
1468 1.1 mrg /* Data ref can't be invariant or sequential access at current loop if
1469 1.1 mrg its address changes with respect to any subloops. */
1470 1.1 mrg for (j = i_idx + 1; j < stride->length (); ++j)
1471 1.1 mrg if (!integer_zerop ((*stride)[j]))
1472 1.1 mrg {
1473 1.1 mrg subloop_stride_p = true;
1474 1.1 mrg break;
1475 1.1 mrg }
1476 1.1 mrg
1477 1.1 mrg if (integer_zerop (iloop_stride))
1478 1.1 mrg {
1479 1.1 mrg if (!subloop_stride_p)
1480 1.1 mrg num_old_inv_drs++;
1481 1.1 mrg }
1482 1.1 mrg if (integer_zerop (oloop_stride))
1483 1.1 mrg {
1484 1.1 mrg if (!subloop_stride_p)
1485 1.1 mrg num_new_inv_drs++;
1486 1.1 mrg }
1487 1.1 mrg
1488 1.1 mrg if (TREE_CODE (iloop_stride) == INTEGER_CST
1489 1.1 mrg && TREE_CODE (oloop_stride) == INTEGER_CST)
1490 1.1 mrg {
1491 1.1 mrg iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1492 1.1 mrg oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1493 1.1 mrg }
1494 1.1 mrg else if (multiple_of_p (TREE_TYPE (iloop_stride),
1495 1.1 mrg iloop_stride, oloop_stride))
1496 1.1 mrg num_resolved_ok_drs++;
1497 1.1 mrg else if (multiple_of_p (TREE_TYPE (iloop_stride),
1498 1.1 mrg oloop_stride, iloop_stride))
1499 1.1 mrg num_resolved_not_ok_drs++;
1500 1.1 mrg else
1501 1.1 mrg num_unresolved_drs++;
1502 1.1 mrg
1503 1.1 mrg /* Data ref can't be sequential access if its address changes in sub
1504 1.1 mrg loop. */
1505 1.1 mrg if (subloop_stride_p)
1506 1.1 mrg {
1507 1.1 mrg all_seq_dr_before_p = false;
1508 1.1 mrg all_seq_dr_after_p = false;
1509 1.1 mrg continue;
1510 1.1 mrg }
1511 1.1 mrg /* Track if all data references are sequential accesses before/after loop
1512 1.1 mrg interchange. Note invariant is considered sequential here. */
1513 1.1 mrg tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1514 1.1 mrg if (all_seq_dr_before_p
1515 1.1 mrg && ! (integer_zerop (iloop_stride)
1516 1.1 mrg || operand_equal_p (access_size, iloop_stride, 0)))
1517 1.1 mrg all_seq_dr_before_p = false;
1518 1.1 mrg if (all_seq_dr_after_p
1519 1.1 mrg && ! (integer_zerop (oloop_stride)
1520 1.1 mrg || operand_equal_p (access_size, oloop_stride, 0)))
1521 1.1 mrg all_seq_dr_after_p = false;
1522 1.1 mrg }
1523 1.1 mrg
1524 1.1 mrg if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1525 1.1 mrg {
1526 1.1 mrg fprintf (dump_file, "\toverall:\t\t");
1527 1.1 mrg print_decu (iloop_strides, dump_file);
1528 1.1 mrg fprintf (dump_file, "\t");
1529 1.1 mrg print_decu (oloop_strides, dump_file);
1530 1.1 mrg fprintf (dump_file, "\n");
1531 1.1 mrg
1532 1.1 mrg fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1533 1.1 mrg num_old_inv_drs, num_new_inv_drs);
1534 1.1 mrg fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1535 1.1 mrg all_seq_dr_before_p ? "true" : "false",
1536 1.1 mrg all_seq_dr_after_p ? "true" : "false");
1537 1.1 mrg fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1538 1.1 mrg num_resolved_ok_drs);
1539 1.1 mrg fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1540 1.1 mrg num_resolved_not_ok_drs);
1541 1.1 mrg fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1542 1.1 mrg num_unresolved_drs);
1543 1.1 mrg fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1544 1.1 mrg fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
1545 1.1 mrg }
1546 1.1 mrg
1547 1.1 mrg if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1548 1.1 mrg return false;
1549 1.1 mrg
1550 1.1 mrg /* Stmts of outer loop will be moved to inner loop. If there are two many
1551 1.1 mrg such stmts, it could make inner loop costly. Here we compare stmt cost
1552 1.1 mrg between outer and inner loops. */
1553 1.1 mrg if (i_stmt_cost && o_stmt_cost
1554 1.1 mrg && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1555 1.1 mrg && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1556 1.1 mrg return false;
1557 1.1 mrg
1558 1.1 mrg /* We use different stride comparison ratio for interchanging innermost
1559 1.1 mrg two loops or not. The idea is to be conservative in interchange for
1560 1.1 mrg the innermost loops. */
1561 1.1 mrg ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1562 1.1 mrg /* Do interchange if it gives better data locality behavior. */
1563 1.1 mrg if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1564 1.1 mrg return true;
1565 1.1 mrg if (wi::gtu_p (iloop_strides, oloop_strides))
1566 1.1 mrg {
1567 1.1 mrg /* Or it creates more invariant memory references. */
1568 1.1 mrg if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1569 1.1 mrg && num_new_inv_drs > num_old_inv_drs)
1570 1.1 mrg return true;
1571 1.1 mrg /* Or it makes all memory references sequential. */
1572 1.1 mrg if (num_new_inv_drs >= num_old_inv_drs
1573 1.1 mrg && !all_seq_dr_before_p && all_seq_dr_after_p)
1574 1.1 mrg return true;
1575 1.1 mrg }
1576 1.1 mrg
1577 1.1 mrg return false;
1578 1.1 mrg }
1579 1.1 mrg
1580 1.1 mrg /* Try to interchange inner loop of a loop nest to outer level. */
1581 1.1 mrg
1582 1.1 mrg bool
1583 1.1 mrg tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1584 1.1 mrg vec<ddr_p> ddrs)
1585 1.1 mrg {
1586 1.1 mrg location_t loc = find_loop_location (m_loop_nest[0]);
1587 1.1 mrg bool changed_p = false;
1588 1.1 mrg /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1589 1.1 mrg The overall effect is to push inner loop to outermost level in whole
1590 1.1 mrg loop nest. */
1591 1.1 mrg for (unsigned i = m_loop_nest.length (); i > 1; --i)
1592 1.1 mrg {
1593 1.1 mrg unsigned i_idx = i - 1, o_idx = i - 2;
1594 1.1 mrg
1595 1.1 mrg /* Check validity for loop interchange. */
1596 1.1 mrg if (!valid_data_dependences (i_idx, o_idx, ddrs))
1597 1.1 mrg break;
1598 1.1 mrg
1599 1.1 mrg loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1600 1.1 mrg loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1601 1.1 mrg
1602 1.1 mrg /* Check if we can do transformation for loop interchange. */
1603 1.1 mrg if (!iloop.analyze_carried_vars (NULL)
1604 1.1 mrg || !iloop.analyze_lcssa_phis ()
1605 1.1 mrg || !oloop.analyze_carried_vars (&iloop)
1606 1.1 mrg || !oloop.analyze_lcssa_phis ()
1607 1.1 mrg || !iloop.can_interchange_p (NULL)
1608 1.1 mrg || !oloop.can_interchange_p (&iloop))
1609 1.1 mrg break;
1610 1.1 mrg
1611 1.1 mrg /* Outer loop's stmts will be moved to inner loop during interchange.
1612 1.1 mrg If there are many of them, it may make inner loop very costly. We
1613 1.1 mrg need to check number of outer loop's stmts in profit cost model of
1614 1.1 mrg interchange. */
1615 1.1 mrg int stmt_cost = oloop.m_num_stmts;
1616 1.1 mrg /* Count out the exit checking stmt of outer loop. */
1617 1.1 mrg stmt_cost --;
1618 1.1 mrg /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1619 1.1 mrg stmt_cost -= oloop.m_inductions.length ();
1620 1.1 mrg /* Count in the additional load and cond_expr stmts caused by inner
1621 1.1 mrg loop's constant initialized reduction. */
1622 1.1 mrg stmt_cost += iloop.m_const_init_reduc * 2;
1623 1.1 mrg if (stmt_cost < 0)
1624 1.1 mrg stmt_cost = 0;
1625 1.1 mrg
1626 1.1 mrg /* Check profitability for loop interchange. */
1627 1.1 mrg if (should_interchange_loops (i_idx, o_idx, datarefs,
1628 1.1 mrg (unsigned) iloop.m_num_stmts,
1629 1.1 mrg (unsigned) stmt_cost,
1630 1.1 mrg iloop.m_loop->inner == NULL))
1631 1.1 mrg {
1632 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1633 1.1 mrg fprintf (dump_file,
1634 1.1 mrg "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1635 1.1 mrg oloop.m_loop->num, iloop.m_loop->num);
1636 1.1 mrg
1637 1.1 mrg changed_p = true;
1638 1.1 mrg interchange_loops (iloop, oloop);
1639 1.1 mrg /* No need to update if there is no further loop interchange. */
1640 1.1 mrg if (o_idx > 0)
1641 1.1 mrg update_data_info (i_idx, o_idx, datarefs, ddrs);
1642 1.1 mrg }
1643 1.1 mrg else
1644 1.1 mrg {
1645 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1646 1.1 mrg fprintf (dump_file,
1647 1.1 mrg "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1648 1.1 mrg oloop.m_loop->num, iloop.m_loop->num);
1649 1.1 mrg }
1650 1.1 mrg }
1651 1.1 mrg simple_dce_from_worklist (m_dce_seeds);
1652 1.1 mrg
1653 1.1 mrg if (changed_p)
1654 1.1 mrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1655 1.1 mrg "loops interchanged in loop nest\n");
1656 1.1 mrg
1657 1.1 mrg return changed_p;
1658 1.1 mrg }
1659 1.1 mrg
1660 1.1 mrg
1661 1.1 mrg /* Loop interchange pass. */
1662 1.1 mrg
1663 1.1 mrg namespace {
1664 1.1 mrg
1665 1.1 mrg const pass_data pass_data_linterchange =
1666 1.1 mrg {
1667 1.1 mrg GIMPLE_PASS, /* type */
1668 1.1 mrg "linterchange", /* name */
1669 1.1 mrg OPTGROUP_LOOP, /* optinfo_flags */
1670 1.1 mrg TV_LINTERCHANGE, /* tv_id */
1671 1.1 mrg PROP_cfg, /* properties_required */
1672 1.1 mrg 0, /* properties_provided */
1673 1.1 mrg 0, /* properties_destroyed */
1674 1.1 mrg 0, /* todo_flags_start */
1675 1.1 mrg 0, /* todo_flags_finish */
1676 1.1 mrg };
1677 1.1 mrg
1678 1.1 mrg class pass_linterchange : public gimple_opt_pass
1679 1.1 mrg {
1680 1.1 mrg public:
1681 1.1 mrg pass_linterchange (gcc::context *ctxt)
1682 1.1 mrg : gimple_opt_pass (pass_data_linterchange, ctxt)
1683 1.1 mrg {}
1684 1.1 mrg
1685 1.1 mrg /* opt_pass methods: */
1686 1.1 mrg opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1687 1.1 mrg virtual bool gate (function *) { return flag_loop_interchange; }
1688 1.1 mrg virtual unsigned int execute (function *);
1689 1.1 mrg
1690 1.1 mrg }; // class pass_linterchange
1691 1.1 mrg
1692 1.1 mrg
1693 1.1 mrg /* Return true if LOOP has proper form for interchange. We check three
1694 1.1 mrg conditions in the function:
1695 1.1 mrg 1) In general, a loop can be interchanged only if it doesn't have
1696 1.1 mrg basic blocks other than header, exit and latch besides possible
1697 1.1 mrg inner loop nest. This basically restricts loop interchange to
1698 1.1 mrg below form loop nests:
1699 1.1 mrg
1700 1.1 mrg header<---+
1701 1.1 mrg | |
1702 1.1 mrg v |
1703 1.1 mrg INNER_LOOP |
1704 1.1 mrg | |
1705 1.1 mrg v |
1706 1.1 mrg exit--->latch
1707 1.1 mrg
1708 1.1 mrg 2) Data reference in basic block that executes in different times
1709 1.1 mrg than loop head/exit is not allowed.
1710 1.1 mrg 3) Record the innermost outer loop that doesn't form rectangle loop
1711 1.1 mrg nest with LOOP. */
1712 1.1 mrg
1713 1.1 mrg static bool
1714 1.1 mrg proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
1715 1.1 mrg {
1716 1.1 mrg edge e0, e1, exit;
1717 1.1 mrg
1718 1.1 mrg /* Don't interchange if loop has unsupported information for the moment. */
1719 1.1 mrg if (loop->safelen > 0
1720 1.1 mrg || loop->constraints != 0
1721 1.1 mrg || loop->can_be_parallel
1722 1.1 mrg || loop->dont_vectorize
1723 1.1 mrg || loop->force_vectorize
1724 1.1 mrg || loop->in_oacc_kernels_region
1725 1.1 mrg || loop->orig_loop_num != 0
1726 1.1 mrg || loop->simduid != NULL_TREE)
1727 1.1 mrg return false;
1728 1.1 mrg
1729 1.1 mrg /* Don't interchange if outer loop has basic block other than header, exit
1730 1.1 mrg and latch. */
1731 1.1 mrg if (loop->inner != NULL
1732 1.1 mrg && loop->num_nodes != loop->inner->num_nodes + 3)
1733 1.1 mrg return false;
1734 1.1 mrg
1735 1.1 mrg if ((exit = single_dom_exit (loop)) == NULL)
1736 1.1 mrg return false;
1737 1.1 mrg
1738 1.1 mrg /* Check control flow on loop header/exit blocks. */
1739 1.1 mrg if (loop->header == exit->src
1740 1.1 mrg && (EDGE_COUNT (loop->header->preds) != 2
1741 1.1 mrg || EDGE_COUNT (loop->header->succs) != 2))
1742 1.1 mrg return false;
1743 1.1 mrg else if (loop->header != exit->src
1744 1.1 mrg && (EDGE_COUNT (loop->header->preds) != 2
1745 1.1 mrg || !single_succ_p (loop->header)
1746 1.1 mrg || unsupported_edge (single_succ_edge (loop->header))
1747 1.1 mrg || EDGE_COUNT (exit->src->succs) != 2
1748 1.1 mrg || !single_pred_p (exit->src)
1749 1.1 mrg || unsupported_edge (single_pred_edge (exit->src))))
1750 1.1 mrg return false;
1751 1.1 mrg
1752 1.1 mrg e0 = EDGE_PRED (loop->header, 0);
1753 1.1 mrg e1 = EDGE_PRED (loop->header, 1);
1754 1.1 mrg if (unsupported_edge (e0) || unsupported_edge (e1)
1755 1.1 mrg || (e0->src != loop->latch && e1->src != loop->latch)
1756 1.1 mrg || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1757 1.1 mrg return false;
1758 1.1 mrg
1759 1.1 mrg e0 = EDGE_SUCC (exit->src, 0);
1760 1.1 mrg e1 = EDGE_SUCC (exit->src, 1);
1761 1.1 mrg if (unsupported_edge (e0) || unsupported_edge (e1)
1762 1.1 mrg || (e0->dest != loop->latch && e1->dest != loop->latch)
1763 1.1 mrg || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1764 1.1 mrg return false;
1765 1.1 mrg
1766 1.1 mrg /* Don't interchange if any reference is in basic block that doesn't
1767 1.1 mrg dominate exit block. */
1768 1.1 mrg basic_block *bbs = get_loop_body (loop);
1769 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; i++)
1770 1.1 mrg {
1771 1.1 mrg basic_block bb = bbs[i];
1772 1.1 mrg
1773 1.1 mrg if (bb->loop_father != loop
1774 1.1 mrg || bb == loop->header || bb == exit->src
1775 1.1 mrg || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1776 1.1 mrg continue;
1777 1.1 mrg
1778 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1779 1.1 mrg !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1780 1.1 mrg if (gimple_vuse (gsi_stmt (gsi)))
1781 1.1 mrg {
1782 1.1 mrg free (bbs);
1783 1.1 mrg return false;
1784 1.1 mrg }
1785 1.1 mrg }
1786 1.1 mrg free (bbs);
1787 1.1 mrg
1788 1.1 mrg tree niters = number_of_latch_executions (loop);
1789 1.1 mrg niters = analyze_scalar_evolution (loop_outer (loop), niters);
1790 1.1 mrg if (!niters || chrec_contains_undetermined (niters))
1791 1.1 mrg return false;
1792 1.1 mrg
1793 1.1 mrg /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1794 1.1 mrg for (loop_p loop2 = loop_outer (loop);
1795 1.1 mrg loop2 && flow_loop_nested_p (*min_outer, loop2);
1796 1.1 mrg loop2 = loop_outer (loop2))
1797 1.1 mrg {
1798 1.1 mrg niters = instantiate_scev (loop_preheader_edge (loop2),
1799 1.1 mrg loop_outer (loop), niters);
1800 1.1 mrg if (!evolution_function_is_invariant_p (niters, loop2->num))
1801 1.1 mrg {
1802 1.1 mrg *min_outer = loop2;
1803 1.1 mrg break;
1804 1.1 mrg }
1805 1.1 mrg }
1806 1.1 mrg return true;
1807 1.1 mrg }
1808 1.1 mrg
1809 1.1 mrg /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1810 1.1 mrg should be interchanged by looking into all DATAREFS. */
1811 1.1 mrg
1812 1.1 mrg static bool
1813 1.1 mrg should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
1814 1.1 mrg vec<data_reference_p> datarefs)
1815 1.1 mrg {
1816 1.1 mrg unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1817 1.1 mrg gcc_assert (idx > 0);
1818 1.1 mrg
1819 1.1 mrg /* Check if any two adjacent loops should be interchanged. */
1820 1.1 mrg for (struct loop *loop = innermost;
1821 1.1 mrg loop != loop_nest; loop = loop_outer (loop), idx--)
1822 1.1 mrg if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
1823 1.1 mrg loop == innermost, false))
1824 1.1 mrg return true;
1825 1.1 mrg
1826 1.1 mrg return false;
1827 1.1 mrg }
1828 1.1 mrg
1829 1.1 mrg /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1830 1.1 mrg dependences for loop interchange and store it in DDRS. Note we compute
1831 1.1 mrg dependences directly rather than call generic interface so that we can
1832 1.1 mrg return on unknown dependence instantly. */
1833 1.1 mrg
1834 1.1 mrg static bool
1835 1.1 mrg tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1836 1.1 mrg vec<data_reference_p> datarefs,
1837 1.1 mrg vec<ddr_p> *ddrs)
1838 1.1 mrg {
1839 1.1 mrg struct data_reference *a, *b;
1840 1.1 mrg struct loop *innermost = loop_nest.last ();
1841 1.1 mrg
1842 1.1 mrg for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1843 1.1 mrg {
1844 1.1 mrg bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1845 1.1 mrg for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1846 1.1 mrg if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1847 1.1 mrg {
1848 1.1 mrg bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1849 1.1 mrg /* Don't support multiple write references in outer loop. */
1850 1.1 mrg if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1851 1.1 mrg return false;
1852 1.1 mrg
1853 1.1 mrg ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1854 1.1 mrg ddrs->safe_push (ddr);
1855 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]);
1856 1.1 mrg
1857 1.1 mrg /* Give up if ddr is unknown dependence or classic direct vector
1858 1.1 mrg is not available. */
1859 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1860 1.1 mrg || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1861 1.1 mrg && DDR_NUM_DIR_VECTS (ddr) == 0))
1862 1.1 mrg return false;
1863 1.1 mrg
1864 1.1 mrg /* If either data references is in outer loop of nest, we require
1865 1.1 mrg no dependence here because the data reference need to be moved
1866 1.1 mrg into inner loop during interchange. */
1867 1.1 mrg if (a_outer_p && b_outer_p
1868 1.1 mrg && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1869 1.1 mrg continue;
1870 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1871 1.1 mrg && (a_outer_p || b_outer_p))
1872 1.1 mrg return false;
1873 1.1 mrg }
1874 1.1 mrg }
1875 1.1 mrg
1876 1.1 mrg return true;
1877 1.1 mrg }
1878 1.1 mrg
1879 1.1 mrg /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1880 1.1 mrg
1881 1.1 mrg static inline void
1882 1.1 mrg prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
1883 1.1 mrg {
1884 1.1 mrg unsigned i, j;
1885 1.1 mrg struct data_reference *dr;
1886 1.1 mrg
1887 1.1 mrg for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1888 1.1 mrg {
1889 1.1 mrg if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1890 1.1 mrg datarefs[j++] = dr;
1891 1.1 mrg else
1892 1.1 mrg {
1893 1.1 mrg if (dr->aux)
1894 1.1 mrg {
1895 1.1 mrg DR_ACCESS_STRIDE (dr)->release ();
1896 1.1 mrg delete (vec<tree> *) dr->aux;
1897 1.1 mrg }
1898 1.1 mrg free_data_ref (dr);
1899 1.1 mrg }
1900 1.1 mrg }
1901 1.1 mrg datarefs.truncate (j);
1902 1.1 mrg }
1903 1.1 mrg
1904 1.1 mrg /* Find and store data references in DATAREFS for LOOP nest. If there's
1905 1.1 mrg difficult data reference in a basic block, we shrink the loop nest to
1906 1.1 mrg inner loop of that basic block's father loop. On success, return the
1907 1.1 mrg outer loop of the result loop nest. */
1908 1.1 mrg
1909 1.1 mrg static struct loop *
1910 1.1 mrg prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
1911 1.1 mrg {
1912 1.1 mrg struct loop *loop_nest = loop;
1913 1.1 mrg vec<data_reference_p> *bb_refs;
1914 1.1 mrg basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1915 1.1 mrg
1916 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; i++)
1917 1.1 mrg bbs[i]->aux = NULL;
1918 1.1 mrg
1919 1.1 mrg /* Find data references for all basic blocks. Shrink loop nest on difficult
1920 1.1 mrg data reference. */
1921 1.1 mrg for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1922 1.1 mrg {
1923 1.1 mrg bb = bbs[i];
1924 1.1 mrg if (!flow_bb_inside_loop_p (loop_nest, bb))
1925 1.1 mrg continue;
1926 1.1 mrg
1927 1.1 mrg bb_refs = new vec<data_reference_p> ();
1928 1.1 mrg if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1929 1.1 mrg {
1930 1.1 mrg loop_nest = bb->loop_father->inner;
1931 1.1 mrg if (loop_nest && !loop_nest->inner)
1932 1.1 mrg loop_nest = NULL;
1933 1.1 mrg
1934 1.1 mrg free_data_refs (*bb_refs);
1935 1.1 mrg delete bb_refs;
1936 1.1 mrg }
1937 1.1 mrg else if (bb_refs->is_empty ())
1938 1.1 mrg delete bb_refs;
1939 1.1 mrg else
1940 1.1 mrg bb->aux = bb_refs;
1941 1.1 mrg }
1942 1.1 mrg
1943 1.1 mrg /* Collect all data references in loop nest. */
1944 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; i++)
1945 1.1 mrg {
1946 1.1 mrg bb = bbs[i];
1947 1.1 mrg if (!bb->aux)
1948 1.1 mrg continue;
1949 1.1 mrg
1950 1.1 mrg bb_refs = (vec<data_reference_p> *) bb->aux;
1951 1.1 mrg if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1952 1.1 mrg datarefs->safe_splice (*bb_refs);
1953 1.1 mrg else
1954 1.1 mrg free_data_refs (*bb_refs);
1955 1.1 mrg
1956 1.1 mrg delete bb_refs;
1957 1.1 mrg bb->aux = NULL;
1958 1.1 mrg }
1959 1.1 mrg free (bbs);
1960 1.1 mrg
1961 1.1 mrg return loop_nest;
1962 1.1 mrg }
1963 1.1 mrg
1964 1.1 mrg /* Given innermost LOOP, return true if perfect loop nest can be found and
1965 1.1 mrg data dependences can be computed. If succeed, record the perfect loop
1966 1.1 mrg nest in LOOP_NEST; record all data references in DATAREFS and record all
1967 1.1 mrg data dependence relations in DDRS.
1968 1.1 mrg
1969 1.1 mrg We do support a restricted form of imperfect loop nest, i.e, loop nest
1970 1.1 mrg with load/store in outer loop initializing/finalizing simple reduction
1971 1.1 mrg of the innermost loop. For such outer loop reference, we require that
1972 1.1 mrg it has no dependence with others sinve it will be moved to inner loop
1973 1.1 mrg in interchange. */
1974 1.1 mrg
1975 1.1 mrg static bool
1976 1.1 mrg prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
1977 1.1 mrg vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1978 1.1 mrg {
1979 1.1 mrg struct loop *start_loop = NULL, *innermost = loop;
1980 1.1 mrg struct loop *outermost = loops_for_fn (cfun)->tree_root;
1981 1.1 mrg
1982 1.1 mrg /* Find loop nest from the innermost loop. The outermost is the innermost
1983 1.1 mrg outer*/
1984 1.1 mrg while (loop->num != 0 && loop->inner == start_loop
1985 1.1 mrg && flow_loop_nested_p (outermost, loop))
1986 1.1 mrg {
1987 1.1 mrg if (!proper_loop_form_for_interchange (loop, &outermost))
1988 1.1 mrg break;
1989 1.1 mrg
1990 1.1 mrg start_loop = loop;
1991 1.1 mrg /* If this loop has sibling loop, the father loop won't be in perfect
1992 1.1 mrg loop nest. */
1993 1.1 mrg if (loop->next != NULL)
1994 1.1 mrg break;
1995 1.1 mrg
1996 1.1 mrg loop = loop_outer (loop);
1997 1.1 mrg }
1998 1.1 mrg if (!start_loop || !start_loop->inner)
1999 1.1 mrg return false;
2000 1.1 mrg
2001 1.1 mrg /* Prepare the data reference vector for the loop nest, pruning outer
2002 1.1 mrg loops we cannot handle. */
2003 1.1 mrg start_loop = prepare_data_references (start_loop, datarefs);
2004 1.1 mrg if (!start_loop
2005 1.1 mrg /* Check if there is no data reference. */
2006 1.1 mrg || datarefs->is_empty ()
2007 1.1 mrg /* Check if there are too many of data references. */
2008 1.1 mrg || (int) datarefs->length () > MAX_DATAREFS)
2009 1.1 mrg return false;
2010 1.1 mrg
2011 1.1 mrg /* Compute access strides for all data references, pruning outer
2012 1.1 mrg loops we cannot analyze refs in. */
2013 1.1 mrg start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2014 1.1 mrg if (!start_loop)
2015 1.1 mrg return false;
2016 1.1 mrg
2017 1.1 mrg /* Check if any interchange is profitable in the loop nest. */
2018 1.1 mrg if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2019 1.1 mrg return false;
2020 1.1 mrg
2021 1.1 mrg /* Check if data dependences can be computed for loop nest starting from
2022 1.1 mrg start_loop. */
2023 1.1 mrg loop = start_loop;
2024 1.1 mrg do {
2025 1.1 mrg loop_nest->truncate (0);
2026 1.1 mrg
2027 1.1 mrg if (loop != start_loop)
2028 1.1 mrg prune_datarefs_not_in_loop (start_loop, *datarefs);
2029 1.1 mrg
2030 1.1 mrg if (find_loop_nest (start_loop, loop_nest)
2031 1.1 mrg && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2032 1.1 mrg {
2033 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2034 1.1 mrg fprintf (dump_file,
2035 1.1 mrg "\nConsider loop interchange for loop_nest<%d - %d>\n",
2036 1.1 mrg start_loop->num, innermost->num);
2037 1.1 mrg
2038 1.1 mrg if (loop != start_loop)
2039 1.1 mrg prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2040 1.1 mrg
2041 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2042 1.1 mrg dump_access_strides (*datarefs);
2043 1.1 mrg
2044 1.1 mrg return true;
2045 1.1 mrg }
2046 1.1 mrg
2047 1.1 mrg free_dependence_relations (*ddrs);
2048 1.1 mrg *ddrs = vNULL;
2049 1.1 mrg /* Try to compute data dependences with the outermost loop stripped. */
2050 1.1 mrg loop = start_loop;
2051 1.1 mrg start_loop = start_loop->inner;
2052 1.1 mrg } while (start_loop && start_loop->inner);
2053 1.1 mrg
2054 1.1 mrg return false;
2055 1.1 mrg }
2056 1.1 mrg
2057 1.1 mrg /* Main entry for loop interchange pass. */
2058 1.1 mrg
2059 1.1 mrg unsigned int
2060 1.1 mrg pass_linterchange::execute (function *fun)
2061 1.1 mrg {
2062 1.1 mrg if (number_of_loops (fun) <= 2)
2063 1.1 mrg return 0;
2064 1.1 mrg
2065 1.1 mrg bool changed_p = false;
2066 1.1 mrg struct loop *loop;
2067 1.1 mrg FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2068 1.1 mrg {
2069 1.1 mrg vec<loop_p> loop_nest = vNULL;
2070 1.1 mrg vec<data_reference_p> datarefs = vNULL;
2071 1.1 mrg vec<ddr_p> ddrs = vNULL;
2072 1.1 mrg if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2073 1.1 mrg {
2074 1.1 mrg tree_loop_interchange loop_interchange (loop_nest);
2075 1.1 mrg changed_p |= loop_interchange.interchange (datarefs, ddrs);
2076 1.1 mrg }
2077 1.1 mrg free_dependence_relations (ddrs);
2078 1.1 mrg free_data_refs_with_aux (datarefs);
2079 1.1 mrg loop_nest.release ();
2080 1.1 mrg }
2081 1.1 mrg
2082 1.1 mrg return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2083 1.1 mrg }
2084 1.1 mrg
2085 1.1 mrg } // anon namespace
2086 1.1 mrg
2087 1.1 mrg gimple_opt_pass *
2088 1.1 mrg make_pass_linterchange (gcc::context *ctxt)
2089 1.1 mrg {
2090 1.1 mrg return new pass_linterchange (ctxt);
2091 1.1 mrg }
2092