tree-loop-distribution.cc revision 1.1.1.1 1 1.1 mrg /* Loop distribution.
2 1.1 mrg Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Georges-Andre Silber <Georges-Andre.Silber (at) ensmp.fr>
4 1.1 mrg and Sebastian Pop <sebastian.pop (at) amd.com>.
5 1.1 mrg
6 1.1 mrg This file is part of GCC.
7 1.1 mrg
8 1.1 mrg GCC is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published by the
10 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
11 1.1 mrg later version.
12 1.1 mrg
13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 1.1 mrg for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU General Public License
19 1.1 mrg along with GCC; see the file COPYING3. If not see
20 1.1 mrg <http://www.gnu.org/licenses/>. */
21 1.1 mrg
22 1.1 mrg /* This pass performs loop distribution: for example, the loop
23 1.1 mrg
24 1.1 mrg |DO I = 2, N
25 1.1 mrg | A(I) = B(I) + C
26 1.1 mrg | D(I) = A(I-1)*E
27 1.1 mrg |ENDDO
28 1.1 mrg
29 1.1 mrg is transformed to
30 1.1 mrg
31 1.1 mrg |DOALL I = 2, N
32 1.1 mrg | A(I) = B(I) + C
33 1.1 mrg |ENDDO
34 1.1 mrg |
35 1.1 mrg |DOALL I = 2, N
36 1.1 mrg | D(I) = A(I-1)*E
37 1.1 mrg |ENDDO
38 1.1 mrg
39 1.1 mrg Loop distribution is the dual of loop fusion. It separates statements
40 1.1 mrg of a loop (or loop nest) into multiple loops (or loop nests) with the
41 1.1 mrg same loop header. The major goal is to separate statements which may
42 1.1 mrg be vectorized from those that can't. This pass implements distribution
43 1.1 mrg in the following steps:
44 1.1 mrg
45 1.1 mrg 1) Seed partitions with specific type statements. For now we support
46 1.1 mrg two types seed statements: statement defining variable used outside
47 1.1 mrg of loop; statement storing to memory.
48 1.1 mrg 2) Build reduced dependence graph (RDG) for loop to be distributed.
49 1.1 mrg The vertices (RDG:V) model all statements in the loop and the edges
50 1.1 mrg (RDG:E) model flow and control dependencies between statements.
51 1.1 mrg 3) Apart from RDG, compute data dependencies between memory references.
52 1.1 mrg 4) Starting from seed statement, build up partition by adding depended
53 1.1 mrg statements according to RDG's dependence information. Partition is
54 1.1 mrg classified as parallel type if it can be executed paralleled; or as
55 1.1 mrg sequential type if it can't. Parallel type partition is further
56 1.1 mrg classified as different builtin kinds if it can be implemented as
57 1.1 mrg builtin function calls.
58 1.1 mrg 5) Build partition dependence graph (PG) based on data dependencies.
59 1.1 mrg The vertices (PG:V) model all partitions and the edges (PG:E) model
60 1.1 mrg all data dependencies between every partitions pair. In general,
61 1.1 mrg data dependence is either compilation time known or unknown. In C
62 1.1 mrg family languages, there exists quite amount compilation time unknown
63 1.1 mrg dependencies because of possible alias relation of data references.
64 1.1 mrg We categorize PG's edge to two types: "true" edge that represents
65 1.1 mrg compilation time known data dependencies; "alias" edge for all other
66 1.1 mrg data dependencies.
67 1.1 mrg 6) Traverse subgraph of PG as if all "alias" edges don't exist. Merge
68 1.1 mrg partitions in each strong connected component (SCC) correspondingly.
69 1.1 mrg Build new PG for merged partitions.
70 1.1 mrg 7) Traverse PG again and this time with both "true" and "alias" edges
71 1.1 mrg included. We try to break SCCs by removing some edges. Because
72 1.1 mrg SCCs by "true" edges are all fused in step 6), we can break SCCs
73 1.1 mrg by removing some "alias" edges. It's NP-hard to choose optimal
74 1.1 mrg edge set, fortunately simple approximation is good enough for us
75 1.1 mrg given the small problem scale.
76 1.1 mrg 8) Collect all data dependencies of the removed "alias" edges. Create
77 1.1 mrg runtime alias checks for collected data dependencies.
78 1.1 mrg 9) Version loop under the condition of runtime alias checks. Given
79 1.1 mrg loop distribution generally introduces additional overhead, it is
80 1.1 mrg only useful if vectorization is achieved in distributed loop. We
81 1.1 mrg version loop with internal function call IFN_LOOP_DIST_ALIAS. If
82 1.1 mrg no distributed loop can be vectorized, we simply remove distributed
83 1.1 mrg loops and recover to the original one.
84 1.1 mrg
85 1.1 mrg TODO:
86 1.1 mrg 1) We only distribute innermost two-level loop nest now. We should
87 1.1 mrg extend it for arbitrary loop nests in the future.
88 1.1 mrg 2) We only fuse partitions in SCC now. A better fusion algorithm is
89 1.1 mrg desired to minimize loop overhead, maximize parallelism and maximize
90 1.1 mrg data reuse. */
91 1.1 mrg
92 1.1 mrg #include "config.h"
93 1.1 mrg #include "system.h"
94 1.1 mrg #include "coretypes.h"
95 1.1 mrg #include "backend.h"
96 1.1 mrg #include "tree.h"
97 1.1 mrg #include "gimple.h"
98 1.1 mrg #include "cfghooks.h"
99 1.1 mrg #include "tree-pass.h"
100 1.1 mrg #include "ssa.h"
101 1.1 mrg #include "gimple-pretty-print.h"
102 1.1 mrg #include "fold-const.h"
103 1.1 mrg #include "cfganal.h"
104 1.1 mrg #include "gimple-iterator.h"
105 1.1 mrg #include "gimplify-me.h"
106 1.1 mrg #include "stor-layout.h"
107 1.1 mrg #include "tree-cfg.h"
108 1.1 mrg #include "tree-ssa-loop-manip.h"
109 1.1 mrg #include "tree-ssa-loop-ivopts.h"
110 1.1 mrg #include "tree-ssa-loop.h"
111 1.1 mrg #include "tree-into-ssa.h"
112 1.1 mrg #include "tree-ssa.h"
113 1.1 mrg #include "cfgloop.h"
114 1.1 mrg #include "tree-scalar-evolution.h"
115 1.1 mrg #include "tree-vectorizer.h"
116 1.1 mrg #include "tree-eh.h"
117 1.1 mrg #include "gimple-fold.h"
118 1.1 mrg #include "tree-affine.h"
119 1.1 mrg #include "intl.h"
120 1.1 mrg #include "rtl.h"
121 1.1 mrg #include "memmodel.h"
122 1.1 mrg #include "optabs.h"
123 1.1 mrg
124 1.1 mrg
125 1.1 mrg #define MAX_DATAREFS_NUM \
126 1.1 mrg ((unsigned) param_loop_max_datarefs_for_datadeps)
127 1.1 mrg
128 1.1 mrg /* Threshold controlling number of distributed partitions. Given it may
129 1.1 mrg be unnecessary if a memory stream cost model is invented in the future,
130 1.1 mrg we define it as a temporary macro, rather than a parameter. */
131 1.1 mrg #define NUM_PARTITION_THRESHOLD (4)
132 1.1 mrg
133 1.1 mrg /* Hashtable helpers. */
134 1.1 mrg
135 1.1 mrg struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
136 1.1 mrg {
137 1.1 mrg static inline hashval_t hash (const data_dependence_relation *);
138 1.1 mrg static inline bool equal (const data_dependence_relation *,
139 1.1 mrg const data_dependence_relation *);
140 1.1 mrg };
141 1.1 mrg
142 1.1 mrg /* Hash function for data dependence. */
143 1.1 mrg
144 1.1 mrg inline hashval_t
145 1.1 mrg ddr_hasher::hash (const data_dependence_relation *ddr)
146 1.1 mrg {
147 1.1 mrg inchash::hash h;
148 1.1 mrg h.add_ptr (DDR_A (ddr));
149 1.1 mrg h.add_ptr (DDR_B (ddr));
150 1.1 mrg return h.end ();
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg /* Hash table equality function for data dependence. */
154 1.1 mrg
155 1.1 mrg inline bool
156 1.1 mrg ddr_hasher::equal (const data_dependence_relation *ddr1,
157 1.1 mrg const data_dependence_relation *ddr2)
158 1.1 mrg {
159 1.1 mrg return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg
163 1.1 mrg
164 1.1 mrg #define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
165 1.1 mrg
166 1.1 mrg /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
167 1.1 mrg struct rdg_vertex
168 1.1 mrg {
169 1.1 mrg /* The statement represented by this vertex. */
170 1.1 mrg gimple *stmt;
171 1.1 mrg
172 1.1 mrg /* Vector of data-references in this statement. */
173 1.1 mrg vec<data_reference_p> datarefs;
174 1.1 mrg
175 1.1 mrg /* True when the statement contains a write to memory. */
176 1.1 mrg bool has_mem_write;
177 1.1 mrg
178 1.1 mrg /* True when the statement contains a read from memory. */
179 1.1 mrg bool has_mem_reads;
180 1.1 mrg };
181 1.1 mrg
182 1.1 mrg #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
183 1.1 mrg #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
184 1.1 mrg #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
185 1.1 mrg #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
186 1.1 mrg #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
187 1.1 mrg #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
188 1.1 mrg #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
189 1.1 mrg #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
190 1.1 mrg
191 1.1 mrg /* Data dependence type. */
192 1.1 mrg
193 1.1 mrg enum rdg_dep_type
194 1.1 mrg {
195 1.1 mrg /* Read After Write (RAW). */
196 1.1 mrg flow_dd = 'f',
197 1.1 mrg
198 1.1 mrg /* Control dependence (execute conditional on). */
199 1.1 mrg control_dd = 'c'
200 1.1 mrg };
201 1.1 mrg
202 1.1 mrg /* Dependence information attached to an edge of the RDG. */
203 1.1 mrg
204 1.1 mrg struct rdg_edge
205 1.1 mrg {
206 1.1 mrg /* Type of the dependence. */
207 1.1 mrg enum rdg_dep_type type;
208 1.1 mrg };
209 1.1 mrg
210 1.1 mrg #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
211 1.1 mrg
212 1.1 mrg /* Kind of distributed loop. */
213 1.1 mrg enum partition_kind {
214 1.1 mrg PKIND_NORMAL,
215 1.1 mrg /* Partial memset stands for a paritition can be distributed into a loop
216 1.1 mrg of memset calls, rather than a single memset call. It's handled just
217 1.1 mrg like a normal parition, i.e, distributed as separate loop, no memset
218 1.1 mrg call is generated.
219 1.1 mrg
220 1.1 mrg Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
221 1.1 mrg loop nest as deep as possible. As a result, parloop achieves better
222 1.1 mrg parallelization by parallelizing deeper loop nest. This hack should
223 1.1 mrg be unnecessary and removed once distributed memset can be understood
224 1.1 mrg and analyzed in data reference analysis. See PR82604 for more. */
225 1.1 mrg PKIND_PARTIAL_MEMSET,
226 1.1 mrg PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
227 1.1 mrg };
228 1.1 mrg
229 1.1 mrg /* Type of distributed loop. */
230 1.1 mrg enum partition_type {
231 1.1 mrg /* The distributed loop can be executed parallelly. */
232 1.1 mrg PTYPE_PARALLEL = 0,
233 1.1 mrg /* The distributed loop has to be executed sequentially. */
234 1.1 mrg PTYPE_SEQUENTIAL
235 1.1 mrg };
236 1.1 mrg
237 1.1 mrg /* Builtin info for loop distribution. */
238 1.1 mrg struct builtin_info
239 1.1 mrg {
240 1.1 mrg /* data-references a kind != PKIND_NORMAL partition is about. */
241 1.1 mrg data_reference_p dst_dr;
242 1.1 mrg data_reference_p src_dr;
243 1.1 mrg /* Base address and size of memory objects operated by the builtin. Note
244 1.1 mrg both dest and source memory objects must have the same size. */
245 1.1 mrg tree dst_base;
246 1.1 mrg tree src_base;
247 1.1 mrg tree size;
248 1.1 mrg /* Base and offset part of dst_base after stripping constant offset. This
249 1.1 mrg is only used in memset builtin distribution for now. */
250 1.1 mrg tree dst_base_base;
251 1.1 mrg unsigned HOST_WIDE_INT dst_base_offset;
252 1.1 mrg };
253 1.1 mrg
254 1.1 mrg /* Partition for loop distribution. */
255 1.1 mrg struct partition
256 1.1 mrg {
257 1.1 mrg /* Statements of the partition. */
258 1.1 mrg bitmap stmts;
259 1.1 mrg /* True if the partition defines variable which is used outside of loop. */
260 1.1 mrg bool reduction_p;
261 1.1 mrg location_t loc;
262 1.1 mrg enum partition_kind kind;
263 1.1 mrg enum partition_type type;
264 1.1 mrg /* Data references in the partition. */
265 1.1 mrg bitmap datarefs;
266 1.1 mrg /* Information of builtin parition. */
267 1.1 mrg struct builtin_info *builtin;
268 1.1 mrg };
269 1.1 mrg
270 1.1 mrg /* Partitions are fused because of different reasons. */
271 1.1 mrg enum fuse_type
272 1.1 mrg {
273 1.1 mrg FUSE_NON_BUILTIN = 0,
274 1.1 mrg FUSE_REDUCTION = 1,
275 1.1 mrg FUSE_SHARE_REF = 2,
276 1.1 mrg FUSE_SAME_SCC = 3,
277 1.1 mrg FUSE_FINALIZE = 4
278 1.1 mrg };
279 1.1 mrg
280 1.1 mrg /* Description on different fusing reason. */
281 1.1 mrg static const char *fuse_message[] = {
282 1.1 mrg "they are non-builtins",
283 1.1 mrg "they have reductions",
284 1.1 mrg "they have shared memory refs",
285 1.1 mrg "they are in the same dependence scc",
286 1.1 mrg "there is no point to distribute loop"};
287 1.1 mrg
288 1.1 mrg
289 1.1 mrg /* Dump vertex I in RDG to FILE. */
290 1.1 mrg
291 1.1 mrg static void
292 1.1 mrg dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
293 1.1 mrg {
294 1.1 mrg struct vertex *v = &(rdg->vertices[i]);
295 1.1 mrg struct graph_edge *e;
296 1.1 mrg
297 1.1 mrg fprintf (file, "(vertex %d: (%s%s) (in:", i,
298 1.1 mrg RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
299 1.1 mrg RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
300 1.1 mrg
301 1.1 mrg if (v->pred)
302 1.1 mrg for (e = v->pred; e; e = e->pred_next)
303 1.1 mrg fprintf (file, " %d", e->src);
304 1.1 mrg
305 1.1 mrg fprintf (file, ") (out:");
306 1.1 mrg
307 1.1 mrg if (v->succ)
308 1.1 mrg for (e = v->succ; e; e = e->succ_next)
309 1.1 mrg fprintf (file, " %d", e->dest);
310 1.1 mrg
311 1.1 mrg fprintf (file, ")\n");
312 1.1 mrg print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
313 1.1 mrg fprintf (file, ")\n");
314 1.1 mrg }
315 1.1 mrg
316 1.1 mrg /* Call dump_rdg_vertex on stderr. */
317 1.1 mrg
318 1.1 mrg DEBUG_FUNCTION void
319 1.1 mrg debug_rdg_vertex (struct graph *rdg, int i)
320 1.1 mrg {
321 1.1 mrg dump_rdg_vertex (stderr, rdg, i);
322 1.1 mrg }
323 1.1 mrg
324 1.1 mrg /* Dump the reduced dependence graph RDG to FILE. */
325 1.1 mrg
326 1.1 mrg static void
327 1.1 mrg dump_rdg (FILE *file, struct graph *rdg)
328 1.1 mrg {
329 1.1 mrg fprintf (file, "(rdg\n");
330 1.1 mrg for (int i = 0; i < rdg->n_vertices; i++)
331 1.1 mrg dump_rdg_vertex (file, rdg, i);
332 1.1 mrg fprintf (file, ")\n");
333 1.1 mrg }
334 1.1 mrg
335 1.1 mrg /* Call dump_rdg on stderr. */
336 1.1 mrg
337 1.1 mrg DEBUG_FUNCTION void
338 1.1 mrg debug_rdg (struct graph *rdg)
339 1.1 mrg {
340 1.1 mrg dump_rdg (stderr, rdg);
341 1.1 mrg }
342 1.1 mrg
343 1.1 mrg static void
344 1.1 mrg dot_rdg_1 (FILE *file, struct graph *rdg)
345 1.1 mrg {
346 1.1 mrg int i;
347 1.1 mrg pretty_printer buffer;
348 1.1 mrg pp_needs_newline (&buffer) = false;
349 1.1 mrg buffer.buffer->stream = file;
350 1.1 mrg
351 1.1 mrg fprintf (file, "digraph RDG {\n");
352 1.1 mrg
353 1.1 mrg for (i = 0; i < rdg->n_vertices; i++)
354 1.1 mrg {
355 1.1 mrg struct vertex *v = &(rdg->vertices[i]);
356 1.1 mrg struct graph_edge *e;
357 1.1 mrg
358 1.1 mrg fprintf (file, "%d [label=\"[%d] ", i, i);
359 1.1 mrg pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
360 1.1 mrg pp_flush (&buffer);
361 1.1 mrg fprintf (file, "\"]\n");
362 1.1 mrg
363 1.1 mrg /* Highlight reads from memory. */
364 1.1 mrg if (RDG_MEM_READS_STMT (rdg, i))
365 1.1 mrg fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
366 1.1 mrg
367 1.1 mrg /* Highlight stores to memory. */
368 1.1 mrg if (RDG_MEM_WRITE_STMT (rdg, i))
369 1.1 mrg fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
370 1.1 mrg
371 1.1 mrg if (v->succ)
372 1.1 mrg for (e = v->succ; e; e = e->succ_next)
373 1.1 mrg switch (RDGE_TYPE (e))
374 1.1 mrg {
375 1.1 mrg case flow_dd:
376 1.1 mrg /* These are the most common dependences: don't print these. */
377 1.1 mrg fprintf (file, "%d -> %d \n", i, e->dest);
378 1.1 mrg break;
379 1.1 mrg
380 1.1 mrg case control_dd:
381 1.1 mrg fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
382 1.1 mrg break;
383 1.1 mrg
384 1.1 mrg default:
385 1.1 mrg gcc_unreachable ();
386 1.1 mrg }
387 1.1 mrg }
388 1.1 mrg
389 1.1 mrg fprintf (file, "}\n\n");
390 1.1 mrg }
391 1.1 mrg
392 1.1 mrg /* Display the Reduced Dependence Graph using dotty. */
393 1.1 mrg
394 1.1 mrg DEBUG_FUNCTION void
395 1.1 mrg dot_rdg (struct graph *rdg)
396 1.1 mrg {
397 1.1 mrg /* When debugging, you may want to enable the following code. */
398 1.1 mrg #ifdef HAVE_POPEN
399 1.1 mrg FILE *file = popen ("dot -Tx11", "w");
400 1.1 mrg if (!file)
401 1.1 mrg return;
402 1.1 mrg dot_rdg_1 (file, rdg);
403 1.1 mrg fflush (file);
404 1.1 mrg close (fileno (file));
405 1.1 mrg pclose (file);
406 1.1 mrg #else
407 1.1 mrg dot_rdg_1 (stderr, rdg);
408 1.1 mrg #endif
409 1.1 mrg }
410 1.1 mrg
411 1.1 mrg /* Returns the index of STMT in RDG. */
412 1.1 mrg
413 1.1 mrg static int
414 1.1 mrg rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
415 1.1 mrg {
416 1.1 mrg int index = gimple_uid (stmt);
417 1.1 mrg gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
418 1.1 mrg return index;
419 1.1 mrg }
420 1.1 mrg
421 1.1 mrg /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
422 1.1 mrg the index of DEF in RDG. */
423 1.1 mrg
424 1.1 mrg static void
425 1.1 mrg create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
426 1.1 mrg {
427 1.1 mrg use_operand_p imm_use_p;
428 1.1 mrg imm_use_iterator iterator;
429 1.1 mrg
430 1.1 mrg FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
431 1.1 mrg {
432 1.1 mrg struct graph_edge *e;
433 1.1 mrg int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
434 1.1 mrg
435 1.1 mrg if (use < 0)
436 1.1 mrg continue;
437 1.1 mrg
438 1.1 mrg e = add_edge (rdg, idef, use);
439 1.1 mrg e->data = XNEW (struct rdg_edge);
440 1.1 mrg RDGE_TYPE (e) = flow_dd;
441 1.1 mrg }
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg /* Creates an edge for the control dependences of BB to the vertex V. */
445 1.1 mrg
446 1.1 mrg static void
447 1.1 mrg create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
448 1.1 mrg int v, control_dependences *cd)
449 1.1 mrg {
450 1.1 mrg bitmap_iterator bi;
451 1.1 mrg unsigned edge_n;
452 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
453 1.1 mrg 0, edge_n, bi)
454 1.1 mrg {
455 1.1 mrg basic_block cond_bb = cd->get_edge_src (edge_n);
456 1.1 mrg gimple *stmt = last_stmt (cond_bb);
457 1.1 mrg if (stmt && is_ctrl_stmt (stmt))
458 1.1 mrg {
459 1.1 mrg struct graph_edge *e;
460 1.1 mrg int c = rdg_vertex_for_stmt (rdg, stmt);
461 1.1 mrg if (c < 0)
462 1.1 mrg continue;
463 1.1 mrg
464 1.1 mrg e = add_edge (rdg, c, v);
465 1.1 mrg e->data = XNEW (struct rdg_edge);
466 1.1 mrg RDGE_TYPE (e) = control_dd;
467 1.1 mrg }
468 1.1 mrg }
469 1.1 mrg }
470 1.1 mrg
471 1.1 mrg /* Creates the edges of the reduced dependence graph RDG. */
472 1.1 mrg
473 1.1 mrg static void
474 1.1 mrg create_rdg_flow_edges (struct graph *rdg)
475 1.1 mrg {
476 1.1 mrg int i;
477 1.1 mrg def_operand_p def_p;
478 1.1 mrg ssa_op_iter iter;
479 1.1 mrg
480 1.1 mrg for (i = 0; i < rdg->n_vertices; i++)
481 1.1 mrg FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
482 1.1 mrg iter, SSA_OP_DEF)
483 1.1 mrg create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg /* Creates the edges of the reduced dependence graph RDG. */
487 1.1 mrg
488 1.1 mrg static void
489 1.1 mrg create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
490 1.1 mrg {
491 1.1 mrg int i;
492 1.1 mrg
493 1.1 mrg for (i = 0; i < rdg->n_vertices; i++)
494 1.1 mrg {
495 1.1 mrg gimple *stmt = RDG_STMT (rdg, i);
496 1.1 mrg if (gimple_code (stmt) == GIMPLE_PHI)
497 1.1 mrg {
498 1.1 mrg edge_iterator ei;
499 1.1 mrg edge e;
500 1.1 mrg FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
501 1.1 mrg if (flow_bb_inside_loop_p (loop, e->src))
502 1.1 mrg create_edge_for_control_dependence (rdg, e->src, i, cd);
503 1.1 mrg }
504 1.1 mrg else
505 1.1 mrg create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
506 1.1 mrg }
507 1.1 mrg }
508 1.1 mrg
509 1.1 mrg
510 1.1 mrg class loop_distribution
511 1.1 mrg {
512 1.1 mrg private:
513 1.1 mrg /* The loop (nest) to be distributed. */
514 1.1 mrg vec<loop_p> loop_nest;
515 1.1 mrg
516 1.1 mrg /* Vector of data references in the loop to be distributed. */
517 1.1 mrg vec<data_reference_p> datarefs_vec;
518 1.1 mrg
519 1.1 mrg /* If there is nonaddressable data reference in above vector. */
520 1.1 mrg bool has_nonaddressable_dataref_p;
521 1.1 mrg
522 1.1 mrg /* Store index of data reference in aux field. */
523 1.1 mrg
524 1.1 mrg /* Hash table for data dependence relation in the loop to be distributed. */
525 1.1 mrg hash_table<ddr_hasher> *ddrs_table;
526 1.1 mrg
527 1.1 mrg /* Array mapping basic block's index to its topological order. */
528 1.1 mrg int *bb_top_order_index;
529 1.1 mrg /* And size of the array. */
530 1.1 mrg int bb_top_order_index_size;
531 1.1 mrg
532 1.1 mrg /* Build the vertices of the reduced dependence graph RDG. Return false
533 1.1 mrg if that failed. */
534 1.1 mrg bool create_rdg_vertices (struct graph *rdg, const vec<gimple *> &stmts,
535 1.1 mrg loop_p loop);
536 1.1 mrg
537 1.1 mrg /* Initialize STMTS with all the statements of LOOP. We use topological
538 1.1 mrg order to discover all statements. The order is important because
539 1.1 mrg generate_loops_for_partition is using the same traversal for identifying
540 1.1 mrg statements in loop copies. */
541 1.1 mrg void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
542 1.1 mrg
543 1.1 mrg
544 1.1 mrg /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
545 1.1 mrg LOOP, and one edge per flow dependence or control dependence from control
546 1.1 mrg dependence CD. During visiting each statement, data references are also
547 1.1 mrg collected and recorded in global data DATAREFS_VEC. */
548 1.1 mrg struct graph * build_rdg (class loop *loop, control_dependences *cd);
549 1.1 mrg
550 1.1 mrg /* Merge PARTITION into the partition DEST. RDG is the reduced dependence
551 1.1 mrg graph and we update type for result partition if it is non-NULL. */
552 1.1 mrg void partition_merge_into (struct graph *rdg,
553 1.1 mrg partition *dest, partition *partition,
554 1.1 mrg enum fuse_type ft);
555 1.1 mrg
556 1.1 mrg
557 1.1 mrg /* Return data dependence relation for data references A and B. The two
558 1.1 mrg data references must be in lexicographic order wrto reduced dependence
559 1.1 mrg graph RDG. We firstly try to find ddr from global ddr hash table. If
560 1.1 mrg it doesn't exist, compute the ddr and cache it. */
561 1.1 mrg data_dependence_relation * get_data_dependence (struct graph *rdg,
562 1.1 mrg data_reference_p a,
563 1.1 mrg data_reference_p b);
564 1.1 mrg
565 1.1 mrg
566 1.1 mrg /* In reduced dependence graph RDG for loop distribution, return true if
567 1.1 mrg dependence between references DR1 and DR2 leads to a dependence cycle
568 1.1 mrg and such dependence cycle can't be resolved by runtime alias check. */
569 1.1 mrg bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
570 1.1 mrg data_reference_p dr2);
571 1.1 mrg
572 1.1 mrg
573 1.1 mrg /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
574 1.1 mrg PARTITION1's type after merging PARTITION2 into PARTITION1. */
575 1.1 mrg void update_type_for_merge (struct graph *rdg,
576 1.1 mrg partition *partition1, partition *partition2);
577 1.1 mrg
578 1.1 mrg
579 1.1 mrg /* Returns a partition with all the statements needed for computing
580 1.1 mrg the vertex V of the RDG, also including the loop exit conditions. */
581 1.1 mrg partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
582 1.1 mrg
583 1.1 mrg /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
584 1.1 mrg if it forms builtin memcpy or memmove call. */
585 1.1 mrg void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
586 1.1 mrg data_reference_p dst_dr, data_reference_p src_dr);
587 1.1 mrg
588 1.1 mrg /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
589 1.1 mrg For the moment we detect memset, memcpy and memmove patterns. Bitmap
590 1.1 mrg STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
591 1.1 mrg Returns true if there is a reduction in all partitions and we
592 1.1 mrg possibly did not mark PARTITION as having one for this reason. */
593 1.1 mrg
594 1.1 mrg bool
595 1.1 mrg classify_partition (loop_p loop,
596 1.1 mrg struct graph *rdg, partition *partition,
597 1.1 mrg bitmap stmt_in_all_partitions);
598 1.1 mrg
599 1.1 mrg
600 1.1 mrg /* Returns true when PARTITION1 and PARTITION2 access the same memory
601 1.1 mrg object in RDG. */
602 1.1 mrg bool share_memory_accesses (struct graph *rdg,
603 1.1 mrg partition *partition1, partition *partition2);
604 1.1 mrg
605 1.1 mrg /* For each seed statement in STARTING_STMTS, this function builds
606 1.1 mrg partition for it by adding depended statements according to RDG.
607 1.1 mrg All partitions are recorded in PARTITIONS. */
608 1.1 mrg void rdg_build_partitions (struct graph *rdg,
609 1.1 mrg vec<gimple *> starting_stmts,
610 1.1 mrg vec<partition *> *partitions);
611 1.1 mrg
612 1.1 mrg /* Compute partition dependence created by the data references in DRS1
613 1.1 mrg and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
614 1.1 mrg not NULL, we record dependence introduced by possible alias between
615 1.1 mrg two data references in ALIAS_DDRS; otherwise, we simply ignore such
616 1.1 mrg dependence as if it doesn't exist at all. */
617 1.1 mrg int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
618 1.1 mrg bitmap drs2, vec<ddr_p> *alias_ddrs);
619 1.1 mrg
620 1.1 mrg
621 1.1 mrg /* Build and return partition dependence graph for PARTITIONS. RDG is
622 1.1 mrg reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
623 1.1 mrg is true, data dependence caused by possible alias between references
624 1.1 mrg is ignored, as if it doesn't exist at all; otherwise all depdendences
625 1.1 mrg are considered. */
626 1.1 mrg struct graph *build_partition_graph (struct graph *rdg,
627 1.1 mrg vec<struct partition *> *partitions,
628 1.1 mrg bool ignore_alias_p);
629 1.1 mrg
630 1.1 mrg /* Given reduced dependence graph RDG merge strong connected components
631 1.1 mrg of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
632 1.1 mrg possible alias between references is ignored, as if it doesn't exist
633 1.1 mrg at all; otherwise all depdendences are considered. */
634 1.1 mrg void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
635 1.1 mrg *partitions, bool ignore_alias_p);
636 1.1 mrg
637 1.1 mrg /* This is the main function breaking strong conected components in
638 1.1 mrg PARTITIONS giving reduced depdendence graph RDG. Store data dependence
639 1.1 mrg relations for runtime alias check in ALIAS_DDRS. */
640 1.1 mrg void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
641 1.1 mrg *partitions, vec<ddr_p> *alias_ddrs);
642 1.1 mrg
643 1.1 mrg
644 1.1 mrg /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
645 1.1 mrg ALIAS_DDRS contains ddrs which need runtime alias check. */
646 1.1 mrg void finalize_partitions (class loop *loop, vec<struct partition *>
647 1.1 mrg *partitions, vec<ddr_p> *alias_ddrs);
648 1.1 mrg
649 1.1 mrg /* Distributes the code from LOOP in such a way that producer statements
650 1.1 mrg are placed before consumer statements. Tries to separate only the
651 1.1 mrg statements from STMTS into separate loops. Returns the number of
652 1.1 mrg distributed loops. Set NB_CALLS to number of generated builtin calls.
653 1.1 mrg Set *DESTROY_P to whether LOOP needs to be destroyed. */
654 1.1 mrg int distribute_loop (class loop *loop, const vec<gimple *> &stmts,
655 1.1 mrg control_dependences *cd, int *nb_calls, bool *destroy_p,
656 1.1 mrg bool only_patterns_p);
657 1.1 mrg
658 1.1 mrg /* Transform loops which mimic the effects of builtins rawmemchr or strlen and
659 1.1 mrg replace them accordingly. */
660 1.1 mrg bool transform_reduction_loop (loop_p loop);
661 1.1 mrg
662 1.1 mrg /* Compute topological order for basic blocks. Topological order is
663 1.1 mrg needed because data dependence is computed for data references in
664 1.1 mrg lexicographical order. */
665 1.1 mrg void bb_top_order_init (void);
666 1.1 mrg
667 1.1 mrg void bb_top_order_destroy (void);
668 1.1 mrg
669 1.1 mrg public:
670 1.1 mrg
671 1.1 mrg /* Getter for bb_top_order. */
672 1.1 mrg
673 1.1 mrg inline int get_bb_top_order_index_size (void)
674 1.1 mrg {
675 1.1 mrg return bb_top_order_index_size;
676 1.1 mrg }
677 1.1 mrg
678 1.1 mrg inline int get_bb_top_order_index (int i)
679 1.1 mrg {
680 1.1 mrg return bb_top_order_index[i];
681 1.1 mrg }
682 1.1 mrg
683 1.1 mrg unsigned int execute (function *fun);
684 1.1 mrg };
685 1.1 mrg
686 1.1 mrg
687 1.1 mrg /* If X has a smaller topological sort number than Y, returns -1;
688 1.1 mrg if greater, returns 1. */
689 1.1 mrg static int
690 1.1 mrg bb_top_order_cmp_r (const void *x, const void *y, void *loop)
691 1.1 mrg {
692 1.1 mrg loop_distribution *_loop =
693 1.1 mrg (loop_distribution *) loop;
694 1.1 mrg
695 1.1 mrg basic_block bb1 = *(const basic_block *) x;
696 1.1 mrg basic_block bb2 = *(const basic_block *) y;
697 1.1 mrg
698 1.1 mrg int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
699 1.1 mrg
700 1.1 mrg gcc_assert (bb1->index < bb_top_order_index_size
701 1.1 mrg && bb2->index < bb_top_order_index_size);
702 1.1 mrg gcc_assert (bb1 == bb2
703 1.1 mrg || _loop->get_bb_top_order_index(bb1->index)
704 1.1 mrg != _loop->get_bb_top_order_index(bb2->index));
705 1.1 mrg
706 1.1 mrg return (_loop->get_bb_top_order_index(bb1->index) -
707 1.1 mrg _loop->get_bb_top_order_index(bb2->index));
708 1.1 mrg }
709 1.1 mrg
710 1.1 mrg bool
711 1.1 mrg loop_distribution::create_rdg_vertices (struct graph *rdg,
712 1.1 mrg const vec<gimple *> &stmts,
713 1.1 mrg loop_p loop)
714 1.1 mrg {
715 1.1 mrg int i;
716 1.1 mrg gimple *stmt;
717 1.1 mrg
718 1.1 mrg FOR_EACH_VEC_ELT (stmts, i, stmt)
719 1.1 mrg {
720 1.1 mrg struct vertex *v = &(rdg->vertices[i]);
721 1.1 mrg
722 1.1 mrg /* Record statement to vertex mapping. */
723 1.1 mrg gimple_set_uid (stmt, i);
724 1.1 mrg
725 1.1 mrg v->data = XNEW (struct rdg_vertex);
726 1.1 mrg RDGV_STMT (v) = stmt;
727 1.1 mrg RDGV_DATAREFS (v).create (0);
728 1.1 mrg RDGV_HAS_MEM_WRITE (v) = false;
729 1.1 mrg RDGV_HAS_MEM_READS (v) = false;
730 1.1 mrg if (gimple_code (stmt) == GIMPLE_PHI)
731 1.1 mrg continue;
732 1.1 mrg
733 1.1 mrg unsigned drp = datarefs_vec.length ();
734 1.1 mrg if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
735 1.1 mrg return false;
736 1.1 mrg for (unsigned j = drp; j < datarefs_vec.length (); ++j)
737 1.1 mrg {
738 1.1 mrg data_reference_p dr = datarefs_vec[j];
739 1.1 mrg if (DR_IS_READ (dr))
740 1.1 mrg RDGV_HAS_MEM_READS (v) = true;
741 1.1 mrg else
742 1.1 mrg RDGV_HAS_MEM_WRITE (v) = true;
743 1.1 mrg RDGV_DATAREFS (v).safe_push (dr);
744 1.1 mrg has_nonaddressable_dataref_p |= may_be_nonaddressable_p (dr->ref);
745 1.1 mrg }
746 1.1 mrg }
747 1.1 mrg return true;
748 1.1 mrg }
749 1.1 mrg
750 1.1 mrg void
751 1.1 mrg loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
752 1.1 mrg {
753 1.1 mrg unsigned int i;
754 1.1 mrg basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
755 1.1 mrg
756 1.1 mrg for (i = 0; i < loop->num_nodes; i++)
757 1.1 mrg {
758 1.1 mrg basic_block bb = bbs[i];
759 1.1 mrg
760 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
761 1.1 mrg gsi_next (&bsi))
762 1.1 mrg if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
763 1.1 mrg stmts->safe_push (bsi.phi ());
764 1.1 mrg
765 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
766 1.1 mrg gsi_next (&bsi))
767 1.1 mrg {
768 1.1 mrg gimple *stmt = gsi_stmt (bsi);
769 1.1 mrg if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
770 1.1 mrg stmts->safe_push (stmt);
771 1.1 mrg }
772 1.1 mrg }
773 1.1 mrg
774 1.1 mrg free (bbs);
775 1.1 mrg }
776 1.1 mrg
777 1.1 mrg /* Free the reduced dependence graph RDG. */
778 1.1 mrg
779 1.1 mrg static void
780 1.1 mrg free_rdg (struct graph *rdg)
781 1.1 mrg {
782 1.1 mrg int i;
783 1.1 mrg
784 1.1 mrg for (i = 0; i < rdg->n_vertices; i++)
785 1.1 mrg {
786 1.1 mrg struct vertex *v = &(rdg->vertices[i]);
787 1.1 mrg struct graph_edge *e;
788 1.1 mrg
789 1.1 mrg for (e = v->succ; e; e = e->succ_next)
790 1.1 mrg free (e->data);
791 1.1 mrg
792 1.1 mrg if (v->data)
793 1.1 mrg {
794 1.1 mrg gimple_set_uid (RDGV_STMT (v), -1);
795 1.1 mrg (RDGV_DATAREFS (v)).release ();
796 1.1 mrg free (v->data);
797 1.1 mrg }
798 1.1 mrg }
799 1.1 mrg
800 1.1 mrg free_graph (rdg);
801 1.1 mrg }
802 1.1 mrg
803 1.1 mrg struct graph *
804 1.1 mrg loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
805 1.1 mrg {
806 1.1 mrg struct graph *rdg;
807 1.1 mrg
808 1.1 mrg /* Create the RDG vertices from the stmts of the loop nest. */
809 1.1 mrg auto_vec<gimple *, 10> stmts;
810 1.1 mrg stmts_from_loop (loop, &stmts);
811 1.1 mrg rdg = new_graph (stmts.length ());
812 1.1 mrg if (!create_rdg_vertices (rdg, stmts, loop))
813 1.1 mrg {
814 1.1 mrg free_rdg (rdg);
815 1.1 mrg return NULL;
816 1.1 mrg }
817 1.1 mrg stmts.release ();
818 1.1 mrg
819 1.1 mrg create_rdg_flow_edges (rdg);
820 1.1 mrg if (cd)
821 1.1 mrg create_rdg_cd_edges (rdg, cd, loop);
822 1.1 mrg
823 1.1 mrg return rdg;
824 1.1 mrg }
825 1.1 mrg
826 1.1 mrg
827 1.1 mrg /* Allocate and initialize a partition from BITMAP. */
828 1.1 mrg
829 1.1 mrg static partition *
830 1.1 mrg partition_alloc (void)
831 1.1 mrg {
832 1.1 mrg partition *partition = XCNEW (struct partition);
833 1.1 mrg partition->stmts = BITMAP_ALLOC (NULL);
834 1.1 mrg partition->reduction_p = false;
835 1.1 mrg partition->loc = UNKNOWN_LOCATION;
836 1.1 mrg partition->kind = PKIND_NORMAL;
837 1.1 mrg partition->type = PTYPE_PARALLEL;
838 1.1 mrg partition->datarefs = BITMAP_ALLOC (NULL);
839 1.1 mrg return partition;
840 1.1 mrg }
841 1.1 mrg
842 1.1 mrg /* Free PARTITION. */
843 1.1 mrg
844 1.1 mrg static void
845 1.1 mrg partition_free (partition *partition)
846 1.1 mrg {
847 1.1 mrg BITMAP_FREE (partition->stmts);
848 1.1 mrg BITMAP_FREE (partition->datarefs);
849 1.1 mrg if (partition->builtin)
850 1.1 mrg free (partition->builtin);
851 1.1 mrg
852 1.1 mrg free (partition);
853 1.1 mrg }
854 1.1 mrg
855 1.1 mrg /* Returns true if the partition can be generated as a builtin. */
856 1.1 mrg
857 1.1 mrg static bool
858 1.1 mrg partition_builtin_p (partition *partition)
859 1.1 mrg {
860 1.1 mrg return partition->kind > PKIND_PARTIAL_MEMSET;
861 1.1 mrg }
862 1.1 mrg
863 1.1 mrg /* Returns true if the partition contains a reduction. */
864 1.1 mrg
865 1.1 mrg static bool
866 1.1 mrg partition_reduction_p (partition *partition)
867 1.1 mrg {
868 1.1 mrg return partition->reduction_p;
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg void
872 1.1 mrg loop_distribution::partition_merge_into (struct graph *rdg,
873 1.1 mrg partition *dest, partition *partition, enum fuse_type ft)
874 1.1 mrg {
875 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
876 1.1 mrg {
877 1.1 mrg fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
878 1.1 mrg fprintf (dump_file, " Part 1: ");
879 1.1 mrg dump_bitmap (dump_file, dest->stmts);
880 1.1 mrg fprintf (dump_file, " Part 2: ");
881 1.1 mrg dump_bitmap (dump_file, partition->stmts);
882 1.1 mrg }
883 1.1 mrg
884 1.1 mrg dest->kind = PKIND_NORMAL;
885 1.1 mrg if (dest->type == PTYPE_PARALLEL)
886 1.1 mrg dest->type = partition->type;
887 1.1 mrg
888 1.1 mrg bitmap_ior_into (dest->stmts, partition->stmts);
889 1.1 mrg if (partition_reduction_p (partition))
890 1.1 mrg dest->reduction_p = true;
891 1.1 mrg
892 1.1 mrg /* Further check if any data dependence prevents us from executing the
893 1.1 mrg new partition parallelly. */
894 1.1 mrg if (dest->type == PTYPE_PARALLEL && rdg != NULL)
895 1.1 mrg update_type_for_merge (rdg, dest, partition);
896 1.1 mrg
897 1.1 mrg bitmap_ior_into (dest->datarefs, partition->datarefs);
898 1.1 mrg }
899 1.1 mrg
900 1.1 mrg
901 1.1 mrg /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
902 1.1 mrg the LOOP. */
903 1.1 mrg
904 1.1 mrg static bool
905 1.1 mrg ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
906 1.1 mrg {
907 1.1 mrg imm_use_iterator imm_iter;
908 1.1 mrg use_operand_p use_p;
909 1.1 mrg
910 1.1 mrg FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
911 1.1 mrg {
912 1.1 mrg if (is_gimple_debug (USE_STMT (use_p)))
913 1.1 mrg continue;
914 1.1 mrg
915 1.1 mrg basic_block use_bb = gimple_bb (USE_STMT (use_p));
916 1.1 mrg if (!flow_bb_inside_loop_p (loop, use_bb))
917 1.1 mrg return true;
918 1.1 mrg }
919 1.1 mrg
920 1.1 mrg return false;
921 1.1 mrg }
922 1.1 mrg
923 1.1 mrg /* Returns true when STMT defines a scalar variable used after the
924 1.1 mrg loop LOOP. */
925 1.1 mrg
926 1.1 mrg static bool
927 1.1 mrg stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
928 1.1 mrg {
929 1.1 mrg def_operand_p def_p;
930 1.1 mrg ssa_op_iter op_iter;
931 1.1 mrg
932 1.1 mrg if (gimple_code (stmt) == GIMPLE_PHI)
933 1.1 mrg return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
934 1.1 mrg
935 1.1 mrg FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
936 1.1 mrg if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
937 1.1 mrg return true;
938 1.1 mrg
939 1.1 mrg return false;
940 1.1 mrg }
941 1.1 mrg
942 1.1 mrg /* Return a copy of LOOP placed before LOOP. */
943 1.1 mrg
944 1.1 mrg static class loop *
945 1.1 mrg copy_loop_before (class loop *loop)
946 1.1 mrg {
947 1.1 mrg class loop *res;
948 1.1 mrg edge preheader = loop_preheader_edge (loop);
949 1.1 mrg
950 1.1 mrg initialize_original_copy_tables ();
951 1.1 mrg res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
952 1.1 mrg gcc_assert (res != NULL);
953 1.1 mrg free_original_copy_tables ();
954 1.1 mrg delete_update_ssa ();
955 1.1 mrg
956 1.1 mrg return res;
957 1.1 mrg }
958 1.1 mrg
959 1.1 mrg /* Creates an empty basic block after LOOP. */
960 1.1 mrg
961 1.1 mrg static void
962 1.1 mrg create_bb_after_loop (class loop *loop)
963 1.1 mrg {
964 1.1 mrg edge exit = single_exit (loop);
965 1.1 mrg
966 1.1 mrg if (!exit)
967 1.1 mrg return;
968 1.1 mrg
969 1.1 mrg split_edge (exit);
970 1.1 mrg }
971 1.1 mrg
972 1.1 mrg /* Generate code for PARTITION from the code in LOOP. The loop is
973 1.1 mrg copied when COPY_P is true. All the statements not flagged in the
974 1.1 mrg PARTITION bitmap are removed from the loop or from its copy. The
975 1.1 mrg statements are indexed in sequence inside a basic block, and the
976 1.1 mrg basic blocks of a loop are taken in dom order. */
977 1.1 mrg
978 1.1 mrg static void
979 1.1 mrg generate_loops_for_partition (class loop *loop, partition *partition,
980 1.1 mrg bool copy_p)
981 1.1 mrg {
982 1.1 mrg unsigned i;
983 1.1 mrg basic_block *bbs;
984 1.1 mrg
985 1.1 mrg if (copy_p)
986 1.1 mrg {
987 1.1 mrg int orig_loop_num = loop->orig_loop_num;
988 1.1 mrg loop = copy_loop_before (loop);
989 1.1 mrg gcc_assert (loop != NULL);
990 1.1 mrg loop->orig_loop_num = orig_loop_num;
991 1.1 mrg create_preheader (loop, CP_SIMPLE_PREHEADERS);
992 1.1 mrg create_bb_after_loop (loop);
993 1.1 mrg }
994 1.1 mrg else
995 1.1 mrg {
996 1.1 mrg /* Origin number is set to the new versioned loop's num. */
997 1.1 mrg gcc_assert (loop->orig_loop_num != loop->num);
998 1.1 mrg }
999 1.1 mrg
1000 1.1 mrg /* Remove stmts not in the PARTITION bitmap. */
1001 1.1 mrg bbs = get_loop_body_in_dom_order (loop);
1002 1.1 mrg
1003 1.1 mrg if (MAY_HAVE_DEBUG_BIND_STMTS)
1004 1.1 mrg for (i = 0; i < loop->num_nodes; i++)
1005 1.1 mrg {
1006 1.1 mrg basic_block bb = bbs[i];
1007 1.1 mrg
1008 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
1009 1.1 mrg gsi_next (&bsi))
1010 1.1 mrg {
1011 1.1 mrg gphi *phi = bsi.phi ();
1012 1.1 mrg if (!virtual_operand_p (gimple_phi_result (phi))
1013 1.1 mrg && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
1014 1.1 mrg reset_debug_uses (phi);
1015 1.1 mrg }
1016 1.1 mrg
1017 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1018 1.1 mrg {
1019 1.1 mrg gimple *stmt = gsi_stmt (bsi);
1020 1.1 mrg if (gimple_code (stmt) != GIMPLE_LABEL
1021 1.1 mrg && !is_gimple_debug (stmt)
1022 1.1 mrg && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
1023 1.1 mrg reset_debug_uses (stmt);
1024 1.1 mrg }
1025 1.1 mrg }
1026 1.1 mrg
1027 1.1 mrg for (i = 0; i < loop->num_nodes; i++)
1028 1.1 mrg {
1029 1.1 mrg basic_block bb = bbs[i];
1030 1.1 mrg edge inner_exit = NULL;
1031 1.1 mrg
1032 1.1 mrg if (loop != bb->loop_father)
1033 1.1 mrg inner_exit = single_exit (bb->loop_father);
1034 1.1 mrg
1035 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
1036 1.1 mrg {
1037 1.1 mrg gphi *phi = bsi.phi ();
1038 1.1 mrg if (!virtual_operand_p (gimple_phi_result (phi))
1039 1.1 mrg && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
1040 1.1 mrg remove_phi_node (&bsi, true);
1041 1.1 mrg else
1042 1.1 mrg gsi_next (&bsi);
1043 1.1 mrg }
1044 1.1 mrg
1045 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
1046 1.1 mrg {
1047 1.1 mrg gimple *stmt = gsi_stmt (bsi);
1048 1.1 mrg if (gimple_code (stmt) != GIMPLE_LABEL
1049 1.1 mrg && !is_gimple_debug (stmt)
1050 1.1 mrg && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
1051 1.1 mrg {
1052 1.1 mrg /* In distribution of loop nest, if bb is inner loop's exit_bb,
1053 1.1 mrg we choose its exit edge/path in order to avoid generating
1054 1.1 mrg infinite loop. For all other cases, we choose an arbitrary
1055 1.1 mrg path through the empty CFG part that this unnecessary
1056 1.1 mrg control stmt controls. */
1057 1.1 mrg if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
1058 1.1 mrg {
1059 1.1 mrg if (inner_exit && inner_exit->flags & EDGE_TRUE_VALUE)
1060 1.1 mrg gimple_cond_make_true (cond_stmt);
1061 1.1 mrg else
1062 1.1 mrg gimple_cond_make_false (cond_stmt);
1063 1.1 mrg update_stmt (stmt);
1064 1.1 mrg }
1065 1.1 mrg else if (gimple_code (stmt) == GIMPLE_SWITCH)
1066 1.1 mrg {
1067 1.1 mrg gswitch *switch_stmt = as_a <gswitch *> (stmt);
1068 1.1 mrg gimple_switch_set_index
1069 1.1 mrg (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
1070 1.1 mrg update_stmt (stmt);
1071 1.1 mrg }
1072 1.1 mrg else
1073 1.1 mrg {
1074 1.1 mrg unlink_stmt_vdef (stmt);
1075 1.1 mrg gsi_remove (&bsi, true);
1076 1.1 mrg release_defs (stmt);
1077 1.1 mrg continue;
1078 1.1 mrg }
1079 1.1 mrg }
1080 1.1 mrg gsi_next (&bsi);
1081 1.1 mrg }
1082 1.1 mrg }
1083 1.1 mrg
1084 1.1 mrg free (bbs);
1085 1.1 mrg }
1086 1.1 mrg
1087 1.1 mrg /* If VAL memory representation contains the same value in all bytes,
1088 1.1 mrg return that value, otherwise return -1.
1089 1.1 mrg E.g. for 0x24242424 return 0x24, for IEEE double
1090 1.1 mrg 747708026454360457216.0 return 0x44, etc. */
1091 1.1 mrg
1092 1.1 mrg static int
1093 1.1 mrg const_with_all_bytes_same (tree val)
1094 1.1 mrg {
1095 1.1 mrg unsigned char buf[64];
1096 1.1 mrg int i, len;
1097 1.1 mrg
1098 1.1 mrg if (integer_zerop (val)
1099 1.1 mrg || (TREE_CODE (val) == CONSTRUCTOR
1100 1.1 mrg && !TREE_CLOBBER_P (val)
1101 1.1 mrg && CONSTRUCTOR_NELTS (val) == 0))
1102 1.1 mrg return 0;
1103 1.1 mrg
1104 1.1 mrg if (real_zerop (val))
1105 1.1 mrg {
1106 1.1 mrg /* Only return 0 for +0.0, not for -0.0, which doesn't have
1107 1.1 mrg an all bytes same memory representation. Don't transform
1108 1.1 mrg -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
1109 1.1 mrg switch (TREE_CODE (val))
1110 1.1 mrg {
1111 1.1 mrg case REAL_CST:
1112 1.1 mrg if (!real_isneg (TREE_REAL_CST_PTR (val)))
1113 1.1 mrg return 0;
1114 1.1 mrg break;
1115 1.1 mrg case COMPLEX_CST:
1116 1.1 mrg if (!const_with_all_bytes_same (TREE_REALPART (val))
1117 1.1 mrg && !const_with_all_bytes_same (TREE_IMAGPART (val)))
1118 1.1 mrg return 0;
1119 1.1 mrg break;
1120 1.1 mrg case VECTOR_CST:
1121 1.1 mrg {
1122 1.1 mrg unsigned int count = vector_cst_encoded_nelts (val);
1123 1.1 mrg unsigned int j;
1124 1.1 mrg for (j = 0; j < count; ++j)
1125 1.1 mrg if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
1126 1.1 mrg break;
1127 1.1 mrg if (j == count)
1128 1.1 mrg return 0;
1129 1.1 mrg break;
1130 1.1 mrg }
1131 1.1 mrg default:
1132 1.1 mrg break;
1133 1.1 mrg }
1134 1.1 mrg }
1135 1.1 mrg
1136 1.1 mrg if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
1137 1.1 mrg return -1;
1138 1.1 mrg
1139 1.1 mrg len = native_encode_expr (val, buf, sizeof (buf));
1140 1.1 mrg if (len == 0)
1141 1.1 mrg return -1;
1142 1.1 mrg for (i = 1; i < len; i++)
1143 1.1 mrg if (buf[i] != buf[0])
1144 1.1 mrg return -1;
1145 1.1 mrg return buf[0];
1146 1.1 mrg }
1147 1.1 mrg
1148 1.1 mrg /* Generate a call to memset for PARTITION in LOOP. */
1149 1.1 mrg
1150 1.1 mrg static void
1151 1.1 mrg generate_memset_builtin (class loop *loop, partition *partition)
1152 1.1 mrg {
1153 1.1 mrg gimple_stmt_iterator gsi;
1154 1.1 mrg tree mem, fn, nb_bytes;
1155 1.1 mrg tree val;
1156 1.1 mrg struct builtin_info *builtin = partition->builtin;
1157 1.1 mrg gimple *fn_call;
1158 1.1 mrg
1159 1.1 mrg /* The new statements will be placed before LOOP. */
1160 1.1 mrg gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1161 1.1 mrg
1162 1.1 mrg nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
1163 1.1 mrg nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
1164 1.1 mrg false, GSI_CONTINUE_LINKING);
1165 1.1 mrg mem = rewrite_to_non_trapping_overflow (builtin->dst_base);
1166 1.1 mrg mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
1167 1.1 mrg false, GSI_CONTINUE_LINKING);
1168 1.1 mrg
1169 1.1 mrg /* This exactly matches the pattern recognition in classify_partition. */
1170 1.1 mrg val = gimple_assign_rhs1 (DR_STMT (builtin->dst_dr));
1171 1.1 mrg /* Handle constants like 0x15151515 and similarly
1172 1.1 mrg floating point constants etc. where all bytes are the same. */
1173 1.1 mrg int bytev = const_with_all_bytes_same (val);
1174 1.1 mrg if (bytev != -1)
1175 1.1 mrg val = build_int_cst (integer_type_node, bytev);
1176 1.1 mrg else if (TREE_CODE (val) == INTEGER_CST)
1177 1.1 mrg val = fold_convert (integer_type_node, val);
1178 1.1 mrg else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
1179 1.1 mrg {
1180 1.1 mrg tree tem = make_ssa_name (integer_type_node);
1181 1.1 mrg gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
1182 1.1 mrg gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
1183 1.1 mrg val = tem;
1184 1.1 mrg }
1185 1.1 mrg
1186 1.1 mrg fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
1187 1.1 mrg fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
1188 1.1 mrg gimple_set_location (fn_call, partition->loc);
1189 1.1 mrg gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
1190 1.1 mrg fold_stmt (&gsi);
1191 1.1 mrg
1192 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1193 1.1 mrg {
1194 1.1 mrg fprintf (dump_file, "generated memset");
1195 1.1 mrg if (bytev == 0)
1196 1.1 mrg fprintf (dump_file, " zero\n");
1197 1.1 mrg else
1198 1.1 mrg fprintf (dump_file, "\n");
1199 1.1 mrg }
1200 1.1 mrg }
1201 1.1 mrg
1202 1.1 mrg /* Generate a call to memcpy for PARTITION in LOOP. */
1203 1.1 mrg
1204 1.1 mrg static void
1205 1.1 mrg generate_memcpy_builtin (class loop *loop, partition *partition)
1206 1.1 mrg {
1207 1.1 mrg gimple_stmt_iterator gsi;
1208 1.1 mrg gimple *fn_call;
1209 1.1 mrg tree dest, src, fn, nb_bytes;
1210 1.1 mrg enum built_in_function kind;
1211 1.1 mrg struct builtin_info *builtin = partition->builtin;
1212 1.1 mrg
1213 1.1 mrg /* The new statements will be placed before LOOP. */
1214 1.1 mrg gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1215 1.1 mrg
1216 1.1 mrg nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
1217 1.1 mrg nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
1218 1.1 mrg false, GSI_CONTINUE_LINKING);
1219 1.1 mrg dest = rewrite_to_non_trapping_overflow (builtin->dst_base);
1220 1.1 mrg src = rewrite_to_non_trapping_overflow (builtin->src_base);
1221 1.1 mrg if (partition->kind == PKIND_MEMCPY
1222 1.1 mrg || ! ptr_derefs_may_alias_p (dest, src))
1223 1.1 mrg kind = BUILT_IN_MEMCPY;
1224 1.1 mrg else
1225 1.1 mrg kind = BUILT_IN_MEMMOVE;
1226 1.1 mrg /* Try harder if we're copying a constant size. */
1227 1.1 mrg if (kind == BUILT_IN_MEMMOVE && poly_int_tree_p (nb_bytes))
1228 1.1 mrg {
1229 1.1 mrg aff_tree asrc, adest;
1230 1.1 mrg tree_to_aff_combination (src, ptr_type_node, &asrc);
1231 1.1 mrg tree_to_aff_combination (dest, ptr_type_node, &adest);
1232 1.1 mrg aff_combination_scale (&adest, -1);
1233 1.1 mrg aff_combination_add (&asrc, &adest);
1234 1.1 mrg if (aff_comb_cannot_overlap_p (&asrc, wi::to_poly_widest (nb_bytes),
1235 1.1 mrg wi::to_poly_widest (nb_bytes)))
1236 1.1 mrg kind = BUILT_IN_MEMCPY;
1237 1.1 mrg }
1238 1.1 mrg
1239 1.1 mrg dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
1240 1.1 mrg false, GSI_CONTINUE_LINKING);
1241 1.1 mrg src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
1242 1.1 mrg false, GSI_CONTINUE_LINKING);
1243 1.1 mrg fn = build_fold_addr_expr (builtin_decl_implicit (kind));
1244 1.1 mrg fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
1245 1.1 mrg gimple_set_location (fn_call, partition->loc);
1246 1.1 mrg gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
1247 1.1 mrg fold_stmt (&gsi);
1248 1.1 mrg
1249 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1250 1.1 mrg {
1251 1.1 mrg if (kind == BUILT_IN_MEMCPY)
1252 1.1 mrg fprintf (dump_file, "generated memcpy\n");
1253 1.1 mrg else
1254 1.1 mrg fprintf (dump_file, "generated memmove\n");
1255 1.1 mrg }
1256 1.1 mrg }
1257 1.1 mrg
1258 1.1 mrg /* Remove and destroy the loop LOOP. */
1259 1.1 mrg
1260 1.1 mrg static void
1261 1.1 mrg destroy_loop (class loop *loop)
1262 1.1 mrg {
1263 1.1 mrg unsigned nbbs = loop->num_nodes;
1264 1.1 mrg edge exit = single_exit (loop);
1265 1.1 mrg basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
1266 1.1 mrg basic_block *bbs;
1267 1.1 mrg unsigned i;
1268 1.1 mrg
1269 1.1 mrg bbs = get_loop_body_in_dom_order (loop);
1270 1.1 mrg
1271 1.1 mrg gimple_stmt_iterator dst_gsi = gsi_after_labels (exit->dest);
1272 1.1 mrg bool safe_p = single_pred_p (exit->dest);
1273 1.1 mrg for (unsigned i = 0; i < nbbs; ++i)
1274 1.1 mrg {
1275 1.1 mrg /* We have made sure to not leave any dangling uses of SSA
1276 1.1 mrg names defined in the loop. With the exception of virtuals.
1277 1.1 mrg Make sure we replace all uses of virtual defs that will remain
1278 1.1 mrg outside of the loop with the bare symbol as delete_basic_block
1279 1.1 mrg will release them. */
1280 1.1 mrg for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
1281 1.1 mrg gsi_next (&gsi))
1282 1.1 mrg {
1283 1.1 mrg gphi *phi = gsi.phi ();
1284 1.1 mrg if (virtual_operand_p (gimple_phi_result (phi)))
1285 1.1 mrg mark_virtual_phi_result_for_renaming (phi);
1286 1.1 mrg }
1287 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
1288 1.1 mrg {
1289 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1290 1.1 mrg tree vdef = gimple_vdef (stmt);
1291 1.1 mrg if (vdef && TREE_CODE (vdef) == SSA_NAME)
1292 1.1 mrg mark_virtual_operand_for_renaming (vdef);
1293 1.1 mrg /* Also move and eventually reset debug stmts. We can leave
1294 1.1 mrg constant values in place in case the stmt dominates the exit.
1295 1.1 mrg ??? Non-constant values from the last iteration can be
1296 1.1 mrg replaced with final values if we can compute them. */
1297 1.1 mrg if (gimple_debug_bind_p (stmt))
1298 1.1 mrg {
1299 1.1 mrg tree val = gimple_debug_bind_get_value (stmt);
1300 1.1 mrg gsi_move_before (&gsi, &dst_gsi);
1301 1.1 mrg if (val
1302 1.1 mrg && (!safe_p
1303 1.1 mrg || !is_gimple_min_invariant (val)
1304 1.1 mrg || !dominated_by_p (CDI_DOMINATORS, exit->src, bbs[i])))
1305 1.1 mrg {
1306 1.1 mrg gimple_debug_bind_reset_value (stmt);
1307 1.1 mrg update_stmt (stmt);
1308 1.1 mrg }
1309 1.1 mrg }
1310 1.1 mrg else
1311 1.1 mrg gsi_next (&gsi);
1312 1.1 mrg }
1313 1.1 mrg }
1314 1.1 mrg
1315 1.1 mrg redirect_edge_pred (exit, src);
1316 1.1 mrg exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
1317 1.1 mrg exit->flags |= EDGE_FALLTHRU;
1318 1.1 mrg cancel_loop_tree (loop);
1319 1.1 mrg rescan_loop_exit (exit, false, true);
1320 1.1 mrg
1321 1.1 mrg i = nbbs;
1322 1.1 mrg do
1323 1.1 mrg {
1324 1.1 mrg --i;
1325 1.1 mrg delete_basic_block (bbs[i]);
1326 1.1 mrg }
1327 1.1 mrg while (i != 0);
1328 1.1 mrg
1329 1.1 mrg free (bbs);
1330 1.1 mrg
1331 1.1 mrg set_immediate_dominator (CDI_DOMINATORS, dest,
1332 1.1 mrg recompute_dominator (CDI_DOMINATORS, dest));
1333 1.1 mrg }
1334 1.1 mrg
1335 1.1 mrg /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
1336 1.1 mrg
1337 1.1 mrg static bool
1338 1.1 mrg generate_code_for_partition (class loop *loop,
1339 1.1 mrg partition *partition, bool copy_p)
1340 1.1 mrg {
1341 1.1 mrg switch (partition->kind)
1342 1.1 mrg {
1343 1.1 mrg case PKIND_NORMAL:
1344 1.1 mrg case PKIND_PARTIAL_MEMSET:
1345 1.1 mrg /* Reductions all have to be in the last partition. */
1346 1.1 mrg gcc_assert (!partition_reduction_p (partition)
1347 1.1 mrg || !copy_p);
1348 1.1 mrg generate_loops_for_partition (loop, partition, copy_p);
1349 1.1 mrg return false;
1350 1.1 mrg
1351 1.1 mrg case PKIND_MEMSET:
1352 1.1 mrg generate_memset_builtin (loop, partition);
1353 1.1 mrg break;
1354 1.1 mrg
1355 1.1 mrg case PKIND_MEMCPY:
1356 1.1 mrg case PKIND_MEMMOVE:
1357 1.1 mrg generate_memcpy_builtin (loop, partition);
1358 1.1 mrg break;
1359 1.1 mrg
1360 1.1 mrg default:
1361 1.1 mrg gcc_unreachable ();
1362 1.1 mrg }
1363 1.1 mrg
1364 1.1 mrg /* Common tail for partitions we turn into a call. If this was the last
1365 1.1 mrg partition for which we generate code, we have to destroy the loop. */
1366 1.1 mrg if (!copy_p)
1367 1.1 mrg return true;
1368 1.1 mrg return false;
1369 1.1 mrg }
1370 1.1 mrg
1371 1.1 mrg data_dependence_relation *
1372 1.1 mrg loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
1373 1.1 mrg data_reference_p b)
1374 1.1 mrg {
1375 1.1 mrg struct data_dependence_relation ent, **slot;
1376 1.1 mrg struct data_dependence_relation *ddr;
1377 1.1 mrg
1378 1.1 mrg gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
1379 1.1 mrg gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a))
1380 1.1 mrg <= rdg_vertex_for_stmt (rdg, DR_STMT (b)));
1381 1.1 mrg ent.a = a;
1382 1.1 mrg ent.b = b;
1383 1.1 mrg slot = ddrs_table->find_slot (&ent, INSERT);
1384 1.1 mrg if (*slot == NULL)
1385 1.1 mrg {
1386 1.1 mrg ddr = initialize_data_dependence_relation (a, b, loop_nest);
1387 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]);
1388 1.1 mrg *slot = ddr;
1389 1.1 mrg }
1390 1.1 mrg
1391 1.1 mrg return *slot;
1392 1.1 mrg }
1393 1.1 mrg
1394 1.1 mrg bool
1395 1.1 mrg loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
1396 1.1 mrg data_reference_p dr1,
1397 1.1 mrg data_reference_p dr2)
1398 1.1 mrg {
1399 1.1 mrg struct data_dependence_relation *ddr;
1400 1.1 mrg
1401 1.1 mrg /* Re-shuffle data-refs to be in topological order. */
1402 1.1 mrg if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1403 1.1 mrg > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1404 1.1 mrg std::swap (dr1, dr2);
1405 1.1 mrg
1406 1.1 mrg ddr = get_data_dependence (rdg, dr1, dr2);
1407 1.1 mrg
1408 1.1 mrg /* In case of no data dependence. */
1409 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1410 1.1 mrg return false;
1411 1.1 mrg /* For unknown data dependence or known data dependence which can't be
1412 1.1 mrg expressed in classic distance vector, we check if it can be resolved
1413 1.1 mrg by runtime alias check. If yes, we still consider data dependence
1414 1.1 mrg as won't introduce data dependence cycle. */
1415 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1416 1.1 mrg || DDR_NUM_DIST_VECTS (ddr) == 0)
1417 1.1 mrg return !runtime_alias_check_p (ddr, NULL, true);
1418 1.1 mrg else if (DDR_NUM_DIST_VECTS (ddr) > 1)
1419 1.1 mrg return true;
1420 1.1 mrg else if (DDR_REVERSED_P (ddr)
1421 1.1 mrg || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1422 1.1 mrg return false;
1423 1.1 mrg
1424 1.1 mrg return true;
1425 1.1 mrg }
1426 1.1 mrg
1427 1.1 mrg void
1428 1.1 mrg loop_distribution::update_type_for_merge (struct graph *rdg,
1429 1.1 mrg partition *partition1,
1430 1.1 mrg partition *partition2)
1431 1.1 mrg {
1432 1.1 mrg unsigned i, j;
1433 1.1 mrg bitmap_iterator bi, bj;
1434 1.1 mrg data_reference_p dr1, dr2;
1435 1.1 mrg
1436 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
1437 1.1 mrg {
1438 1.1 mrg unsigned start = (partition1 == partition2) ? i + 1 : 0;
1439 1.1 mrg
1440 1.1 mrg dr1 = datarefs_vec[i];
1441 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj)
1442 1.1 mrg {
1443 1.1 mrg dr2 = datarefs_vec[j];
1444 1.1 mrg if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
1445 1.1 mrg continue;
1446 1.1 mrg
1447 1.1 mrg /* Partition can only be executed sequentially if there is any
1448 1.1 mrg data dependence cycle. */
1449 1.1 mrg if (data_dep_in_cycle_p (rdg, dr1, dr2))
1450 1.1 mrg {
1451 1.1 mrg partition1->type = PTYPE_SEQUENTIAL;
1452 1.1 mrg return;
1453 1.1 mrg }
1454 1.1 mrg }
1455 1.1 mrg }
1456 1.1 mrg }
1457 1.1 mrg
1458 1.1 mrg partition *
1459 1.1 mrg loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
1460 1.1 mrg {
1461 1.1 mrg partition *partition = partition_alloc ();
1462 1.1 mrg auto_vec<int, 3> nodes;
1463 1.1 mrg unsigned i, j;
1464 1.1 mrg int x;
1465 1.1 mrg data_reference_p dr;
1466 1.1 mrg
1467 1.1 mrg graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1468 1.1 mrg
1469 1.1 mrg FOR_EACH_VEC_ELT (nodes, i, x)
1470 1.1 mrg {
1471 1.1 mrg bitmap_set_bit (partition->stmts, x);
1472 1.1 mrg
1473 1.1 mrg for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
1474 1.1 mrg {
1475 1.1 mrg unsigned idx = (unsigned) DR_INDEX (dr);
1476 1.1 mrg gcc_assert (idx < datarefs_vec.length ());
1477 1.1 mrg
1478 1.1 mrg /* Partition can only be executed sequentially if there is any
1479 1.1 mrg unknown data reference. */
1480 1.1 mrg if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr)
1481 1.1 mrg || !DR_INIT (dr) || !DR_STEP (dr))
1482 1.1 mrg partition->type = PTYPE_SEQUENTIAL;
1483 1.1 mrg
1484 1.1 mrg bitmap_set_bit (partition->datarefs, idx);
1485 1.1 mrg }
1486 1.1 mrg }
1487 1.1 mrg
1488 1.1 mrg if (partition->type == PTYPE_SEQUENTIAL)
1489 1.1 mrg return partition;
1490 1.1 mrg
1491 1.1 mrg /* Further check if any data dependence prevents us from executing the
1492 1.1 mrg partition parallelly. */
1493 1.1 mrg update_type_for_merge (rdg, partition, partition);
1494 1.1 mrg
1495 1.1 mrg return partition;
1496 1.1 mrg }
1497 1.1 mrg
1498 1.1 mrg /* Given PARTITION of LOOP and RDG, record single load/store data references
1499 1.1 mrg for builtin partition in SRC_DR/DST_DR, return false if there is no such
1500 1.1 mrg data references. */
1501 1.1 mrg
1502 1.1 mrg static bool
1503 1.1 mrg find_single_drs (class loop *loop, struct graph *rdg, const bitmap &partition_stmts,
1504 1.1 mrg data_reference_p *dst_dr, data_reference_p *src_dr)
1505 1.1 mrg {
1506 1.1 mrg unsigned i;
1507 1.1 mrg data_reference_p single_ld = NULL, single_st = NULL;
1508 1.1 mrg bitmap_iterator bi;
1509 1.1 mrg
1510 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition_stmts, 0, i, bi)
1511 1.1 mrg {
1512 1.1 mrg gimple *stmt = RDG_STMT (rdg, i);
1513 1.1 mrg data_reference_p dr;
1514 1.1 mrg
1515 1.1 mrg if (gimple_code (stmt) == GIMPLE_PHI)
1516 1.1 mrg continue;
1517 1.1 mrg
1518 1.1 mrg /* Any scalar stmts are ok. */
1519 1.1 mrg if (!gimple_vuse (stmt))
1520 1.1 mrg continue;
1521 1.1 mrg
1522 1.1 mrg /* Otherwise just regular loads/stores. */
1523 1.1 mrg if (!gimple_assign_single_p (stmt))
1524 1.1 mrg return false;
1525 1.1 mrg
1526 1.1 mrg /* But exactly one store and/or load. */
1527 1.1 mrg for (unsigned j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1528 1.1 mrg {
1529 1.1 mrg tree type = TREE_TYPE (DR_REF (dr));
1530 1.1 mrg
1531 1.1 mrg /* The memset, memcpy and memmove library calls are only
1532 1.1 mrg able to deal with generic address space. */
1533 1.1 mrg if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1534 1.1 mrg return false;
1535 1.1 mrg
1536 1.1 mrg if (DR_IS_READ (dr))
1537 1.1 mrg {
1538 1.1 mrg if (single_ld != NULL)
1539 1.1 mrg return false;
1540 1.1 mrg single_ld = dr;
1541 1.1 mrg }
1542 1.1 mrg else
1543 1.1 mrg {
1544 1.1 mrg if (single_st != NULL)
1545 1.1 mrg return false;
1546 1.1 mrg single_st = dr;
1547 1.1 mrg }
1548 1.1 mrg }
1549 1.1 mrg }
1550 1.1 mrg
1551 1.1 mrg if (!single_ld && !single_st)
1552 1.1 mrg return false;
1553 1.1 mrg
1554 1.1 mrg basic_block bb_ld = NULL;
1555 1.1 mrg basic_block bb_st = NULL;
1556 1.1 mrg
1557 1.1 mrg if (single_ld)
1558 1.1 mrg {
1559 1.1 mrg /* Bail out if this is a bitfield memory reference. */
1560 1.1 mrg if (TREE_CODE (DR_REF (single_ld)) == COMPONENT_REF
1561 1.1 mrg && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld), 1)))
1562 1.1 mrg return false;
1563 1.1 mrg
1564 1.1 mrg /* Data reference must be executed exactly once per iteration of each
1565 1.1 mrg loop in the loop nest. We only need to check dominance information
1566 1.1 mrg against the outermost one in a perfect loop nest because a bb can't
1567 1.1 mrg dominate outermost loop's latch without dominating inner loop's. */
1568 1.1 mrg bb_ld = gimple_bb (DR_STMT (single_ld));
1569 1.1 mrg if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_ld))
1570 1.1 mrg return false;
1571 1.1 mrg }
1572 1.1 mrg
1573 1.1 mrg if (single_st)
1574 1.1 mrg {
1575 1.1 mrg /* Bail out if this is a bitfield memory reference. */
1576 1.1 mrg if (TREE_CODE (DR_REF (single_st)) == COMPONENT_REF
1577 1.1 mrg && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
1578 1.1 mrg return false;
1579 1.1 mrg
1580 1.1 mrg /* Data reference must be executed exactly once per iteration.
1581 1.1 mrg Same as single_ld, we only need to check against the outermost
1582 1.1 mrg loop. */
1583 1.1 mrg bb_st = gimple_bb (DR_STMT (single_st));
1584 1.1 mrg if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_st))
1585 1.1 mrg return false;
1586 1.1 mrg }
1587 1.1 mrg
1588 1.1 mrg if (single_ld && single_st)
1589 1.1 mrg {
1590 1.1 mrg /* Load and store must be in the same loop nest. */
1591 1.1 mrg if (bb_st->loop_father != bb_ld->loop_father)
1592 1.1 mrg return false;
1593 1.1 mrg
1594 1.1 mrg edge e = single_exit (bb_st->loop_father);
1595 1.1 mrg bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
1596 1.1 mrg bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
1597 1.1 mrg if (dom_ld != dom_st)
1598 1.1 mrg return false;
1599 1.1 mrg }
1600 1.1 mrg
1601 1.1 mrg *src_dr = single_ld;
1602 1.1 mrg *dst_dr = single_st;
1603 1.1 mrg return true;
1604 1.1 mrg }
1605 1.1 mrg
1606 1.1 mrg /* Given data reference DR in LOOP_NEST, this function checks the enclosing
1607 1.1 mrg loops from inner to outer to see if loop's step equals to access size at
1608 1.1 mrg each level of loop. Return 2 if we can prove this at all level loops;
1609 1.1 mrg record access base and size in BASE and SIZE; save loop's step at each
1610 1.1 mrg level of loop in STEPS if it is not null. For example:
1611 1.1 mrg
1612 1.1 mrg int arr[100][100][100];
1613 1.1 mrg for (i = 0; i < 100; i++) ;steps[2] = 40000
1614 1.1 mrg for (j = 100; j > 0; j--) ;steps[1] = -400
1615 1.1 mrg for (k = 0; k < 100; k++) ;steps[0] = 4
1616 1.1 mrg arr[i][j - 1][k] = 0; ;base = &arr, size = 4000000
1617 1.1 mrg
1618 1.1 mrg Return 1 if we can prove the equality at the innermost loop, but not all
1619 1.1 mrg level loops. In this case, no information is recorded.
1620 1.1 mrg
1621 1.1 mrg Return 0 if no equality can be proven at any level loops. */
1622 1.1 mrg
1623 1.1 mrg static int
1624 1.1 mrg compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
1625 1.1 mrg tree *size, vec<tree> *steps = NULL)
1626 1.1 mrg {
1627 1.1 mrg location_t loc = gimple_location (DR_STMT (dr));
1628 1.1 mrg basic_block bb = gimple_bb (DR_STMT (dr));
1629 1.1 mrg class loop *loop = bb->loop_father;
1630 1.1 mrg tree ref = DR_REF (dr);
1631 1.1 mrg tree access_base = build_fold_addr_expr (ref);
1632 1.1 mrg tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1633 1.1 mrg int res = 0;
1634 1.1 mrg
1635 1.1 mrg do {
1636 1.1 mrg tree scev_fn = analyze_scalar_evolution (loop, access_base);
1637 1.1 mrg if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
1638 1.1 mrg return res;
1639 1.1 mrg
1640 1.1 mrg access_base = CHREC_LEFT (scev_fn);
1641 1.1 mrg if (tree_contains_chrecs (access_base, NULL))
1642 1.1 mrg return res;
1643 1.1 mrg
1644 1.1 mrg tree scev_step = CHREC_RIGHT (scev_fn);
1645 1.1 mrg /* Only support constant steps. */
1646 1.1 mrg if (TREE_CODE (scev_step) != INTEGER_CST)
1647 1.1 mrg return res;
1648 1.1 mrg
1649 1.1 mrg enum ev_direction access_dir = scev_direction (scev_fn);
1650 1.1 mrg if (access_dir == EV_DIR_UNKNOWN)
1651 1.1 mrg return res;
1652 1.1 mrg
1653 1.1 mrg if (steps != NULL)
1654 1.1 mrg steps->safe_push (scev_step);
1655 1.1 mrg
1656 1.1 mrg scev_step = fold_convert_loc (loc, sizetype, scev_step);
1657 1.1 mrg /* Compute absolute value of scev step. */
1658 1.1 mrg if (access_dir == EV_DIR_DECREASES)
1659 1.1 mrg scev_step = fold_build1_loc (loc, NEGATE_EXPR, sizetype, scev_step);
1660 1.1 mrg
1661 1.1 mrg /* At each level of loop, scev step must equal to access size. In other
1662 1.1 mrg words, DR must access consecutive memory between loop iterations. */
1663 1.1 mrg if (!operand_equal_p (scev_step, access_size, 0))
1664 1.1 mrg return res;
1665 1.1 mrg
1666 1.1 mrg /* Access stride can be computed for data reference at least for the
1667 1.1 mrg innermost loop. */
1668 1.1 mrg res = 1;
1669 1.1 mrg
1670 1.1 mrg /* Compute DR's execution times in loop. */
1671 1.1 mrg tree niters = number_of_latch_executions (loop);
1672 1.1 mrg niters = fold_convert_loc (loc, sizetype, niters);
1673 1.1 mrg if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, bb))
1674 1.1 mrg niters = size_binop_loc (loc, PLUS_EXPR, niters, size_one_node);
1675 1.1 mrg
1676 1.1 mrg /* Compute DR's overall access size in loop. */
1677 1.1 mrg access_size = fold_build2_loc (loc, MULT_EXPR, sizetype,
1678 1.1 mrg niters, scev_step);
1679 1.1 mrg /* Adjust base address in case of negative step. */
1680 1.1 mrg if (access_dir == EV_DIR_DECREASES)
1681 1.1 mrg {
1682 1.1 mrg tree adj = fold_build2_loc (loc, MINUS_EXPR, sizetype,
1683 1.1 mrg scev_step, access_size);
1684 1.1 mrg access_base = fold_build_pointer_plus_loc (loc, access_base, adj);
1685 1.1 mrg }
1686 1.1 mrg } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
1687 1.1 mrg
1688 1.1 mrg *base = access_base;
1689 1.1 mrg *size = access_size;
1690 1.1 mrg /* Access stride can be computed for data reference at each level loop. */
1691 1.1 mrg return 2;
1692 1.1 mrg }
1693 1.1 mrg
1694 1.1 mrg /* Allocate and return builtin struct. Record information like DST_DR,
1695 1.1 mrg SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct. */
1696 1.1 mrg
1697 1.1 mrg static struct builtin_info *
1698 1.1 mrg alloc_builtin (data_reference_p dst_dr, data_reference_p src_dr,
1699 1.1 mrg tree dst_base, tree src_base, tree size)
1700 1.1 mrg {
1701 1.1 mrg struct builtin_info *builtin = XNEW (struct builtin_info);
1702 1.1 mrg builtin->dst_dr = dst_dr;
1703 1.1 mrg builtin->src_dr = src_dr;
1704 1.1 mrg builtin->dst_base = dst_base;
1705 1.1 mrg builtin->src_base = src_base;
1706 1.1 mrg builtin->size = size;
1707 1.1 mrg return builtin;
1708 1.1 mrg }
1709 1.1 mrg
1710 1.1 mrg /* Given data reference DR in loop nest LOOP, classify if it forms builtin
1711 1.1 mrg memset call. */
1712 1.1 mrg
1713 1.1 mrg static void
1714 1.1 mrg classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
1715 1.1 mrg {
1716 1.1 mrg gimple *stmt = DR_STMT (dr);
1717 1.1 mrg tree base, size, rhs = gimple_assign_rhs1 (stmt);
1718 1.1 mrg
1719 1.1 mrg if (const_with_all_bytes_same (rhs) == -1
1720 1.1 mrg && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1721 1.1 mrg || (TYPE_MODE (TREE_TYPE (rhs))
1722 1.1 mrg != TYPE_MODE (unsigned_char_type_node))))
1723 1.1 mrg return;
1724 1.1 mrg
1725 1.1 mrg if (TREE_CODE (rhs) == SSA_NAME
1726 1.1 mrg && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1727 1.1 mrg && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1728 1.1 mrg return;
1729 1.1 mrg
1730 1.1 mrg int res = compute_access_range (loop, dr, &base, &size);
1731 1.1 mrg if (res == 0)
1732 1.1 mrg return;
1733 1.1 mrg if (res == 1)
1734 1.1 mrg {
1735 1.1 mrg partition->kind = PKIND_PARTIAL_MEMSET;
1736 1.1 mrg return;
1737 1.1 mrg }
1738 1.1 mrg
1739 1.1 mrg poly_uint64 base_offset;
1740 1.1 mrg unsigned HOST_WIDE_INT const_base_offset;
1741 1.1 mrg tree base_base = strip_offset (base, &base_offset);
1742 1.1 mrg if (!base_offset.is_constant (&const_base_offset))
1743 1.1 mrg return;
1744 1.1 mrg
1745 1.1 mrg struct builtin_info *builtin;
1746 1.1 mrg builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
1747 1.1 mrg builtin->dst_base_base = base_base;
1748 1.1 mrg builtin->dst_base_offset = const_base_offset;
1749 1.1 mrg partition->builtin = builtin;
1750 1.1 mrg partition->kind = PKIND_MEMSET;
1751 1.1 mrg }
1752 1.1 mrg
1753 1.1 mrg /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
1754 1.1 mrg if it forms builtin memcpy or memmove call. */
1755 1.1 mrg
1756 1.1 mrg void
1757 1.1 mrg loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
1758 1.1 mrg partition *partition,
1759 1.1 mrg data_reference_p dst_dr,
1760 1.1 mrg data_reference_p src_dr)
1761 1.1 mrg {
1762 1.1 mrg tree base, size, src_base, src_size;
1763 1.1 mrg auto_vec<tree> dst_steps, src_steps;
1764 1.1 mrg
1765 1.1 mrg /* Compute access range of both load and store. */
1766 1.1 mrg int res = compute_access_range (loop, dst_dr, &base, &size, &dst_steps);
1767 1.1 mrg if (res != 2)
1768 1.1 mrg return;
1769 1.1 mrg res = compute_access_range (loop, src_dr, &src_base, &src_size, &src_steps);
1770 1.1 mrg if (res != 2)
1771 1.1 mrg return;
1772 1.1 mrg
1773 1.1 mrg /* They must have the same access size. */
1774 1.1 mrg if (!operand_equal_p (size, src_size, 0))
1775 1.1 mrg return;
1776 1.1 mrg
1777 1.1 mrg /* They must have the same storage order. */
1778 1.1 mrg if (reverse_storage_order_for_component_p (DR_REF (dst_dr))
1779 1.1 mrg != reverse_storage_order_for_component_p (DR_REF (src_dr)))
1780 1.1 mrg return;
1781 1.1 mrg
1782 1.1 mrg /* Load and store in loop nest must access memory in the same way, i.e,
1783 1.1 mrg their must have the same steps in each loop of the nest. */
1784 1.1 mrg if (dst_steps.length () != src_steps.length ())
1785 1.1 mrg return;
1786 1.1 mrg for (unsigned i = 0; i < dst_steps.length (); ++i)
1787 1.1 mrg if (!operand_equal_p (dst_steps[i], src_steps[i], 0))
1788 1.1 mrg return;
1789 1.1 mrg
1790 1.1 mrg /* Now check that if there is a dependence. */
1791 1.1 mrg ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
1792 1.1 mrg
1793 1.1 mrg /* Classify as memmove if no dependence between load and store. */
1794 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1795 1.1 mrg {
1796 1.1 mrg partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
1797 1.1 mrg partition->kind = PKIND_MEMMOVE;
1798 1.1 mrg return;
1799 1.1 mrg }
1800 1.1 mrg
1801 1.1 mrg /* Can't do memmove in case of unknown dependence or dependence without
1802 1.1 mrg classical distance vector. */
1803 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1804 1.1 mrg || DDR_NUM_DIST_VECTS (ddr) == 0)
1805 1.1 mrg return;
1806 1.1 mrg
1807 1.1 mrg unsigned i;
1808 1.1 mrg lambda_vector dist_v;
1809 1.1 mrg int num_lev = (DDR_LOOP_NEST (ddr)).length ();
1810 1.1 mrg FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1811 1.1 mrg {
1812 1.1 mrg unsigned dep_lev = dependence_level (dist_v, num_lev);
1813 1.1 mrg /* Can't do memmove if load depends on store. */
1814 1.1 mrg if (dep_lev > 0 && dist_v[dep_lev - 1] > 0 && !DDR_REVERSED_P (ddr))
1815 1.1 mrg return;
1816 1.1 mrg }
1817 1.1 mrg
1818 1.1 mrg partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
1819 1.1 mrg partition->kind = PKIND_MEMMOVE;
1820 1.1 mrg return;
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1 mrg bool
1824 1.1 mrg loop_distribution::classify_partition (loop_p loop,
1825 1.1 mrg struct graph *rdg, partition *partition,
1826 1.1 mrg bitmap stmt_in_all_partitions)
1827 1.1 mrg {
1828 1.1 mrg bitmap_iterator bi;
1829 1.1 mrg unsigned i;
1830 1.1 mrg data_reference_p single_ld = NULL, single_st = NULL;
1831 1.1 mrg bool volatiles_p = false, has_reduction = false;
1832 1.1 mrg
1833 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1834 1.1 mrg {
1835 1.1 mrg gimple *stmt = RDG_STMT (rdg, i);
1836 1.1 mrg
1837 1.1 mrg if (gimple_has_volatile_ops (stmt))
1838 1.1 mrg volatiles_p = true;
1839 1.1 mrg
1840 1.1 mrg /* If the stmt is not included by all partitions and there is uses
1841 1.1 mrg outside of the loop, then mark the partition as reduction. */
1842 1.1 mrg if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1843 1.1 mrg {
1844 1.1 mrg /* Due to limitation in the transform phase we have to fuse all
1845 1.1 mrg reduction partitions. As a result, this could cancel valid
1846 1.1 mrg loop distribution especially for loop that induction variable
1847 1.1 mrg is used outside of loop. To workaround this issue, we skip
1848 1.1 mrg marking partition as reudction if the reduction stmt belongs
1849 1.1 mrg to all partitions. In such case, reduction will be computed
1850 1.1 mrg correctly no matter how partitions are fused/distributed. */
1851 1.1 mrg if (!bitmap_bit_p (stmt_in_all_partitions, i))
1852 1.1 mrg partition->reduction_p = true;
1853 1.1 mrg else
1854 1.1 mrg has_reduction = true;
1855 1.1 mrg }
1856 1.1 mrg }
1857 1.1 mrg
1858 1.1 mrg /* Simple workaround to prevent classifying the partition as builtin
1859 1.1 mrg if it contains any use outside of loop. For the case where all
1860 1.1 mrg partitions have the reduction this simple workaround is delayed
1861 1.1 mrg to only affect the last partition. */
1862 1.1 mrg if (partition->reduction_p)
1863 1.1 mrg return has_reduction;
1864 1.1 mrg
1865 1.1 mrg /* Perform general partition disqualification for builtins. */
1866 1.1 mrg if (volatiles_p
1867 1.1 mrg || !flag_tree_loop_distribute_patterns)
1868 1.1 mrg return has_reduction;
1869 1.1 mrg
1870 1.1 mrg /* Find single load/store data references for builtin partition. */
1871 1.1 mrg if (!find_single_drs (loop, rdg, partition->stmts, &single_st, &single_ld)
1872 1.1 mrg || !single_st)
1873 1.1 mrg return has_reduction;
1874 1.1 mrg
1875 1.1 mrg if (single_ld && single_st)
1876 1.1 mrg {
1877 1.1 mrg gimple *store = DR_STMT (single_st), *load = DR_STMT (single_ld);
1878 1.1 mrg /* Direct aggregate copy or via an SSA name temporary. */
1879 1.1 mrg if (load != store
1880 1.1 mrg && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1881 1.1 mrg return has_reduction;
1882 1.1 mrg }
1883 1.1 mrg
1884 1.1 mrg partition->loc = gimple_location (DR_STMT (single_st));
1885 1.1 mrg
1886 1.1 mrg /* Classify the builtin kind. */
1887 1.1 mrg if (single_ld == NULL)
1888 1.1 mrg classify_builtin_st (loop, partition, single_st);
1889 1.1 mrg else
1890 1.1 mrg classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
1891 1.1 mrg return has_reduction;
1892 1.1 mrg }
1893 1.1 mrg
1894 1.1 mrg bool
1895 1.1 mrg loop_distribution::share_memory_accesses (struct graph *rdg,
1896 1.1 mrg partition *partition1, partition *partition2)
1897 1.1 mrg {
1898 1.1 mrg unsigned i, j;
1899 1.1 mrg bitmap_iterator bi, bj;
1900 1.1 mrg data_reference_p dr1, dr2;
1901 1.1 mrg
1902 1.1 mrg /* First check whether in the intersection of the two partitions are
1903 1.1 mrg any loads or stores. Common loads are the situation that happens
1904 1.1 mrg most often. */
1905 1.1 mrg EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1906 1.1 mrg if (RDG_MEM_WRITE_STMT (rdg, i)
1907 1.1 mrg || RDG_MEM_READS_STMT (rdg, i))
1908 1.1 mrg return true;
1909 1.1 mrg
1910 1.1 mrg /* Then check whether the two partitions access the same memory object. */
1911 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
1912 1.1 mrg {
1913 1.1 mrg dr1 = datarefs_vec[i];
1914 1.1 mrg
1915 1.1 mrg if (!DR_BASE_ADDRESS (dr1)
1916 1.1 mrg || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
1917 1.1 mrg continue;
1918 1.1 mrg
1919 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
1920 1.1 mrg {
1921 1.1 mrg dr2 = datarefs_vec[j];
1922 1.1 mrg
1923 1.1 mrg if (!DR_BASE_ADDRESS (dr2)
1924 1.1 mrg || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
1925 1.1 mrg continue;
1926 1.1 mrg
1927 1.1 mrg if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
1928 1.1 mrg && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
1929 1.1 mrg && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
1930 1.1 mrg && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
1931 1.1 mrg return true;
1932 1.1 mrg }
1933 1.1 mrg }
1934 1.1 mrg
1935 1.1 mrg return false;
1936 1.1 mrg }
1937 1.1 mrg
1938 1.1 mrg /* For each seed statement in STARTING_STMTS, this function builds
1939 1.1 mrg partition for it by adding depended statements according to RDG.
1940 1.1 mrg All partitions are recorded in PARTITIONS. */
1941 1.1 mrg
1942 1.1 mrg void
1943 1.1 mrg loop_distribution::rdg_build_partitions (struct graph *rdg,
1944 1.1 mrg vec<gimple *> starting_stmts,
1945 1.1 mrg vec<partition *> *partitions)
1946 1.1 mrg {
1947 1.1 mrg auto_bitmap processed;
1948 1.1 mrg int i;
1949 1.1 mrg gimple *stmt;
1950 1.1 mrg
1951 1.1 mrg FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1952 1.1 mrg {
1953 1.1 mrg int v = rdg_vertex_for_stmt (rdg, stmt);
1954 1.1 mrg
1955 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1956 1.1 mrg fprintf (dump_file,
1957 1.1 mrg "ldist asked to generate code for vertex %d\n", v);
1958 1.1 mrg
1959 1.1 mrg /* If the vertex is already contained in another partition so
1960 1.1 mrg is the partition rooted at it. */
1961 1.1 mrg if (bitmap_bit_p (processed, v))
1962 1.1 mrg continue;
1963 1.1 mrg
1964 1.1 mrg partition *partition = build_rdg_partition_for_vertex (rdg, v);
1965 1.1 mrg bitmap_ior_into (processed, partition->stmts);
1966 1.1 mrg
1967 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1968 1.1 mrg {
1969 1.1 mrg fprintf (dump_file, "ldist creates useful %s partition:\n",
1970 1.1 mrg partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
1971 1.1 mrg bitmap_print (dump_file, partition->stmts, " ", "\n");
1972 1.1 mrg }
1973 1.1 mrg
1974 1.1 mrg partitions->safe_push (partition);
1975 1.1 mrg }
1976 1.1 mrg
1977 1.1 mrg /* All vertices should have been assigned to at least one partition now,
1978 1.1 mrg other than vertices belonging to dead code. */
1979 1.1 mrg }
1980 1.1 mrg
1981 1.1 mrg /* Dump to FILE the PARTITIONS. */
1982 1.1 mrg
1983 1.1 mrg static void
1984 1.1 mrg dump_rdg_partitions (FILE *file, const vec<partition *> &partitions)
1985 1.1 mrg {
1986 1.1 mrg int i;
1987 1.1 mrg partition *partition;
1988 1.1 mrg
1989 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
1990 1.1 mrg debug_bitmap_file (file, partition->stmts);
1991 1.1 mrg }
1992 1.1 mrg
1993 1.1 mrg /* Debug PARTITIONS. */
1994 1.1 mrg extern void debug_rdg_partitions (const vec<partition *> &);
1995 1.1 mrg
1996 1.1 mrg DEBUG_FUNCTION void
1997 1.1 mrg debug_rdg_partitions (const vec<partition *> &partitions)
1998 1.1 mrg {
1999 1.1 mrg dump_rdg_partitions (stderr, partitions);
2000 1.1 mrg }
2001 1.1 mrg
2002 1.1 mrg /* Returns the number of read and write operations in the RDG. */
2003 1.1 mrg
2004 1.1 mrg static int
2005 1.1 mrg number_of_rw_in_rdg (struct graph *rdg)
2006 1.1 mrg {
2007 1.1 mrg int i, res = 0;
2008 1.1 mrg
2009 1.1 mrg for (i = 0; i < rdg->n_vertices; i++)
2010 1.1 mrg {
2011 1.1 mrg if (RDG_MEM_WRITE_STMT (rdg, i))
2012 1.1 mrg ++res;
2013 1.1 mrg
2014 1.1 mrg if (RDG_MEM_READS_STMT (rdg, i))
2015 1.1 mrg ++res;
2016 1.1 mrg }
2017 1.1 mrg
2018 1.1 mrg return res;
2019 1.1 mrg }
2020 1.1 mrg
2021 1.1 mrg /* Returns the number of read and write operations in a PARTITION of
2022 1.1 mrg the RDG. */
2023 1.1 mrg
2024 1.1 mrg static int
2025 1.1 mrg number_of_rw_in_partition (struct graph *rdg, partition *partition)
2026 1.1 mrg {
2027 1.1 mrg int res = 0;
2028 1.1 mrg unsigned i;
2029 1.1 mrg bitmap_iterator ii;
2030 1.1 mrg
2031 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2032 1.1 mrg {
2033 1.1 mrg if (RDG_MEM_WRITE_STMT (rdg, i))
2034 1.1 mrg ++res;
2035 1.1 mrg
2036 1.1 mrg if (RDG_MEM_READS_STMT (rdg, i))
2037 1.1 mrg ++res;
2038 1.1 mrg }
2039 1.1 mrg
2040 1.1 mrg return res;
2041 1.1 mrg }
2042 1.1 mrg
2043 1.1 mrg /* Returns true when one of the PARTITIONS contains all the read or
2044 1.1 mrg write operations of RDG. */
2045 1.1 mrg
2046 1.1 mrg static bool
2047 1.1 mrg partition_contains_all_rw (struct graph *rdg,
2048 1.1 mrg const vec<partition *> &partitions)
2049 1.1 mrg {
2050 1.1 mrg int i;
2051 1.1 mrg partition *partition;
2052 1.1 mrg int nrw = number_of_rw_in_rdg (rdg);
2053 1.1 mrg
2054 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
2055 1.1 mrg if (nrw == number_of_rw_in_partition (rdg, partition))
2056 1.1 mrg return true;
2057 1.1 mrg
2058 1.1 mrg return false;
2059 1.1 mrg }
2060 1.1 mrg
2061 1.1 mrg int
2062 1.1 mrg loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
2063 1.1 mrg bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
2064 1.1 mrg {
2065 1.1 mrg unsigned i, j;
2066 1.1 mrg bitmap_iterator bi, bj;
2067 1.1 mrg data_reference_p dr1, dr2, saved_dr1;
2068 1.1 mrg
2069 1.1 mrg /* dependence direction - 0 is no dependence, -1 is back,
2070 1.1 mrg 1 is forth, 2 is both (we can stop then, merging will occur). */
2071 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi)
2072 1.1 mrg {
2073 1.1 mrg dr1 = datarefs_vec[i];
2074 1.1 mrg
2075 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj)
2076 1.1 mrg {
2077 1.1 mrg int res, this_dir = 1;
2078 1.1 mrg ddr_p ddr;
2079 1.1 mrg
2080 1.1 mrg dr2 = datarefs_vec[j];
2081 1.1 mrg
2082 1.1 mrg /* Skip all <read, read> data dependence. */
2083 1.1 mrg if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
2084 1.1 mrg continue;
2085 1.1 mrg
2086 1.1 mrg saved_dr1 = dr1;
2087 1.1 mrg /* Re-shuffle data-refs to be in topological order. */
2088 1.1 mrg if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
2089 1.1 mrg > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
2090 1.1 mrg {
2091 1.1 mrg std::swap (dr1, dr2);
2092 1.1 mrg this_dir = -this_dir;
2093 1.1 mrg }
2094 1.1 mrg ddr = get_data_dependence (rdg, dr1, dr2);
2095 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2096 1.1 mrg {
2097 1.1 mrg this_dir = 0;
2098 1.1 mrg res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1),
2099 1.1 mrg DR_BASE_ADDRESS (dr2));
2100 1.1 mrg /* Be conservative. If data references are not well analyzed,
2101 1.1 mrg or the two data references have the same base address and
2102 1.1 mrg offset, add dependence and consider it alias to each other.
2103 1.1 mrg In other words, the dependence cannot be resolved by
2104 1.1 mrg runtime alias check. */
2105 1.1 mrg if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
2106 1.1 mrg || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
2107 1.1 mrg || !DR_INIT (dr1) || !DR_INIT (dr2)
2108 1.1 mrg || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
2109 1.1 mrg || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
2110 1.1 mrg || res == 0)
2111 1.1 mrg this_dir = 2;
2112 1.1 mrg /* Data dependence could be resolved by runtime alias check,
2113 1.1 mrg record it in ALIAS_DDRS. */
2114 1.1 mrg else if (alias_ddrs != NULL)
2115 1.1 mrg alias_ddrs->safe_push (ddr);
2116 1.1 mrg /* Or simply ignore it. */
2117 1.1 mrg }
2118 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2119 1.1 mrg {
2120 1.1 mrg /* Known dependences can still be unordered througout the
2121 1.1 mrg iteration space, see gcc.dg/tree-ssa/ldist-16.c and
2122 1.1 mrg gcc.dg/tree-ssa/pr94969.c. */
2123 1.1 mrg if (DDR_NUM_DIST_VECTS (ddr) != 1)
2124 1.1 mrg this_dir = 2;
2125 1.1 mrg else
2126 1.1 mrg {
2127 1.1 mrg /* If the overlap is exact preserve stmt order. */
2128 1.1 mrg if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
2129 1.1 mrg DDR_NB_LOOPS (ddr)))
2130 1.1 mrg ;
2131 1.1 mrg /* Else as the distance vector is lexicographic positive swap
2132 1.1 mrg the dependence direction. */
2133 1.1 mrg else
2134 1.1 mrg {
2135 1.1 mrg if (DDR_REVERSED_P (ddr))
2136 1.1 mrg this_dir = -this_dir;
2137 1.1 mrg this_dir = -this_dir;
2138 1.1 mrg }
2139 1.1 mrg /* When then dependence distance of the innermost common
2140 1.1 mrg loop of the DRs is zero we have a conflict. This is
2141 1.1 mrg due to wonky dependence analysis which sometimes
2142 1.1 mrg ends up using a zero distance in place of unknown. */
2143 1.1 mrg auto l1 = gimple_bb (DR_STMT (dr1))->loop_father;
2144 1.1 mrg auto l2 = gimple_bb (DR_STMT (dr2))->loop_father;
2145 1.1 mrg int idx = index_in_loop_nest (find_common_loop (l1, l2)->num,
2146 1.1 mrg DDR_LOOP_NEST (ddr));
2147 1.1 mrg if (DDR_DIST_VECT (ddr, 0)[idx] == 0
2148 1.1 mrg /* Unless it is the outermost loop which is the one
2149 1.1 mrg we eventually distribute. */
2150 1.1 mrg && idx != 0)
2151 1.1 mrg this_dir = 2;
2152 1.1 mrg }
2153 1.1 mrg }
2154 1.1 mrg else
2155 1.1 mrg this_dir = 0;
2156 1.1 mrg if (this_dir == 2)
2157 1.1 mrg return 2;
2158 1.1 mrg else if (dir == 0)
2159 1.1 mrg dir = this_dir;
2160 1.1 mrg else if (this_dir != 0 && dir != this_dir)
2161 1.1 mrg return 2;
2162 1.1 mrg /* Shuffle "back" dr1. */
2163 1.1 mrg dr1 = saved_dr1;
2164 1.1 mrg }
2165 1.1 mrg }
2166 1.1 mrg return dir;
2167 1.1 mrg }
2168 1.1 mrg
2169 1.1 mrg /* Compare postorder number of the partition graph vertices V1 and V2. */
2170 1.1 mrg
2171 1.1 mrg static int
2172 1.1 mrg pgcmp (const void *v1_, const void *v2_)
2173 1.1 mrg {
2174 1.1 mrg const vertex *v1 = (const vertex *)v1_;
2175 1.1 mrg const vertex *v2 = (const vertex *)v2_;
2176 1.1 mrg return v2->post - v1->post;
2177 1.1 mrg }
2178 1.1 mrg
2179 1.1 mrg /* Data attached to vertices of partition dependence graph. */
2180 1.1 mrg struct pg_vdata
2181 1.1 mrg {
2182 1.1 mrg /* ID of the corresponding partition. */
2183 1.1 mrg int id;
2184 1.1 mrg /* The partition. */
2185 1.1 mrg struct partition *partition;
2186 1.1 mrg };
2187 1.1 mrg
2188 1.1 mrg /* Data attached to edges of partition dependence graph. */
2189 1.1 mrg struct pg_edata
2190 1.1 mrg {
2191 1.1 mrg /* If the dependence edge can be resolved by runtime alias check,
2192 1.1 mrg this vector contains data dependence relations for runtime alias
2193 1.1 mrg check. On the other hand, if the dependence edge is introduced
2194 1.1 mrg because of compilation time known data dependence, this vector
2195 1.1 mrg contains nothing. */
2196 1.1 mrg vec<ddr_p> alias_ddrs;
2197 1.1 mrg };
2198 1.1 mrg
2199 1.1 mrg /* Callback data for traversing edges in graph. */
2200 1.1 mrg struct pg_edge_callback_data
2201 1.1 mrg {
2202 1.1 mrg /* Bitmap contains strong connected components should be merged. */
2203 1.1 mrg bitmap sccs_to_merge;
2204 1.1 mrg /* Array constains component information for all vertices. */
2205 1.1 mrg int *vertices_component;
2206 1.1 mrg /* Vector to record all data dependence relations which are needed
2207 1.1 mrg to break strong connected components by runtime alias checks. */
2208 1.1 mrg vec<ddr_p> *alias_ddrs;
2209 1.1 mrg };
2210 1.1 mrg
2211 1.1 mrg /* Initialize vertice's data for partition dependence graph PG with
2212 1.1 mrg PARTITIONS. */
2213 1.1 mrg
2214 1.1 mrg static void
2215 1.1 mrg init_partition_graph_vertices (struct graph *pg,
2216 1.1 mrg vec<struct partition *> *partitions)
2217 1.1 mrg {
2218 1.1 mrg int i;
2219 1.1 mrg partition *partition;
2220 1.1 mrg struct pg_vdata *data;
2221 1.1 mrg
2222 1.1 mrg for (i = 0; partitions->iterate (i, &partition); ++i)
2223 1.1 mrg {
2224 1.1 mrg data = new pg_vdata;
2225 1.1 mrg pg->vertices[i].data = data;
2226 1.1 mrg data->id = i;
2227 1.1 mrg data->partition = partition;
2228 1.1 mrg }
2229 1.1 mrg }
2230 1.1 mrg
2231 1.1 mrg /* Add edge <I, J> to partition dependence graph PG. Attach vector of data
2232 1.1 mrg dependence relations to the EDGE if DDRS isn't NULL. */
2233 1.1 mrg
2234 1.1 mrg static void
2235 1.1 mrg add_partition_graph_edge (struct graph *pg, int i, int j, vec<ddr_p> *ddrs)
2236 1.1 mrg {
2237 1.1 mrg struct graph_edge *e = add_edge (pg, i, j);
2238 1.1 mrg
2239 1.1 mrg /* If the edge is attached with data dependence relations, it means this
2240 1.1 mrg dependence edge can be resolved by runtime alias checks. */
2241 1.1 mrg if (ddrs != NULL)
2242 1.1 mrg {
2243 1.1 mrg struct pg_edata *data = new pg_edata;
2244 1.1 mrg
2245 1.1 mrg gcc_assert (ddrs->length () > 0);
2246 1.1 mrg e->data = data;
2247 1.1 mrg data->alias_ddrs = vNULL;
2248 1.1 mrg data->alias_ddrs.safe_splice (*ddrs);
2249 1.1 mrg }
2250 1.1 mrg }
2251 1.1 mrg
2252 1.1 mrg /* Callback function for graph travesal algorithm. It returns true
2253 1.1 mrg if edge E should skipped when traversing the graph. */
2254 1.1 mrg
2255 1.1 mrg static bool
2256 1.1 mrg pg_skip_alias_edge (struct graph_edge *e)
2257 1.1 mrg {
2258 1.1 mrg struct pg_edata *data = (struct pg_edata *)e->data;
2259 1.1 mrg return (data != NULL && data->alias_ddrs.length () > 0);
2260 1.1 mrg }
2261 1.1 mrg
2262 1.1 mrg /* Callback function freeing data attached to edge E of graph. */
2263 1.1 mrg
2264 1.1 mrg static void
2265 1.1 mrg free_partition_graph_edata_cb (struct graph *, struct graph_edge *e, void *)
2266 1.1 mrg {
2267 1.1 mrg if (e->data != NULL)
2268 1.1 mrg {
2269 1.1 mrg struct pg_edata *data = (struct pg_edata *)e->data;
2270 1.1 mrg data->alias_ddrs.release ();
2271 1.1 mrg delete data;
2272 1.1 mrg }
2273 1.1 mrg }
2274 1.1 mrg
2275 1.1 mrg /* Free data attached to vertice of partition dependence graph PG. */
2276 1.1 mrg
2277 1.1 mrg static void
2278 1.1 mrg free_partition_graph_vdata (struct graph *pg)
2279 1.1 mrg {
2280 1.1 mrg int i;
2281 1.1 mrg struct pg_vdata *data;
2282 1.1 mrg
2283 1.1 mrg for (i = 0; i < pg->n_vertices; ++i)
2284 1.1 mrg {
2285 1.1 mrg data = (struct pg_vdata *)pg->vertices[i].data;
2286 1.1 mrg delete data;
2287 1.1 mrg }
2288 1.1 mrg }
2289 1.1 mrg
2290 1.1 mrg /* Build and return partition dependence graph for PARTITIONS. RDG is
2291 1.1 mrg reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
2292 1.1 mrg is true, data dependence caused by possible alias between references
2293 1.1 mrg is ignored, as if it doesn't exist at all; otherwise all depdendences
2294 1.1 mrg are considered. */
2295 1.1 mrg
2296 1.1 mrg struct graph *
2297 1.1 mrg loop_distribution::build_partition_graph (struct graph *rdg,
2298 1.1 mrg vec<struct partition *> *partitions,
2299 1.1 mrg bool ignore_alias_p)
2300 1.1 mrg {
2301 1.1 mrg int i, j;
2302 1.1 mrg struct partition *partition1, *partition2;
2303 1.1 mrg graph *pg = new_graph (partitions->length ());
2304 1.1 mrg auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p;
2305 1.1 mrg
2306 1.1 mrg alias_ddrs_p = ignore_alias_p ? NULL : &alias_ddrs;
2307 1.1 mrg
2308 1.1 mrg init_partition_graph_vertices (pg, partitions);
2309 1.1 mrg
2310 1.1 mrg for (i = 0; partitions->iterate (i, &partition1); ++i)
2311 1.1 mrg {
2312 1.1 mrg for (j = i + 1; partitions->iterate (j, &partition2); ++j)
2313 1.1 mrg {
2314 1.1 mrg /* dependence direction - 0 is no dependence, -1 is back,
2315 1.1 mrg 1 is forth, 2 is both (we can stop then, merging will occur). */
2316 1.1 mrg int dir = 0;
2317 1.1 mrg
2318 1.1 mrg /* If the first partition has reduction, add back edge; if the
2319 1.1 mrg second partition has reduction, add forth edge. This makes
2320 1.1 mrg sure that reduction partition will be sorted as the last one. */
2321 1.1 mrg if (partition_reduction_p (partition1))
2322 1.1 mrg dir = -1;
2323 1.1 mrg else if (partition_reduction_p (partition2))
2324 1.1 mrg dir = 1;
2325 1.1 mrg
2326 1.1 mrg /* Cleanup the temporary vector. */
2327 1.1 mrg alias_ddrs.truncate (0);
2328 1.1 mrg
2329 1.1 mrg dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs,
2330 1.1 mrg partition2->datarefs, alias_ddrs_p);
2331 1.1 mrg
2332 1.1 mrg /* Add edge to partition graph if there exists dependence. There
2333 1.1 mrg are two types of edges. One type edge is caused by compilation
2334 1.1 mrg time known dependence, this type cannot be resolved by runtime
2335 1.1 mrg alias check. The other type can be resolved by runtime alias
2336 1.1 mrg check. */
2337 1.1 mrg if (dir == 1 || dir == 2
2338 1.1 mrg || alias_ddrs.length () > 0)
2339 1.1 mrg {
2340 1.1 mrg /* Attach data dependence relations to edge that can be resolved
2341 1.1 mrg by runtime alias check. */
2342 1.1 mrg bool alias_edge_p = (dir != 1 && dir != 2);
2343 1.1 mrg add_partition_graph_edge (pg, i, j,
2344 1.1 mrg (alias_edge_p) ? &alias_ddrs : NULL);
2345 1.1 mrg }
2346 1.1 mrg if (dir == -1 || dir == 2
2347 1.1 mrg || alias_ddrs.length () > 0)
2348 1.1 mrg {
2349 1.1 mrg /* Attach data dependence relations to edge that can be resolved
2350 1.1 mrg by runtime alias check. */
2351 1.1 mrg bool alias_edge_p = (dir != -1 && dir != 2);
2352 1.1 mrg add_partition_graph_edge (pg, j, i,
2353 1.1 mrg (alias_edge_p) ? &alias_ddrs : NULL);
2354 1.1 mrg }
2355 1.1 mrg }
2356 1.1 mrg }
2357 1.1 mrg return pg;
2358 1.1 mrg }
2359 1.1 mrg
2360 1.1 mrg /* Sort partitions in PG in descending post order and store them in
2361 1.1 mrg PARTITIONS. */
2362 1.1 mrg
2363 1.1 mrg static void
2364 1.1 mrg sort_partitions_by_post_order (struct graph *pg,
2365 1.1 mrg vec<struct partition *> *partitions)
2366 1.1 mrg {
2367 1.1 mrg int i;
2368 1.1 mrg struct pg_vdata *data;
2369 1.1 mrg
2370 1.1 mrg /* Now order the remaining nodes in descending postorder. */
2371 1.1 mrg qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
2372 1.1 mrg partitions->truncate (0);
2373 1.1 mrg for (i = 0; i < pg->n_vertices; ++i)
2374 1.1 mrg {
2375 1.1 mrg data = (struct pg_vdata *)pg->vertices[i].data;
2376 1.1 mrg if (data->partition)
2377 1.1 mrg partitions->safe_push (data->partition);
2378 1.1 mrg }
2379 1.1 mrg }
2380 1.1 mrg
2381 1.1 mrg void
2382 1.1 mrg loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
2383 1.1 mrg vec<struct partition *> *partitions,
2384 1.1 mrg bool ignore_alias_p)
2385 1.1 mrg {
2386 1.1 mrg struct partition *partition1, *partition2;
2387 1.1 mrg struct pg_vdata *data;
2388 1.1 mrg graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
2389 1.1 mrg int i, j, num_sccs = graphds_scc (pg, NULL);
2390 1.1 mrg
2391 1.1 mrg /* Strong connected compoenent means dependence cycle, we cannot distribute
2392 1.1 mrg them. So fuse them together. */
2393 1.1 mrg if ((unsigned) num_sccs < partitions->length ())
2394 1.1 mrg {
2395 1.1 mrg for (i = 0; i < num_sccs; ++i)
2396 1.1 mrg {
2397 1.1 mrg for (j = 0; partitions->iterate (j, &partition1); ++j)
2398 1.1 mrg if (pg->vertices[j].component == i)
2399 1.1 mrg break;
2400 1.1 mrg for (j = j + 1; partitions->iterate (j, &partition2); ++j)
2401 1.1 mrg if (pg->vertices[j].component == i)
2402 1.1 mrg {
2403 1.1 mrg partition_merge_into (NULL, partition1,
2404 1.1 mrg partition2, FUSE_SAME_SCC);
2405 1.1 mrg partition1->type = PTYPE_SEQUENTIAL;
2406 1.1 mrg (*partitions)[j] = NULL;
2407 1.1 mrg partition_free (partition2);
2408 1.1 mrg data = (struct pg_vdata *)pg->vertices[j].data;
2409 1.1 mrg data->partition = NULL;
2410 1.1 mrg }
2411 1.1 mrg }
2412 1.1 mrg }
2413 1.1 mrg
2414 1.1 mrg sort_partitions_by_post_order (pg, partitions);
2415 1.1 mrg gcc_assert (partitions->length () == (unsigned)num_sccs);
2416 1.1 mrg free_partition_graph_vdata (pg);
2417 1.1 mrg for_each_edge (pg, free_partition_graph_edata_cb, NULL);
2418 1.1 mrg free_graph (pg);
2419 1.1 mrg }
2420 1.1 mrg
2421 1.1 mrg /* Callback function for traversing edge E in graph G. DATA is private
2422 1.1 mrg callback data. */
2423 1.1 mrg
2424 1.1 mrg static void
2425 1.1 mrg pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
2426 1.1 mrg {
2427 1.1 mrg int i, j, component;
2428 1.1 mrg struct pg_edge_callback_data *cbdata;
2429 1.1 mrg struct pg_edata *edata = (struct pg_edata *) e->data;
2430 1.1 mrg
2431 1.1 mrg /* If the edge doesn't have attached data dependence, it represents
2432 1.1 mrg compilation time known dependences. This type dependence cannot
2433 1.1 mrg be resolved by runtime alias check. */
2434 1.1 mrg if (edata == NULL || edata->alias_ddrs.length () == 0)
2435 1.1 mrg return;
2436 1.1 mrg
2437 1.1 mrg cbdata = (struct pg_edge_callback_data *) data;
2438 1.1 mrg i = e->src;
2439 1.1 mrg j = e->dest;
2440 1.1 mrg component = cbdata->vertices_component[i];
2441 1.1 mrg /* Vertices are topologically sorted according to compilation time
2442 1.1 mrg known dependences, so we can break strong connected components
2443 1.1 mrg by removing edges of the opposite direction, i.e, edges pointing
2444 1.1 mrg from vertice with smaller post number to vertice with bigger post
2445 1.1 mrg number. */
2446 1.1 mrg if (g->vertices[i].post < g->vertices[j].post
2447 1.1 mrg /* We only need to remove edges connecting vertices in the same
2448 1.1 mrg strong connected component to break it. */
2449 1.1 mrg && component == cbdata->vertices_component[j]
2450 1.1 mrg /* Check if we want to break the strong connected component or not. */
2451 1.1 mrg && !bitmap_bit_p (cbdata->sccs_to_merge, component))
2452 1.1 mrg cbdata->alias_ddrs->safe_splice (edata->alias_ddrs);
2453 1.1 mrg }
2454 1.1 mrg
2455 1.1 mrg /* Callback function for traversing edge E. DATA is private
2456 1.1 mrg callback data. */
2457 1.1 mrg
2458 1.1 mrg static void
2459 1.1 mrg pg_unmark_merged_alias_ddrs (struct graph *, struct graph_edge *e, void *data)
2460 1.1 mrg {
2461 1.1 mrg int i, j, component;
2462 1.1 mrg struct pg_edge_callback_data *cbdata;
2463 1.1 mrg struct pg_edata *edata = (struct pg_edata *) e->data;
2464 1.1 mrg
2465 1.1 mrg if (edata == NULL || edata->alias_ddrs.length () == 0)
2466 1.1 mrg return;
2467 1.1 mrg
2468 1.1 mrg cbdata = (struct pg_edge_callback_data *) data;
2469 1.1 mrg i = e->src;
2470 1.1 mrg j = e->dest;
2471 1.1 mrg component = cbdata->vertices_component[i];
2472 1.1 mrg /* Make sure to not skip vertices inside SCCs we are going to merge. */
2473 1.1 mrg if (component == cbdata->vertices_component[j]
2474 1.1 mrg && bitmap_bit_p (cbdata->sccs_to_merge, component))
2475 1.1 mrg {
2476 1.1 mrg edata->alias_ddrs.release ();
2477 1.1 mrg delete edata;
2478 1.1 mrg e->data = NULL;
2479 1.1 mrg }
2480 1.1 mrg }
2481 1.1 mrg
2482 1.1 mrg /* This is the main function breaking strong conected components in
2483 1.1 mrg PARTITIONS giving reduced depdendence graph RDG. Store data dependence
2484 1.1 mrg relations for runtime alias check in ALIAS_DDRS. */
2485 1.1 mrg void
2486 1.1 mrg loop_distribution::break_alias_scc_partitions (struct graph *rdg,
2487 1.1 mrg vec<struct partition *> *partitions,
2488 1.1 mrg vec<ddr_p> *alias_ddrs)
2489 1.1 mrg {
2490 1.1 mrg int i, j, k, num_sccs, num_sccs_no_alias = 0;
2491 1.1 mrg /* Build partition dependence graph. */
2492 1.1 mrg graph *pg = build_partition_graph (rdg, partitions, false);
2493 1.1 mrg
2494 1.1 mrg alias_ddrs->truncate (0);
2495 1.1 mrg /* Find strong connected components in the graph, with all dependence edges
2496 1.1 mrg considered. */
2497 1.1 mrg num_sccs = graphds_scc (pg, NULL);
2498 1.1 mrg /* All SCCs now can be broken by runtime alias checks because SCCs caused by
2499 1.1 mrg compilation time known dependences are merged before this function. */
2500 1.1 mrg if ((unsigned) num_sccs < partitions->length ())
2501 1.1 mrg {
2502 1.1 mrg struct pg_edge_callback_data cbdata;
2503 1.1 mrg auto_bitmap sccs_to_merge;
2504 1.1 mrg auto_vec<enum partition_type> scc_types;
2505 1.1 mrg struct partition *partition, *first;
2506 1.1 mrg
2507 1.1 mrg /* If all partitions in a SCC have the same type, we can simply merge the
2508 1.1 mrg SCC. This loop finds out such SCCS and record them in bitmap. */
2509 1.1 mrg bitmap_set_range (sccs_to_merge, 0, (unsigned) num_sccs);
2510 1.1 mrg for (i = 0; i < num_sccs; ++i)
2511 1.1 mrg {
2512 1.1 mrg for (j = 0; partitions->iterate (j, &first); ++j)
2513 1.1 mrg if (pg->vertices[j].component == i)
2514 1.1 mrg break;
2515 1.1 mrg
2516 1.1 mrg bool same_type = true, all_builtins = partition_builtin_p (first);
2517 1.1 mrg for (++j; partitions->iterate (j, &partition); ++j)
2518 1.1 mrg {
2519 1.1 mrg if (pg->vertices[j].component != i)
2520 1.1 mrg continue;
2521 1.1 mrg
2522 1.1 mrg if (first->type != partition->type)
2523 1.1 mrg {
2524 1.1 mrg same_type = false;
2525 1.1 mrg break;
2526 1.1 mrg }
2527 1.1 mrg all_builtins &= partition_builtin_p (partition);
2528 1.1 mrg }
2529 1.1 mrg /* Merge SCC if all partitions in SCC have the same type, though the
2530 1.1 mrg result partition is sequential, because vectorizer can do better
2531 1.1 mrg runtime alias check. One expecption is all partitions in SCC are
2532 1.1 mrg builtins. */
2533 1.1 mrg if (!same_type || all_builtins)
2534 1.1 mrg bitmap_clear_bit (sccs_to_merge, i);
2535 1.1 mrg }
2536 1.1 mrg
2537 1.1 mrg /* Initialize callback data for traversing. */
2538 1.1 mrg cbdata.sccs_to_merge = sccs_to_merge;
2539 1.1 mrg cbdata.alias_ddrs = alias_ddrs;
2540 1.1 mrg cbdata.vertices_component = XNEWVEC (int, pg->n_vertices);
2541 1.1 mrg /* Record the component information which will be corrupted by next
2542 1.1 mrg graph scc finding call. */
2543 1.1 mrg for (i = 0; i < pg->n_vertices; ++i)
2544 1.1 mrg cbdata.vertices_component[i] = pg->vertices[i].component;
2545 1.1 mrg
2546 1.1 mrg /* Collect data dependences for runtime alias checks to break SCCs. */
2547 1.1 mrg if (bitmap_count_bits (sccs_to_merge) != (unsigned) num_sccs)
2548 1.1 mrg {
2549 1.1 mrg /* For SCCs we want to merge clear all alias_ddrs for edges
2550 1.1 mrg inside the component. */
2551 1.1 mrg for_each_edge (pg, pg_unmark_merged_alias_ddrs, &cbdata);
2552 1.1 mrg
2553 1.1 mrg /* Run SCC finding algorithm again, with alias dependence edges
2554 1.1 mrg skipped. This is to topologically sort partitions according to
2555 1.1 mrg compilation time known dependence. Note the topological order
2556 1.1 mrg is stored in the form of pg's post order number. */
2557 1.1 mrg num_sccs_no_alias = graphds_scc (pg, NULL, pg_skip_alias_edge);
2558 1.1 mrg /* We cannot assert partitions->length () == num_sccs_no_alias
2559 1.1 mrg since we are not ignoring alias edges in cycles we are
2560 1.1 mrg going to merge. That's required to compute correct postorder. */
2561 1.1 mrg /* With topological order, we can construct two subgraphs L and R.
2562 1.1 mrg L contains edge <x, y> where x < y in terms of post order, while
2563 1.1 mrg R contains edge <x, y> where x > y. Edges for compilation time
2564 1.1 mrg known dependence all fall in R, so we break SCCs by removing all
2565 1.1 mrg (alias) edges of in subgraph L. */
2566 1.1 mrg for_each_edge (pg, pg_collect_alias_ddrs, &cbdata);
2567 1.1 mrg }
2568 1.1 mrg
2569 1.1 mrg /* For SCC that doesn't need to be broken, merge it. */
2570 1.1 mrg for (i = 0; i < num_sccs; ++i)
2571 1.1 mrg {
2572 1.1 mrg if (!bitmap_bit_p (sccs_to_merge, i))
2573 1.1 mrg continue;
2574 1.1 mrg
2575 1.1 mrg for (j = 0; partitions->iterate (j, &first); ++j)
2576 1.1 mrg if (cbdata.vertices_component[j] == i)
2577 1.1 mrg break;
2578 1.1 mrg for (k = j + 1; partitions->iterate (k, &partition); ++k)
2579 1.1 mrg {
2580 1.1 mrg struct pg_vdata *data;
2581 1.1 mrg
2582 1.1 mrg if (cbdata.vertices_component[k] != i)
2583 1.1 mrg continue;
2584 1.1 mrg
2585 1.1 mrg partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
2586 1.1 mrg (*partitions)[k] = NULL;
2587 1.1 mrg partition_free (partition);
2588 1.1 mrg data = (struct pg_vdata *)pg->vertices[k].data;
2589 1.1 mrg gcc_assert (data->id == k);
2590 1.1 mrg data->partition = NULL;
2591 1.1 mrg /* The result partition of merged SCC must be sequential. */
2592 1.1 mrg first->type = PTYPE_SEQUENTIAL;
2593 1.1 mrg }
2594 1.1 mrg }
2595 1.1 mrg /* If reduction partition's SCC is broken by runtime alias checks,
2596 1.1 mrg we force a negative post order to it making sure it will be scheduled
2597 1.1 mrg in the last. */
2598 1.1 mrg if (num_sccs_no_alias > 0)
2599 1.1 mrg {
2600 1.1 mrg j = -1;
2601 1.1 mrg for (i = 0; i < pg->n_vertices; ++i)
2602 1.1 mrg {
2603 1.1 mrg struct pg_vdata *data = (struct pg_vdata *)pg->vertices[i].data;
2604 1.1 mrg if (data->partition && partition_reduction_p (data->partition))
2605 1.1 mrg {
2606 1.1 mrg gcc_assert (j == -1);
2607 1.1 mrg j = i;
2608 1.1 mrg }
2609 1.1 mrg }
2610 1.1 mrg if (j >= 0)
2611 1.1 mrg pg->vertices[j].post = -1;
2612 1.1 mrg }
2613 1.1 mrg
2614 1.1 mrg free (cbdata.vertices_component);
2615 1.1 mrg }
2616 1.1 mrg
2617 1.1 mrg sort_partitions_by_post_order (pg, partitions);
2618 1.1 mrg free_partition_graph_vdata (pg);
2619 1.1 mrg for_each_edge (pg, free_partition_graph_edata_cb, NULL);
2620 1.1 mrg free_graph (pg);
2621 1.1 mrg
2622 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2623 1.1 mrg {
2624 1.1 mrg fprintf (dump_file, "Possible alias data dependence to break:\n");
2625 1.1 mrg dump_data_dependence_relations (dump_file, *alias_ddrs);
2626 1.1 mrg }
2627 1.1 mrg }
2628 1.1 mrg
2629 1.1 mrg /* Compute and return an expression whose value is the segment length which
2630 1.1 mrg will be accessed by DR in NITERS iterations. */
2631 1.1 mrg
2632 1.1 mrg static tree
2633 1.1 mrg data_ref_segment_size (struct data_reference *dr, tree niters)
2634 1.1 mrg {
2635 1.1 mrg niters = size_binop (MINUS_EXPR,
2636 1.1 mrg fold_convert (sizetype, niters),
2637 1.1 mrg size_one_node);
2638 1.1 mrg return size_binop (MULT_EXPR,
2639 1.1 mrg fold_convert (sizetype, DR_STEP (dr)),
2640 1.1 mrg fold_convert (sizetype, niters));
2641 1.1 mrg }
2642 1.1 mrg
2643 1.1 mrg /* Return true if LOOP's latch is dominated by statement for data reference
2644 1.1 mrg DR. */
2645 1.1 mrg
2646 1.1 mrg static inline bool
2647 1.1 mrg latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
2648 1.1 mrg {
2649 1.1 mrg return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
2650 1.1 mrg gimple_bb (DR_STMT (dr)));
2651 1.1 mrg }
2652 1.1 mrg
2653 1.1 mrg /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
2654 1.1 mrg data dependence relations ALIAS_DDRS. */
2655 1.1 mrg
2656 1.1 mrg static void
2657 1.1 mrg compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
2658 1.1 mrg vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
2659 1.1 mrg {
2660 1.1 mrg unsigned int i;
2661 1.1 mrg unsigned HOST_WIDE_INT factor = 1;
2662 1.1 mrg tree niters_plus_one, niters = number_of_latch_executions (loop);
2663 1.1 mrg
2664 1.1 mrg gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
2665 1.1 mrg niters = fold_convert (sizetype, niters);
2666 1.1 mrg niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
2667 1.1 mrg
2668 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2669 1.1 mrg fprintf (dump_file, "Creating alias check pairs:\n");
2670 1.1 mrg
2671 1.1 mrg /* Iterate all data dependence relations and compute alias check pairs. */
2672 1.1 mrg for (i = 0; i < alias_ddrs->length (); i++)
2673 1.1 mrg {
2674 1.1 mrg ddr_p ddr = (*alias_ddrs)[i];
2675 1.1 mrg struct data_reference *dr_a = DDR_A (ddr);
2676 1.1 mrg struct data_reference *dr_b = DDR_B (ddr);
2677 1.1 mrg tree seg_length_a, seg_length_b;
2678 1.1 mrg
2679 1.1 mrg if (latch_dominated_by_data_ref (loop, dr_a))
2680 1.1 mrg seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
2681 1.1 mrg else
2682 1.1 mrg seg_length_a = data_ref_segment_size (dr_a, niters);
2683 1.1 mrg
2684 1.1 mrg if (latch_dominated_by_data_ref (loop, dr_b))
2685 1.1 mrg seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
2686 1.1 mrg else
2687 1.1 mrg seg_length_b = data_ref_segment_size (dr_b, niters);
2688 1.1 mrg
2689 1.1 mrg unsigned HOST_WIDE_INT access_size_a
2690 1.1 mrg = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
2691 1.1 mrg unsigned HOST_WIDE_INT access_size_b
2692 1.1 mrg = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
2693 1.1 mrg unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
2694 1.1 mrg unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
2695 1.1 mrg
2696 1.1 mrg dr_with_seg_len_pair_t dr_with_seg_len_pair
2697 1.1 mrg (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
2698 1.1 mrg dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
2699 1.1 mrg /* ??? Would WELL_ORDERED be safe? */
2700 1.1 mrg dr_with_seg_len_pair_t::REORDERED);
2701 1.1 mrg
2702 1.1 mrg comp_alias_pairs->safe_push (dr_with_seg_len_pair);
2703 1.1 mrg }
2704 1.1 mrg
2705 1.1 mrg if (tree_fits_uhwi_p (niters))
2706 1.1 mrg factor = tree_to_uhwi (niters);
2707 1.1 mrg
2708 1.1 mrg /* Prune alias check pairs. */
2709 1.1 mrg prune_runtime_alias_test_list (comp_alias_pairs, factor);
2710 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2711 1.1 mrg fprintf (dump_file,
2712 1.1 mrg "Improved number of alias checks from %d to %d\n",
2713 1.1 mrg alias_ddrs->length (), comp_alias_pairs->length ());
2714 1.1 mrg }
2715 1.1 mrg
2716 1.1 mrg /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
2717 1.1 mrg checks and version LOOP under condition of these runtime alias checks. */
2718 1.1 mrg
2719 1.1 mrg static void
2720 1.1 mrg version_loop_by_alias_check (vec<struct partition *> *partitions,
2721 1.1 mrg class loop *loop, vec<ddr_p> *alias_ddrs)
2722 1.1 mrg {
2723 1.1 mrg profile_probability prob;
2724 1.1 mrg basic_block cond_bb;
2725 1.1 mrg class loop *nloop;
2726 1.1 mrg tree lhs, arg0, cond_expr = NULL_TREE;
2727 1.1 mrg gimple_seq cond_stmts = NULL;
2728 1.1 mrg gimple *call_stmt = NULL;
2729 1.1 mrg auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs;
2730 1.1 mrg
2731 1.1 mrg /* Generate code for runtime alias checks if necessary. */
2732 1.1 mrg gcc_assert (alias_ddrs->length () > 0);
2733 1.1 mrg
2734 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2735 1.1 mrg fprintf (dump_file,
2736 1.1 mrg "Version loop <%d> with runtime alias check\n", loop->num);
2737 1.1 mrg
2738 1.1 mrg compute_alias_check_pairs (loop, alias_ddrs, &comp_alias_pairs);
2739 1.1 mrg create_runtime_alias_checks (loop, &comp_alias_pairs, &cond_expr);
2740 1.1 mrg cond_expr = force_gimple_operand_1 (cond_expr, &cond_stmts,
2741 1.1 mrg is_gimple_val, NULL_TREE);
2742 1.1 mrg
2743 1.1 mrg /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */
2744 1.1 mrg bool cancelable_p = flag_tree_loop_vectorize;
2745 1.1 mrg if (cancelable_p)
2746 1.1 mrg {
2747 1.1 mrg unsigned i = 0;
2748 1.1 mrg struct partition *partition;
2749 1.1 mrg for (; partitions->iterate (i, &partition); ++i)
2750 1.1 mrg if (!partition_builtin_p (partition))
2751 1.1 mrg break;
2752 1.1 mrg
2753 1.1 mrg /* If all partitions are builtins, distributing it would be profitable and
2754 1.1 mrg we don't want to cancel the runtime alias checks. */
2755 1.1 mrg if (i == partitions->length ())
2756 1.1 mrg cancelable_p = false;
2757 1.1 mrg }
2758 1.1 mrg
2759 1.1 mrg /* Generate internal function call for loop distribution alias check if the
2760 1.1 mrg runtime alias check should be cancelable. */
2761 1.1 mrg if (cancelable_p)
2762 1.1 mrg {
2763 1.1 mrg call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
2764 1.1 mrg 2, NULL_TREE, cond_expr);
2765 1.1 mrg lhs = make_ssa_name (boolean_type_node);
2766 1.1 mrg gimple_call_set_lhs (call_stmt, lhs);
2767 1.1 mrg }
2768 1.1 mrg else
2769 1.1 mrg lhs = cond_expr;
2770 1.1 mrg
2771 1.1 mrg prob = profile_probability::guessed_always ().apply_scale (9, 10);
2772 1.1 mrg initialize_original_copy_tables ();
2773 1.1 mrg nloop = loop_version (loop, lhs, &cond_bb, prob, prob.invert (),
2774 1.1 mrg prob, prob.invert (), true);
2775 1.1 mrg free_original_copy_tables ();
2776 1.1 mrg /* Record the original loop number in newly generated loops. In case of
2777 1.1 mrg distribution, the original loop will be distributed and the new loop
2778 1.1 mrg is kept. */
2779 1.1 mrg loop->orig_loop_num = nloop->num;
2780 1.1 mrg nloop->orig_loop_num = nloop->num;
2781 1.1 mrg nloop->dont_vectorize = true;
2782 1.1 mrg nloop->force_vectorize = false;
2783 1.1 mrg
2784 1.1 mrg if (call_stmt)
2785 1.1 mrg {
2786 1.1 mrg /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
2787 1.1 mrg loop could be destroyed. */
2788 1.1 mrg arg0 = build_int_cst (integer_type_node, loop->orig_loop_num);
2789 1.1 mrg gimple_call_set_arg (call_stmt, 0, arg0);
2790 1.1 mrg gimple_seq_add_stmt_without_update (&cond_stmts, call_stmt);
2791 1.1 mrg }
2792 1.1 mrg
2793 1.1 mrg if (cond_stmts)
2794 1.1 mrg {
2795 1.1 mrg gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
2796 1.1 mrg gsi_insert_seq_before (&cond_gsi, cond_stmts, GSI_SAME_STMT);
2797 1.1 mrg }
2798 1.1 mrg update_ssa (TODO_update_ssa);
2799 1.1 mrg }
2800 1.1 mrg
2801 1.1 mrg /* Return true if loop versioning is needed to distrubute PARTITIONS.
2802 1.1 mrg ALIAS_DDRS are data dependence relations for runtime alias check. */
2803 1.1 mrg
2804 1.1 mrg static inline bool
2805 1.1 mrg version_for_distribution_p (vec<struct partition *> *partitions,
2806 1.1 mrg vec<ddr_p> *alias_ddrs)
2807 1.1 mrg {
2808 1.1 mrg /* No need to version loop if we have only one partition. */
2809 1.1 mrg if (partitions->length () == 1)
2810 1.1 mrg return false;
2811 1.1 mrg
2812 1.1 mrg /* Need to version loop if runtime alias check is necessary. */
2813 1.1 mrg return (alias_ddrs->length () > 0);
2814 1.1 mrg }
2815 1.1 mrg
2816 1.1 mrg /* Compare base offset of builtin mem* partitions P1 and P2. */
2817 1.1 mrg
2818 1.1 mrg static int
2819 1.1 mrg offset_cmp (const void *vp1, const void *vp2)
2820 1.1 mrg {
2821 1.1 mrg struct partition *p1 = *(struct partition *const *) vp1;
2822 1.1 mrg struct partition *p2 = *(struct partition *const *) vp2;
2823 1.1 mrg unsigned HOST_WIDE_INT o1 = p1->builtin->dst_base_offset;
2824 1.1 mrg unsigned HOST_WIDE_INT o2 = p2->builtin->dst_base_offset;
2825 1.1 mrg return (o2 < o1) - (o1 < o2);
2826 1.1 mrg }
2827 1.1 mrg
2828 1.1 mrg /* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
2829 1.1 mrg case optimization transforming below code:
2830 1.1 mrg
2831 1.1 mrg __builtin_memset (&obj, 0, 100);
2832 1.1 mrg _1 = &obj + 100;
2833 1.1 mrg __builtin_memset (_1, 0, 200);
2834 1.1 mrg _2 = &obj + 300;
2835 1.1 mrg __builtin_memset (_2, 0, 100);
2836 1.1 mrg
2837 1.1 mrg into:
2838 1.1 mrg
2839 1.1 mrg __builtin_memset (&obj, 0, 400);
2840 1.1 mrg
2841 1.1 mrg Note we don't have dependence information between different partitions
2842 1.1 mrg at this point, as a result, we can't handle nonadjacent memset builtin
2843 1.1 mrg partitions since dependence might be broken. */
2844 1.1 mrg
2845 1.1 mrg static void
2846 1.1 mrg fuse_memset_builtins (vec<struct partition *> *partitions)
2847 1.1 mrg {
2848 1.1 mrg unsigned i, j;
2849 1.1 mrg struct partition *part1, *part2;
2850 1.1 mrg tree rhs1, rhs2;
2851 1.1 mrg
2852 1.1 mrg for (i = 0; partitions->iterate (i, &part1);)
2853 1.1 mrg {
2854 1.1 mrg if (part1->kind != PKIND_MEMSET)
2855 1.1 mrg {
2856 1.1 mrg i++;
2857 1.1 mrg continue;
2858 1.1 mrg }
2859 1.1 mrg
2860 1.1 mrg /* Find sub-array of memset builtins of the same base. Index range
2861 1.1 mrg of the sub-array is [i, j) with "j > i". */
2862 1.1 mrg for (j = i + 1; partitions->iterate (j, &part2); ++j)
2863 1.1 mrg {
2864 1.1 mrg if (part2->kind != PKIND_MEMSET
2865 1.1 mrg || !operand_equal_p (part1->builtin->dst_base_base,
2866 1.1 mrg part2->builtin->dst_base_base, 0))
2867 1.1 mrg break;
2868 1.1 mrg
2869 1.1 mrg /* Memset calls setting different values can't be merged. */
2870 1.1 mrg rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
2871 1.1 mrg rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
2872 1.1 mrg if (!operand_equal_p (rhs1, rhs2, 0))
2873 1.1 mrg break;
2874 1.1 mrg }
2875 1.1 mrg
2876 1.1 mrg /* Stable sort is required in order to avoid breaking dependence. */
2877 1.1 mrg gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],
2878 1.1 mrg offset_cmp);
2879 1.1 mrg /* Continue with next partition. */
2880 1.1 mrg i = j;
2881 1.1 mrg }
2882 1.1 mrg
2883 1.1 mrg /* Merge all consecutive memset builtin partitions. */
2884 1.1 mrg for (i = 0; i < partitions->length () - 1;)
2885 1.1 mrg {
2886 1.1 mrg part1 = (*partitions)[i];
2887 1.1 mrg if (part1->kind != PKIND_MEMSET)
2888 1.1 mrg {
2889 1.1 mrg i++;
2890 1.1 mrg continue;
2891 1.1 mrg }
2892 1.1 mrg
2893 1.1 mrg part2 = (*partitions)[i + 1];
2894 1.1 mrg /* Only merge memset partitions of the same base and with constant
2895 1.1 mrg access sizes. */
2896 1.1 mrg if (part2->kind != PKIND_MEMSET
2897 1.1 mrg || TREE_CODE (part1->builtin->size) != INTEGER_CST
2898 1.1 mrg || TREE_CODE (part2->builtin->size) != INTEGER_CST
2899 1.1 mrg || !operand_equal_p (part1->builtin->dst_base_base,
2900 1.1 mrg part2->builtin->dst_base_base, 0))
2901 1.1 mrg {
2902 1.1 mrg i++;
2903 1.1 mrg continue;
2904 1.1 mrg }
2905 1.1 mrg rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
2906 1.1 mrg rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
2907 1.1 mrg int bytev1 = const_with_all_bytes_same (rhs1);
2908 1.1 mrg int bytev2 = const_with_all_bytes_same (rhs2);
2909 1.1 mrg /* Only merge memset partitions of the same value. */
2910 1.1 mrg if (bytev1 != bytev2 || bytev1 == -1)
2911 1.1 mrg {
2912 1.1 mrg i++;
2913 1.1 mrg continue;
2914 1.1 mrg }
2915 1.1 mrg wide_int end1 = wi::add (part1->builtin->dst_base_offset,
2916 1.1 mrg wi::to_wide (part1->builtin->size));
2917 1.1 mrg /* Only merge adjacent memset partitions. */
2918 1.1 mrg if (wi::ne_p (end1, part2->builtin->dst_base_offset))
2919 1.1 mrg {
2920 1.1 mrg i++;
2921 1.1 mrg continue;
2922 1.1 mrg }
2923 1.1 mrg /* Merge partitions[i] and partitions[i+1]. */
2924 1.1 mrg part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
2925 1.1 mrg part1->builtin->size,
2926 1.1 mrg part2->builtin->size);
2927 1.1 mrg partition_free (part2);
2928 1.1 mrg partitions->ordered_remove (i + 1);
2929 1.1 mrg }
2930 1.1 mrg }
2931 1.1 mrg
2932 1.1 mrg void
2933 1.1 mrg loop_distribution::finalize_partitions (class loop *loop,
2934 1.1 mrg vec<struct partition *> *partitions,
2935 1.1 mrg vec<ddr_p> *alias_ddrs)
2936 1.1 mrg {
2937 1.1 mrg unsigned i;
2938 1.1 mrg struct partition *partition, *a;
2939 1.1 mrg
2940 1.1 mrg if (partitions->length () == 1
2941 1.1 mrg || alias_ddrs->length () > 0)
2942 1.1 mrg return;
2943 1.1 mrg
2944 1.1 mrg unsigned num_builtin = 0, num_normal = 0, num_partial_memset = 0;
2945 1.1 mrg bool same_type_p = true;
2946 1.1 mrg enum partition_type type = ((*partitions)[0])->type;
2947 1.1 mrg for (i = 0; partitions->iterate (i, &partition); ++i)
2948 1.1 mrg {
2949 1.1 mrg same_type_p &= (type == partition->type);
2950 1.1 mrg if (partition_builtin_p (partition))
2951 1.1 mrg {
2952 1.1 mrg num_builtin++;
2953 1.1 mrg continue;
2954 1.1 mrg }
2955 1.1 mrg num_normal++;
2956 1.1 mrg if (partition->kind == PKIND_PARTIAL_MEMSET)
2957 1.1 mrg num_partial_memset++;
2958 1.1 mrg }
2959 1.1 mrg
2960 1.1 mrg /* Don't distribute current loop into too many loops given we don't have
2961 1.1 mrg memory stream cost model. Be even more conservative in case of loop
2962 1.1 mrg nest distribution. */
2963 1.1 mrg if ((same_type_p && num_builtin == 0
2964 1.1 mrg && (loop->inner == NULL || num_normal != 2 || num_partial_memset != 1))
2965 1.1 mrg || (loop->inner != NULL
2966 1.1 mrg && i >= NUM_PARTITION_THRESHOLD && num_normal > 1)
2967 1.1 mrg || (loop->inner == NULL
2968 1.1 mrg && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
2969 1.1 mrg {
2970 1.1 mrg a = (*partitions)[0];
2971 1.1 mrg for (i = 1; partitions->iterate (i, &partition); ++i)
2972 1.1 mrg {
2973 1.1 mrg partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
2974 1.1 mrg partition_free (partition);
2975 1.1 mrg }
2976 1.1 mrg partitions->truncate (1);
2977 1.1 mrg }
2978 1.1 mrg
2979 1.1 mrg /* Fuse memset builtins if possible. */
2980 1.1 mrg if (partitions->length () > 1)
2981 1.1 mrg fuse_memset_builtins (partitions);
2982 1.1 mrg }
2983 1.1 mrg
2984 1.1 mrg /* Distributes the code from LOOP in such a way that producer statements
2985 1.1 mrg are placed before consumer statements. Tries to separate only the
2986 1.1 mrg statements from STMTS into separate loops. Returns the number of
2987 1.1 mrg distributed loops. Set NB_CALLS to number of generated builtin calls.
2988 1.1 mrg Set *DESTROY_P to whether LOOP needs to be destroyed. */
2989 1.1 mrg
2990 1.1 mrg int
2991 1.1 mrg loop_distribution::distribute_loop (class loop *loop,
2992 1.1 mrg const vec<gimple *> &stmts,
2993 1.1 mrg control_dependences *cd, int *nb_calls, bool *destroy_p,
2994 1.1 mrg bool only_patterns_p)
2995 1.1 mrg {
2996 1.1 mrg ddrs_table = new hash_table<ddr_hasher> (389);
2997 1.1 mrg struct graph *rdg;
2998 1.1 mrg partition *partition;
2999 1.1 mrg int i, nbp;
3000 1.1 mrg
3001 1.1 mrg *destroy_p = false;
3002 1.1 mrg *nb_calls = 0;
3003 1.1 mrg loop_nest.create (0);
3004 1.1 mrg if (!find_loop_nest (loop, &loop_nest))
3005 1.1 mrg {
3006 1.1 mrg loop_nest.release ();
3007 1.1 mrg delete ddrs_table;
3008 1.1 mrg return 0;
3009 1.1 mrg }
3010 1.1 mrg
3011 1.1 mrg datarefs_vec.create (20);
3012 1.1 mrg has_nonaddressable_dataref_p = false;
3013 1.1 mrg rdg = build_rdg (loop, cd);
3014 1.1 mrg if (!rdg)
3015 1.1 mrg {
3016 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3017 1.1 mrg fprintf (dump_file,
3018 1.1 mrg "Loop %d not distributed: failed to build the RDG.\n",
3019 1.1 mrg loop->num);
3020 1.1 mrg
3021 1.1 mrg loop_nest.release ();
3022 1.1 mrg free_data_refs (datarefs_vec);
3023 1.1 mrg delete ddrs_table;
3024 1.1 mrg return 0;
3025 1.1 mrg }
3026 1.1 mrg
3027 1.1 mrg if (datarefs_vec.length () > MAX_DATAREFS_NUM)
3028 1.1 mrg {
3029 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3030 1.1 mrg fprintf (dump_file,
3031 1.1 mrg "Loop %d not distributed: too many memory references.\n",
3032 1.1 mrg loop->num);
3033 1.1 mrg
3034 1.1 mrg free_rdg (rdg);
3035 1.1 mrg loop_nest.release ();
3036 1.1 mrg free_data_refs (datarefs_vec);
3037 1.1 mrg delete ddrs_table;
3038 1.1 mrg return 0;
3039 1.1 mrg }
3040 1.1 mrg
3041 1.1 mrg data_reference_p dref;
3042 1.1 mrg for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
3043 1.1 mrg dref->aux = (void *) (uintptr_t) i;
3044 1.1 mrg
3045 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3046 1.1 mrg dump_rdg (dump_file, rdg);
3047 1.1 mrg
3048 1.1 mrg auto_vec<struct partition *, 3> partitions;
3049 1.1 mrg rdg_build_partitions (rdg, stmts, &partitions);
3050 1.1 mrg
3051 1.1 mrg auto_vec<ddr_p> alias_ddrs;
3052 1.1 mrg
3053 1.1 mrg auto_bitmap stmt_in_all_partitions;
3054 1.1 mrg bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
3055 1.1 mrg for (i = 1; partitions.iterate (i, &partition); ++i)
3056 1.1 mrg bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
3057 1.1 mrg
3058 1.1 mrg bool any_builtin = false;
3059 1.1 mrg bool reduction_in_all = false;
3060 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
3061 1.1 mrg {
3062 1.1 mrg reduction_in_all
3063 1.1 mrg |= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
3064 1.1 mrg any_builtin |= partition_builtin_p (partition);
3065 1.1 mrg }
3066 1.1 mrg
3067 1.1 mrg /* If we are only distributing patterns but did not detect any,
3068 1.1 mrg simply bail out. */
3069 1.1 mrg if (only_patterns_p
3070 1.1 mrg && !any_builtin)
3071 1.1 mrg {
3072 1.1 mrg nbp = 0;
3073 1.1 mrg goto ldist_done;
3074 1.1 mrg }
3075 1.1 mrg
3076 1.1 mrg /* If we are only distributing patterns fuse all partitions that
3077 1.1 mrg were not classified as builtins. This also avoids chopping
3078 1.1 mrg a loop into pieces, separated by builtin calls. That is, we
3079 1.1 mrg only want no or a single loop body remaining. */
3080 1.1 mrg struct partition *into;
3081 1.1 mrg if (only_patterns_p)
3082 1.1 mrg {
3083 1.1 mrg for (i = 0; partitions.iterate (i, &into); ++i)
3084 1.1 mrg if (!partition_builtin_p (into))
3085 1.1 mrg break;
3086 1.1 mrg for (++i; partitions.iterate (i, &partition); ++i)
3087 1.1 mrg if (!partition_builtin_p (partition))
3088 1.1 mrg {
3089 1.1 mrg partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN);
3090 1.1 mrg partitions.unordered_remove (i);
3091 1.1 mrg partition_free (partition);
3092 1.1 mrg i--;
3093 1.1 mrg }
3094 1.1 mrg }
3095 1.1 mrg
3096 1.1 mrg /* Due to limitations in the transform phase we have to fuse all
3097 1.1 mrg reduction partitions into the last partition so the existing
3098 1.1 mrg loop will contain all loop-closed PHI nodes. */
3099 1.1 mrg for (i = 0; partitions.iterate (i, &into); ++i)
3100 1.1 mrg if (partition_reduction_p (into))
3101 1.1 mrg break;
3102 1.1 mrg for (i = i + 1; partitions.iterate (i, &partition); ++i)
3103 1.1 mrg if (partition_reduction_p (partition))
3104 1.1 mrg {
3105 1.1 mrg partition_merge_into (rdg, into, partition, FUSE_REDUCTION);
3106 1.1 mrg partitions.unordered_remove (i);
3107 1.1 mrg partition_free (partition);
3108 1.1 mrg i--;
3109 1.1 mrg }
3110 1.1 mrg
3111 1.1 mrg /* Apply our simple cost model - fuse partitions with similar
3112 1.1 mrg memory accesses. */
3113 1.1 mrg for (i = 0; partitions.iterate (i, &into); ++i)
3114 1.1 mrg {
3115 1.1 mrg bool changed = false;
3116 1.1 mrg if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET)
3117 1.1 mrg continue;
3118 1.1 mrg for (int j = i + 1;
3119 1.1 mrg partitions.iterate (j, &partition); ++j)
3120 1.1 mrg {
3121 1.1 mrg if (share_memory_accesses (rdg, into, partition))
3122 1.1 mrg {
3123 1.1 mrg partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
3124 1.1 mrg partitions.unordered_remove (j);
3125 1.1 mrg partition_free (partition);
3126 1.1 mrg j--;
3127 1.1 mrg changed = true;
3128 1.1 mrg }
3129 1.1 mrg }
3130 1.1 mrg /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
3131 1.1 mrg accesses when 1 and 2 have similar accesses but not 0 and 1
3132 1.1 mrg then in the next iteration we will fail to consider merging
3133 1.1 mrg 1 into 0,2. So try again if we did any merging into 0. */
3134 1.1 mrg if (changed)
3135 1.1 mrg i--;
3136 1.1 mrg }
3137 1.1 mrg
3138 1.1 mrg /* Put a non-builtin partition last if we need to preserve a reduction.
3139 1.1 mrg ??? This is a workaround that makes sort_partitions_by_post_order do
3140 1.1 mrg the correct thing while in reality it should sort each component
3141 1.1 mrg separately and then put the component with a reduction or a non-builtin
3142 1.1 mrg last. */
3143 1.1 mrg if (reduction_in_all
3144 1.1 mrg && partition_builtin_p (partitions.last()))
3145 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
3146 1.1 mrg if (!partition_builtin_p (partition))
3147 1.1 mrg {
3148 1.1 mrg partitions.unordered_remove (i);
3149 1.1 mrg partitions.quick_push (partition);
3150 1.1 mrg break;
3151 1.1 mrg }
3152 1.1 mrg
3153 1.1 mrg /* Build the partition dependency graph and fuse partitions in strong
3154 1.1 mrg connected component. */
3155 1.1 mrg if (partitions.length () > 1)
3156 1.1 mrg {
3157 1.1 mrg /* Don't support loop nest distribution under runtime alias check
3158 1.1 mrg since it's not likely to enable many vectorization opportunities.
3159 1.1 mrg Also if loop has any data reference which may be not addressable
3160 1.1 mrg since alias check needs to take, compare address of the object. */
3161 1.1 mrg if (loop->inner || has_nonaddressable_dataref_p)
3162 1.1 mrg merge_dep_scc_partitions (rdg, &partitions, false);
3163 1.1 mrg else
3164 1.1 mrg {
3165 1.1 mrg merge_dep_scc_partitions (rdg, &partitions, true);
3166 1.1 mrg if (partitions.length () > 1)
3167 1.1 mrg break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
3168 1.1 mrg }
3169 1.1 mrg }
3170 1.1 mrg
3171 1.1 mrg finalize_partitions (loop, &partitions, &alias_ddrs);
3172 1.1 mrg
3173 1.1 mrg /* If there is a reduction in all partitions make sure the last one
3174 1.1 mrg is not classified for builtin code generation. */
3175 1.1 mrg if (reduction_in_all)
3176 1.1 mrg {
3177 1.1 mrg partition = partitions.last ();
3178 1.1 mrg if (only_patterns_p
3179 1.1 mrg && partition_builtin_p (partition)
3180 1.1 mrg && !partition_builtin_p (partitions[0]))
3181 1.1 mrg {
3182 1.1 mrg nbp = 0;
3183 1.1 mrg goto ldist_done;
3184 1.1 mrg }
3185 1.1 mrg partition->kind = PKIND_NORMAL;
3186 1.1 mrg }
3187 1.1 mrg
3188 1.1 mrg nbp = partitions.length ();
3189 1.1 mrg if (nbp == 0
3190 1.1 mrg || (nbp == 1 && !partition_builtin_p (partitions[0]))
3191 1.1 mrg || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
3192 1.1 mrg {
3193 1.1 mrg nbp = 0;
3194 1.1 mrg goto ldist_done;
3195 1.1 mrg }
3196 1.1 mrg
3197 1.1 mrg if (version_for_distribution_p (&partitions, &alias_ddrs))
3198 1.1 mrg version_loop_by_alias_check (&partitions, loop, &alias_ddrs);
3199 1.1 mrg
3200 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3201 1.1 mrg {
3202 1.1 mrg fprintf (dump_file,
3203 1.1 mrg "distribute loop <%d> into partitions:\n", loop->num);
3204 1.1 mrg dump_rdg_partitions (dump_file, partitions);
3205 1.1 mrg }
3206 1.1 mrg
3207 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
3208 1.1 mrg {
3209 1.1 mrg if (partition_builtin_p (partition))
3210 1.1 mrg (*nb_calls)++;
3211 1.1 mrg *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
3212 1.1 mrg }
3213 1.1 mrg
3214 1.1 mrg ldist_done:
3215 1.1 mrg loop_nest.release ();
3216 1.1 mrg free_data_refs (datarefs_vec);
3217 1.1 mrg for (hash_table<ddr_hasher>::iterator iter = ddrs_table->begin ();
3218 1.1 mrg iter != ddrs_table->end (); ++iter)
3219 1.1 mrg {
3220 1.1 mrg free_dependence_relation (*iter);
3221 1.1 mrg *iter = NULL;
3222 1.1 mrg }
3223 1.1 mrg delete ddrs_table;
3224 1.1 mrg
3225 1.1 mrg FOR_EACH_VEC_ELT (partitions, i, partition)
3226 1.1 mrg partition_free (partition);
3227 1.1 mrg
3228 1.1 mrg free_rdg (rdg);
3229 1.1 mrg return nbp - *nb_calls;
3230 1.1 mrg }
3231 1.1 mrg
3232 1.1 mrg
3233 1.1 mrg void loop_distribution::bb_top_order_init (void)
3234 1.1 mrg {
3235 1.1 mrg int rpo_num;
3236 1.1 mrg int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
3237 1.1 mrg edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3238 1.1 mrg bitmap exit_bbs = BITMAP_ALLOC (NULL);
3239 1.1 mrg
3240 1.1 mrg bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
3241 1.1 mrg bb_top_order_index_size = last_basic_block_for_fn (cfun);
3242 1.1 mrg
3243 1.1 mrg entry->flags &= ~EDGE_DFS_BACK;
3244 1.1 mrg bitmap_set_bit (exit_bbs, EXIT_BLOCK);
3245 1.1 mrg rpo_num = rev_post_order_and_mark_dfs_back_seme (cfun, entry, exit_bbs, true,
3246 1.1 mrg rpo, NULL);
3247 1.1 mrg BITMAP_FREE (exit_bbs);
3248 1.1 mrg
3249 1.1 mrg for (int i = 0; i < rpo_num; i++)
3250 1.1 mrg bb_top_order_index[rpo[i]] = i;
3251 1.1 mrg
3252 1.1 mrg free (rpo);
3253 1.1 mrg }
3254 1.1 mrg
3255 1.1 mrg void loop_distribution::bb_top_order_destroy ()
3256 1.1 mrg {
3257 1.1 mrg free (bb_top_order_index);
3258 1.1 mrg bb_top_order_index = NULL;
3259 1.1 mrg bb_top_order_index_size = 0;
3260 1.1 mrg }
3261 1.1 mrg
3262 1.1 mrg
3263 1.1 mrg /* Given LOOP, this function records seed statements for distribution in
3264 1.1 mrg WORK_LIST. Return false if there is nothing for distribution. */
3265 1.1 mrg
3266 1.1 mrg static bool
3267 1.1 mrg find_seed_stmts_for_distribution (class loop *loop, vec<gimple *> *work_list)
3268 1.1 mrg {
3269 1.1 mrg basic_block *bbs = get_loop_body_in_dom_order (loop);
3270 1.1 mrg
3271 1.1 mrg /* Initialize the worklist with stmts we seed the partitions with. */
3272 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; ++i)
3273 1.1 mrg {
3274 1.1 mrg /* In irreducible sub-regions we don't know how to redirect
3275 1.1 mrg conditions, so fail. See PR100492. */
3276 1.1 mrg if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
3277 1.1 mrg {
3278 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3279 1.1 mrg fprintf (dump_file, "loop %d contains an irreducible region.\n",
3280 1.1 mrg loop->num);
3281 1.1 mrg work_list->truncate (0);
3282 1.1 mrg break;
3283 1.1 mrg }
3284 1.1 mrg for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
3285 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
3286 1.1 mrg {
3287 1.1 mrg gphi *phi = gsi.phi ();
3288 1.1 mrg if (virtual_operand_p (gimple_phi_result (phi)))
3289 1.1 mrg continue;
3290 1.1 mrg /* Distribute stmts which have defs that are used outside of
3291 1.1 mrg the loop. */
3292 1.1 mrg if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
3293 1.1 mrg continue;
3294 1.1 mrg work_list->safe_push (phi);
3295 1.1 mrg }
3296 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
3297 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
3298 1.1 mrg {
3299 1.1 mrg gimple *stmt = gsi_stmt (gsi);
3300 1.1 mrg
3301 1.1 mrg /* Ignore clobbers, they do not have true side effects. */
3302 1.1 mrg if (gimple_clobber_p (stmt))
3303 1.1 mrg continue;
3304 1.1 mrg
3305 1.1 mrg /* If there is a stmt with side-effects bail out - we
3306 1.1 mrg cannot and should not distribute this loop. */
3307 1.1 mrg if (gimple_has_side_effects (stmt))
3308 1.1 mrg {
3309 1.1 mrg free (bbs);
3310 1.1 mrg return false;
3311 1.1 mrg }
3312 1.1 mrg
3313 1.1 mrg /* Distribute stmts which have defs that are used outside of
3314 1.1 mrg the loop. */
3315 1.1 mrg if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
3316 1.1 mrg ;
3317 1.1 mrg /* Otherwise only distribute stores for now. */
3318 1.1 mrg else if (!gimple_vdef (stmt))
3319 1.1 mrg continue;
3320 1.1 mrg
3321 1.1 mrg work_list->safe_push (stmt);
3322 1.1 mrg }
3323 1.1 mrg }
3324 1.1 mrg bool res = work_list->length () > 0;
3325 1.1 mrg if (res && !can_copy_bbs_p (bbs, loop->num_nodes))
3326 1.1 mrg {
3327 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3328 1.1 mrg fprintf (dump_file, "cannot copy loop %d.\n", loop->num);
3329 1.1 mrg res = false;
3330 1.1 mrg }
3331 1.1 mrg free (bbs);
3332 1.1 mrg return res;
3333 1.1 mrg }
3334 1.1 mrg
3335 1.1 mrg /* A helper function for generate_{rawmemchr,strlen}_builtin functions in order
3336 1.1 mrg to place new statements SEQ before LOOP and replace the old reduction
3337 1.1 mrg variable with the new one. */
3338 1.1 mrg
3339 1.1 mrg static void
3340 1.1 mrg generate_reduction_builtin_1 (loop_p loop, gimple_seq &seq,
3341 1.1 mrg tree reduction_var_old, tree reduction_var_new,
3342 1.1 mrg const char *info, machine_mode load_mode)
3343 1.1 mrg {
3344 1.1 mrg gcc_assert (flag_tree_loop_distribute_patterns);
3345 1.1 mrg
3346 1.1 mrg /* Place new statements before LOOP. */
3347 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
3348 1.1 mrg gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
3349 1.1 mrg
3350 1.1 mrg /* Replace old reduction variable with new one. */
3351 1.1 mrg imm_use_iterator iter;
3352 1.1 mrg gimple *stmt;
3353 1.1 mrg use_operand_p use_p;
3354 1.1 mrg FOR_EACH_IMM_USE_STMT (stmt, iter, reduction_var_old)
3355 1.1 mrg {
3356 1.1 mrg FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3357 1.1 mrg SET_USE (use_p, reduction_var_new);
3358 1.1 mrg
3359 1.1 mrg update_stmt (stmt);
3360 1.1 mrg }
3361 1.1 mrg
3362 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3363 1.1 mrg fprintf (dump_file, info, GET_MODE_NAME (load_mode));
3364 1.1 mrg }
3365 1.1 mrg
3366 1.1 mrg /* Generate a call to rawmemchr and place it before LOOP. REDUCTION_VAR is
3367 1.1 mrg replaced with a fresh SSA name representing the result of the call. */
3368 1.1 mrg
3369 1.1 mrg static void
3370 1.1 mrg generate_rawmemchr_builtin (loop_p loop, tree reduction_var,
3371 1.1 mrg data_reference_p store_dr, tree base, tree pattern,
3372 1.1 mrg location_t loc)
3373 1.1 mrg {
3374 1.1 mrg gimple_seq seq = NULL;
3375 1.1 mrg
3376 1.1 mrg tree mem = force_gimple_operand (base, &seq, true, NULL_TREE);
3377 1.1 mrg gimple *fn_call = gimple_build_call_internal (IFN_RAWMEMCHR, 2, mem, pattern);
3378 1.1 mrg tree reduction_var_new = copy_ssa_name (reduction_var);
3379 1.1 mrg gimple_call_set_lhs (fn_call, reduction_var_new);
3380 1.1 mrg gimple_set_location (fn_call, loc);
3381 1.1 mrg gimple_seq_add_stmt (&seq, fn_call);
3382 1.1 mrg
3383 1.1 mrg if (store_dr)
3384 1.1 mrg {
3385 1.1 mrg gassign *g = gimple_build_assign (DR_REF (store_dr), reduction_var_new);
3386 1.1 mrg gimple_seq_add_stmt (&seq, g);
3387 1.1 mrg }
3388 1.1 mrg
3389 1.1 mrg generate_reduction_builtin_1 (loop, seq, reduction_var, reduction_var_new,
3390 1.1 mrg "generated rawmemchr%s\n",
3391 1.1 mrg TYPE_MODE (TREE_TYPE (TREE_TYPE (base))));
3392 1.1 mrg }
3393 1.1 mrg
3394 1.1 mrg /* Helper function for generate_strlen_builtin(,_using_rawmemchr) */
3395 1.1 mrg
3396 1.1 mrg static void
3397 1.1 mrg generate_strlen_builtin_1 (loop_p loop, gimple_seq &seq,
3398 1.1 mrg tree reduction_var_old, tree reduction_var_new,
3399 1.1 mrg machine_mode mode, tree start_len)
3400 1.1 mrg {
3401 1.1 mrg /* REDUCTION_VAR_NEW has either size type or ptrdiff type and must be
3402 1.1 mrg converted if types of old and new reduction variable are not compatible. */
3403 1.1 mrg reduction_var_new = gimple_convert (&seq, TREE_TYPE (reduction_var_old),
3404 1.1 mrg reduction_var_new);
3405 1.1 mrg
3406 1.1 mrg /* Loops of the form `for (i=42; s[i]; ++i);` have an additional start
3407 1.1 mrg length. */
3408 1.1 mrg if (!integer_zerop (start_len))
3409 1.1 mrg {
3410 1.1 mrg tree lhs = make_ssa_name (TREE_TYPE (reduction_var_new));
3411 1.1 mrg gimple *g = gimple_build_assign (lhs, PLUS_EXPR, reduction_var_new,
3412 1.1 mrg start_len);
3413 1.1 mrg gimple_seq_add_stmt (&seq, g);
3414 1.1 mrg reduction_var_new = lhs;
3415 1.1 mrg }
3416 1.1 mrg
3417 1.1 mrg generate_reduction_builtin_1 (loop, seq, reduction_var_old, reduction_var_new,
3418 1.1 mrg "generated strlen%s\n", mode);
3419 1.1 mrg }
3420 1.1 mrg
3421 1.1 mrg /* Generate a call to strlen and place it before LOOP. REDUCTION_VAR is
3422 1.1 mrg replaced with a fresh SSA name representing the result of the call. */
3423 1.1 mrg
3424 1.1 mrg static void
3425 1.1 mrg generate_strlen_builtin (loop_p loop, tree reduction_var, tree base,
3426 1.1 mrg tree start_len, location_t loc)
3427 1.1 mrg {
3428 1.1 mrg gimple_seq seq = NULL;
3429 1.1 mrg
3430 1.1 mrg tree reduction_var_new = make_ssa_name (size_type_node);
3431 1.1 mrg
3432 1.1 mrg tree mem = force_gimple_operand (base, &seq, true, NULL_TREE);
3433 1.1 mrg tree fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_STRLEN));
3434 1.1 mrg gimple *fn_call = gimple_build_call (fn, 1, mem);
3435 1.1 mrg gimple_call_set_lhs (fn_call, reduction_var_new);
3436 1.1 mrg gimple_set_location (fn_call, loc);
3437 1.1 mrg gimple_seq_add_stmt (&seq, fn_call);
3438 1.1 mrg
3439 1.1 mrg generate_strlen_builtin_1 (loop, seq, reduction_var, reduction_var_new,
3440 1.1 mrg QImode, start_len);
3441 1.1 mrg }
3442 1.1 mrg
3443 1.1 mrg /* Generate code in order to mimic the behaviour of strlen but this time over
3444 1.1 mrg an array of elements with mode different than QI. REDUCTION_VAR is replaced
3445 1.1 mrg with a fresh SSA name representing the result, i.e., the length. */
3446 1.1 mrg
3447 1.1 mrg static void
3448 1.1 mrg generate_strlen_builtin_using_rawmemchr (loop_p loop, tree reduction_var,
3449 1.1 mrg tree base, tree load_type,
3450 1.1 mrg tree start_len, location_t loc)
3451 1.1 mrg {
3452 1.1 mrg gimple_seq seq = NULL;
3453 1.1 mrg
3454 1.1 mrg tree start = force_gimple_operand (base, &seq, true, NULL_TREE);
3455 1.1 mrg tree zero = build_zero_cst (load_type);
3456 1.1 mrg gimple *fn_call = gimple_build_call_internal (IFN_RAWMEMCHR, 2, start, zero);
3457 1.1 mrg tree end = make_ssa_name (TREE_TYPE (base));
3458 1.1 mrg gimple_call_set_lhs (fn_call, end);
3459 1.1 mrg gimple_set_location (fn_call, loc);
3460 1.1 mrg gimple_seq_add_stmt (&seq, fn_call);
3461 1.1 mrg
3462 1.1 mrg /* Determine the number of elements between START and END by
3463 1.1 mrg evaluating (END - START) / sizeof (*START). */
3464 1.1 mrg tree diff = make_ssa_name (ptrdiff_type_node);
3465 1.1 mrg gimple *diff_stmt = gimple_build_assign (diff, POINTER_DIFF_EXPR, end, start);
3466 1.1 mrg gimple_seq_add_stmt (&seq, diff_stmt);
3467 1.1 mrg /* Let SIZE be the size of each character. */
3468 1.1 mrg tree size = gimple_convert (&seq, ptrdiff_type_node,
3469 1.1 mrg TYPE_SIZE_UNIT (load_type));
3470 1.1 mrg tree count = make_ssa_name (ptrdiff_type_node);
3471 1.1 mrg gimple *count_stmt = gimple_build_assign (count, TRUNC_DIV_EXPR, diff, size);
3472 1.1 mrg gimple_seq_add_stmt (&seq, count_stmt);
3473 1.1 mrg
3474 1.1 mrg generate_strlen_builtin_1 (loop, seq, reduction_var, count,
3475 1.1 mrg TYPE_MODE (load_type),
3476 1.1 mrg start_len);
3477 1.1 mrg }
3478 1.1 mrg
3479 1.1 mrg /* Return true if we can count at least as many characters by taking pointer
3480 1.1 mrg difference as we can count via reduction_var without an overflow. Thus
3481 1.1 mrg compute 2^n < (2^(m-1) / s) where n = TYPE_PRECISION (reduction_var_type),
3482 1.1 mrg m = TYPE_PRECISION (ptrdiff_type_node), and s = size of each character. */
3483 1.1 mrg static bool
3484 1.1 mrg reduction_var_overflows_first (tree reduction_var_type, tree load_type)
3485 1.1 mrg {
3486 1.1 mrg widest_int n2 = wi::lshift (1, TYPE_PRECISION (reduction_var_type));;
3487 1.1 mrg widest_int m2 = wi::lshift (1, TYPE_PRECISION (ptrdiff_type_node) - 1);
3488 1.1 mrg widest_int s = wi::to_widest (TYPE_SIZE_UNIT (load_type));
3489 1.1 mrg return wi::ltu_p (n2, wi::udiv_trunc (m2, s));
3490 1.1 mrg }
3491 1.1 mrg
3492 1.1 mrg static gimple *
3493 1.1 mrg determine_reduction_stmt_1 (const loop_p loop, const basic_block *bbs)
3494 1.1 mrg {
3495 1.1 mrg gimple *reduction_stmt = NULL;
3496 1.1 mrg
3497 1.1 mrg for (unsigned i = 0, ninsns = 0; i < loop->num_nodes; ++i)
3498 1.1 mrg {
3499 1.1 mrg basic_block bb = bbs[i];
3500 1.1 mrg
3501 1.1 mrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
3502 1.1 mrg gsi_next (&bsi))
3503 1.1 mrg {
3504 1.1 mrg gphi *phi = bsi.phi ();
3505 1.1 mrg if (virtual_operand_p (gimple_phi_result (phi)))
3506 1.1 mrg continue;
3507 1.1 mrg if (stmt_has_scalar_dependences_outside_loop (loop, phi))
3508 1.1 mrg {
3509 1.1 mrg if (reduction_stmt)
3510 1.1 mrg return NULL;
3511 1.1 mrg reduction_stmt = phi;
3512 1.1 mrg }
3513 1.1 mrg }
3514 1.1 mrg
3515 1.1 mrg for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
3516 1.1 mrg !gsi_end_p (bsi); gsi_next_nondebug (&bsi), ++ninsns)
3517 1.1 mrg {
3518 1.1 mrg /* Bail out early for loops which are unlikely to match. */
3519 1.1 mrg if (ninsns > 16)
3520 1.1 mrg return NULL;
3521 1.1 mrg gimple *stmt = gsi_stmt (bsi);
3522 1.1 mrg if (gimple_clobber_p (stmt))
3523 1.1 mrg continue;
3524 1.1 mrg if (gimple_code (stmt) == GIMPLE_LABEL)
3525 1.1 mrg continue;
3526 1.1 mrg if (gimple_has_volatile_ops (stmt))
3527 1.1 mrg return NULL;
3528 1.1 mrg if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
3529 1.1 mrg {
3530 1.1 mrg if (reduction_stmt)
3531 1.1 mrg return NULL;
3532 1.1 mrg reduction_stmt = stmt;
3533 1.1 mrg }
3534 1.1 mrg }
3535 1.1 mrg }
3536 1.1 mrg
3537 1.1 mrg return reduction_stmt;
3538 1.1 mrg }
3539 1.1 mrg
3540 1.1 mrg /* If LOOP has a single non-volatile reduction statement, then return a pointer
3541 1.1 mrg to it. Otherwise return NULL. */
3542 1.1 mrg static gimple *
3543 1.1 mrg determine_reduction_stmt (const loop_p loop)
3544 1.1 mrg {
3545 1.1 mrg basic_block *bbs = get_loop_body (loop);
3546 1.1 mrg gimple *reduction_stmt = determine_reduction_stmt_1 (loop, bbs);
3547 1.1 mrg XDELETEVEC (bbs);
3548 1.1 mrg return reduction_stmt;
3549 1.1 mrg }
3550 1.1 mrg
3551 1.1 mrg /* Transform loops which mimic the effects of builtins rawmemchr or strlen and
3552 1.1 mrg replace them accordingly. For example, a loop of the form
3553 1.1 mrg
3554 1.1 mrg for (; *p != 42; ++p);
3555 1.1 mrg
3556 1.1 mrg is replaced by
3557 1.1 mrg
3558 1.1 mrg p = rawmemchr<MODE> (p, 42);
3559 1.1 mrg
3560 1.1 mrg under the assumption that rawmemchr is available for a particular MODE.
3561 1.1 mrg Another example is
3562 1.1 mrg
3563 1.1 mrg int i;
3564 1.1 mrg for (i = 42; s[i]; ++i);
3565 1.1 mrg
3566 1.1 mrg which is replaced by
3567 1.1 mrg
3568 1.1 mrg i = (int)strlen (&s[42]) + 42;
3569 1.1 mrg
3570 1.1 mrg for some character array S. In case array S is not of type character array
3571 1.1 mrg we end up with
3572 1.1 mrg
3573 1.1 mrg i = (int)(rawmemchr<MODE> (&s[42], 0) - &s[42]) + 42;
3574 1.1 mrg
3575 1.1 mrg assuming that rawmemchr is available for a particular MODE. */
3576 1.1 mrg
3577 1.1 mrg bool
3578 1.1 mrg loop_distribution::transform_reduction_loop (loop_p loop)
3579 1.1 mrg {
3580 1.1 mrg gimple *reduction_stmt;
3581 1.1 mrg data_reference_p load_dr = NULL, store_dr = NULL;
3582 1.1 mrg
3583 1.1 mrg edge e = single_exit (loop);
3584 1.1 mrg gcond *cond = safe_dyn_cast <gcond *> (last_stmt (e->src));
3585 1.1 mrg if (!cond)
3586 1.1 mrg return false;
3587 1.1 mrg /* Ensure loop condition is an (in)equality test and loop is exited either if
3588 1.1 mrg the inequality test fails or the equality test succeeds. */
3589 1.1 mrg if (!(e->flags & EDGE_FALSE_VALUE && gimple_cond_code (cond) == NE_EXPR)
3590 1.1 mrg && !(e->flags & EDGE_TRUE_VALUE && gimple_cond_code (cond) == EQ_EXPR))
3591 1.1 mrg return false;
3592 1.1 mrg /* A limitation of the current implementation is that we only support
3593 1.1 mrg constant patterns in (in)equality tests. */
3594 1.1 mrg tree pattern = gimple_cond_rhs (cond);
3595 1.1 mrg if (TREE_CODE (pattern) != INTEGER_CST)
3596 1.1 mrg return false;
3597 1.1 mrg
3598 1.1 mrg reduction_stmt = determine_reduction_stmt (loop);
3599 1.1 mrg
3600 1.1 mrg /* A limitation of the current implementation is that we require a reduction
3601 1.1 mrg statement. Therefore, loops without a reduction statement as in the
3602 1.1 mrg following are not recognized:
3603 1.1 mrg int *p;
3604 1.1 mrg void foo (void) { for (; *p; ++p); } */
3605 1.1 mrg if (reduction_stmt == NULL)
3606 1.1 mrg return false;
3607 1.1 mrg
3608 1.1 mrg /* Reduction variables are guaranteed to be SSA names. */
3609 1.1 mrg tree reduction_var;
3610 1.1 mrg switch (gimple_code (reduction_stmt))
3611 1.1 mrg {
3612 1.1 mrg case GIMPLE_ASSIGN:
3613 1.1 mrg case GIMPLE_PHI:
3614 1.1 mrg reduction_var = gimple_get_lhs (reduction_stmt);
3615 1.1 mrg break;
3616 1.1 mrg default:
3617 1.1 mrg /* Bail out e.g. for GIMPLE_CALL. */
3618 1.1 mrg return false;
3619 1.1 mrg }
3620 1.1 mrg
3621 1.1 mrg struct graph *rdg = build_rdg (loop, NULL);
3622 1.1 mrg if (rdg == NULL)
3623 1.1 mrg {
3624 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3625 1.1 mrg fprintf (dump_file,
3626 1.1 mrg "Loop %d not transformed: failed to build the RDG.\n",
3627 1.1 mrg loop->num);
3628 1.1 mrg
3629 1.1 mrg return false;
3630 1.1 mrg }
3631 1.1 mrg auto_bitmap partition_stmts;
3632 1.1 mrg bitmap_set_range (partition_stmts, 0, rdg->n_vertices);
3633 1.1 mrg find_single_drs (loop, rdg, partition_stmts, &store_dr, &load_dr);
3634 1.1 mrg free_rdg (rdg);
3635 1.1 mrg
3636 1.1 mrg /* Bail out if there is no single load. */
3637 1.1 mrg if (load_dr == NULL)
3638 1.1 mrg return false;
3639 1.1 mrg
3640 1.1 mrg /* Reaching this point we have a loop with a single reduction variable,
3641 1.1 mrg a single load, and an optional single store. */
3642 1.1 mrg
3643 1.1 mrg tree load_ref = DR_REF (load_dr);
3644 1.1 mrg tree load_type = TREE_TYPE (load_ref);
3645 1.1 mrg tree load_access_base = build_fold_addr_expr (load_ref);
3646 1.1 mrg tree load_access_size = TYPE_SIZE_UNIT (load_type);
3647 1.1 mrg affine_iv load_iv, reduction_iv;
3648 1.1 mrg
3649 1.1 mrg if (!INTEGRAL_TYPE_P (load_type)
3650 1.1 mrg || !type_has_mode_precision_p (load_type))
3651 1.1 mrg return false;
3652 1.1 mrg
3653 1.1 mrg /* We already ensured that the loop condition tests for (in)equality where the
3654 1.1 mrg rhs is a constant pattern. Now ensure that the lhs is the result of the
3655 1.1 mrg load. */
3656 1.1 mrg if (gimple_cond_lhs (cond) != gimple_assign_lhs (DR_STMT (load_dr)))
3657 1.1 mrg return false;
3658 1.1 mrg
3659 1.1 mrg /* Bail out if no affine induction variable with constant step can be
3660 1.1 mrg determined. */
3661 1.1 mrg if (!simple_iv (loop, loop, load_access_base, &load_iv, false))
3662 1.1 mrg return false;
3663 1.1 mrg
3664 1.1 mrg /* Bail out if memory accesses are not consecutive or not growing. */
3665 1.1 mrg if (!operand_equal_p (load_iv.step, load_access_size, 0))
3666 1.1 mrg return false;
3667 1.1 mrg
3668 1.1 mrg if (!simple_iv (loop, loop, reduction_var, &reduction_iv, false))
3669 1.1 mrg return false;
3670 1.1 mrg
3671 1.1 mrg /* Handle rawmemchr like loops. */
3672 1.1 mrg if (operand_equal_p (load_iv.base, reduction_iv.base)
3673 1.1 mrg && operand_equal_p (load_iv.step, reduction_iv.step))
3674 1.1 mrg {
3675 1.1 mrg if (store_dr)
3676 1.1 mrg {
3677 1.1 mrg /* Ensure that we store to X and load from X+I where I>0. */
3678 1.1 mrg if (TREE_CODE (load_iv.base) != POINTER_PLUS_EXPR
3679 1.1 mrg || !integer_onep (TREE_OPERAND (load_iv.base, 1)))
3680 1.1 mrg return false;
3681 1.1 mrg tree ptr_base = TREE_OPERAND (load_iv.base, 0);
3682 1.1 mrg if (TREE_CODE (ptr_base) != SSA_NAME)
3683 1.1 mrg return false;
3684 1.1 mrg gimple *def = SSA_NAME_DEF_STMT (ptr_base);
3685 1.1 mrg if (!gimple_assign_single_p (def)
3686 1.1 mrg || gimple_assign_rhs1 (def) != DR_REF (store_dr))
3687 1.1 mrg return false;
3688 1.1 mrg /* Ensure that the reduction value is stored. */
3689 1.1 mrg if (gimple_assign_rhs1 (DR_STMT (store_dr)) != reduction_var)
3690 1.1 mrg return false;
3691 1.1 mrg }
3692 1.1 mrg /* Bail out if target does not provide rawmemchr for a certain mode. */
3693 1.1 mrg machine_mode mode = TYPE_MODE (load_type);
3694 1.1 mrg if (direct_optab_handler (rawmemchr_optab, mode) == CODE_FOR_nothing)
3695 1.1 mrg return false;
3696 1.1 mrg location_t loc = gimple_location (DR_STMT (load_dr));
3697 1.1 mrg generate_rawmemchr_builtin (loop, reduction_var, store_dr, load_iv.base,
3698 1.1 mrg pattern, loc);
3699 1.1 mrg return true;
3700 1.1 mrg }
3701 1.1 mrg
3702 1.1 mrg /* Handle strlen like loops. */
3703 1.1 mrg if (store_dr == NULL
3704 1.1 mrg && integer_zerop (pattern)
3705 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (reduction_var))
3706 1.1 mrg && TREE_CODE (reduction_iv.base) == INTEGER_CST
3707 1.1 mrg && TREE_CODE (reduction_iv.step) == INTEGER_CST
3708 1.1 mrg && integer_onep (reduction_iv.step))
3709 1.1 mrg {
3710 1.1 mrg location_t loc = gimple_location (DR_STMT (load_dr));
3711 1.1 mrg tree reduction_var_type = TREE_TYPE (reduction_var);
3712 1.1 mrg /* While determining the length of a string an overflow might occur.
3713 1.1 mrg If an overflow only occurs in the loop implementation and not in the
3714 1.1 mrg strlen implementation, then either the overflow is undefined or the
3715 1.1 mrg truncated result of strlen equals the one of the loop. Otherwise if
3716 1.1 mrg an overflow may also occur in the strlen implementation, then
3717 1.1 mrg replacing a loop by a call to strlen is sound whenever we ensure that
3718 1.1 mrg if an overflow occurs in the strlen implementation, then also an
3719 1.1 mrg overflow occurs in the loop implementation which is undefined. It
3720 1.1 mrg seems reasonable to relax this and assume that the strlen
3721 1.1 mrg implementation cannot overflow in case sizetype is big enough in the
3722 1.1 mrg sense that an overflow can only happen for string objects which are
3723 1.1 mrg bigger than half of the address space; at least for 32-bit targets and
3724 1.1 mrg up.
3725 1.1 mrg
3726 1.1 mrg For strlen which makes use of rawmemchr the maximal length of a string
3727 1.1 mrg which can be determined without an overflow is PTRDIFF_MAX / S where
3728 1.1 mrg each character has size S. Since an overflow for ptrdiff type is
3729 1.1 mrg undefined we have to make sure that if an overflow occurs, then an
3730 1.1 mrg overflow occurs in the loop implementation, too, and this is
3731 1.1 mrg undefined, too. Similar as before we relax this and assume that no
3732 1.1 mrg string object is larger than half of the address space; at least for
3733 1.1 mrg 32-bit targets and up. */
3734 1.1 mrg if (TYPE_MODE (load_type) == TYPE_MODE (char_type_node)
3735 1.1 mrg && TYPE_PRECISION (load_type) == TYPE_PRECISION (char_type_node)
3736 1.1 mrg && ((TYPE_PRECISION (sizetype) >= TYPE_PRECISION (ptr_type_node) - 1
3737 1.1 mrg && TYPE_PRECISION (ptr_type_node) >= 32)
3738 1.1 mrg || (TYPE_OVERFLOW_UNDEFINED (reduction_var_type)
3739 1.1 mrg && TYPE_PRECISION (reduction_var_type) <= TYPE_PRECISION (sizetype)))
3740 1.1 mrg && builtin_decl_implicit (BUILT_IN_STRLEN))
3741 1.1 mrg generate_strlen_builtin (loop, reduction_var, load_iv.base,
3742 1.1 mrg reduction_iv.base, loc);
3743 1.1 mrg else if (direct_optab_handler (rawmemchr_optab, TYPE_MODE (load_type))
3744 1.1 mrg != CODE_FOR_nothing
3745 1.1 mrg && ((TYPE_PRECISION (ptrdiff_type_node) == TYPE_PRECISION (ptr_type_node)
3746 1.1 mrg && TYPE_PRECISION (ptrdiff_type_node) >= 32)
3747 1.1 mrg || (TYPE_OVERFLOW_UNDEFINED (reduction_var_type)
3748 1.1 mrg && reduction_var_overflows_first (reduction_var_type, load_type))))
3749 1.1 mrg generate_strlen_builtin_using_rawmemchr (loop, reduction_var,
3750 1.1 mrg load_iv.base,
3751 1.1 mrg load_type,
3752 1.1 mrg reduction_iv.base, loc);
3753 1.1 mrg else
3754 1.1 mrg return false;
3755 1.1 mrg return true;
3756 1.1 mrg }
3757 1.1 mrg
3758 1.1 mrg return false;
3759 1.1 mrg }
3760 1.1 mrg
3761 1.1 mrg /* Given innermost LOOP, return the outermost enclosing loop that forms a
3762 1.1 mrg perfect loop nest. */
3763 1.1 mrg
3764 1.1 mrg static class loop *
3765 1.1 mrg prepare_perfect_loop_nest (class loop *loop)
3766 1.1 mrg {
3767 1.1 mrg class loop *outer = loop_outer (loop);
3768 1.1 mrg tree niters = number_of_latch_executions (loop);
3769 1.1 mrg
3770 1.1 mrg /* TODO: We only support the innermost 3-level loop nest distribution
3771 1.1 mrg because of compilation time issue for now. This should be relaxed
3772 1.1 mrg in the future. Note we only allow 3-level loop nest distribution
3773 1.1 mrg when parallelizing loops. */
3774 1.1 mrg while ((loop->inner == NULL
3775 1.1 mrg || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
3776 1.1 mrg && loop_outer (outer)
3777 1.1 mrg && outer->inner == loop && loop->next == NULL
3778 1.1 mrg && single_exit (outer)
3779 1.1 mrg && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
3780 1.1 mrg && (niters = number_of_latch_executions (outer)) != NULL_TREE
3781 1.1 mrg && niters != chrec_dont_know)
3782 1.1 mrg {
3783 1.1 mrg loop = outer;
3784 1.1 mrg outer = loop_outer (loop);
3785 1.1 mrg }
3786 1.1 mrg
3787 1.1 mrg return loop;
3788 1.1 mrg }
3789 1.1 mrg
3790 1.1 mrg
3791 1.1 mrg unsigned int
3792 1.1 mrg loop_distribution::execute (function *fun)
3793 1.1 mrg {
3794 1.1 mrg bool changed = false;
3795 1.1 mrg basic_block bb;
3796 1.1 mrg control_dependences *cd = NULL;
3797 1.1 mrg auto_vec<loop_p> loops_to_be_destroyed;
3798 1.1 mrg
3799 1.1 mrg if (number_of_loops (fun) <= 1)
3800 1.1 mrg return 0;
3801 1.1 mrg
3802 1.1 mrg bb_top_order_init ();
3803 1.1 mrg
3804 1.1 mrg FOR_ALL_BB_FN (bb, fun)
3805 1.1 mrg {
3806 1.1 mrg gimple_stmt_iterator gsi;
3807 1.1 mrg for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3808 1.1 mrg gimple_set_uid (gsi_stmt (gsi), -1);
3809 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3810 1.1 mrg gimple_set_uid (gsi_stmt (gsi), -1);
3811 1.1 mrg }
3812 1.1 mrg
3813 1.1 mrg /* We can at the moment only distribute non-nested loops, thus restrict
3814 1.1 mrg walking to innermost loops. */
3815 1.1 mrg for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
3816 1.1 mrg {
3817 1.1 mrg /* Don't distribute multiple exit edges loop, or cold loop when
3818 1.1 mrg not doing pattern detection. */
3819 1.1 mrg if (!single_exit (loop)
3820 1.1 mrg || (!flag_tree_loop_distribute_patterns
3821 1.1 mrg && !optimize_loop_for_speed_p (loop)))
3822 1.1 mrg continue;
3823 1.1 mrg
3824 1.1 mrg /* If niters is unknown don't distribute loop but rather try to transform
3825 1.1 mrg it to a call to a builtin. */
3826 1.1 mrg tree niters = number_of_latch_executions (loop);
3827 1.1 mrg if (niters == NULL_TREE || niters == chrec_dont_know)
3828 1.1 mrg {
3829 1.1 mrg datarefs_vec.create (20);
3830 1.1 mrg if (flag_tree_loop_distribute_patterns
3831 1.1 mrg && transform_reduction_loop (loop))
3832 1.1 mrg {
3833 1.1 mrg changed = true;
3834 1.1 mrg loops_to_be_destroyed.safe_push (loop);
3835 1.1 mrg if (dump_enabled_p ())
3836 1.1 mrg {
3837 1.1 mrg dump_user_location_t loc = find_loop_location (loop);
3838 1.1 mrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
3839 1.1 mrg loc, "Loop %d transformed into a builtin.\n",
3840 1.1 mrg loop->num);
3841 1.1 mrg }
3842 1.1 mrg }
3843 1.1 mrg free_data_refs (datarefs_vec);
3844 1.1 mrg continue;
3845 1.1 mrg }
3846 1.1 mrg
3847 1.1 mrg /* Get the perfect loop nest for distribution. */
3848 1.1 mrg loop = prepare_perfect_loop_nest (loop);
3849 1.1 mrg for (; loop; loop = loop->inner)
3850 1.1 mrg {
3851 1.1 mrg auto_vec<gimple *> work_list;
3852 1.1 mrg if (!find_seed_stmts_for_distribution (loop, &work_list))
3853 1.1 mrg break;
3854 1.1 mrg
3855 1.1 mrg const char *str = loop->inner ? " nest" : "";
3856 1.1 mrg dump_user_location_t loc = find_loop_location (loop);
3857 1.1 mrg if (!cd)
3858 1.1 mrg {
3859 1.1 mrg calculate_dominance_info (CDI_DOMINATORS);
3860 1.1 mrg calculate_dominance_info (CDI_POST_DOMINATORS);
3861 1.1 mrg cd = new control_dependences ();
3862 1.1 mrg free_dominance_info (CDI_POST_DOMINATORS);
3863 1.1 mrg }
3864 1.1 mrg
3865 1.1 mrg bool destroy_p;
3866 1.1 mrg int nb_generated_loops, nb_generated_calls;
3867 1.1 mrg nb_generated_loops
3868 1.1 mrg = distribute_loop (loop, work_list, cd, &nb_generated_calls,
3869 1.1 mrg &destroy_p, (!optimize_loop_for_speed_p (loop)
3870 1.1 mrg || !flag_tree_loop_distribution));
3871 1.1 mrg if (destroy_p)
3872 1.1 mrg loops_to_be_destroyed.safe_push (loop);
3873 1.1 mrg
3874 1.1 mrg if (nb_generated_loops + nb_generated_calls > 0)
3875 1.1 mrg {
3876 1.1 mrg changed = true;
3877 1.1 mrg if (dump_enabled_p ())
3878 1.1 mrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
3879 1.1 mrg loc, "Loop%s %d distributed: split to %d loops "
3880 1.1 mrg "and %d library calls.\n", str, loop->num,
3881 1.1 mrg nb_generated_loops, nb_generated_calls);
3882 1.1 mrg
3883 1.1 mrg break;
3884 1.1 mrg }
3885 1.1 mrg
3886 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3887 1.1 mrg fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
3888 1.1 mrg }
3889 1.1 mrg }
3890 1.1 mrg
3891 1.1 mrg if (cd)
3892 1.1 mrg delete cd;
3893 1.1 mrg
3894 1.1 mrg if (bb_top_order_index != NULL)
3895 1.1 mrg bb_top_order_destroy ();
3896 1.1 mrg
3897 1.1 mrg if (changed)
3898 1.1 mrg {
3899 1.1 mrg /* Destroy loop bodies that could not be reused. Do this late as we
3900 1.1 mrg otherwise can end up refering to stale data in control dependences. */
3901 1.1 mrg unsigned i;
3902 1.1 mrg class loop *loop;
3903 1.1 mrg FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
3904 1.1 mrg destroy_loop (loop);
3905 1.1 mrg
3906 1.1 mrg /* Cached scalar evolutions now may refer to wrong or non-existing
3907 1.1 mrg loops. */
3908 1.1 mrg scev_reset ();
3909 1.1 mrg mark_virtual_operands_for_renaming (fun);
3910 1.1 mrg rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3911 1.1 mrg }
3912 1.1 mrg
3913 1.1 mrg checking_verify_loop_structure ();
3914 1.1 mrg
3915 1.1 mrg return changed ? TODO_cleanup_cfg : 0;
3916 1.1 mrg }
3917 1.1 mrg
3918 1.1 mrg
3919 1.1 mrg /* Distribute all loops in the current function. */
3920 1.1 mrg
3921 1.1 mrg namespace {
3922 1.1 mrg
3923 1.1 mrg const pass_data pass_data_loop_distribution =
3924 1.1 mrg {
3925 1.1 mrg GIMPLE_PASS, /* type */
3926 1.1 mrg "ldist", /* name */
3927 1.1 mrg OPTGROUP_LOOP, /* optinfo_flags */
3928 1.1 mrg TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
3929 1.1 mrg ( PROP_cfg | PROP_ssa ), /* properties_required */
3930 1.1 mrg 0, /* properties_provided */
3931 1.1 mrg 0, /* properties_destroyed */
3932 1.1 mrg 0, /* todo_flags_start */
3933 1.1 mrg 0, /* todo_flags_finish */
3934 1.1 mrg };
3935 1.1 mrg
3936 1.1 mrg class pass_loop_distribution : public gimple_opt_pass
3937 1.1 mrg {
3938 1.1 mrg public:
3939 1.1 mrg pass_loop_distribution (gcc::context *ctxt)
3940 1.1 mrg : gimple_opt_pass (pass_data_loop_distribution, ctxt)
3941 1.1 mrg {}
3942 1.1 mrg
3943 1.1 mrg /* opt_pass methods: */
3944 1.1 mrg virtual bool gate (function *)
3945 1.1 mrg {
3946 1.1 mrg return flag_tree_loop_distribution
3947 1.1 mrg || flag_tree_loop_distribute_patterns;
3948 1.1 mrg }
3949 1.1 mrg
3950 1.1 mrg virtual unsigned int execute (function *);
3951 1.1 mrg
3952 1.1 mrg }; // class pass_loop_distribution
3953 1.1 mrg
3954 1.1 mrg unsigned int
3955 1.1 mrg pass_loop_distribution::execute (function *fun)
3956 1.1 mrg {
3957 1.1 mrg return loop_distribution ().execute (fun);
3958 1.1 mrg }
3959 1.1 mrg
3960 1.1 mrg } // anon namespace
3961 1.1 mrg
3962 1.1 mrg gimple_opt_pass *
3963 1.1 mrg make_pass_loop_distribution (gcc::context *ctxt)
3964 1.1 mrg {
3965 1.1 mrg return new pass_loop_distribution (ctxt);
3966 1.1 mrg }
3967