tree-data-ref.cc revision 1.1 1 1.1 mrg /* Data references and dependences detectors.
2 1.1 mrg Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Sebastian Pop <pop (at) cri.ensmp.fr>
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
8 1.1 mrg the terms of the GNU General Public License as published by the Free
9 1.1 mrg Software Foundation; either version 3, or (at your option) any later
10 1.1 mrg version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 1.1 mrg for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg /* This pass walks a given loop structure searching for array
22 1.1 mrg references. The information about the array accesses is recorded
23 1.1 mrg in DATA_REFERENCE structures.
24 1.1 mrg
25 1.1 mrg The basic test for determining the dependences is:
26 1.1 mrg given two access functions chrec1 and chrec2 to a same array, and
27 1.1 mrg x and y two vectors from the iteration domain, the same element of
28 1.1 mrg the array is accessed twice at iterations x and y if and only if:
29 1.1 mrg | chrec1 (x) == chrec2 (y).
30 1.1 mrg
31 1.1 mrg The goals of this analysis are:
32 1.1 mrg
33 1.1 mrg - to determine the independence: the relation between two
34 1.1 mrg independent accesses is qualified with the chrec_known (this
35 1.1 mrg information allows a loop parallelization),
36 1.1 mrg
37 1.1 mrg - when two data references access the same data, to qualify the
38 1.1 mrg dependence relation with classic dependence representations:
39 1.1 mrg
40 1.1 mrg - distance vectors
41 1.1 mrg - direction vectors
42 1.1 mrg - loop carried level dependence
43 1.1 mrg - polyhedron dependence
44 1.1 mrg or with the chains of recurrences based representation,
45 1.1 mrg
46 1.1 mrg - to define a knowledge base for storing the data dependence
47 1.1 mrg information,
48 1.1 mrg
49 1.1 mrg - to define an interface to access this data.
50 1.1 mrg
51 1.1 mrg
52 1.1 mrg Definitions:
53 1.1 mrg
54 1.1 mrg - subscript: given two array accesses a subscript is the tuple
55 1.1 mrg composed of the access functions for a given dimension. Example:
56 1.1 mrg Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 1.1 mrg (f1, g1), (f2, g2), (f3, g3).
58 1.1 mrg
59 1.1 mrg - Diophantine equation: an equation whose coefficients and
60 1.1 mrg solutions are integer constants, for example the equation
61 1.1 mrg | 3*x + 2*y = 1
62 1.1 mrg has an integer solution x = 1 and y = -1.
63 1.1 mrg
64 1.1 mrg References:
65 1.1 mrg
66 1.1 mrg - "Advanced Compilation for High Performance Computing" by Randy
67 1.1 mrg Allen and Ken Kennedy.
68 1.1 mrg http://citeseer.ist.psu.edu/goff91practical.html
69 1.1 mrg
70 1.1 mrg - "Loop Transformations for Restructuring Compilers - The Foundations"
71 1.1 mrg by Utpal Banerjee.
72 1.1 mrg
73 1.1 mrg
74 1.1 mrg */
75 1.1 mrg
76 1.1 mrg #define INCLUDE_ALGORITHM
77 1.1 mrg #include "config.h"
78 1.1 mrg #include "system.h"
79 1.1 mrg #include "coretypes.h"
80 1.1 mrg #include "backend.h"
81 1.1 mrg #include "rtl.h"
82 1.1 mrg #include "tree.h"
83 1.1 mrg #include "gimple.h"
84 1.1 mrg #include "gimple-pretty-print.h"
85 1.1 mrg #include "alias.h"
86 1.1 mrg #include "fold-const.h"
87 1.1 mrg #include "expr.h"
88 1.1 mrg #include "gimple-iterator.h"
89 1.1 mrg #include "tree-ssa-loop-niter.h"
90 1.1 mrg #include "tree-ssa-loop.h"
91 1.1 mrg #include "tree-ssa.h"
92 1.1 mrg #include "cfgloop.h"
93 1.1 mrg #include "tree-data-ref.h"
94 1.1 mrg #include "tree-scalar-evolution.h"
95 1.1 mrg #include "dumpfile.h"
96 1.1 mrg #include "tree-affine.h"
97 1.1 mrg #include "builtins.h"
98 1.1 mrg #include "tree-eh.h"
99 1.1 mrg #include "ssa.h"
100 1.1 mrg #include "internal-fn.h"
101 1.1 mrg #include "vr-values.h"
102 1.1 mrg #include "range-op.h"
103 1.1 mrg #include "tree-ssa-loop-ivopts.h"
104 1.1 mrg
105 1.1 mrg static struct datadep_stats
106 1.1 mrg {
107 1.1 mrg int num_dependence_tests;
108 1.1 mrg int num_dependence_dependent;
109 1.1 mrg int num_dependence_independent;
110 1.1 mrg int num_dependence_undetermined;
111 1.1 mrg
112 1.1 mrg int num_subscript_tests;
113 1.1 mrg int num_subscript_undetermined;
114 1.1 mrg int num_same_subscript_function;
115 1.1 mrg
116 1.1 mrg int num_ziv;
117 1.1 mrg int num_ziv_independent;
118 1.1 mrg int num_ziv_dependent;
119 1.1 mrg int num_ziv_unimplemented;
120 1.1 mrg
121 1.1 mrg int num_siv;
122 1.1 mrg int num_siv_independent;
123 1.1 mrg int num_siv_dependent;
124 1.1 mrg int num_siv_unimplemented;
125 1.1 mrg
126 1.1 mrg int num_miv;
127 1.1 mrg int num_miv_independent;
128 1.1 mrg int num_miv_dependent;
129 1.1 mrg int num_miv_unimplemented;
130 1.1 mrg } dependence_stats;
131 1.1 mrg
132 1.1 mrg static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 1.1 mrg unsigned int, unsigned int,
134 1.1 mrg class loop *);
135 1.1 mrg /* Returns true iff A divides B. */
136 1.1 mrg
137 1.1 mrg static inline bool
138 1.1 mrg tree_fold_divides_p (const_tree a, const_tree b)
139 1.1 mrg {
140 1.1 mrg gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 1.1 mrg gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 1.1 mrg return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143 1.1 mrg }
144 1.1 mrg
145 1.1 mrg /* Returns true iff A divides B. */
146 1.1 mrg
147 1.1 mrg static inline bool
148 1.1 mrg int_divides_p (lambda_int a, lambda_int b)
149 1.1 mrg {
150 1.1 mrg return ((b % a) == 0);
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg /* Return true if reference REF contains a union access. */
154 1.1 mrg
155 1.1 mrg static bool
156 1.1 mrg ref_contains_union_access_p (tree ref)
157 1.1 mrg {
158 1.1 mrg while (handled_component_p (ref))
159 1.1 mrg {
160 1.1 mrg ref = TREE_OPERAND (ref, 0);
161 1.1 mrg if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 1.1 mrg || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 1.1 mrg return true;
164 1.1 mrg }
165 1.1 mrg return false;
166 1.1 mrg }
167 1.1 mrg
168 1.1 mrg
169 1.1 mrg
171 1.1 mrg /* Dump into FILE all the data references from DATAREFS. */
172 1.1 mrg
173 1.1 mrg static void
174 1.1 mrg dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 1.1 mrg {
176 1.1 mrg for (data_reference *dr : datarefs)
177 1.1 mrg dump_data_reference (file, dr);
178 1.1 mrg }
179 1.1 mrg
180 1.1 mrg /* Unified dump into FILE all the data references from DATAREFS. */
181 1.1 mrg
182 1.1 mrg DEBUG_FUNCTION void
183 1.1 mrg debug (vec<data_reference_p> &ref)
184 1.1 mrg {
185 1.1 mrg dump_data_references (stderr, ref);
186 1.1 mrg }
187 1.1 mrg
188 1.1 mrg DEBUG_FUNCTION void
189 1.1 mrg debug (vec<data_reference_p> *ptr)
190 1.1 mrg {
191 1.1 mrg if (ptr)
192 1.1 mrg debug (*ptr);
193 1.1 mrg else
194 1.1 mrg fprintf (stderr, "<nil>\n");
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg
198 1.1 mrg /* Dump into STDERR all the data references from DATAREFS. */
199 1.1 mrg
200 1.1 mrg DEBUG_FUNCTION void
201 1.1 mrg debug_data_references (vec<data_reference_p> datarefs)
202 1.1 mrg {
203 1.1 mrg dump_data_references (stderr, datarefs);
204 1.1 mrg }
205 1.1 mrg
206 1.1 mrg /* Print to STDERR the data_reference DR. */
207 1.1 mrg
208 1.1 mrg DEBUG_FUNCTION void
209 1.1 mrg debug_data_reference (struct data_reference *dr)
210 1.1 mrg {
211 1.1 mrg dump_data_reference (stderr, dr);
212 1.1 mrg }
213 1.1 mrg
214 1.1 mrg /* Dump function for a DATA_REFERENCE structure. */
215 1.1 mrg
216 1.1 mrg void
217 1.1 mrg dump_data_reference (FILE *outf,
218 1.1 mrg struct data_reference *dr)
219 1.1 mrg {
220 1.1 mrg unsigned int i;
221 1.1 mrg
222 1.1 mrg fprintf (outf, "#(Data Ref: \n");
223 1.1 mrg fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 1.1 mrg fprintf (outf, "# stmt: ");
225 1.1 mrg print_gimple_stmt (outf, DR_STMT (dr), 0);
226 1.1 mrg fprintf (outf, "# ref: ");
227 1.1 mrg print_generic_stmt (outf, DR_REF (dr));
228 1.1 mrg fprintf (outf, "# base_object: ");
229 1.1 mrg print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 1.1 mrg
231 1.1 mrg for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 1.1 mrg {
233 1.1 mrg fprintf (outf, "# Access function %d: ", i);
234 1.1 mrg print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 1.1 mrg }
236 1.1 mrg fprintf (outf, "#)\n");
237 1.1 mrg }
238 1.1 mrg
239 1.1 mrg /* Unified dump function for a DATA_REFERENCE structure. */
240 1.1 mrg
241 1.1 mrg DEBUG_FUNCTION void
242 1.1 mrg debug (data_reference &ref)
243 1.1 mrg {
244 1.1 mrg dump_data_reference (stderr, &ref);
245 1.1 mrg }
246 1.1 mrg
247 1.1 mrg DEBUG_FUNCTION void
248 1.1 mrg debug (data_reference *ptr)
249 1.1 mrg {
250 1.1 mrg if (ptr)
251 1.1 mrg debug (*ptr);
252 1.1 mrg else
253 1.1 mrg fprintf (stderr, "<nil>\n");
254 1.1 mrg }
255 1.1 mrg
256 1.1 mrg
257 1.1 mrg /* Dumps the affine function described by FN to the file OUTF. */
258 1.1 mrg
259 1.1 mrg DEBUG_FUNCTION void
260 1.1 mrg dump_affine_function (FILE *outf, affine_fn fn)
261 1.1 mrg {
262 1.1 mrg unsigned i;
263 1.1 mrg tree coef;
264 1.1 mrg
265 1.1 mrg print_generic_expr (outf, fn[0], TDF_SLIM);
266 1.1 mrg for (i = 1; fn.iterate (i, &coef); i++)
267 1.1 mrg {
268 1.1 mrg fprintf (outf, " + ");
269 1.1 mrg print_generic_expr (outf, coef, TDF_SLIM);
270 1.1 mrg fprintf (outf, " * x_%u", i);
271 1.1 mrg }
272 1.1 mrg }
273 1.1 mrg
274 1.1 mrg /* Dumps the conflict function CF to the file OUTF. */
275 1.1 mrg
276 1.1 mrg DEBUG_FUNCTION void
277 1.1 mrg dump_conflict_function (FILE *outf, conflict_function *cf)
278 1.1 mrg {
279 1.1 mrg unsigned i;
280 1.1 mrg
281 1.1 mrg if (cf->n == NO_DEPENDENCE)
282 1.1 mrg fprintf (outf, "no dependence");
283 1.1 mrg else if (cf->n == NOT_KNOWN)
284 1.1 mrg fprintf (outf, "not known");
285 1.1 mrg else
286 1.1 mrg {
287 1.1 mrg for (i = 0; i < cf->n; i++)
288 1.1 mrg {
289 1.1 mrg if (i != 0)
290 1.1 mrg fprintf (outf, " ");
291 1.1 mrg fprintf (outf, "[");
292 1.1 mrg dump_affine_function (outf, cf->fns[i]);
293 1.1 mrg fprintf (outf, "]");
294 1.1 mrg }
295 1.1 mrg }
296 1.1 mrg }
297 1.1 mrg
298 1.1 mrg /* Dump function for a SUBSCRIPT structure. */
299 1.1 mrg
300 1.1 mrg DEBUG_FUNCTION void
301 1.1 mrg dump_subscript (FILE *outf, struct subscript *subscript)
302 1.1 mrg {
303 1.1 mrg conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 1.1 mrg
305 1.1 mrg fprintf (outf, "\n (subscript \n");
306 1.1 mrg fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 1.1 mrg dump_conflict_function (outf, cf);
308 1.1 mrg if (CF_NONTRIVIAL_P (cf))
309 1.1 mrg {
310 1.1 mrg tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 1.1 mrg fprintf (outf, "\n last_conflict: ");
312 1.1 mrg print_generic_expr (outf, last_iteration);
313 1.1 mrg }
314 1.1 mrg
315 1.1 mrg cf = SUB_CONFLICTS_IN_B (subscript);
316 1.1 mrg fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 1.1 mrg dump_conflict_function (outf, cf);
318 1.1 mrg if (CF_NONTRIVIAL_P (cf))
319 1.1 mrg {
320 1.1 mrg tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 1.1 mrg fprintf (outf, "\n last_conflict: ");
322 1.1 mrg print_generic_expr (outf, last_iteration);
323 1.1 mrg }
324 1.1 mrg
325 1.1 mrg fprintf (outf, "\n (Subscript distance: ");
326 1.1 mrg print_generic_expr (outf, SUB_DISTANCE (subscript));
327 1.1 mrg fprintf (outf, " ))\n");
328 1.1 mrg }
329 1.1 mrg
330 1.1 mrg /* Print the classic direction vector DIRV to OUTF. */
331 1.1 mrg
332 1.1 mrg DEBUG_FUNCTION void
333 1.1 mrg print_direction_vector (FILE *outf,
334 1.1 mrg lambda_vector dirv,
335 1.1 mrg int length)
336 1.1 mrg {
337 1.1 mrg int eq;
338 1.1 mrg
339 1.1 mrg for (eq = 0; eq < length; eq++)
340 1.1 mrg {
341 1.1 mrg enum data_dependence_direction dir = ((enum data_dependence_direction)
342 1.1 mrg dirv[eq]);
343 1.1 mrg
344 1.1 mrg switch (dir)
345 1.1 mrg {
346 1.1 mrg case dir_positive:
347 1.1 mrg fprintf (outf, " +");
348 1.1 mrg break;
349 1.1 mrg case dir_negative:
350 1.1 mrg fprintf (outf, " -");
351 1.1 mrg break;
352 1.1 mrg case dir_equal:
353 1.1 mrg fprintf (outf, " =");
354 1.1 mrg break;
355 1.1 mrg case dir_positive_or_equal:
356 1.1 mrg fprintf (outf, " +=");
357 1.1 mrg break;
358 1.1 mrg case dir_positive_or_negative:
359 1.1 mrg fprintf (outf, " +-");
360 1.1 mrg break;
361 1.1 mrg case dir_negative_or_equal:
362 1.1 mrg fprintf (outf, " -=");
363 1.1 mrg break;
364 1.1 mrg case dir_star:
365 1.1 mrg fprintf (outf, " *");
366 1.1 mrg break;
367 1.1 mrg default:
368 1.1 mrg fprintf (outf, "indep");
369 1.1 mrg break;
370 1.1 mrg }
371 1.1 mrg }
372 1.1 mrg fprintf (outf, "\n");
373 1.1 mrg }
374 1.1 mrg
375 1.1 mrg /* Print a vector of direction vectors. */
376 1.1 mrg
377 1.1 mrg DEBUG_FUNCTION void
378 1.1 mrg print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 1.1 mrg int length)
380 1.1 mrg {
381 1.1 mrg for (lambda_vector v : dir_vects)
382 1.1 mrg print_direction_vector (outf, v, length);
383 1.1 mrg }
384 1.1 mrg
385 1.1 mrg /* Print out a vector VEC of length N to OUTFILE. */
386 1.1 mrg
387 1.1 mrg DEBUG_FUNCTION void
388 1.1 mrg print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 1.1 mrg {
390 1.1 mrg int i;
391 1.1 mrg
392 1.1 mrg for (i = 0; i < n; i++)
393 1.1 mrg fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 1.1 mrg fprintf (outfile, "\n");
395 1.1 mrg }
396 1.1 mrg
397 1.1 mrg /* Print a vector of distance vectors. */
398 1.1 mrg
399 1.1 mrg DEBUG_FUNCTION void
400 1.1 mrg print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 1.1 mrg int length)
402 1.1 mrg {
403 1.1 mrg for (lambda_vector v : dist_vects)
404 1.1 mrg print_lambda_vector (outf, v, length);
405 1.1 mrg }
406 1.1 mrg
407 1.1 mrg /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 1.1 mrg
409 1.1 mrg DEBUG_FUNCTION void
410 1.1 mrg dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 1.1 mrg {
412 1.1 mrg struct data_reference *dra, *drb;
413 1.1 mrg
414 1.1 mrg fprintf (outf, "(Data Dep: \n");
415 1.1 mrg
416 1.1 mrg if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 1.1 mrg {
418 1.1 mrg if (ddr)
419 1.1 mrg {
420 1.1 mrg dra = DDR_A (ddr);
421 1.1 mrg drb = DDR_B (ddr);
422 1.1 mrg if (dra)
423 1.1 mrg dump_data_reference (outf, dra);
424 1.1 mrg else
425 1.1 mrg fprintf (outf, " (nil)\n");
426 1.1 mrg if (drb)
427 1.1 mrg dump_data_reference (outf, drb);
428 1.1 mrg else
429 1.1 mrg fprintf (outf, " (nil)\n");
430 1.1 mrg }
431 1.1 mrg fprintf (outf, " (don't know)\n)\n");
432 1.1 mrg return;
433 1.1 mrg }
434 1.1 mrg
435 1.1 mrg dra = DDR_A (ddr);
436 1.1 mrg drb = DDR_B (ddr);
437 1.1 mrg dump_data_reference (outf, dra);
438 1.1 mrg dump_data_reference (outf, drb);
439 1.1 mrg
440 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 1.1 mrg fprintf (outf, " (no dependence)\n");
442 1.1 mrg
443 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 1.1 mrg {
445 1.1 mrg unsigned int i;
446 1.1 mrg class loop *loopi;
447 1.1 mrg
448 1.1 mrg subscript *sub;
449 1.1 mrg FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 1.1 mrg {
451 1.1 mrg fprintf (outf, " access_fn_A: ");
452 1.1 mrg print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 1.1 mrg fprintf (outf, " access_fn_B: ");
454 1.1 mrg print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 1.1 mrg dump_subscript (outf, sub);
456 1.1 mrg }
457 1.1 mrg
458 1.1 mrg fprintf (outf, " loop nest: (");
459 1.1 mrg FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 1.1 mrg fprintf (outf, "%d ", loopi->num);
461 1.1 mrg fprintf (outf, ")\n");
462 1.1 mrg
463 1.1 mrg for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 1.1 mrg {
465 1.1 mrg fprintf (outf, " distance_vector: ");
466 1.1 mrg print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 1.1 mrg DDR_NB_LOOPS (ddr));
468 1.1 mrg }
469 1.1 mrg
470 1.1 mrg for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 1.1 mrg {
472 1.1 mrg fprintf (outf, " direction_vector: ");
473 1.1 mrg print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 1.1 mrg DDR_NB_LOOPS (ddr));
475 1.1 mrg }
476 1.1 mrg }
477 1.1 mrg
478 1.1 mrg fprintf (outf, ")\n");
479 1.1 mrg }
480 1.1 mrg
481 1.1 mrg /* Debug version. */
482 1.1 mrg
483 1.1 mrg DEBUG_FUNCTION void
484 1.1 mrg debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 1.1 mrg {
486 1.1 mrg dump_data_dependence_relation (stderr, ddr);
487 1.1 mrg }
488 1.1 mrg
489 1.1 mrg /* Dump into FILE all the dependence relations from DDRS. */
490 1.1 mrg
491 1.1 mrg DEBUG_FUNCTION void
492 1.1 mrg dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 1.1 mrg {
494 1.1 mrg for (auto ddr : ddrs)
495 1.1 mrg dump_data_dependence_relation (file, ddr);
496 1.1 mrg }
497 1.1 mrg
498 1.1 mrg DEBUG_FUNCTION void
499 1.1 mrg debug (vec<ddr_p> &ref)
500 1.1 mrg {
501 1.1 mrg dump_data_dependence_relations (stderr, ref);
502 1.1 mrg }
503 1.1 mrg
504 1.1 mrg DEBUG_FUNCTION void
505 1.1 mrg debug (vec<ddr_p> *ptr)
506 1.1 mrg {
507 1.1 mrg if (ptr)
508 1.1 mrg debug (*ptr);
509 1.1 mrg else
510 1.1 mrg fprintf (stderr, "<nil>\n");
511 1.1 mrg }
512 1.1 mrg
513 1.1 mrg
514 1.1 mrg /* Dump to STDERR all the dependence relations from DDRS. */
515 1.1 mrg
516 1.1 mrg DEBUG_FUNCTION void
517 1.1 mrg debug_data_dependence_relations (vec<ddr_p> ddrs)
518 1.1 mrg {
519 1.1 mrg dump_data_dependence_relations (stderr, ddrs);
520 1.1 mrg }
521 1.1 mrg
522 1.1 mrg /* Dumps the distance and direction vectors in FILE. DDRS contains
523 1.1 mrg the dependence relations, and VECT_SIZE is the size of the
524 1.1 mrg dependence vectors, or in other words the number of loops in the
525 1.1 mrg considered nest. */
526 1.1 mrg
527 1.1 mrg DEBUG_FUNCTION void
528 1.1 mrg dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 1.1 mrg {
530 1.1 mrg for (data_dependence_relation *ddr : ddrs)
531 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 1.1 mrg {
533 1.1 mrg for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 1.1 mrg {
535 1.1 mrg fprintf (file, "DISTANCE_V (");
536 1.1 mrg print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 1.1 mrg fprintf (file, ")\n");
538 1.1 mrg }
539 1.1 mrg
540 1.1 mrg for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 1.1 mrg {
542 1.1 mrg fprintf (file, "DIRECTION_V (");
543 1.1 mrg print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 1.1 mrg fprintf (file, ")\n");
545 1.1 mrg }
546 1.1 mrg }
547 1.1 mrg
548 1.1 mrg fprintf (file, "\n\n");
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg /* Dumps the data dependence relations DDRS in FILE. */
552 1.1 mrg
553 1.1 mrg DEBUG_FUNCTION void
554 1.1 mrg dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 1.1 mrg {
556 1.1 mrg for (data_dependence_relation *ddr : ddrs)
557 1.1 mrg dump_data_dependence_relation (file, ddr);
558 1.1 mrg
559 1.1 mrg fprintf (file, "\n\n");
560 1.1 mrg }
561 1.1 mrg
562 1.1 mrg DEBUG_FUNCTION void
563 1.1 mrg debug_ddrs (vec<ddr_p> ddrs)
564 1.1 mrg {
565 1.1 mrg dump_ddrs (stderr, ddrs);
566 1.1 mrg }
567 1.1 mrg
568 1.1 mrg /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 1.1 mrg OP0 CODE OP1, where:
570 1.1 mrg
571 1.1 mrg - OP0 CODE OP1 has integral type TYPE
572 1.1 mrg - the range of OP0 is given by OP0_RANGE and
573 1.1 mrg - the range of OP1 is given by OP1_RANGE.
574 1.1 mrg
575 1.1 mrg Independently of RESULT_RANGE, try to compute:
576 1.1 mrg
577 1.1 mrg DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 1.1 mrg - (sizetype) (OP0 CODE OP1)
579 1.1 mrg
580 1.1 mrg as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 1.1 mrg Return true on success, or false if DELTA is not known at compile time.
582 1.1 mrg
583 1.1 mrg Truncation and sign changes are known to distribute over CODE, i.e.
584 1.1 mrg
585 1.1 mrg (itype) (A CODE B) == (itype) A CODE (itype) B
586 1.1 mrg
587 1.1 mrg for any integral type ITYPE whose precision is no greater than the
588 1.1 mrg precision of A and B. */
589 1.1 mrg
590 1.1 mrg static bool
591 1.1 mrg compute_distributive_range (tree type, value_range &op0_range,
592 1.1 mrg tree_code code, value_range &op1_range,
593 1.1 mrg tree *off, value_range *result_range)
594 1.1 mrg {
595 1.1 mrg gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 1.1 mrg if (result_range)
597 1.1 mrg {
598 1.1 mrg range_operator *op = range_op_handler (code, type);
599 1.1 mrg op->fold_range (*result_range, type, op0_range, op1_range);
600 1.1 mrg }
601 1.1 mrg
602 1.1 mrg /* The distributive property guarantees that if TYPE is no narrower
603 1.1 mrg than SIZETYPE,
604 1.1 mrg
605 1.1 mrg (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606 1.1 mrg
607 1.1 mrg and so we can treat DELTA as zero. */
608 1.1 mrg if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 1.1 mrg return true;
610 1.1 mrg
611 1.1 mrg /* If overflow is undefined, we can assume that:
612 1.1 mrg
613 1.1 mrg X == (ssizetype) OP0 CODE (ssizetype) OP1
614 1.1 mrg
615 1.1 mrg is within the range of TYPE, i.e.:
616 1.1 mrg
617 1.1 mrg X == (ssizetype) (TYPE) X
618 1.1 mrg
619 1.1 mrg Distributing the (TYPE) truncation over X gives:
620 1.1 mrg
621 1.1 mrg X == (ssizetype) (OP0 CODE OP1)
622 1.1 mrg
623 1.1 mrg Casting both sides to sizetype and distributing the sizetype cast
624 1.1 mrg over X gives:
625 1.1 mrg
626 1.1 mrg (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627 1.1 mrg
628 1.1 mrg and so we can treat DELTA as zero. */
629 1.1 mrg if (TYPE_OVERFLOW_UNDEFINED (type))
630 1.1 mrg return true;
631 1.1 mrg
632 1.1 mrg /* Compute the range of:
633 1.1 mrg
634 1.1 mrg (ssizetype) OP0 CODE (ssizetype) OP1
635 1.1 mrg
636 1.1 mrg The distributive property guarantees that this has the same bitpattern as:
637 1.1 mrg
638 1.1 mrg (sizetype) OP0 CODE (sizetype) OP1
639 1.1 mrg
640 1.1 mrg but its range is more conducive to analysis. */
641 1.1 mrg range_cast (op0_range, ssizetype);
642 1.1 mrg range_cast (op1_range, ssizetype);
643 1.1 mrg value_range wide_range;
644 1.1 mrg range_operator *op = range_op_handler (code, ssizetype);
645 1.1 mrg bool saved_flag_wrapv = flag_wrapv;
646 1.1 mrg flag_wrapv = 1;
647 1.1 mrg op->fold_range (wide_range, ssizetype, op0_range, op1_range);
648 1.1 mrg flag_wrapv = saved_flag_wrapv;
649 1.1 mrg if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
650 1.1 mrg return false;
651 1.1 mrg
652 1.1 mrg wide_int lb = wide_range.lower_bound ();
653 1.1 mrg wide_int ub = wide_range.upper_bound ();
654 1.1 mrg
655 1.1 mrg /* Calculate the number of times that each end of the range overflows or
656 1.1 mrg underflows TYPE. We can only calculate DELTA if the numbers match. */
657 1.1 mrg unsigned int precision = TYPE_PRECISION (type);
658 1.1 mrg if (!TYPE_UNSIGNED (type))
659 1.1 mrg {
660 1.1 mrg wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
661 1.1 mrg lb -= type_min;
662 1.1 mrg ub -= type_min;
663 1.1 mrg }
664 1.1 mrg wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
665 1.1 mrg lb &= upper_bits;
666 1.1 mrg ub &= upper_bits;
667 1.1 mrg if (lb != ub)
668 1.1 mrg return false;
669 1.1 mrg
670 1.1 mrg /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
671 1.1 mrg negative values indicating underflow. The low PRECISION bits of LB
672 1.1 mrg are clear, so DELTA is therefore LB (== UB). */
673 1.1 mrg *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
674 1.1 mrg return true;
675 1.1 mrg }
676 1.1 mrg
677 1.1 mrg /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
678 1.1 mrg given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
679 1.1 mrg FROM_TYPE are integral types. */
680 1.1 mrg
681 1.1 mrg static bool
682 1.1 mrg nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
683 1.1 mrg {
684 1.1 mrg gcc_assert (INTEGRAL_TYPE_P (to_type)
685 1.1 mrg && INTEGRAL_TYPE_P (from_type)
686 1.1 mrg && !TYPE_OVERFLOW_TRAPS (to_type)
687 1.1 mrg && !TYPE_OVERFLOW_TRAPS (from_type));
688 1.1 mrg
689 1.1 mrg /* Converting to something no narrower than sizetype and then to sizetype
690 1.1 mrg is equivalent to converting directly to sizetype. */
691 1.1 mrg if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
692 1.1 mrg return true;
693 1.1 mrg
694 1.1 mrg /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
695 1.1 mrg if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
696 1.1 mrg && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
697 1.1 mrg return true;
698 1.1 mrg
699 1.1 mrg /* For narrowing conversions, we could in principle test whether
700 1.1 mrg the bits in FROM_TYPE but not in TO_TYPE have a fixed value
701 1.1 mrg and apply a constant adjustment.
702 1.1 mrg
703 1.1 mrg For other conversions (which involve a sign change) we could
704 1.1 mrg check that the signs are always equal, and apply a constant
705 1.1 mrg adjustment if the signs are negative.
706 1.1 mrg
707 1.1 mrg However, both cases should be rare. */
708 1.1 mrg return range_fits_type_p (&range, TYPE_PRECISION (to_type),
709 1.1 mrg TYPE_SIGN (to_type));
710 1.1 mrg }
711 1.1 mrg
712 1.1 mrg static void
713 1.1 mrg split_constant_offset (tree type, tree *var, tree *off,
714 1.1 mrg value_range *result_range,
715 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache,
716 1.1 mrg unsigned *limit);
717 1.1 mrg
718 1.1 mrg /* Helper function for split_constant_offset. If TYPE is a pointer type,
719 1.1 mrg try to express OP0 CODE OP1 as:
720 1.1 mrg
721 1.1 mrg POINTER_PLUS <*VAR, (sizetype) *OFF>
722 1.1 mrg
723 1.1 mrg where:
724 1.1 mrg
725 1.1 mrg - *VAR has type TYPE
726 1.1 mrg - *OFF is a constant of type ssizetype.
727 1.1 mrg
728 1.1 mrg If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
729 1.1 mrg
730 1.1 mrg *VAR + (sizetype) *OFF
731 1.1 mrg
732 1.1 mrg where:
733 1.1 mrg
734 1.1 mrg - *VAR has type sizetype
735 1.1 mrg - *OFF is a constant of type ssizetype.
736 1.1 mrg
737 1.1 mrg In both cases, OP0 CODE OP1 has type TYPE.
738 1.1 mrg
739 1.1 mrg Return true on success. A false return value indicates that we can't
740 1.1 mrg do better than set *OFF to zero.
741 1.1 mrg
742 1.1 mrg When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
743 1.1 mrg if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
744 1.1 mrg
745 1.1 mrg CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
746 1.1 mrg visited. LIMIT counts down the number of SSA names that we are
747 1.1 mrg allowed to process before giving up. */
748 1.1 mrg
749 1.1 mrg static bool
750 1.1 mrg split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
751 1.1 mrg tree *var, tree *off, value_range *result_range,
752 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache,
753 1.1 mrg unsigned *limit)
754 1.1 mrg {
755 1.1 mrg tree var0, var1;
756 1.1 mrg tree off0, off1;
757 1.1 mrg value_range op0_range, op1_range;
758 1.1 mrg
759 1.1 mrg *var = NULL_TREE;
760 1.1 mrg *off = NULL_TREE;
761 1.1 mrg
762 1.1 mrg if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
763 1.1 mrg return false;
764 1.1 mrg
765 1.1 mrg if (TREE_CODE (op0) == SSA_NAME
766 1.1 mrg && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
767 1.1 mrg return false;
768 1.1 mrg if (op1
769 1.1 mrg && TREE_CODE (op1) == SSA_NAME
770 1.1 mrg && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
771 1.1 mrg return false;
772 1.1 mrg
773 1.1 mrg switch (code)
774 1.1 mrg {
775 1.1 mrg case INTEGER_CST:
776 1.1 mrg *var = size_int (0);
777 1.1 mrg *off = fold_convert (ssizetype, op0);
778 1.1 mrg if (result_range)
779 1.1 mrg result_range->set (op0, op0);
780 1.1 mrg return true;
781 1.1 mrg
782 1.1 mrg case POINTER_PLUS_EXPR:
783 1.1 mrg split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
784 1.1 mrg split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
785 1.1 mrg *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
786 1.1 mrg *off = size_binop (PLUS_EXPR, off0, off1);
787 1.1 mrg return true;
788 1.1 mrg
789 1.1 mrg case PLUS_EXPR:
790 1.1 mrg case MINUS_EXPR:
791 1.1 mrg split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
792 1.1 mrg split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
793 1.1 mrg *off = size_binop (code, off0, off1);
794 1.1 mrg if (!compute_distributive_range (type, op0_range, code, op1_range,
795 1.1 mrg off, result_range))
796 1.1 mrg return false;
797 1.1 mrg *var = fold_build2 (code, sizetype, var0, var1);
798 1.1 mrg return true;
799 1.1 mrg
800 1.1 mrg case MULT_EXPR:
801 1.1 mrg if (TREE_CODE (op1) != INTEGER_CST)
802 1.1 mrg return false;
803 1.1 mrg
804 1.1 mrg split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
805 1.1 mrg op1_range.set (op1, op1);
806 1.1 mrg *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
807 1.1 mrg if (!compute_distributive_range (type, op0_range, code, op1_range,
808 1.1 mrg off, result_range))
809 1.1 mrg return false;
810 1.1 mrg *var = fold_build2 (MULT_EXPR, sizetype, var0,
811 1.1 mrg fold_convert (sizetype, op1));
812 1.1 mrg return true;
813 1.1 mrg
814 1.1 mrg case ADDR_EXPR:
815 1.1 mrg {
816 1.1 mrg tree base, poffset;
817 1.1 mrg poly_int64 pbitsize, pbitpos, pbytepos;
818 1.1 mrg machine_mode pmode;
819 1.1 mrg int punsignedp, preversep, pvolatilep;
820 1.1 mrg
821 1.1 mrg op0 = TREE_OPERAND (op0, 0);
822 1.1 mrg base
823 1.1 mrg = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
824 1.1 mrg &punsignedp, &preversep, &pvolatilep);
825 1.1 mrg
826 1.1 mrg if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
827 1.1 mrg return false;
828 1.1 mrg base = build_fold_addr_expr (base);
829 1.1 mrg off0 = ssize_int (pbytepos);
830 1.1 mrg
831 1.1 mrg if (poffset)
832 1.1 mrg {
833 1.1 mrg split_constant_offset (poffset, &poffset, &off1, nullptr,
834 1.1 mrg cache, limit);
835 1.1 mrg off0 = size_binop (PLUS_EXPR, off0, off1);
836 1.1 mrg base = fold_build_pointer_plus (base, poffset);
837 1.1 mrg }
838 1.1 mrg
839 1.1 mrg var0 = fold_convert (type, base);
840 1.1 mrg
841 1.1 mrg /* If variable length types are involved, punt, otherwise casts
842 1.1 mrg might be converted into ARRAY_REFs in gimplify_conversion.
843 1.1 mrg To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
844 1.1 mrg possibly no longer appears in current GIMPLE, might resurface.
845 1.1 mrg This perhaps could run
846 1.1 mrg if (CONVERT_EXPR_P (var0))
847 1.1 mrg {
848 1.1 mrg gimplify_conversion (&var0);
849 1.1 mrg // Attempt to fill in any within var0 found ARRAY_REF's
850 1.1 mrg // element size from corresponding op embedded ARRAY_REF,
851 1.1 mrg // if unsuccessful, just punt.
852 1.1 mrg } */
853 1.1 mrg while (POINTER_TYPE_P (type))
854 1.1 mrg type = TREE_TYPE (type);
855 1.1 mrg if (int_size_in_bytes (type) < 0)
856 1.1 mrg return false;
857 1.1 mrg
858 1.1 mrg *var = var0;
859 1.1 mrg *off = off0;
860 1.1 mrg return true;
861 1.1 mrg }
862 1.1 mrg
863 1.1 mrg case SSA_NAME:
864 1.1 mrg {
865 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 1.1 mrg enum tree_code subcode;
867 1.1 mrg
868 1.1 mrg if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
869 1.1 mrg return false;
870 1.1 mrg
871 1.1 mrg subcode = gimple_assign_rhs_code (def_stmt);
872 1.1 mrg
873 1.1 mrg /* We are using a cache to avoid un-CSEing large amounts of code. */
874 1.1 mrg bool use_cache = false;
875 1.1 mrg if (!has_single_use (op0)
876 1.1 mrg && (subcode == POINTER_PLUS_EXPR
877 1.1 mrg || subcode == PLUS_EXPR
878 1.1 mrg || subcode == MINUS_EXPR
879 1.1 mrg || subcode == MULT_EXPR
880 1.1 mrg || subcode == ADDR_EXPR
881 1.1 mrg || CONVERT_EXPR_CODE_P (subcode)))
882 1.1 mrg {
883 1.1 mrg use_cache = true;
884 1.1 mrg bool existed;
885 1.1 mrg std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
886 1.1 mrg if (existed)
887 1.1 mrg {
888 1.1 mrg if (integer_zerop (e.second))
889 1.1 mrg return false;
890 1.1 mrg *var = e.first;
891 1.1 mrg *off = e.second;
892 1.1 mrg /* The caller sets the range in this case. */
893 1.1 mrg return true;
894 1.1 mrg }
895 1.1 mrg e = std::make_pair (op0, ssize_int (0));
896 1.1 mrg }
897 1.1 mrg
898 1.1 mrg if (*limit == 0)
899 1.1 mrg return false;
900 1.1 mrg --*limit;
901 1.1 mrg
902 1.1 mrg var0 = gimple_assign_rhs1 (def_stmt);
903 1.1 mrg var1 = gimple_assign_rhs2 (def_stmt);
904 1.1 mrg
905 1.1 mrg bool res = split_constant_offset_1 (type, var0, subcode, var1,
906 1.1 mrg var, off, nullptr, cache, limit);
907 1.1 mrg if (res && use_cache)
908 1.1 mrg *cache.get (op0) = std::make_pair (*var, *off);
909 1.1 mrg /* The caller sets the range in this case. */
910 1.1 mrg return res;
911 1.1 mrg }
912 1.1 mrg CASE_CONVERT:
913 1.1 mrg {
914 1.1 mrg /* We can only handle the following conversions:
915 1.1 mrg
916 1.1 mrg - Conversions from one pointer type to another pointer type.
917 1.1 mrg
918 1.1 mrg - Conversions from one non-trapping integral type to another
919 1.1 mrg non-trapping integral type. In this case, the recursive
920 1.1 mrg call makes sure that:
921 1.1 mrg
922 1.1 mrg (sizetype) OP0
923 1.1 mrg
924 1.1 mrg can be expressed as a sizetype operation involving VAR and OFF,
925 1.1 mrg and all we need to do is check whether:
926 1.1 mrg
927 1.1 mrg (sizetype) OP0 == (sizetype) (TYPE) OP0
928 1.1 mrg
929 1.1 mrg - Conversions from a non-trapping sizetype-size integral type to
930 1.1 mrg a like-sized pointer type. In this case, the recursive call
931 1.1 mrg makes sure that:
932 1.1 mrg
933 1.1 mrg (sizetype) OP0 == *VAR + (sizetype) *OFF
934 1.1 mrg
935 1.1 mrg and we can convert that to:
936 1.1 mrg
937 1.1 mrg POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
938 1.1 mrg
939 1.1 mrg - Conversions from a sizetype-sized pointer type to a like-sized
940 1.1 mrg non-trapping integral type. In this case, the recursive call
941 1.1 mrg makes sure that:
942 1.1 mrg
943 1.1 mrg OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
944 1.1 mrg
945 1.1 mrg where the POINTER_PLUS and *VAR have the same precision as
946 1.1 mrg TYPE (and the same precision as sizetype). Then:
947 1.1 mrg
948 1.1 mrg (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 1.1 mrg tree itype = TREE_TYPE (op0);
950 1.1 mrg if ((POINTER_TYPE_P (itype)
951 1.1 mrg || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 1.1 mrg && (POINTER_TYPE_P (type)
953 1.1 mrg || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 1.1 mrg && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 1.1 mrg || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 1.1 mrg && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
957 1.1 mrg {
958 1.1 mrg if (POINTER_TYPE_P (type))
959 1.1 mrg {
960 1.1 mrg split_constant_offset (op0, var, off, nullptr, cache, limit);
961 1.1 mrg *var = fold_convert (type, *var);
962 1.1 mrg }
963 1.1 mrg else if (POINTER_TYPE_P (itype))
964 1.1 mrg {
965 1.1 mrg split_constant_offset (op0, var, off, nullptr, cache, limit);
966 1.1 mrg *var = fold_convert (sizetype, *var);
967 1.1 mrg }
968 1.1 mrg else
969 1.1 mrg {
970 1.1 mrg split_constant_offset (op0, var, off, &op0_range,
971 1.1 mrg cache, limit);
972 1.1 mrg if (!nop_conversion_for_offset_p (type, itype, op0_range))
973 1.1 mrg return false;
974 1.1 mrg if (result_range)
975 1.1 mrg {
976 1.1 mrg *result_range = op0_range;
977 1.1 mrg range_cast (*result_range, type);
978 1.1 mrg }
979 1.1 mrg }
980 1.1 mrg return true;
981 1.1 mrg }
982 1.1 mrg return false;
983 1.1 mrg }
984 1.1 mrg
985 1.1 mrg default:
986 1.1 mrg return false;
987 1.1 mrg }
988 1.1 mrg }
989 1.1 mrg
990 1.1 mrg /* If EXP has pointer type, try to express it as:
991 1.1 mrg
992 1.1 mrg POINTER_PLUS <*VAR, (sizetype) *OFF>
993 1.1 mrg
994 1.1 mrg where:
995 1.1 mrg
996 1.1 mrg - *VAR has the same type as EXP
997 1.1 mrg - *OFF is a constant of type ssizetype.
998 1.1 mrg
999 1.1 mrg If EXP has an integral type, try to express (sizetype) EXP as:
1000 1.1 mrg
1001 1.1 mrg *VAR + (sizetype) *OFF
1002 1.1 mrg
1003 1.1 mrg where:
1004 1.1 mrg
1005 1.1 mrg - *VAR has type sizetype
1006 1.1 mrg - *OFF is a constant of type ssizetype.
1007 1.1 mrg
1008 1.1 mrg If EXP_RANGE is nonnull, set it to the range of EXP.
1009 1.1 mrg
1010 1.1 mrg CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 1.1 mrg visited. LIMIT counts down the number of SSA names that we are
1012 1.1 mrg allowed to process before giving up. */
1013 1.1 mrg
1014 1.1 mrg static void
1015 1.1 mrg split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache,
1017 1.1 mrg unsigned *limit)
1018 1.1 mrg {
1019 1.1 mrg tree type = TREE_TYPE (exp), op0, op1;
1020 1.1 mrg enum tree_code code;
1021 1.1 mrg
1022 1.1 mrg code = TREE_CODE (exp);
1023 1.1 mrg if (exp_range)
1024 1.1 mrg {
1025 1.1 mrg *exp_range = type;
1026 1.1 mrg if (code == SSA_NAME)
1027 1.1 mrg {
1028 1.1 mrg value_range vr;
1029 1.1 mrg get_range_query (cfun)->range_of_expr (vr, exp);
1030 1.1 mrg if (vr.undefined_p ())
1031 1.1 mrg vr.set_varying (TREE_TYPE (exp));
1032 1.1 mrg wide_int var_min = wi::to_wide (vr.min ());
1033 1.1 mrg wide_int var_max = wi::to_wide (vr.max ());
1034 1.1 mrg value_range_kind vr_kind = vr.kind ();
1035 1.1 mrg wide_int var_nonzero = get_nonzero_bits (exp);
1036 1.1 mrg vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1037 1.1 mrg &var_min, &var_max,
1038 1.1 mrg var_nonzero,
1039 1.1 mrg TYPE_SIGN (type));
1040 1.1 mrg /* This check for VR_VARYING is here because the old code
1041 1.1 mrg using get_range_info would return VR_RANGE for the entire
1042 1.1 mrg domain, instead of VR_VARYING. The new code normalizes
1043 1.1 mrg full-domain ranges to VR_VARYING. */
1044 1.1 mrg if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1045 1.1 mrg *exp_range = value_range (type, var_min, var_max);
1046 1.1 mrg }
1047 1.1 mrg }
1048 1.1 mrg
1049 1.1 mrg if (!tree_is_chrec (exp)
1050 1.1 mrg && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1051 1.1 mrg {
1052 1.1 mrg extract_ops_from_tree (exp, &code, &op0, &op1);
1053 1.1 mrg if (split_constant_offset_1 (type, op0, code, op1, var, off,
1054 1.1 mrg exp_range, cache, limit))
1055 1.1 mrg return;
1056 1.1 mrg }
1057 1.1 mrg
1058 1.1 mrg *var = exp;
1059 1.1 mrg if (INTEGRAL_TYPE_P (type))
1060 1.1 mrg *var = fold_convert (sizetype, *var);
1061 1.1 mrg *off = ssize_int (0);
1062 1.1 mrg
1063 1.1 mrg value_range r;
1064 1.1 mrg if (exp_range && code != SSA_NAME
1065 1.1 mrg && get_range_query (cfun)->range_of_expr (r, exp)
1066 1.1 mrg && !r.undefined_p ())
1067 1.1 mrg *exp_range = r;
1068 1.1 mrg }
1069 1.1 mrg
1070 1.1 mrg /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1071 1.1 mrg type as EXP while OFF has type ssizetype. */
1072 1.1 mrg
1073 1.1 mrg void
1074 1.1 mrg split_constant_offset (tree exp, tree *var, tree *off)
1075 1.1 mrg {
1076 1.1 mrg unsigned limit = param_ssa_name_def_chain_limit;
1077 1.1 mrg static hash_map<tree, std::pair<tree, tree> > *cache;
1078 1.1 mrg if (!cache)
1079 1.1 mrg cache = new hash_map<tree, std::pair<tree, tree> > (37);
1080 1.1 mrg split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1081 1.1 mrg *var = fold_convert (TREE_TYPE (exp), *var);
1082 1.1 mrg cache->empty ();
1083 1.1 mrg }
1084 1.1 mrg
1085 1.1 mrg /* Returns the address ADDR of an object in a canonical shape (without nop
1086 1.1 mrg casts, and with type of pointer to the object). */
1087 1.1 mrg
1088 1.1 mrg static tree
1089 1.1 mrg canonicalize_base_object_address (tree addr)
1090 1.1 mrg {
1091 1.1 mrg tree orig = addr;
1092 1.1 mrg
1093 1.1 mrg STRIP_NOPS (addr);
1094 1.1 mrg
1095 1.1 mrg /* The base address may be obtained by casting from integer, in that case
1096 1.1 mrg keep the cast. */
1097 1.1 mrg if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1098 1.1 mrg return orig;
1099 1.1 mrg
1100 1.1 mrg if (TREE_CODE (addr) != ADDR_EXPR)
1101 1.1 mrg return addr;
1102 1.1 mrg
1103 1.1 mrg return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1104 1.1 mrg }
1105 1.1 mrg
1106 1.1 mrg /* Analyze the behavior of memory reference REF within STMT.
1107 1.1 mrg There are two modes:
1108 1.1 mrg
1109 1.1 mrg - BB analysis. In this case we simply split the address into base,
1110 1.1 mrg init and offset components, without reference to any containing loop.
1111 1.1 mrg The resulting base and offset are general expressions and they can
1112 1.1 mrg vary arbitrarily from one iteration of the containing loop to the next.
1113 1.1 mrg The step is always zero.
1114 1.1 mrg
1115 1.1 mrg - loop analysis. In this case we analyze the reference both wrt LOOP
1116 1.1 mrg and on the basis that the reference occurs (is "used") in LOOP;
1117 1.1 mrg see the comment above analyze_scalar_evolution_in_loop for more
1118 1.1 mrg information about this distinction. The base, init, offset and
1119 1.1 mrg step fields are all invariant in LOOP.
1120 1.1 mrg
1121 1.1 mrg Perform BB analysis if LOOP is null, or if LOOP is the function's
1122 1.1 mrg dummy outermost loop. In other cases perform loop analysis.
1123 1.1 mrg
1124 1.1 mrg Return true if the analysis succeeded and store the results in DRB if so.
1125 1.1 mrg BB analysis can only fail for bitfield or reversed-storage accesses. */
1126 1.1 mrg
1127 1.1 mrg opt_result
1128 1.1 mrg dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1129 1.1 mrg class loop *loop, const gimple *stmt)
1130 1.1 mrg {
1131 1.1 mrg poly_int64 pbitsize, pbitpos;
1132 1.1 mrg tree base, poffset;
1133 1.1 mrg machine_mode pmode;
1134 1.1 mrg int punsignedp, preversep, pvolatilep;
1135 1.1 mrg affine_iv base_iv, offset_iv;
1136 1.1 mrg tree init, dinit, step;
1137 1.1 mrg bool in_loop = (loop && loop->num);
1138 1.1 mrg
1139 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1140 1.1 mrg fprintf (dump_file, "analyze_innermost: ");
1141 1.1 mrg
1142 1.1 mrg base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1143 1.1 mrg &punsignedp, &preversep, &pvolatilep);
1144 1.1 mrg gcc_assert (base != NULL_TREE);
1145 1.1 mrg
1146 1.1 mrg poly_int64 pbytepos;
1147 1.1 mrg if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1148 1.1 mrg return opt_result::failure_at (stmt,
1149 1.1 mrg "failed: bit offset alignment.\n");
1150 1.1 mrg
1151 1.1 mrg if (preversep)
1152 1.1 mrg return opt_result::failure_at (stmt,
1153 1.1 mrg "failed: reverse storage order.\n");
1154 1.1 mrg
1155 1.1 mrg /* Calculate the alignment and misalignment for the inner reference. */
1156 1.1 mrg unsigned int HOST_WIDE_INT bit_base_misalignment;
1157 1.1 mrg unsigned int bit_base_alignment;
1158 1.1 mrg get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1159 1.1 mrg
1160 1.1 mrg /* There are no bitfield references remaining in BASE, so the values
1161 1.1 mrg we got back must be whole bytes. */
1162 1.1 mrg gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1163 1.1 mrg && bit_base_misalignment % BITS_PER_UNIT == 0);
1164 1.1 mrg unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1165 1.1 mrg poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1166 1.1 mrg
1167 1.1 mrg if (TREE_CODE (base) == MEM_REF)
1168 1.1 mrg {
1169 1.1 mrg if (!integer_zerop (TREE_OPERAND (base, 1)))
1170 1.1 mrg {
1171 1.1 mrg /* Subtract MOFF from the base and add it to POFFSET instead.
1172 1.1 mrg Adjust the misalignment to reflect the amount we subtracted. */
1173 1.1 mrg poly_offset_int moff = mem_ref_offset (base);
1174 1.1 mrg base_misalignment -= moff.force_shwi ();
1175 1.1 mrg tree mofft = wide_int_to_tree (sizetype, moff);
1176 1.1 mrg if (!poffset)
1177 1.1 mrg poffset = mofft;
1178 1.1 mrg else
1179 1.1 mrg poffset = size_binop (PLUS_EXPR, poffset, mofft);
1180 1.1 mrg }
1181 1.1 mrg base = TREE_OPERAND (base, 0);
1182 1.1 mrg }
1183 1.1 mrg else
1184 1.1 mrg base = build_fold_addr_expr (base);
1185 1.1 mrg
1186 1.1 mrg if (in_loop)
1187 1.1 mrg {
1188 1.1 mrg if (!simple_iv (loop, loop, base, &base_iv, true))
1189 1.1 mrg return opt_result::failure_at
1190 1.1 mrg (stmt, "failed: evolution of base is not affine.\n");
1191 1.1 mrg }
1192 1.1 mrg else
1193 1.1 mrg {
1194 1.1 mrg base_iv.base = base;
1195 1.1 mrg base_iv.step = ssize_int (0);
1196 1.1 mrg base_iv.no_overflow = true;
1197 1.1 mrg }
1198 1.1 mrg
1199 1.1 mrg if (!poffset)
1200 1.1 mrg {
1201 1.1 mrg offset_iv.base = ssize_int (0);
1202 1.1 mrg offset_iv.step = ssize_int (0);
1203 1.1 mrg }
1204 1.1 mrg else
1205 1.1 mrg {
1206 1.1 mrg if (!in_loop)
1207 1.1 mrg {
1208 1.1 mrg offset_iv.base = poffset;
1209 1.1 mrg offset_iv.step = ssize_int (0);
1210 1.1 mrg }
1211 1.1 mrg else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1212 1.1 mrg return opt_result::failure_at
1213 1.1 mrg (stmt, "failed: evolution of offset is not affine.\n");
1214 1.1 mrg }
1215 1.1 mrg
1216 1.1 mrg init = ssize_int (pbytepos);
1217 1.1 mrg
1218 1.1 mrg /* Subtract any constant component from the base and add it to INIT instead.
1219 1.1 mrg Adjust the misalignment to reflect the amount we subtracted. */
1220 1.1 mrg split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1221 1.1 mrg init = size_binop (PLUS_EXPR, init, dinit);
1222 1.1 mrg base_misalignment -= TREE_INT_CST_LOW (dinit);
1223 1.1 mrg
1224 1.1 mrg split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1225 1.1 mrg init = size_binop (PLUS_EXPR, init, dinit);
1226 1.1 mrg
1227 1.1 mrg step = size_binop (PLUS_EXPR,
1228 1.1 mrg fold_convert (ssizetype, base_iv.step),
1229 1.1 mrg fold_convert (ssizetype, offset_iv.step));
1230 1.1 mrg
1231 1.1 mrg base = canonicalize_base_object_address (base_iv.base);
1232 1.1 mrg
1233 1.1 mrg /* See if get_pointer_alignment can guarantee a higher alignment than
1234 1.1 mrg the one we calculated above. */
1235 1.1 mrg unsigned int HOST_WIDE_INT alt_misalignment;
1236 1.1 mrg unsigned int alt_alignment;
1237 1.1 mrg get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1238 1.1 mrg
1239 1.1 mrg /* As above, these values must be whole bytes. */
1240 1.1 mrg gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1241 1.1 mrg && alt_misalignment % BITS_PER_UNIT == 0);
1242 1.1 mrg alt_alignment /= BITS_PER_UNIT;
1243 1.1 mrg alt_misalignment /= BITS_PER_UNIT;
1244 1.1 mrg
1245 1.1 mrg if (base_alignment < alt_alignment)
1246 1.1 mrg {
1247 1.1 mrg base_alignment = alt_alignment;
1248 1.1 mrg base_misalignment = alt_misalignment;
1249 1.1 mrg }
1250 1.1 mrg
1251 1.1 mrg drb->base_address = base;
1252 1.1 mrg drb->offset = fold_convert (ssizetype, offset_iv.base);
1253 1.1 mrg drb->init = init;
1254 1.1 mrg drb->step = step;
1255 1.1 mrg if (known_misalignment (base_misalignment, base_alignment,
1256 1.1 mrg &drb->base_misalignment))
1257 1.1 mrg drb->base_alignment = base_alignment;
1258 1.1 mrg else
1259 1.1 mrg {
1260 1.1 mrg drb->base_alignment = known_alignment (base_misalignment);
1261 1.1 mrg drb->base_misalignment = 0;
1262 1.1 mrg }
1263 1.1 mrg drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1264 1.1 mrg drb->step_alignment = highest_pow2_factor (step);
1265 1.1 mrg
1266 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1267 1.1 mrg fprintf (dump_file, "success.\n");
1268 1.1 mrg
1269 1.1 mrg return opt_result::success ();
1270 1.1 mrg }
1271 1.1 mrg
1272 1.1 mrg /* Return true if OP is a valid component reference for a DR access
1273 1.1 mrg function. This accepts a subset of what handled_component_p accepts. */
1274 1.1 mrg
1275 1.1 mrg static bool
1276 1.1 mrg access_fn_component_p (tree op)
1277 1.1 mrg {
1278 1.1 mrg switch (TREE_CODE (op))
1279 1.1 mrg {
1280 1.1 mrg case REALPART_EXPR:
1281 1.1 mrg case IMAGPART_EXPR:
1282 1.1 mrg case ARRAY_REF:
1283 1.1 mrg return true;
1284 1.1 mrg
1285 1.1 mrg case COMPONENT_REF:
1286 1.1 mrg return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1287 1.1 mrg
1288 1.1 mrg default:
1289 1.1 mrg return false;
1290 1.1 mrg }
1291 1.1 mrg }
1292 1.1 mrg
1293 1.1 mrg /* Returns whether BASE can have a access_fn_component_p with BASE
1294 1.1 mrg as base. */
1295 1.1 mrg
1296 1.1 mrg static bool
1297 1.1 mrg base_supports_access_fn_components_p (tree base)
1298 1.1 mrg {
1299 1.1 mrg switch (TREE_CODE (TREE_TYPE (base)))
1300 1.1 mrg {
1301 1.1 mrg case COMPLEX_TYPE:
1302 1.1 mrg case ARRAY_TYPE:
1303 1.1 mrg case RECORD_TYPE:
1304 1.1 mrg return true;
1305 1.1 mrg default:
1306 1.1 mrg return false;
1307 1.1 mrg }
1308 1.1 mrg }
1309 1.1 mrg
1310 1.1 mrg /* Determines the base object and the list of indices of memory reference
1311 1.1 mrg DR, analyzed in LOOP and instantiated before NEST. */
1312 1.1 mrg
1313 1.1 mrg static void
1314 1.1 mrg dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1315 1.1 mrg {
1316 1.1 mrg /* If analyzing a basic-block there are no indices to analyze
1317 1.1 mrg and thus no access functions. */
1318 1.1 mrg if (!nest)
1319 1.1 mrg {
1320 1.1 mrg dri->base_object = ref;
1321 1.1 mrg dri->access_fns.create (0);
1322 1.1 mrg return;
1323 1.1 mrg }
1324 1.1 mrg
1325 1.1 mrg vec<tree> access_fns = vNULL;
1326 1.1 mrg
1327 1.1 mrg /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1328 1.1 mrg into a two element array with a constant index. The base is
1329 1.1 mrg then just the immediate underlying object. */
1330 1.1 mrg if (TREE_CODE (ref) == REALPART_EXPR)
1331 1.1 mrg {
1332 1.1 mrg ref = TREE_OPERAND (ref, 0);
1333 1.1 mrg access_fns.safe_push (integer_zero_node);
1334 1.1 mrg }
1335 1.1 mrg else if (TREE_CODE (ref) == IMAGPART_EXPR)
1336 1.1 mrg {
1337 1.1 mrg ref = TREE_OPERAND (ref, 0);
1338 1.1 mrg access_fns.safe_push (integer_one_node);
1339 1.1 mrg }
1340 1.1 mrg
1341 1.1 mrg /* Analyze access functions of dimensions we know to be independent.
1342 1.1 mrg The list of component references handled here should be kept in
1343 1.1 mrg sync with access_fn_component_p. */
1344 1.1 mrg while (handled_component_p (ref))
1345 1.1 mrg {
1346 1.1 mrg if (TREE_CODE (ref) == ARRAY_REF)
1347 1.1 mrg {
1348 1.1 mrg tree op = TREE_OPERAND (ref, 1);
1349 1.1 mrg tree access_fn = analyze_scalar_evolution (loop, op);
1350 1.1 mrg access_fn = instantiate_scev (nest, loop, access_fn);
1351 1.1 mrg access_fns.safe_push (access_fn);
1352 1.1 mrg }
1353 1.1 mrg else if (TREE_CODE (ref) == COMPONENT_REF
1354 1.1 mrg && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1355 1.1 mrg {
1356 1.1 mrg /* For COMPONENT_REFs of records (but not unions!) use the
1357 1.1 mrg FIELD_DECL offset as constant access function so we can
1358 1.1 mrg disambiguate a[i].f1 and a[i].f2. */
1359 1.1 mrg tree off = component_ref_field_offset (ref);
1360 1.1 mrg off = size_binop (PLUS_EXPR,
1361 1.1 mrg size_binop (MULT_EXPR,
1362 1.1 mrg fold_convert (bitsizetype, off),
1363 1.1 mrg bitsize_int (BITS_PER_UNIT)),
1364 1.1 mrg DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1365 1.1 mrg access_fns.safe_push (off);
1366 1.1 mrg }
1367 1.1 mrg else
1368 1.1 mrg /* If we have an unhandled component we could not translate
1369 1.1 mrg to an access function stop analyzing. We have determined
1370 1.1 mrg our base object in this case. */
1371 1.1 mrg break;
1372 1.1 mrg
1373 1.1 mrg ref = TREE_OPERAND (ref, 0);
1374 1.1 mrg }
1375 1.1 mrg
1376 1.1 mrg /* If the address operand of a MEM_REF base has an evolution in the
1377 1.1 mrg analyzed nest, add it as an additional independent access-function. */
1378 1.1 mrg if (TREE_CODE (ref) == MEM_REF)
1379 1.1 mrg {
1380 1.1 mrg tree op = TREE_OPERAND (ref, 0);
1381 1.1 mrg tree access_fn = analyze_scalar_evolution (loop, op);
1382 1.1 mrg access_fn = instantiate_scev (nest, loop, access_fn);
1383 1.1 mrg STRIP_NOPS (access_fn);
1384 1.1 mrg if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1385 1.1 mrg {
1386 1.1 mrg tree memoff = TREE_OPERAND (ref, 1);
1387 1.1 mrg tree base = initial_condition (access_fn);
1388 1.1 mrg tree orig_type = TREE_TYPE (base);
1389 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (base);
1390 1.1 mrg tree off;
1391 1.1 mrg split_constant_offset (base, &base, &off);
1392 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (base);
1393 1.1 mrg /* Fold the MEM_REF offset into the evolutions initial
1394 1.1 mrg value to make more bases comparable. */
1395 1.1 mrg if (!integer_zerop (memoff))
1396 1.1 mrg {
1397 1.1 mrg off = size_binop (PLUS_EXPR, off,
1398 1.1 mrg fold_convert (ssizetype, memoff));
1399 1.1 mrg memoff = build_int_cst (TREE_TYPE (memoff), 0);
1400 1.1 mrg }
1401 1.1 mrg /* Adjust the offset so it is a multiple of the access type
1402 1.1 mrg size and thus we separate bases that can possibly be used
1403 1.1 mrg to produce partial overlaps (which the access_fn machinery
1404 1.1 mrg cannot handle). */
1405 1.1 mrg wide_int rem;
1406 1.1 mrg if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1407 1.1 mrg && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1408 1.1 mrg && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1409 1.1 mrg rem = wi::mod_trunc
1410 1.1 mrg (wi::to_wide (off),
1411 1.1 mrg wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1412 1.1 mrg SIGNED);
1413 1.1 mrg else
1414 1.1 mrg /* If we can't compute the remainder simply force the initial
1415 1.1 mrg condition to zero. */
1416 1.1 mrg rem = wi::to_wide (off);
1417 1.1 mrg off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1418 1.1 mrg memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1419 1.1 mrg /* And finally replace the initial condition. */
1420 1.1 mrg access_fn = chrec_replace_initial_condition
1421 1.1 mrg (access_fn, fold_convert (orig_type, off));
1422 1.1 mrg /* ??? This is still not a suitable base object for
1423 1.1 mrg dr_may_alias_p - the base object needs to be an
1424 1.1 mrg access that covers the object as whole. With
1425 1.1 mrg an evolution in the pointer this cannot be
1426 1.1 mrg guaranteed.
1427 1.1 mrg As a band-aid, mark the access so we can special-case
1428 1.1 mrg it in dr_may_alias_p. */
1429 1.1 mrg tree old = ref;
1430 1.1 mrg ref = fold_build2_loc (EXPR_LOCATION (ref),
1431 1.1 mrg MEM_REF, TREE_TYPE (ref),
1432 1.1 mrg base, memoff);
1433 1.1 mrg MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1434 1.1 mrg MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1435 1.1 mrg dri->unconstrained_base = true;
1436 1.1 mrg access_fns.safe_push (access_fn);
1437 1.1 mrg }
1438 1.1 mrg }
1439 1.1 mrg else if (DECL_P (ref))
1440 1.1 mrg {
1441 1.1 mrg /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1442 1.1 mrg ref = build2 (MEM_REF, TREE_TYPE (ref),
1443 1.1 mrg build_fold_addr_expr (ref),
1444 1.1 mrg build_int_cst (reference_alias_ptr_type (ref), 0));
1445 1.1 mrg }
1446 1.1 mrg
1447 1.1 mrg dri->base_object = ref;
1448 1.1 mrg dri->access_fns = access_fns;
1449 1.1 mrg }
1450 1.1 mrg
1451 1.1 mrg /* Extracts the alias analysis information from the memory reference DR. */
1452 1.1 mrg
1453 1.1 mrg static void
1454 1.1 mrg dr_analyze_alias (struct data_reference *dr)
1455 1.1 mrg {
1456 1.1 mrg tree ref = DR_REF (dr);
1457 1.1 mrg tree base = get_base_address (ref), addr;
1458 1.1 mrg
1459 1.1 mrg if (INDIRECT_REF_P (base)
1460 1.1 mrg || TREE_CODE (base) == MEM_REF)
1461 1.1 mrg {
1462 1.1 mrg addr = TREE_OPERAND (base, 0);
1463 1.1 mrg if (TREE_CODE (addr) == SSA_NAME)
1464 1.1 mrg DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1465 1.1 mrg }
1466 1.1 mrg }
1467 1.1 mrg
1468 1.1 mrg /* Frees data reference DR. */
1469 1.1 mrg
1470 1.1 mrg void
1471 1.1 mrg free_data_ref (data_reference_p dr)
1472 1.1 mrg {
1473 1.1 mrg DR_ACCESS_FNS (dr).release ();
1474 1.1 mrg if (dr->alt_indices.base_object)
1475 1.1 mrg dr->alt_indices.access_fns.release ();
1476 1.1 mrg free (dr);
1477 1.1 mrg }
1478 1.1 mrg
1479 1.1 mrg /* Analyze memory reference MEMREF, which is accessed in STMT.
1480 1.1 mrg The reference is a read if IS_READ is true, otherwise it is a write.
1481 1.1 mrg IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1482 1.1 mrg within STMT, i.e. that it might not occur even if STMT is executed
1483 1.1 mrg and runs to completion.
1484 1.1 mrg
1485 1.1 mrg Return the data_reference description of MEMREF. NEST is the outermost
1486 1.1 mrg loop in which the reference should be instantiated, LOOP is the loop
1487 1.1 mrg in which the data reference should be analyzed. */
1488 1.1 mrg
1489 1.1 mrg struct data_reference *
1490 1.1 mrg create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1491 1.1 mrg bool is_read, bool is_conditional_in_stmt)
1492 1.1 mrg {
1493 1.1 mrg struct data_reference *dr;
1494 1.1 mrg
1495 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1496 1.1 mrg {
1497 1.1 mrg fprintf (dump_file, "Creating dr for ");
1498 1.1 mrg print_generic_expr (dump_file, memref, TDF_SLIM);
1499 1.1 mrg fprintf (dump_file, "\n");
1500 1.1 mrg }
1501 1.1 mrg
1502 1.1 mrg dr = XCNEW (struct data_reference);
1503 1.1 mrg DR_STMT (dr) = stmt;
1504 1.1 mrg DR_REF (dr) = memref;
1505 1.1 mrg DR_IS_READ (dr) = is_read;
1506 1.1 mrg DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1507 1.1 mrg
1508 1.1 mrg dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1509 1.1 mrg nest != NULL ? loop : NULL, stmt);
1510 1.1 mrg dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1511 1.1 mrg dr_analyze_alias (dr);
1512 1.1 mrg
1513 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
1514 1.1 mrg {
1515 1.1 mrg unsigned i;
1516 1.1 mrg fprintf (dump_file, "\tbase_address: ");
1517 1.1 mrg print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1518 1.1 mrg fprintf (dump_file, "\n\toffset from base address: ");
1519 1.1 mrg print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1520 1.1 mrg fprintf (dump_file, "\n\tconstant offset from base address: ");
1521 1.1 mrg print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1522 1.1 mrg fprintf (dump_file, "\n\tstep: ");
1523 1.1 mrg print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1524 1.1 mrg fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1525 1.1 mrg fprintf (dump_file, "\n\tbase misalignment: %d",
1526 1.1 mrg DR_BASE_MISALIGNMENT (dr));
1527 1.1 mrg fprintf (dump_file, "\n\toffset alignment: %d",
1528 1.1 mrg DR_OFFSET_ALIGNMENT (dr));
1529 1.1 mrg fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1530 1.1 mrg fprintf (dump_file, "\n\tbase_object: ");
1531 1.1 mrg print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1532 1.1 mrg fprintf (dump_file, "\n");
1533 1.1 mrg for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1534 1.1 mrg {
1535 1.1 mrg fprintf (dump_file, "\tAccess function %d: ", i);
1536 1.1 mrg print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1537 1.1 mrg }
1538 1.1 mrg }
1539 1.1 mrg
1540 1.1 mrg return dr;
1541 1.1 mrg }
1542 1.1 mrg
1543 1.1 mrg /* A helper function computes order between two tree expressions T1 and T2.
1544 1.1 mrg This is used in comparator functions sorting objects based on the order
1545 1.1 mrg of tree expressions. The function returns -1, 0, or 1. */
1546 1.1 mrg
1547 1.1 mrg int
1548 1.1 mrg data_ref_compare_tree (tree t1, tree t2)
1549 1.1 mrg {
1550 1.1 mrg int i, cmp;
1551 1.1 mrg enum tree_code code;
1552 1.1 mrg char tclass;
1553 1.1 mrg
1554 1.1 mrg if (t1 == t2)
1555 1.1 mrg return 0;
1556 1.1 mrg if (t1 == NULL)
1557 1.1 mrg return -1;
1558 1.1 mrg if (t2 == NULL)
1559 1.1 mrg return 1;
1560 1.1 mrg
1561 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (t1);
1562 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (t2);
1563 1.1 mrg if (t1 == t2)
1564 1.1 mrg return 0;
1565 1.1 mrg
1566 1.1 mrg if (TREE_CODE (t1) != TREE_CODE (t2)
1567 1.1 mrg && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1568 1.1 mrg return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1569 1.1 mrg
1570 1.1 mrg code = TREE_CODE (t1);
1571 1.1 mrg switch (code)
1572 1.1 mrg {
1573 1.1 mrg case INTEGER_CST:
1574 1.1 mrg return tree_int_cst_compare (t1, t2);
1575 1.1 mrg
1576 1.1 mrg case STRING_CST:
1577 1.1 mrg if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1578 1.1 mrg return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1579 1.1 mrg return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1580 1.1 mrg TREE_STRING_LENGTH (t1));
1581 1.1 mrg
1582 1.1 mrg case SSA_NAME:
1583 1.1 mrg if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1584 1.1 mrg return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1585 1.1 mrg break;
1586 1.1 mrg
1587 1.1 mrg default:
1588 1.1 mrg if (POLY_INT_CST_P (t1))
1589 1.1 mrg return compare_sizes_for_sort (wi::to_poly_widest (t1),
1590 1.1 mrg wi::to_poly_widest (t2));
1591 1.1 mrg
1592 1.1 mrg tclass = TREE_CODE_CLASS (code);
1593 1.1 mrg
1594 1.1 mrg /* For decls, compare their UIDs. */
1595 1.1 mrg if (tclass == tcc_declaration)
1596 1.1 mrg {
1597 1.1 mrg if (DECL_UID (t1) != DECL_UID (t2))
1598 1.1 mrg return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1599 1.1 mrg break;
1600 1.1 mrg }
1601 1.1 mrg /* For expressions, compare their operands recursively. */
1602 1.1 mrg else if (IS_EXPR_CODE_CLASS (tclass))
1603 1.1 mrg {
1604 1.1 mrg for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1605 1.1 mrg {
1606 1.1 mrg cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1607 1.1 mrg TREE_OPERAND (t2, i));
1608 1.1 mrg if (cmp != 0)
1609 1.1 mrg return cmp;
1610 1.1 mrg }
1611 1.1 mrg }
1612 1.1 mrg else
1613 1.1 mrg gcc_unreachable ();
1614 1.1 mrg }
1615 1.1 mrg
1616 1.1 mrg return 0;
1617 1.1 mrg }
1618 1.1 mrg
1619 1.1 mrg /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1620 1.1 mrg check. */
1621 1.1 mrg
1622 1.1 mrg opt_result
1623 1.1 mrg runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1624 1.1 mrg {
1625 1.1 mrg if (dump_enabled_p ())
1626 1.1 mrg dump_printf (MSG_NOTE,
1627 1.1 mrg "consider run-time aliasing test between %T and %T\n",
1628 1.1 mrg DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1629 1.1 mrg
1630 1.1 mrg if (!speed_p)
1631 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632 1.1 mrg "runtime alias check not supported when"
1633 1.1 mrg " optimizing for size.\n");
1634 1.1 mrg
1635 1.1 mrg /* FORNOW: We don't support versioning with outer-loop in either
1636 1.1 mrg vectorization or loop distribution. */
1637 1.1 mrg if (loop != NULL && loop->inner != NULL)
1638 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1639 1.1 mrg "runtime alias check not supported for"
1640 1.1 mrg " outer loop.\n");
1641 1.1 mrg
1642 1.1 mrg /* FORNOW: We don't support handling different address spaces. */
1643 1.1 mrg if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1644 1.1 mrg != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1645 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1646 1.1 mrg "runtime alias check between different "
1647 1.1 mrg "address spaces not supported.\n");
1648 1.1 mrg
1649 1.1 mrg return opt_result::success ();
1650 1.1 mrg }
1651 1.1 mrg
1652 1.1 mrg /* Operator == between two dr_with_seg_len objects.
1653 1.1 mrg
1654 1.1 mrg This equality operator is used to make sure two data refs
1655 1.1 mrg are the same one so that we will consider to combine the
1656 1.1 mrg aliasing checks of those two pairs of data dependent data
1657 1.1 mrg refs. */
1658 1.1 mrg
1659 1.1 mrg static bool
1660 1.1 mrg operator == (const dr_with_seg_len& d1,
1661 1.1 mrg const dr_with_seg_len& d2)
1662 1.1 mrg {
1663 1.1 mrg return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1664 1.1 mrg DR_BASE_ADDRESS (d2.dr), 0)
1665 1.1 mrg && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1666 1.1 mrg && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1667 1.1 mrg && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1668 1.1 mrg && known_eq (d1.access_size, d2.access_size)
1669 1.1 mrg && d1.align == d2.align);
1670 1.1 mrg }
1671 1.1 mrg
1672 1.1 mrg /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1673 1.1 mrg so that we can combine aliasing checks in one scan. */
1674 1.1 mrg
1675 1.1 mrg static int
1676 1.1 mrg comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1677 1.1 mrg {
1678 1.1 mrg const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1679 1.1 mrg const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1680 1.1 mrg const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1681 1.1 mrg const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1682 1.1 mrg
1683 1.1 mrg /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1684 1.1 mrg if a and c have the same basic address snd step, and b and d have the same
1685 1.1 mrg address and step. Therefore, if any a&c or b&d don't have the same address
1686 1.1 mrg and step, we don't care the order of those two pairs after sorting. */
1687 1.1 mrg int comp_res;
1688 1.1 mrg
1689 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1690 1.1 mrg DR_BASE_ADDRESS (b1.dr))) != 0)
1691 1.1 mrg return comp_res;
1692 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1693 1.1 mrg DR_BASE_ADDRESS (b2.dr))) != 0)
1694 1.1 mrg return comp_res;
1695 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1696 1.1 mrg DR_STEP (b1.dr))) != 0)
1697 1.1 mrg return comp_res;
1698 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1699 1.1 mrg DR_STEP (b2.dr))) != 0)
1700 1.1 mrg return comp_res;
1701 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1702 1.1 mrg DR_OFFSET (b1.dr))) != 0)
1703 1.1 mrg return comp_res;
1704 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1705 1.1 mrg DR_INIT (b1.dr))) != 0)
1706 1.1 mrg return comp_res;
1707 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1708 1.1 mrg DR_OFFSET (b2.dr))) != 0)
1709 1.1 mrg return comp_res;
1710 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1711 1.1 mrg DR_INIT (b2.dr))) != 0)
1712 1.1 mrg return comp_res;
1713 1.1 mrg
1714 1.1 mrg return 0;
1715 1.1 mrg }
1716 1.1 mrg
1717 1.1 mrg /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1718 1.1 mrg
1719 1.1 mrg static void
1720 1.1 mrg dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1721 1.1 mrg {
1722 1.1 mrg dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1723 1.1 mrg DR_REF (alias_pair->first.dr),
1724 1.1 mrg DR_REF (alias_pair->second.dr));
1725 1.1 mrg
1726 1.1 mrg dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1727 1.1 mrg alias_pair->first.seg_len);
1728 1.1 mrg if (!operand_equal_p (alias_pair->first.seg_len,
1729 1.1 mrg alias_pair->second.seg_len, 0))
1730 1.1 mrg dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1731 1.1 mrg
1732 1.1 mrg dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1733 1.1 mrg dump_dec (MSG_NOTE, alias_pair->first.access_size);
1734 1.1 mrg if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1735 1.1 mrg {
1736 1.1 mrg dump_printf (MSG_NOTE, " vs. ");
1737 1.1 mrg dump_dec (MSG_NOTE, alias_pair->second.access_size);
1738 1.1 mrg }
1739 1.1 mrg
1740 1.1 mrg dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1741 1.1 mrg alias_pair->first.align);
1742 1.1 mrg if (alias_pair->first.align != alias_pair->second.align)
1743 1.1 mrg dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1744 1.1 mrg
1745 1.1 mrg dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1746 1.1 mrg if (alias_pair->flags & DR_ALIAS_RAW)
1747 1.1 mrg dump_printf (MSG_NOTE, " RAW");
1748 1.1 mrg if (alias_pair->flags & DR_ALIAS_WAR)
1749 1.1 mrg dump_printf (MSG_NOTE, " WAR");
1750 1.1 mrg if (alias_pair->flags & DR_ALIAS_WAW)
1751 1.1 mrg dump_printf (MSG_NOTE, " WAW");
1752 1.1 mrg if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1753 1.1 mrg dump_printf (MSG_NOTE, " ARBITRARY");
1754 1.1 mrg if (alias_pair->flags & DR_ALIAS_SWAPPED)
1755 1.1 mrg dump_printf (MSG_NOTE, " SWAPPED");
1756 1.1 mrg if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1757 1.1 mrg dump_printf (MSG_NOTE, " UNSWAPPED");
1758 1.1 mrg if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1759 1.1 mrg dump_printf (MSG_NOTE, " MIXED_STEPS");
1760 1.1 mrg if (alias_pair->flags == 0)
1761 1.1 mrg dump_printf (MSG_NOTE, " <none>");
1762 1.1 mrg dump_printf (MSG_NOTE, "\n");
1763 1.1 mrg }
1764 1.1 mrg
1765 1.1 mrg /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1766 1.1 mrg FACTOR is number of iterations that each data reference is accessed.
1767 1.1 mrg
1768 1.1 mrg Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1769 1.1 mrg we create an expression:
1770 1.1 mrg
1771 1.1 mrg ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1772 1.1 mrg || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1773 1.1 mrg
1774 1.1 mrg for aliasing checks. However, in some cases we can decrease the number
1775 1.1 mrg of checks by combining two checks into one. For example, suppose we have
1776 1.1 mrg another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1777 1.1 mrg condition is satisfied:
1778 1.1 mrg
1779 1.1 mrg load_ptr_0 < load_ptr_1 &&
1780 1.1 mrg load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1781 1.1 mrg
1782 1.1 mrg (this condition means, in each iteration of vectorized loop, the accessed
1783 1.1 mrg memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1784 1.1 mrg load_ptr_1.)
1785 1.1 mrg
1786 1.1 mrg we then can use only the following expression to finish the alising checks
1787 1.1 mrg between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1788 1.1 mrg
1789 1.1 mrg ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1790 1.1 mrg || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1791 1.1 mrg
1792 1.1 mrg Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1793 1.1 mrg basic address. */
1794 1.1 mrg
1795 1.1 mrg void
1796 1.1 mrg prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1797 1.1 mrg poly_uint64)
1798 1.1 mrg {
1799 1.1 mrg if (alias_pairs->is_empty ())
1800 1.1 mrg return;
1801 1.1 mrg
1802 1.1 mrg /* Canonicalize each pair so that the base components are ordered wrt
1803 1.1 mrg data_ref_compare_tree. This allows the loop below to merge more
1804 1.1 mrg cases. */
1805 1.1 mrg unsigned int i;
1806 1.1 mrg dr_with_seg_len_pair_t *alias_pair;
1807 1.1 mrg FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1808 1.1 mrg {
1809 1.1 mrg data_reference_p dr_a = alias_pair->first.dr;
1810 1.1 mrg data_reference_p dr_b = alias_pair->second.dr;
1811 1.1 mrg int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1812 1.1 mrg DR_BASE_ADDRESS (dr_b));
1813 1.1 mrg if (comp_res == 0)
1814 1.1 mrg comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1815 1.1 mrg if (comp_res == 0)
1816 1.1 mrg comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1817 1.1 mrg if (comp_res > 0)
1818 1.1 mrg {
1819 1.1 mrg std::swap (alias_pair->first, alias_pair->second);
1820 1.1 mrg alias_pair->flags |= DR_ALIAS_SWAPPED;
1821 1.1 mrg }
1822 1.1 mrg else
1823 1.1 mrg alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1824 1.1 mrg }
1825 1.1 mrg
1826 1.1 mrg /* Sort the collected data ref pairs so that we can scan them once to
1827 1.1 mrg combine all possible aliasing checks. */
1828 1.1 mrg alias_pairs->qsort (comp_dr_with_seg_len_pair);
1829 1.1 mrg
1830 1.1 mrg /* Scan the sorted dr pairs and check if we can combine alias checks
1831 1.1 mrg of two neighboring dr pairs. */
1832 1.1 mrg unsigned int last = 0;
1833 1.1 mrg for (i = 1; i < alias_pairs->length (); ++i)
1834 1.1 mrg {
1835 1.1 mrg /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1836 1.1 mrg dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1837 1.1 mrg dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1838 1.1 mrg
1839 1.1 mrg dr_with_seg_len *dr_a1 = &alias_pair1->first;
1840 1.1 mrg dr_with_seg_len *dr_b1 = &alias_pair1->second;
1841 1.1 mrg dr_with_seg_len *dr_a2 = &alias_pair2->first;
1842 1.1 mrg dr_with_seg_len *dr_b2 = &alias_pair2->second;
1843 1.1 mrg
1844 1.1 mrg /* Remove duplicate data ref pairs. */
1845 1.1 mrg if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1846 1.1 mrg {
1847 1.1 mrg if (dump_enabled_p ())
1848 1.1 mrg dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1849 1.1 mrg DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1850 1.1 mrg DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1851 1.1 mrg alias_pair1->flags |= alias_pair2->flags;
1852 1.1 mrg continue;
1853 1.1 mrg }
1854 1.1 mrg
1855 1.1 mrg /* Assume that we won't be able to merge the pairs, then correct
1856 1.1 mrg if we do. */
1857 1.1 mrg last += 1;
1858 1.1 mrg if (last != i)
1859 1.1 mrg (*alias_pairs)[last] = (*alias_pairs)[i];
1860 1.1 mrg
1861 1.1 mrg if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1862 1.1 mrg {
1863 1.1 mrg /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1864 1.1 mrg and DR_A1 and DR_A2 are two consecutive memrefs. */
1865 1.1 mrg if (*dr_a1 == *dr_a2)
1866 1.1 mrg {
1867 1.1 mrg std::swap (dr_a1, dr_b1);
1868 1.1 mrg std::swap (dr_a2, dr_b2);
1869 1.1 mrg }
1870 1.1 mrg
1871 1.1 mrg poly_int64 init_a1, init_a2;
1872 1.1 mrg /* Only consider cases in which the distance between the initial
1873 1.1 mrg DR_A1 and the initial DR_A2 is known at compile time. */
1874 1.1 mrg if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1875 1.1 mrg DR_BASE_ADDRESS (dr_a2->dr), 0)
1876 1.1 mrg || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1877 1.1 mrg DR_OFFSET (dr_a2->dr), 0)
1878 1.1 mrg || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1879 1.1 mrg || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1880 1.1 mrg continue;
1881 1.1 mrg
1882 1.1 mrg /* Don't combine if we can't tell which one comes first. */
1883 1.1 mrg if (!ordered_p (init_a1, init_a2))
1884 1.1 mrg continue;
1885 1.1 mrg
1886 1.1 mrg /* Work out what the segment length would be if we did combine
1887 1.1 mrg DR_A1 and DR_A2:
1888 1.1 mrg
1889 1.1 mrg - If DR_A1 and DR_A2 have equal lengths, that length is
1890 1.1 mrg also the combined length.
1891 1.1 mrg
1892 1.1 mrg - If DR_A1 and DR_A2 both have negative "lengths", the combined
1893 1.1 mrg length is the lower bound on those lengths.
1894 1.1 mrg
1895 1.1 mrg - If DR_A1 and DR_A2 both have positive lengths, the combined
1896 1.1 mrg length is the upper bound on those lengths.
1897 1.1 mrg
1898 1.1 mrg Other cases are unlikely to give a useful combination.
1899 1.1 mrg
1900 1.1 mrg The lengths both have sizetype, so the sign is taken from
1901 1.1 mrg the step instead. */
1902 1.1 mrg poly_uint64 new_seg_len = 0;
1903 1.1 mrg bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1904 1.1 mrg dr_a2->seg_len, 0);
1905 1.1 mrg if (new_seg_len_p)
1906 1.1 mrg {
1907 1.1 mrg poly_uint64 seg_len_a1, seg_len_a2;
1908 1.1 mrg if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1909 1.1 mrg || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1910 1.1 mrg continue;
1911 1.1 mrg
1912 1.1 mrg tree indicator_a = dr_direction_indicator (dr_a1->dr);
1913 1.1 mrg if (TREE_CODE (indicator_a) != INTEGER_CST)
1914 1.1 mrg continue;
1915 1.1 mrg
1916 1.1 mrg tree indicator_b = dr_direction_indicator (dr_a2->dr);
1917 1.1 mrg if (TREE_CODE (indicator_b) != INTEGER_CST)
1918 1.1 mrg continue;
1919 1.1 mrg
1920 1.1 mrg int sign_a = tree_int_cst_sgn (indicator_a);
1921 1.1 mrg int sign_b = tree_int_cst_sgn (indicator_b);
1922 1.1 mrg
1923 1.1 mrg if (sign_a <= 0 && sign_b <= 0)
1924 1.1 mrg new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1925 1.1 mrg else if (sign_a >= 0 && sign_b >= 0)
1926 1.1 mrg new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1927 1.1 mrg else
1928 1.1 mrg continue;
1929 1.1 mrg }
1930 1.1 mrg /* At this point we're committed to merging the refs. */
1931 1.1 mrg
1932 1.1 mrg /* Make sure dr_a1 starts left of dr_a2. */
1933 1.1 mrg if (maybe_gt (init_a1, init_a2))
1934 1.1 mrg {
1935 1.1 mrg std::swap (*dr_a1, *dr_a2);
1936 1.1 mrg std::swap (init_a1, init_a2);
1937 1.1 mrg }
1938 1.1 mrg
1939 1.1 mrg /* The DR_Bs are equal, so only the DR_As can introduce
1940 1.1 mrg mixed steps. */
1941 1.1 mrg if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1942 1.1 mrg alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1943 1.1 mrg
1944 1.1 mrg if (new_seg_len_p)
1945 1.1 mrg {
1946 1.1 mrg dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1947 1.1 mrg new_seg_len);
1948 1.1 mrg dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1949 1.1 mrg }
1950 1.1 mrg
1951 1.1 mrg /* This is always positive due to the swap above. */
1952 1.1 mrg poly_uint64 diff = init_a2 - init_a1;
1953 1.1 mrg
1954 1.1 mrg /* The new check will start at DR_A1. Make sure that its access
1955 1.1 mrg size encompasses the initial DR_A2. */
1956 1.1 mrg if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1957 1.1 mrg {
1958 1.1 mrg dr_a1->access_size = upper_bound (dr_a1->access_size,
1959 1.1 mrg diff + dr_a2->access_size);
1960 1.1 mrg unsigned int new_align = known_alignment (dr_a1->access_size);
1961 1.1 mrg dr_a1->align = MIN (dr_a1->align, new_align);
1962 1.1 mrg }
1963 1.1 mrg if (dump_enabled_p ())
1964 1.1 mrg dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1965 1.1 mrg DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1966 1.1 mrg DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1967 1.1 mrg alias_pair1->flags |= alias_pair2->flags;
1968 1.1 mrg last -= 1;
1969 1.1 mrg }
1970 1.1 mrg }
1971 1.1 mrg alias_pairs->truncate (last + 1);
1972 1.1 mrg
1973 1.1 mrg /* Try to restore the original dr_with_seg_len order within each
1974 1.1 mrg dr_with_seg_len_pair_t. If we ended up combining swapped and
1975 1.1 mrg unswapped pairs into the same check, we have to invalidate any
1976 1.1 mrg RAW, WAR and WAW information for it. */
1977 1.1 mrg if (dump_enabled_p ())
1978 1.1 mrg dump_printf (MSG_NOTE, "merged alias checks:\n");
1979 1.1 mrg FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1980 1.1 mrg {
1981 1.1 mrg unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1982 1.1 mrg unsigned int swapped = (alias_pair->flags & swap_mask);
1983 1.1 mrg if (swapped == DR_ALIAS_SWAPPED)
1984 1.1 mrg std::swap (alias_pair->first, alias_pair->second);
1985 1.1 mrg else if (swapped != DR_ALIAS_UNSWAPPED)
1986 1.1 mrg alias_pair->flags |= DR_ALIAS_ARBITRARY;
1987 1.1 mrg alias_pair->flags &= ~swap_mask;
1988 1.1 mrg if (dump_enabled_p ())
1989 1.1 mrg dump_alias_pair (alias_pair, " ");
1990 1.1 mrg }
1991 1.1 mrg }
1992 1.1 mrg
1993 1.1 mrg /* A subroutine of create_intersect_range_checks, with a subset of the
1994 1.1 mrg same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1995 1.1 mrg to optimize cases in which the references form a simple RAW, WAR or
1996 1.1 mrg WAR dependence. */
1997 1.1 mrg
1998 1.1 mrg static bool
1999 1.1 mrg create_ifn_alias_checks (tree *cond_expr,
2000 1.1 mrg const dr_with_seg_len_pair_t &alias_pair)
2001 1.1 mrg {
2002 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first;
2003 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second;
2004 1.1 mrg
2005 1.1 mrg /* Check for cases in which:
2006 1.1 mrg
2007 1.1 mrg (a) we have a known RAW, WAR or WAR dependence
2008 1.1 mrg (b) the accesses are well-ordered in both the original and new code
2009 1.1 mrg (see the comment above the DR_ALIAS_* flags for details); and
2010 1.1 mrg (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2011 1.1 mrg if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2012 1.1 mrg return false;
2013 1.1 mrg
2014 1.1 mrg /* Make sure that both DRs access the same pattern of bytes,
2015 1.1 mrg with a constant length and step. */
2016 1.1 mrg poly_uint64 seg_len;
2017 1.1 mrg if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2018 1.1 mrg || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2019 1.1 mrg || maybe_ne (dr_a.access_size, dr_b.access_size)
2020 1.1 mrg || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2021 1.1 mrg || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2022 1.1 mrg return false;
2023 1.1 mrg
2024 1.1 mrg unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2025 1.1 mrg tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2026 1.1 mrg tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2027 1.1 mrg
2028 1.1 mrg /* See whether the target suports what we want to do. WAW checks are
2029 1.1 mrg equivalent to WAR checks here. */
2030 1.1 mrg internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2031 1.1 mrg ? IFN_CHECK_RAW_PTRS
2032 1.1 mrg : IFN_CHECK_WAR_PTRS);
2033 1.1 mrg unsigned int align = MIN (dr_a.align, dr_b.align);
2034 1.1 mrg poly_uint64 full_length = seg_len + bytes;
2035 1.1 mrg if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2036 1.1 mrg full_length, align))
2037 1.1 mrg {
2038 1.1 mrg full_length = seg_len + dr_a.access_size;
2039 1.1 mrg if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2040 1.1 mrg full_length, align))
2041 1.1 mrg return false;
2042 1.1 mrg }
2043 1.1 mrg
2044 1.1 mrg /* Commit to using this form of test. */
2045 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2046 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2047 1.1 mrg
2048 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2049 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2050 1.1 mrg
2051 1.1 mrg *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2052 1.1 mrg ifn, boolean_type_node,
2053 1.1 mrg 4, addr_a, addr_b,
2054 1.1 mrg size_int (full_length),
2055 1.1 mrg size_int (align));
2056 1.1 mrg
2057 1.1 mrg if (dump_enabled_p ())
2058 1.1 mrg {
2059 1.1 mrg if (ifn == IFN_CHECK_RAW_PTRS)
2060 1.1 mrg dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2061 1.1 mrg else
2062 1.1 mrg dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2063 1.1 mrg }
2064 1.1 mrg return true;
2065 1.1 mrg }
2066 1.1 mrg
2067 1.1 mrg /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2068 1.1 mrg free of aliases, using a condition based on index values instead
2069 1.1 mrg of a condition based on addresses. Return true on success,
2070 1.1 mrg storing the condition in *COND_EXPR.
2071 1.1 mrg
2072 1.1 mrg This can only be done if the two data references in ALIAS_PAIR access
2073 1.1 mrg the same array object and the index is the only difference. For example,
2074 1.1 mrg if the two data references are DR_A and DR_B:
2075 1.1 mrg
2076 1.1 mrg DR_A DR_B
2077 1.1 mrg data-ref arr[i] arr[j]
2078 1.1 mrg base_object arr arr
2079 1.1 mrg index {i_0, +, 1}_loop {j_0, +, 1}_loop
2080 1.1 mrg
2081 1.1 mrg The addresses and their index are like:
2082 1.1 mrg
2083 1.1 mrg |<- ADDR_A ->| |<- ADDR_B ->|
2084 1.1 mrg ------------------------------------------------------->
2085 1.1 mrg | | | | | | | | | |
2086 1.1 mrg ------------------------------------------------------->
2087 1.1 mrg i_0 ... i_0+4 j_0 ... j_0+4
2088 1.1 mrg
2089 1.1 mrg We can create expression based on index rather than address:
2090 1.1 mrg
2091 1.1 mrg (unsigned) (i_0 - j_0 + 3) <= 6
2092 1.1 mrg
2093 1.1 mrg i.e. the indices are less than 4 apart.
2094 1.1 mrg
2095 1.1 mrg Note evolution step of index needs to be considered in comparison. */
2096 1.1 mrg
2097 1.1 mrg static bool
2098 1.1 mrg create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2099 1.1 mrg const dr_with_seg_len_pair_t &alias_pair)
2100 1.1 mrg {
2101 1.1 mrg const dr_with_seg_len &dr_a = alias_pair.first;
2102 1.1 mrg const dr_with_seg_len &dr_b = alias_pair.second;
2103 1.1 mrg if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2104 1.1 mrg || integer_zerop (DR_STEP (dr_a.dr))
2105 1.1 mrg || integer_zerop (DR_STEP (dr_b.dr))
2106 1.1 mrg || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2107 1.1 mrg return false;
2108 1.1 mrg
2109 1.1 mrg poly_uint64 seg_len1, seg_len2;
2110 1.1 mrg if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2111 1.1 mrg || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2112 1.1 mrg return false;
2113 1.1 mrg
2114 1.1 mrg if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2115 1.1 mrg return false;
2116 1.1 mrg
2117 1.1 mrg if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2118 1.1 mrg return false;
2119 1.1 mrg
2120 1.1 mrg if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2121 1.1 mrg return false;
2122 1.1 mrg
2123 1.1 mrg gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2124 1.1 mrg
2125 1.1 mrg bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2126 1.1 mrg unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2127 1.1 mrg if (neg_step)
2128 1.1 mrg {
2129 1.1 mrg abs_step = -abs_step;
2130 1.1 mrg seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2131 1.1 mrg seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2132 1.1 mrg }
2133 1.1 mrg
2134 1.1 mrg /* Infer the number of iterations with which the memory segment is accessed
2135 1.1 mrg by DR. In other words, alias is checked if memory segment accessed by
2136 1.1 mrg DR_A in some iterations intersect with memory segment accessed by DR_B
2137 1.1 mrg in the same amount iterations.
2138 1.1 mrg Note segnment length is a linear function of number of iterations with
2139 1.1 mrg DR_STEP as the coefficient. */
2140 1.1 mrg poly_uint64 niter_len1, niter_len2;
2141 1.1 mrg if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2142 1.1 mrg || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2143 1.1 mrg return false;
2144 1.1 mrg
2145 1.1 mrg /* Divide each access size by the byte step, rounding up. */
2146 1.1 mrg poly_uint64 niter_access1, niter_access2;
2147 1.1 mrg if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2148 1.1 mrg abs_step, &niter_access1)
2149 1.1 mrg || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2150 1.1 mrg abs_step, &niter_access2))
2151 1.1 mrg return false;
2152 1.1 mrg
2153 1.1 mrg bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2154 1.1 mrg
2155 1.1 mrg int found = -1;
2156 1.1 mrg for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2157 1.1 mrg {
2158 1.1 mrg tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2159 1.1 mrg tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2160 1.1 mrg /* Two indices must be the same if they are not scev, or not scev wrto
2161 1.1 mrg current loop being vecorized. */
2162 1.1 mrg if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2163 1.1 mrg || TREE_CODE (access2) != POLYNOMIAL_CHREC
2164 1.1 mrg || CHREC_VARIABLE (access1) != (unsigned)loop->num
2165 1.1 mrg || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2166 1.1 mrg {
2167 1.1 mrg if (operand_equal_p (access1, access2, 0))
2168 1.1 mrg continue;
2169 1.1 mrg
2170 1.1 mrg return false;
2171 1.1 mrg }
2172 1.1 mrg if (found >= 0)
2173 1.1 mrg return false;
2174 1.1 mrg found = i;
2175 1.1 mrg }
2176 1.1 mrg
2177 1.1 mrg /* Ought not to happen in practice, since if all accesses are equal then the
2178 1.1 mrg alias should be decidable at compile time. */
2179 1.1 mrg if (found < 0)
2180 1.1 mrg return false;
2181 1.1 mrg
2182 1.1 mrg /* The two indices must have the same step. */
2183 1.1 mrg tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2184 1.1 mrg tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2185 1.1 mrg if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2186 1.1 mrg return false;
2187 1.1 mrg
2188 1.1 mrg tree idx_step = CHREC_RIGHT (access1);
2189 1.1 mrg /* Index must have const step, otherwise DR_STEP won't be constant. */
2190 1.1 mrg gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2191 1.1 mrg /* Index must evaluate in the same direction as DR. */
2192 1.1 mrg gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2193 1.1 mrg
2194 1.1 mrg tree min1 = CHREC_LEFT (access1);
2195 1.1 mrg tree min2 = CHREC_LEFT (access2);
2196 1.1 mrg if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2197 1.1 mrg return false;
2198 1.1 mrg
2199 1.1 mrg /* Ideally, alias can be checked against loop's control IV, but we
2200 1.1 mrg need to prove linear mapping between control IV and reference
2201 1.1 mrg index. Although that should be true, we check against (array)
2202 1.1 mrg index of data reference. Like segment length, index length is
2203 1.1 mrg linear function of the number of iterations with index_step as
2204 1.1 mrg the coefficient, i.e, niter_len * idx_step. */
2205 1.1 mrg offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2206 1.1 mrg SIGNED);
2207 1.1 mrg if (neg_step)
2208 1.1 mrg abs_idx_step = -abs_idx_step;
2209 1.1 mrg poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2210 1.1 mrg poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2211 1.1 mrg poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2212 1.1 mrg poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2213 1.1 mrg
2214 1.1 mrg gcc_assert (known_ge (idx_len1, 0)
2215 1.1 mrg && known_ge (idx_len2, 0)
2216 1.1 mrg && known_ge (idx_access1, 0)
2217 1.1 mrg && known_ge (idx_access2, 0));
2218 1.1 mrg
2219 1.1 mrg /* Each access has the following pattern, with lengths measured
2220 1.1 mrg in units of INDEX:
2221 1.1 mrg
2222 1.1 mrg <-- idx_len -->
2223 1.1 mrg <--- A: -ve step --->
2224 1.1 mrg +-----+-------+-----+-------+-----+
2225 1.1 mrg | n-1 | ..... | 0 | ..... | n-1 |
2226 1.1 mrg +-----+-------+-----+-------+-----+
2227 1.1 mrg <--- B: +ve step --->
2228 1.1 mrg <-- idx_len -->
2229 1.1 mrg |
2230 1.1 mrg min
2231 1.1 mrg
2232 1.1 mrg where "n" is the number of scalar iterations covered by the segment
2233 1.1 mrg and where each access spans idx_access units.
2234 1.1 mrg
2235 1.1 mrg A is the range of bytes accessed when the step is negative,
2236 1.1 mrg B is the range when the step is positive.
2237 1.1 mrg
2238 1.1 mrg When checking for general overlap, we need to test whether
2239 1.1 mrg the range:
2240 1.1 mrg
2241 1.1 mrg [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2242 1.1 mrg
2243 1.1 mrg overlaps:
2244 1.1 mrg
2245 1.1 mrg [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2246 1.1 mrg
2247 1.1 mrg where:
2248 1.1 mrg
2249 1.1 mrg low_offsetN = +ve step ? 0 : -idx_lenN;
2250 1.1 mrg high_offsetN = +ve step ? idx_lenN : 0;
2251 1.1 mrg
2252 1.1 mrg This is equivalent to testing whether:
2253 1.1 mrg
2254 1.1 mrg min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2255 1.1 mrg && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2256 1.1 mrg
2257 1.1 mrg Converting this into a single test, there is an overlap if:
2258 1.1 mrg
2259 1.1 mrg 0 <= min2 - min1 + bias <= limit
2260 1.1 mrg
2261 1.1 mrg where bias = high_offset2 + idx_access2 - 1 - low_offset1
2262 1.1 mrg limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2263 1.1 mrg + (high_offset2 - low_offset2 + idx_access2 - 1)
2264 1.1 mrg i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2265 1.1 mrg
2266 1.1 mrg Combining the tests requires limit to be computable in an unsigned
2267 1.1 mrg form of the index type; if it isn't, we fall back to the usual
2268 1.1 mrg pointer-based checks.
2269 1.1 mrg
2270 1.1 mrg We can do better if DR_B is a write and if DR_A and DR_B are
2271 1.1 mrg well-ordered in both the original and the new code (see the
2272 1.1 mrg comment above the DR_ALIAS_* flags for details). In this case
2273 1.1 mrg we know that for each i in [0, n-1], the write performed by
2274 1.1 mrg access i of DR_B occurs after access numbers j<=i of DR_A in
2275 1.1 mrg both the original and the new code. Any write or anti
2276 1.1 mrg dependencies wrt those DR_A accesses are therefore maintained.
2277 1.1 mrg
2278 1.1 mrg We just need to make sure that each individual write in DR_B does not
2279 1.1 mrg overlap any higher-indexed access in DR_A; such DR_A accesses happen
2280 1.1 mrg after the DR_B access in the original code but happen before it in
2281 1.1 mrg the new code.
2282 1.1 mrg
2283 1.1 mrg We know the steps for both accesses are equal, so by induction, we
2284 1.1 mrg just need to test whether the first write of DR_B overlaps a later
2285 1.1 mrg access of DR_A. In other words, we need to move min1 along by
2286 1.1 mrg one iteration:
2287 1.1 mrg
2288 1.1 mrg min1' = min1 + idx_step
2289 1.1 mrg
2290 1.1 mrg and use the ranges:
2291 1.1 mrg
2292 1.1 mrg [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2293 1.1 mrg
2294 1.1 mrg and:
2295 1.1 mrg
2296 1.1 mrg [min2, min2 + idx_access2 - 1]
2297 1.1 mrg
2298 1.1 mrg where:
2299 1.1 mrg
2300 1.1 mrg low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2301 1.1 mrg high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2302 1.1 mrg if (waw_or_war_p)
2303 1.1 mrg idx_len1 -= abs_idx_step;
2304 1.1 mrg
2305 1.1 mrg poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2306 1.1 mrg if (!waw_or_war_p)
2307 1.1 mrg limit += idx_len2;
2308 1.1 mrg
2309 1.1 mrg tree utype = unsigned_type_for (TREE_TYPE (min1));
2310 1.1 mrg if (!wi::fits_to_tree_p (limit, utype))
2311 1.1 mrg return false;
2312 1.1 mrg
2313 1.1 mrg poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2314 1.1 mrg poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2315 1.1 mrg poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2316 1.1 mrg /* Equivalent to adding IDX_STEP to MIN1. */
2317 1.1 mrg if (waw_or_war_p)
2318 1.1 mrg bias -= wi::to_offset (idx_step);
2319 1.1 mrg
2320 1.1 mrg tree subject = fold_build2 (MINUS_EXPR, utype,
2321 1.1 mrg fold_convert (utype, min2),
2322 1.1 mrg fold_convert (utype, min1));
2323 1.1 mrg subject = fold_build2 (PLUS_EXPR, utype, subject,
2324 1.1 mrg wide_int_to_tree (utype, bias));
2325 1.1 mrg tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2326 1.1 mrg wide_int_to_tree (utype, limit));
2327 1.1 mrg if (*cond_expr)
2328 1.1 mrg *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2329 1.1 mrg *cond_expr, part_cond_expr);
2330 1.1 mrg else
2331 1.1 mrg *cond_expr = part_cond_expr;
2332 1.1 mrg if (dump_enabled_p ())
2333 1.1 mrg {
2334 1.1 mrg if (waw_or_war_p)
2335 1.1 mrg dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2336 1.1 mrg else
2337 1.1 mrg dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2338 1.1 mrg }
2339 1.1 mrg return true;
2340 1.1 mrg }
2341 1.1 mrg
2342 1.1 mrg /* A subroutine of create_intersect_range_checks, with a subset of the
2343 1.1 mrg same arguments. Try to optimize cases in which the second access
2344 1.1 mrg is a write and in which some overlap is valid. */
2345 1.1 mrg
2346 1.1 mrg static bool
2347 1.1 mrg create_waw_or_war_checks (tree *cond_expr,
2348 1.1 mrg const dr_with_seg_len_pair_t &alias_pair)
2349 1.1 mrg {
2350 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first;
2351 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second;
2352 1.1 mrg
2353 1.1 mrg /* Check for cases in which:
2354 1.1 mrg
2355 1.1 mrg (a) DR_B is always a write;
2356 1.1 mrg (b) the accesses are well-ordered in both the original and new code
2357 1.1 mrg (see the comment above the DR_ALIAS_* flags for details); and
2358 1.1 mrg (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2359 1.1 mrg if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2360 1.1 mrg return false;
2361 1.1 mrg
2362 1.1 mrg /* Check for equal (but possibly variable) steps. */
2363 1.1 mrg tree step = DR_STEP (dr_a.dr);
2364 1.1 mrg if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2365 1.1 mrg return false;
2366 1.1 mrg
2367 1.1 mrg /* Make sure that we can operate on sizetype without loss of precision. */
2368 1.1 mrg tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2369 1.1 mrg if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2370 1.1 mrg return false;
2371 1.1 mrg
2372 1.1 mrg /* All addresses involved are known to have a common alignment ALIGN.
2373 1.1 mrg We can therefore subtract ALIGN from an exclusive endpoint to get
2374 1.1 mrg an inclusive endpoint. In the best (and common) case, ALIGN is the
2375 1.1 mrg same as the access sizes of both DRs, and so subtracting ALIGN
2376 1.1 mrg cancels out the addition of an access size. */
2377 1.1 mrg unsigned int align = MIN (dr_a.align, dr_b.align);
2378 1.1 mrg poly_uint64 last_chunk_a = dr_a.access_size - align;
2379 1.1 mrg poly_uint64 last_chunk_b = dr_b.access_size - align;
2380 1.1 mrg
2381 1.1 mrg /* Get a boolean expression that is true when the step is negative. */
2382 1.1 mrg tree indicator = dr_direction_indicator (dr_a.dr);
2383 1.1 mrg tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2384 1.1 mrg fold_convert (ssizetype, indicator),
2385 1.1 mrg ssize_int (0));
2386 1.1 mrg
2387 1.1 mrg /* Get lengths in sizetype. */
2388 1.1 mrg tree seg_len_a
2389 1.1 mrg = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2390 1.1 mrg step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2391 1.1 mrg
2392 1.1 mrg /* Each access has the following pattern:
2393 1.1 mrg
2394 1.1 mrg <- |seg_len| ->
2395 1.1 mrg <--- A: -ve step --->
2396 1.1 mrg +-----+-------+-----+-------+-----+
2397 1.1 mrg | n-1 | ..... | 0 | ..... | n-1 |
2398 1.1 mrg +-----+-------+-----+-------+-----+
2399 1.1 mrg <--- B: +ve step --->
2400 1.1 mrg <- |seg_len| ->
2401 1.1 mrg |
2402 1.1 mrg base address
2403 1.1 mrg
2404 1.1 mrg where "n" is the number of scalar iterations covered by the segment.
2405 1.1 mrg
2406 1.1 mrg A is the range of bytes accessed when the step is negative,
2407 1.1 mrg B is the range when the step is positive.
2408 1.1 mrg
2409 1.1 mrg We know that DR_B is a write. We also know (from checking that
2410 1.1 mrg DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2411 1.1 mrg the write performed by access i of DR_B occurs after access numbers
2412 1.1 mrg j<=i of DR_A in both the original and the new code. Any write or
2413 1.1 mrg anti dependencies wrt those DR_A accesses are therefore maintained.
2414 1.1 mrg
2415 1.1 mrg We just need to make sure that each individual write in DR_B does not
2416 1.1 mrg overlap any higher-indexed access in DR_A; such DR_A accesses happen
2417 1.1 mrg after the DR_B access in the original code but happen before it in
2418 1.1 mrg the new code.
2419 1.1 mrg
2420 1.1 mrg We know the steps for both accesses are equal, so by induction, we
2421 1.1 mrg just need to test whether the first write of DR_B overlaps a later
2422 1.1 mrg access of DR_A. In other words, we need to move addr_a along by
2423 1.1 mrg one iteration:
2424 1.1 mrg
2425 1.1 mrg addr_a' = addr_a + step
2426 1.1 mrg
2427 1.1 mrg and check whether:
2428 1.1 mrg
2429 1.1 mrg [addr_b, addr_b + last_chunk_b]
2430 1.1 mrg
2431 1.1 mrg overlaps:
2432 1.1 mrg
2433 1.1 mrg [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2434 1.1 mrg
2435 1.1 mrg where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2436 1.1 mrg
2437 1.1 mrg low_offset_a = +ve step ? 0 : seg_len_a - step
2438 1.1 mrg high_offset_a = +ve step ? seg_len_a - step : 0
2439 1.1 mrg
2440 1.1 mrg This is equivalent to testing whether:
2441 1.1 mrg
2442 1.1 mrg addr_a' + low_offset_a <= addr_b + last_chunk_b
2443 1.1 mrg && addr_b <= addr_a' + high_offset_a + last_chunk_a
2444 1.1 mrg
2445 1.1 mrg Converting this into a single test, there is an overlap if:
2446 1.1 mrg
2447 1.1 mrg 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2448 1.1 mrg
2449 1.1 mrg where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2450 1.1 mrg
2451 1.1 mrg If DR_A is performed, limit + |step| - last_chunk_b is known to be
2452 1.1 mrg less than the size of the object underlying DR_A. We also know
2453 1.1 mrg that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2454 1.1 mrg guaranteed at compile time. There can therefore be no overflow if
2455 1.1 mrg "limit" is calculated in an unsigned type with pointer precision. */
2456 1.1 mrg tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2457 1.1 mrg DR_OFFSET (dr_a.dr));
2458 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2459 1.1 mrg
2460 1.1 mrg tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2461 1.1 mrg DR_OFFSET (dr_b.dr));
2462 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2463 1.1 mrg
2464 1.1 mrg /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2465 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, step);
2466 1.1 mrg tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2467 1.1 mrg seg_len_a, step);
2468 1.1 mrg if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2469 1.1 mrg seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2470 1.1 mrg
2471 1.1 mrg tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2472 1.1 mrg seg_len_a_minus_step, size_zero_node);
2473 1.1 mrg if (!CONSTANT_CLASS_P (low_offset_a))
2474 1.1 mrg low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2475 1.1 mrg
2476 1.1 mrg /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2477 1.1 mrg but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2478 1.1 mrg tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2479 1.1 mrg low_offset_a);
2480 1.1 mrg
2481 1.1 mrg /* The amount added to addr_b - addr_a'. */
2482 1.1 mrg tree bias = fold_build2 (MINUS_EXPR, sizetype,
2483 1.1 mrg size_int (last_chunk_b), low_offset_a);
2484 1.1 mrg
2485 1.1 mrg tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2486 1.1 mrg limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2487 1.1 mrg size_int (last_chunk_a + last_chunk_b));
2488 1.1 mrg
2489 1.1 mrg tree subject = fold_build2 (MINUS_EXPR, sizetype,
2490 1.1 mrg fold_convert (sizetype, addr_b),
2491 1.1 mrg fold_convert (sizetype, addr_a));
2492 1.1 mrg subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2493 1.1 mrg
2494 1.1 mrg *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2495 1.1 mrg if (dump_enabled_p ())
2496 1.1 mrg dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2497 1.1 mrg return true;
2498 1.1 mrg }
2499 1.1 mrg
2500 1.1 mrg /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2501 1.1 mrg every address ADDR accessed by D:
2502 1.1 mrg
2503 1.1 mrg *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2504 1.1 mrg
2505 1.1 mrg In this case, every element accessed by D is aligned to at least
2506 1.1 mrg ALIGN bytes.
2507 1.1 mrg
2508 1.1 mrg If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2509 1.1 mrg
2510 1.1 mrg *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2511 1.1 mrg
2512 1.1 mrg static void
2513 1.1 mrg get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2514 1.1 mrg tree *seg_max_out, HOST_WIDE_INT align)
2515 1.1 mrg {
2516 1.1 mrg /* Each access has the following pattern:
2517 1.1 mrg
2518 1.1 mrg <- |seg_len| ->
2519 1.1 mrg <--- A: -ve step --->
2520 1.1 mrg +-----+-------+-----+-------+-----+
2521 1.1 mrg | n-1 | ,.... | 0 | ..... | n-1 |
2522 1.1 mrg +-----+-------+-----+-------+-----+
2523 1.1 mrg <--- B: +ve step --->
2524 1.1 mrg <- |seg_len| ->
2525 1.1 mrg |
2526 1.1 mrg base address
2527 1.1 mrg
2528 1.1 mrg where "n" is the number of scalar iterations covered by the segment.
2529 1.1 mrg (This should be VF for a particular pair if we know that both steps
2530 1.1 mrg are the same, otherwise it will be the full number of scalar loop
2531 1.1 mrg iterations.)
2532 1.1 mrg
2533 1.1 mrg A is the range of bytes accessed when the step is negative,
2534 1.1 mrg B is the range when the step is positive.
2535 1.1 mrg
2536 1.1 mrg If the access size is "access_size" bytes, the lowest addressed byte is:
2537 1.1 mrg
2538 1.1 mrg base + (step < 0 ? seg_len : 0) [LB]
2539 1.1 mrg
2540 1.1 mrg and the highest addressed byte is always below:
2541 1.1 mrg
2542 1.1 mrg base + (step < 0 ? 0 : seg_len) + access_size [UB]
2543 1.1 mrg
2544 1.1 mrg Thus:
2545 1.1 mrg
2546 1.1 mrg LB <= ADDR < UB
2547 1.1 mrg
2548 1.1 mrg If ALIGN is nonzero, all three values are aligned to at least ALIGN
2549 1.1 mrg bytes, so:
2550 1.1 mrg
2551 1.1 mrg LB <= ADDR <= UB - ALIGN
2552 1.1 mrg
2553 1.1 mrg where "- ALIGN" folds naturally with the "+ access_size" and often
2554 1.1 mrg cancels it out.
2555 1.1 mrg
2556 1.1 mrg We don't try to simplify LB and UB beyond this (e.g. by using
2557 1.1 mrg MIN and MAX based on whether seg_len rather than the stride is
2558 1.1 mrg negative) because it is possible for the absolute size of the
2559 1.1 mrg segment to overflow the range of a ssize_t.
2560 1.1 mrg
2561 1.1 mrg Keeping the pointer_plus outside of the cond_expr should allow
2562 1.1 mrg the cond_exprs to be shared with other alias checks. */
2563 1.1 mrg tree indicator = dr_direction_indicator (d.dr);
2564 1.1 mrg tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2565 1.1 mrg fold_convert (ssizetype, indicator),
2566 1.1 mrg ssize_int (0));
2567 1.1 mrg tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2568 1.1 mrg DR_OFFSET (d.dr));
2569 1.1 mrg addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2570 1.1 mrg tree seg_len
2571 1.1 mrg = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2572 1.1 mrg
2573 1.1 mrg tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2574 1.1 mrg seg_len, size_zero_node);
2575 1.1 mrg tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2576 1.1 mrg size_zero_node, seg_len);
2577 1.1 mrg max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2578 1.1 mrg size_int (d.access_size - align));
2579 1.1 mrg
2580 1.1 mrg *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2581 1.1 mrg *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2582 1.1 mrg }
2583 1.1 mrg
2584 1.1 mrg /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2585 1.1 mrg storing the condition in *COND_EXPR. The fallback is to generate a
2586 1.1 mrg a test that the two accesses do not overlap:
2587 1.1 mrg
2588 1.1 mrg end_a <= start_b || end_b <= start_a. */
2589 1.1 mrg
2590 1.1 mrg static void
2591 1.1 mrg create_intersect_range_checks (class loop *loop, tree *cond_expr,
2592 1.1 mrg const dr_with_seg_len_pair_t &alias_pair)
2593 1.1 mrg {
2594 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first;
2595 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second;
2596 1.1 mrg *cond_expr = NULL_TREE;
2597 1.1 mrg if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2598 1.1 mrg return;
2599 1.1 mrg
2600 1.1 mrg if (create_ifn_alias_checks (cond_expr, alias_pair))
2601 1.1 mrg return;
2602 1.1 mrg
2603 1.1 mrg if (create_waw_or_war_checks (cond_expr, alias_pair))
2604 1.1 mrg return;
2605 1.1 mrg
2606 1.1 mrg unsigned HOST_WIDE_INT min_align;
2607 1.1 mrg tree_code cmp_code;
2608 1.1 mrg /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2609 1.1 mrg are equivalent. This is just an optimization heuristic. */
2610 1.1 mrg if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2611 1.1 mrg && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2612 1.1 mrg {
2613 1.1 mrg /* In this case adding access_size to seg_len is likely to give
2614 1.1 mrg a simple X * step, where X is either the number of scalar
2615 1.1 mrg iterations or the vectorization factor. We're better off
2616 1.1 mrg keeping that, rather than subtracting an alignment from it.
2617 1.1 mrg
2618 1.1 mrg In this case the maximum values are exclusive and so there is
2619 1.1 mrg no alias if the maximum of one segment equals the minimum
2620 1.1 mrg of another. */
2621 1.1 mrg min_align = 0;
2622 1.1 mrg cmp_code = LE_EXPR;
2623 1.1 mrg }
2624 1.1 mrg else
2625 1.1 mrg {
2626 1.1 mrg /* Calculate the minimum alignment shared by all four pointers,
2627 1.1 mrg then arrange for this alignment to be subtracted from the
2628 1.1 mrg exclusive maximum values to get inclusive maximum values.
2629 1.1 mrg This "- min_align" is cumulative with a "+ access_size"
2630 1.1 mrg in the calculation of the maximum values. In the best
2631 1.1 mrg (and common) case, the two cancel each other out, leaving
2632 1.1 mrg us with an inclusive bound based only on seg_len. In the
2633 1.1 mrg worst case we're simply adding a smaller number than before.
2634 1.1 mrg
2635 1.1 mrg Because the maximum values are inclusive, there is an alias
2636 1.1 mrg if the maximum value of one segment is equal to the minimum
2637 1.1 mrg value of the other. */
2638 1.1 mrg min_align = std::min (dr_a.align, dr_b.align);
2639 1.1 mrg cmp_code = LT_EXPR;
2640 1.1 mrg }
2641 1.1 mrg
2642 1.1 mrg tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2643 1.1 mrg get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2644 1.1 mrg get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2645 1.1 mrg
2646 1.1 mrg *cond_expr
2647 1.1 mrg = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2648 1.1 mrg fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2649 1.1 mrg fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2650 1.1 mrg if (dump_enabled_p ())
2651 1.1 mrg dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2652 1.1 mrg }
2653 1.1 mrg
2654 1.1 mrg /* Create a conditional expression that represents the run-time checks for
2655 1.1 mrg overlapping of address ranges represented by a list of data references
2656 1.1 mrg pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2657 1.1 mrg COND_EXPR is the conditional expression to be used in the if statement
2658 1.1 mrg that controls which version of the loop gets executed at runtime. */
2659 1.1 mrg
2660 1.1 mrg void
2661 1.1 mrg create_runtime_alias_checks (class loop *loop,
2662 1.1 mrg const vec<dr_with_seg_len_pair_t> *alias_pairs,
2663 1.1 mrg tree * cond_expr)
2664 1.1 mrg {
2665 1.1 mrg tree part_cond_expr;
2666 1.1 mrg
2667 1.1 mrg fold_defer_overflow_warnings ();
2668 1.1 mrg for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2669 1.1 mrg {
2670 1.1 mrg gcc_assert (alias_pair.flags);
2671 1.1 mrg if (dump_enabled_p ())
2672 1.1 mrg dump_printf (MSG_NOTE,
2673 1.1 mrg "create runtime check for data references %T and %T\n",
2674 1.1 mrg DR_REF (alias_pair.first.dr),
2675 1.1 mrg DR_REF (alias_pair.second.dr));
2676 1.1 mrg
2677 1.1 mrg /* Create condition expression for each pair data references. */
2678 1.1 mrg create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2679 1.1 mrg if (*cond_expr)
2680 1.1 mrg *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2681 1.1 mrg *cond_expr, part_cond_expr);
2682 1.1 mrg else
2683 1.1 mrg *cond_expr = part_cond_expr;
2684 1.1 mrg }
2685 1.1 mrg fold_undefer_and_ignore_overflow_warnings ();
2686 1.1 mrg }
2687 1.1 mrg
2688 1.1 mrg /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2689 1.1 mrg expressions. */
2690 1.1 mrg static bool
2691 1.1 mrg dr_equal_offsets_p1 (tree offset1, tree offset2)
2692 1.1 mrg {
2693 1.1 mrg bool res;
2694 1.1 mrg
2695 1.1 mrg STRIP_NOPS (offset1);
2696 1.1 mrg STRIP_NOPS (offset2);
2697 1.1 mrg
2698 1.1 mrg if (offset1 == offset2)
2699 1.1 mrg return true;
2700 1.1 mrg
2701 1.1 mrg if (TREE_CODE (offset1) != TREE_CODE (offset2)
2702 1.1 mrg || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2703 1.1 mrg return false;
2704 1.1 mrg
2705 1.1 mrg res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2706 1.1 mrg TREE_OPERAND (offset2, 0));
2707 1.1 mrg
2708 1.1 mrg if (!res || !BINARY_CLASS_P (offset1))
2709 1.1 mrg return res;
2710 1.1 mrg
2711 1.1 mrg res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2712 1.1 mrg TREE_OPERAND (offset2, 1));
2713 1.1 mrg
2714 1.1 mrg return res;
2715 1.1 mrg }
2716 1.1 mrg
2717 1.1 mrg /* Check if DRA and DRB have equal offsets. */
2718 1.1 mrg bool
2719 1.1 mrg dr_equal_offsets_p (struct data_reference *dra,
2720 1.1 mrg struct data_reference *drb)
2721 1.1 mrg {
2722 1.1 mrg tree offset1, offset2;
2723 1.1 mrg
2724 1.1 mrg offset1 = DR_OFFSET (dra);
2725 1.1 mrg offset2 = DR_OFFSET (drb);
2726 1.1 mrg
2727 1.1 mrg return dr_equal_offsets_p1 (offset1, offset2);
2728 1.1 mrg }
2729 1.1 mrg
2730 1.1 mrg /* Returns true if FNA == FNB. */
2731 1.1 mrg
2732 1.1 mrg static bool
2733 1.1 mrg affine_function_equal_p (affine_fn fna, affine_fn fnb)
2734 1.1 mrg {
2735 1.1 mrg unsigned i, n = fna.length ();
2736 1.1 mrg
2737 1.1 mrg if (n != fnb.length ())
2738 1.1 mrg return false;
2739 1.1 mrg
2740 1.1 mrg for (i = 0; i < n; i++)
2741 1.1 mrg if (!operand_equal_p (fna[i], fnb[i], 0))
2742 1.1 mrg return false;
2743 1.1 mrg
2744 1.1 mrg return true;
2745 1.1 mrg }
2746 1.1 mrg
2747 1.1 mrg /* If all the functions in CF are the same, returns one of them,
2748 1.1 mrg otherwise returns NULL. */
2749 1.1 mrg
2750 1.1 mrg static affine_fn
2751 1.1 mrg common_affine_function (conflict_function *cf)
2752 1.1 mrg {
2753 1.1 mrg unsigned i;
2754 1.1 mrg affine_fn comm;
2755 1.1 mrg
2756 1.1 mrg if (!CF_NONTRIVIAL_P (cf))
2757 1.1 mrg return affine_fn ();
2758 1.1 mrg
2759 1.1 mrg comm = cf->fns[0];
2760 1.1 mrg
2761 1.1 mrg for (i = 1; i < cf->n; i++)
2762 1.1 mrg if (!affine_function_equal_p (comm, cf->fns[i]))
2763 1.1 mrg return affine_fn ();
2764 1.1 mrg
2765 1.1 mrg return comm;
2766 1.1 mrg }
2767 1.1 mrg
2768 1.1 mrg /* Returns the base of the affine function FN. */
2769 1.1 mrg
2770 1.1 mrg static tree
2771 1.1 mrg affine_function_base (affine_fn fn)
2772 1.1 mrg {
2773 1.1 mrg return fn[0];
2774 1.1 mrg }
2775 1.1 mrg
2776 1.1 mrg /* Returns true if FN is a constant. */
2777 1.1 mrg
2778 1.1 mrg static bool
2779 1.1 mrg affine_function_constant_p (affine_fn fn)
2780 1.1 mrg {
2781 1.1 mrg unsigned i;
2782 1.1 mrg tree coef;
2783 1.1 mrg
2784 1.1 mrg for (i = 1; fn.iterate (i, &coef); i++)
2785 1.1 mrg if (!integer_zerop (coef))
2786 1.1 mrg return false;
2787 1.1 mrg
2788 1.1 mrg return true;
2789 1.1 mrg }
2790 1.1 mrg
2791 1.1 mrg /* Returns true if FN is the zero constant function. */
2792 1.1 mrg
2793 1.1 mrg static bool
2794 1.1 mrg affine_function_zero_p (affine_fn fn)
2795 1.1 mrg {
2796 1.1 mrg return (integer_zerop (affine_function_base (fn))
2797 1.1 mrg && affine_function_constant_p (fn));
2798 1.1 mrg }
2799 1.1 mrg
2800 1.1 mrg /* Returns a signed integer type with the largest precision from TA
2801 1.1 mrg and TB. */
2802 1.1 mrg
2803 1.1 mrg static tree
2804 1.1 mrg signed_type_for_types (tree ta, tree tb)
2805 1.1 mrg {
2806 1.1 mrg if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2807 1.1 mrg return signed_type_for (ta);
2808 1.1 mrg else
2809 1.1 mrg return signed_type_for (tb);
2810 1.1 mrg }
2811 1.1 mrg
2812 1.1 mrg /* Applies operation OP on affine functions FNA and FNB, and returns the
2813 1.1 mrg result. */
2814 1.1 mrg
2815 1.1 mrg static affine_fn
2816 1.1 mrg affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2817 1.1 mrg {
2818 1.1 mrg unsigned i, n, m;
2819 1.1 mrg affine_fn ret;
2820 1.1 mrg tree coef;
2821 1.1 mrg
2822 1.1 mrg if (fnb.length () > fna.length ())
2823 1.1 mrg {
2824 1.1 mrg n = fna.length ();
2825 1.1 mrg m = fnb.length ();
2826 1.1 mrg }
2827 1.1 mrg else
2828 1.1 mrg {
2829 1.1 mrg n = fnb.length ();
2830 1.1 mrg m = fna.length ();
2831 1.1 mrg }
2832 1.1 mrg
2833 1.1 mrg ret.create (m);
2834 1.1 mrg for (i = 0; i < n; i++)
2835 1.1 mrg {
2836 1.1 mrg tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2837 1.1 mrg TREE_TYPE (fnb[i]));
2838 1.1 mrg ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2839 1.1 mrg }
2840 1.1 mrg
2841 1.1 mrg for (; fna.iterate (i, &coef); i++)
2842 1.1 mrg ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2843 1.1 mrg coef, integer_zero_node));
2844 1.1 mrg for (; fnb.iterate (i, &coef); i++)
2845 1.1 mrg ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2846 1.1 mrg integer_zero_node, coef));
2847 1.1 mrg
2848 1.1 mrg return ret;
2849 1.1 mrg }
2850 1.1 mrg
2851 1.1 mrg /* Returns the sum of affine functions FNA and FNB. */
2852 1.1 mrg
2853 1.1 mrg static affine_fn
2854 1.1 mrg affine_fn_plus (affine_fn fna, affine_fn fnb)
2855 1.1 mrg {
2856 1.1 mrg return affine_fn_op (PLUS_EXPR, fna, fnb);
2857 1.1 mrg }
2858 1.1 mrg
2859 1.1 mrg /* Returns the difference of affine functions FNA and FNB. */
2860 1.1 mrg
2861 1.1 mrg static affine_fn
2862 1.1 mrg affine_fn_minus (affine_fn fna, affine_fn fnb)
2863 1.1 mrg {
2864 1.1 mrg return affine_fn_op (MINUS_EXPR, fna, fnb);
2865 1.1 mrg }
2866 1.1 mrg
2867 1.1 mrg /* Frees affine function FN. */
2868 1.1 mrg
2869 1.1 mrg static void
2870 1.1 mrg affine_fn_free (affine_fn fn)
2871 1.1 mrg {
2872 1.1 mrg fn.release ();
2873 1.1 mrg }
2874 1.1 mrg
2875 1.1 mrg /* Determine for each subscript in the data dependence relation DDR
2876 1.1 mrg the distance. */
2877 1.1 mrg
2878 1.1 mrg static void
2879 1.1 mrg compute_subscript_distance (struct data_dependence_relation *ddr)
2880 1.1 mrg {
2881 1.1 mrg conflict_function *cf_a, *cf_b;
2882 1.1 mrg affine_fn fn_a, fn_b, diff;
2883 1.1 mrg
2884 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2885 1.1 mrg {
2886 1.1 mrg unsigned int i;
2887 1.1 mrg
2888 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2889 1.1 mrg {
2890 1.1 mrg struct subscript *subscript;
2891 1.1 mrg
2892 1.1 mrg subscript = DDR_SUBSCRIPT (ddr, i);
2893 1.1 mrg cf_a = SUB_CONFLICTS_IN_A (subscript);
2894 1.1 mrg cf_b = SUB_CONFLICTS_IN_B (subscript);
2895 1.1 mrg
2896 1.1 mrg fn_a = common_affine_function (cf_a);
2897 1.1 mrg fn_b = common_affine_function (cf_b);
2898 1.1 mrg if (!fn_a.exists () || !fn_b.exists ())
2899 1.1 mrg {
2900 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know;
2901 1.1 mrg return;
2902 1.1 mrg }
2903 1.1 mrg diff = affine_fn_minus (fn_a, fn_b);
2904 1.1 mrg
2905 1.1 mrg if (affine_function_constant_p (diff))
2906 1.1 mrg SUB_DISTANCE (subscript) = affine_function_base (diff);
2907 1.1 mrg else
2908 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know;
2909 1.1 mrg
2910 1.1 mrg affine_fn_free (diff);
2911 1.1 mrg }
2912 1.1 mrg }
2913 1.1 mrg }
2914 1.1 mrg
2915 1.1 mrg /* Returns the conflict function for "unknown". */
2916 1.1 mrg
2917 1.1 mrg static conflict_function *
2918 1.1 mrg conflict_fn_not_known (void)
2919 1.1 mrg {
2920 1.1 mrg conflict_function *fn = XCNEW (conflict_function);
2921 1.1 mrg fn->n = NOT_KNOWN;
2922 1.1 mrg
2923 1.1 mrg return fn;
2924 1.1 mrg }
2925 1.1 mrg
2926 1.1 mrg /* Returns the conflict function for "independent". */
2927 1.1 mrg
2928 1.1 mrg static conflict_function *
2929 1.1 mrg conflict_fn_no_dependence (void)
2930 1.1 mrg {
2931 1.1 mrg conflict_function *fn = XCNEW (conflict_function);
2932 1.1 mrg fn->n = NO_DEPENDENCE;
2933 1.1 mrg
2934 1.1 mrg return fn;
2935 1.1 mrg }
2936 1.1 mrg
2937 1.1 mrg /* Returns true if the address of OBJ is invariant in LOOP. */
2938 1.1 mrg
2939 1.1 mrg static bool
2940 1.1 mrg object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2941 1.1 mrg {
2942 1.1 mrg while (handled_component_p (obj))
2943 1.1 mrg {
2944 1.1 mrg if (TREE_CODE (obj) == ARRAY_REF)
2945 1.1 mrg {
2946 1.1 mrg for (int i = 1; i < 4; ++i)
2947 1.1 mrg if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2948 1.1 mrg loop->num))
2949 1.1 mrg return false;
2950 1.1 mrg }
2951 1.1 mrg else if (TREE_CODE (obj) == COMPONENT_REF)
2952 1.1 mrg {
2953 1.1 mrg if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2954 1.1 mrg loop->num))
2955 1.1 mrg return false;
2956 1.1 mrg }
2957 1.1 mrg obj = TREE_OPERAND (obj, 0);
2958 1.1 mrg }
2959 1.1 mrg
2960 1.1 mrg if (!INDIRECT_REF_P (obj)
2961 1.1 mrg && TREE_CODE (obj) != MEM_REF)
2962 1.1 mrg return true;
2963 1.1 mrg
2964 1.1 mrg return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2965 1.1 mrg loop->num);
2966 1.1 mrg }
2967 1.1 mrg
2968 1.1 mrg /* Returns false if we can prove that data references A and B do not alias,
2969 1.1 mrg true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2970 1.1 mrg considered. */
2971 1.1 mrg
2972 1.1 mrg bool
2973 1.1 mrg dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2974 1.1 mrg class loop *loop_nest)
2975 1.1 mrg {
2976 1.1 mrg tree addr_a = DR_BASE_OBJECT (a);
2977 1.1 mrg tree addr_b = DR_BASE_OBJECT (b);
2978 1.1 mrg
2979 1.1 mrg /* If we are not processing a loop nest but scalar code we
2980 1.1 mrg do not need to care about possible cross-iteration dependences
2981 1.1 mrg and thus can process the full original reference. Do so,
2982 1.1 mrg similar to how loop invariant motion applies extra offset-based
2983 1.1 mrg disambiguation. */
2984 1.1 mrg if (!loop_nest)
2985 1.1 mrg {
2986 1.1 mrg aff_tree off1, off2;
2987 1.1 mrg poly_widest_int size1, size2;
2988 1.1 mrg get_inner_reference_aff (DR_REF (a), &off1, &size1);
2989 1.1 mrg get_inner_reference_aff (DR_REF (b), &off2, &size2);
2990 1.1 mrg aff_combination_scale (&off1, -1);
2991 1.1 mrg aff_combination_add (&off2, &off1);
2992 1.1 mrg if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2993 1.1 mrg return false;
2994 1.1 mrg }
2995 1.1 mrg
2996 1.1 mrg if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2997 1.1 mrg && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2998 1.1 mrg /* For cross-iteration dependences the cliques must be valid for the
2999 1.1 mrg whole loop, not just individual iterations. */
3000 1.1 mrg && (!loop_nest
3001 1.1 mrg || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3002 1.1 mrg || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3003 1.1 mrg && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3004 1.1 mrg && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3005 1.1 mrg return false;
3006 1.1 mrg
3007 1.1 mrg /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3008 1.1 mrg do not know the size of the base-object. So we cannot do any
3009 1.1 mrg offset/overlap based analysis but have to rely on points-to
3010 1.1 mrg information only. */
3011 1.1 mrg if (TREE_CODE (addr_a) == MEM_REF
3012 1.1 mrg && (DR_UNCONSTRAINED_BASE (a)
3013 1.1 mrg || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3014 1.1 mrg {
3015 1.1 mrg /* For true dependences we can apply TBAA. */
3016 1.1 mrg if (flag_strict_aliasing
3017 1.1 mrg && DR_IS_WRITE (a) && DR_IS_READ (b)
3018 1.1 mrg && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3019 1.1 mrg get_alias_set (DR_REF (b))))
3020 1.1 mrg return false;
3021 1.1 mrg if (TREE_CODE (addr_b) == MEM_REF)
3022 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3023 1.1 mrg TREE_OPERAND (addr_b, 0));
3024 1.1 mrg else
3025 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3026 1.1 mrg build_fold_addr_expr (addr_b));
3027 1.1 mrg }
3028 1.1 mrg else if (TREE_CODE (addr_b) == MEM_REF
3029 1.1 mrg && (DR_UNCONSTRAINED_BASE (b)
3030 1.1 mrg || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3031 1.1 mrg {
3032 1.1 mrg /* For true dependences we can apply TBAA. */
3033 1.1 mrg if (flag_strict_aliasing
3034 1.1 mrg && DR_IS_WRITE (a) && DR_IS_READ (b)
3035 1.1 mrg && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3036 1.1 mrg get_alias_set (DR_REF (b))))
3037 1.1 mrg return false;
3038 1.1 mrg if (TREE_CODE (addr_a) == MEM_REF)
3039 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3040 1.1 mrg TREE_OPERAND (addr_b, 0));
3041 1.1 mrg else
3042 1.1 mrg return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3043 1.1 mrg TREE_OPERAND (addr_b, 0));
3044 1.1 mrg }
3045 1.1 mrg
3046 1.1 mrg /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3047 1.1 mrg that is being subsetted in the loop nest. */
3048 1.1 mrg if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3049 1.1 mrg return refs_output_dependent_p (addr_a, addr_b);
3050 1.1 mrg else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3051 1.1 mrg return refs_anti_dependent_p (addr_a, addr_b);
3052 1.1 mrg return refs_may_alias_p (addr_a, addr_b);
3053 1.1 mrg }
3054 1.1 mrg
3055 1.1 mrg /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3056 1.1 mrg if it is meaningful to compare their associated access functions
3057 1.1 mrg when checking for dependencies. */
3058 1.1 mrg
3059 1.1 mrg static bool
3060 1.1 mrg access_fn_components_comparable_p (tree ref_a, tree ref_b)
3061 1.1 mrg {
3062 1.1 mrg /* Allow pairs of component refs from the following sets:
3063 1.1 mrg
3064 1.1 mrg { REALPART_EXPR, IMAGPART_EXPR }
3065 1.1 mrg { COMPONENT_REF }
3066 1.1 mrg { ARRAY_REF }. */
3067 1.1 mrg tree_code code_a = TREE_CODE (ref_a);
3068 1.1 mrg tree_code code_b = TREE_CODE (ref_b);
3069 1.1 mrg if (code_a == IMAGPART_EXPR)
3070 1.1 mrg code_a = REALPART_EXPR;
3071 1.1 mrg if (code_b == IMAGPART_EXPR)
3072 1.1 mrg code_b = REALPART_EXPR;
3073 1.1 mrg if (code_a != code_b)
3074 1.1 mrg return false;
3075 1.1 mrg
3076 1.1 mrg if (TREE_CODE (ref_a) == COMPONENT_REF)
3077 1.1 mrg /* ??? We cannot simply use the type of operand #0 of the refs here as
3078 1.1 mrg the Fortran compiler smuggles type punning into COMPONENT_REFs.
3079 1.1 mrg Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3080 1.1 mrg return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3081 1.1 mrg == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3082 1.1 mrg
3083 1.1 mrg return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3084 1.1 mrg TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3085 1.1 mrg }
3086 1.1 mrg
3087 1.1 mrg /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3088 1.1 mrg is true when the main indices of A and B were not comparable so we try again
3089 1.1 mrg with alternate indices computed on an indirect reference. */
3090 1.1 mrg
3091 1.1 mrg struct data_dependence_relation *
3092 1.1 mrg initialize_data_dependence_relation (struct data_dependence_relation *res,
3093 1.1 mrg vec<loop_p> loop_nest,
3094 1.1 mrg bool use_alt_indices)
3095 1.1 mrg {
3096 1.1 mrg struct data_reference *a = DDR_A (res);
3097 1.1 mrg struct data_reference *b = DDR_B (res);
3098 1.1 mrg unsigned int i;
3099 1.1 mrg
3100 1.1 mrg struct indices *indices_a = &a->indices;
3101 1.1 mrg struct indices *indices_b = &b->indices;
3102 1.1 mrg if (use_alt_indices)
3103 1.1 mrg {
3104 1.1 mrg if (TREE_CODE (DR_REF (a)) != MEM_REF)
3105 1.1 mrg indices_a = &a->alt_indices;
3106 1.1 mrg if (TREE_CODE (DR_REF (b)) != MEM_REF)
3107 1.1 mrg indices_b = &b->alt_indices;
3108 1.1 mrg }
3109 1.1 mrg unsigned int num_dimensions_a = indices_a->access_fns.length ();
3110 1.1 mrg unsigned int num_dimensions_b = indices_b->access_fns.length ();
3111 1.1 mrg if (num_dimensions_a == 0 || num_dimensions_b == 0)
3112 1.1 mrg {
3113 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3114 1.1 mrg return res;
3115 1.1 mrg }
3116 1.1 mrg
3117 1.1 mrg /* For unconstrained bases, the root (highest-indexed) subscript
3118 1.1 mrg describes a variation in the base of the original DR_REF rather
3119 1.1 mrg than a component access. We have no type that accurately describes
3120 1.1 mrg the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3121 1.1 mrg applying this subscript) so limit the search to the last real
3122 1.1 mrg component access.
3123 1.1 mrg
3124 1.1 mrg E.g. for:
3125 1.1 mrg
3126 1.1 mrg void
3127 1.1 mrg f (int a[][8], int b[][8])
3128 1.1 mrg {
3129 1.1 mrg for (int i = 0; i < 8; ++i)
3130 1.1 mrg a[i * 2][0] = b[i][0];
3131 1.1 mrg }
3132 1.1 mrg
3133 1.1 mrg the a and b accesses have a single ARRAY_REF component reference [0]
3134 1.1 mrg but have two subscripts. */
3135 1.1 mrg if (indices_a->unconstrained_base)
3136 1.1 mrg num_dimensions_a -= 1;
3137 1.1 mrg if (indices_b->unconstrained_base)
3138 1.1 mrg num_dimensions_b -= 1;
3139 1.1 mrg
3140 1.1 mrg /* These structures describe sequences of component references in
3141 1.1 mrg DR_REF (A) and DR_REF (B). Each component reference is tied to a
3142 1.1 mrg specific access function. */
3143 1.1 mrg struct {
3144 1.1 mrg /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3145 1.1 mrg DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3146 1.1 mrg indices. In C notation, these are the indices of the rightmost
3147 1.1 mrg component references; e.g. for a sequence .b.c.d, the start
3148 1.1 mrg index is for .d. */
3149 1.1 mrg unsigned int start_a;
3150 1.1 mrg unsigned int start_b;
3151 1.1 mrg
3152 1.1 mrg /* The sequence contains LENGTH consecutive access functions from
3153 1.1 mrg each DR. */
3154 1.1 mrg unsigned int length;
3155 1.1 mrg
3156 1.1 mrg /* The enclosing objects for the A and B sequences respectively,
3157 1.1 mrg i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3158 1.1 mrg and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3159 1.1 mrg tree object_a;
3160 1.1 mrg tree object_b;
3161 1.1 mrg } full_seq = {}, struct_seq = {};
3162 1.1 mrg
3163 1.1 mrg /* Before each iteration of the loop:
3164 1.1 mrg
3165 1.1 mrg - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3166 1.1 mrg - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3167 1.1 mrg unsigned int index_a = 0;
3168 1.1 mrg unsigned int index_b = 0;
3169 1.1 mrg tree ref_a = DR_REF (a);
3170 1.1 mrg tree ref_b = DR_REF (b);
3171 1.1 mrg
3172 1.1 mrg /* Now walk the component references from the final DR_REFs back up to
3173 1.1 mrg the enclosing base objects. Each component reference corresponds
3174 1.1 mrg to one access function in the DR, with access function 0 being for
3175 1.1 mrg the final DR_REF and the highest-indexed access function being the
3176 1.1 mrg one that is applied to the base of the DR.
3177 1.1 mrg
3178 1.1 mrg Look for a sequence of component references whose access functions
3179 1.1 mrg are comparable (see access_fn_components_comparable_p). If more
3180 1.1 mrg than one such sequence exists, pick the one nearest the base
3181 1.1 mrg (which is the leftmost sequence in C notation). Store this sequence
3182 1.1 mrg in FULL_SEQ.
3183 1.1 mrg
3184 1.1 mrg For example, if we have:
3185 1.1 mrg
3186 1.1 mrg struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3187 1.1 mrg
3188 1.1 mrg A: a[0][i].s.c.d
3189 1.1 mrg B: __real b[0][i].s.e[i].f
3190 1.1 mrg
3191 1.1 mrg (where d is the same type as the real component of f) then the access
3192 1.1 mrg functions would be:
3193 1.1 mrg
3194 1.1 mrg 0 1 2 3
3195 1.1 mrg A: .d .c .s [i]
3196 1.1 mrg
3197 1.1 mrg 0 1 2 3 4 5
3198 1.1 mrg B: __real .f [i] .e .s [i]
3199 1.1 mrg
3200 1.1 mrg The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3201 1.1 mrg and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3202 1.1 mrg COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3203 1.1 mrg the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3204 1.1 mrg so is comparable. The A3/B5 column contains two ARRAY_REFs that
3205 1.1 mrg index foo[10] arrays, so is again comparable. The sequence is
3206 1.1 mrg therefore:
3207 1.1 mrg
3208 1.1 mrg A: [1, 3] (i.e. [i].s.c)
3209 1.1 mrg B: [3, 5] (i.e. [i].s.e)
3210 1.1 mrg
3211 1.1 mrg Also look for sequences of component references whose access
3212 1.1 mrg functions are comparable and whose enclosing objects have the same
3213 1.1 mrg RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3214 1.1 mrg example, STRUCT_SEQ would be:
3215 1.1 mrg
3216 1.1 mrg A: [1, 2] (i.e. s.c)
3217 1.1 mrg B: [3, 4] (i.e. s.e) */
3218 1.1 mrg while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3219 1.1 mrg {
3220 1.1 mrg /* The alternate indices form always has a single dimension
3221 1.1 mrg with unconstrained base. */
3222 1.1 mrg gcc_assert (!use_alt_indices);
3223 1.1 mrg
3224 1.1 mrg /* REF_A and REF_B must be one of the component access types
3225 1.1 mrg allowed by dr_analyze_indices. */
3226 1.1 mrg gcc_checking_assert (access_fn_component_p (ref_a));
3227 1.1 mrg gcc_checking_assert (access_fn_component_p (ref_b));
3228 1.1 mrg
3229 1.1 mrg /* Get the immediately-enclosing objects for REF_A and REF_B,
3230 1.1 mrg i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3231 1.1 mrg and DR_ACCESS_FN (B, INDEX_B). */
3232 1.1 mrg tree object_a = TREE_OPERAND (ref_a, 0);
3233 1.1 mrg tree object_b = TREE_OPERAND (ref_b, 0);
3234 1.1 mrg
3235 1.1 mrg tree type_a = TREE_TYPE (object_a);
3236 1.1 mrg tree type_b = TREE_TYPE (object_b);
3237 1.1 mrg if (access_fn_components_comparable_p (ref_a, ref_b))
3238 1.1 mrg {
3239 1.1 mrg /* This pair of component accesses is comparable for dependence
3240 1.1 mrg analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3241 1.1 mrg DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3242 1.1 mrg if (full_seq.start_a + full_seq.length != index_a
3243 1.1 mrg || full_seq.start_b + full_seq.length != index_b)
3244 1.1 mrg {
3245 1.1 mrg /* The accesses don't extend the current sequence,
3246 1.1 mrg so start a new one here. */
3247 1.1 mrg full_seq.start_a = index_a;
3248 1.1 mrg full_seq.start_b = index_b;
3249 1.1 mrg full_seq.length = 0;
3250 1.1 mrg }
3251 1.1 mrg
3252 1.1 mrg /* Add this pair of references to the sequence. */
3253 1.1 mrg full_seq.length += 1;
3254 1.1 mrg full_seq.object_a = object_a;
3255 1.1 mrg full_seq.object_b = object_b;
3256 1.1 mrg
3257 1.1 mrg /* If the enclosing objects are structures (and thus have the
3258 1.1 mrg same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3259 1.1 mrg if (TREE_CODE (type_a) == RECORD_TYPE)
3260 1.1 mrg struct_seq = full_seq;
3261 1.1 mrg
3262 1.1 mrg /* Move to the next containing reference for both A and B. */
3263 1.1 mrg ref_a = object_a;
3264 1.1 mrg ref_b = object_b;
3265 1.1 mrg index_a += 1;
3266 1.1 mrg index_b += 1;
3267 1.1 mrg continue;
3268 1.1 mrg }
3269 1.1 mrg
3270 1.1 mrg /* Try to approach equal type sizes. */
3271 1.1 mrg if (!COMPLETE_TYPE_P (type_a)
3272 1.1 mrg || !COMPLETE_TYPE_P (type_b)
3273 1.1 mrg || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3274 1.1 mrg || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3275 1.1 mrg break;
3276 1.1 mrg
3277 1.1 mrg unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3278 1.1 mrg unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3279 1.1 mrg if (size_a <= size_b)
3280 1.1 mrg {
3281 1.1 mrg index_a += 1;
3282 1.1 mrg ref_a = object_a;
3283 1.1 mrg }
3284 1.1 mrg if (size_b <= size_a)
3285 1.1 mrg {
3286 1.1 mrg index_b += 1;
3287 1.1 mrg ref_b = object_b;
3288 1.1 mrg }
3289 1.1 mrg }
3290 1.1 mrg
3291 1.1 mrg /* See whether FULL_SEQ ends at the base and whether the two bases
3292 1.1 mrg are equal. We do not care about TBAA or alignment info so we can
3293 1.1 mrg use OEP_ADDRESS_OF to avoid false negatives. */
3294 1.1 mrg tree base_a = indices_a->base_object;
3295 1.1 mrg tree base_b = indices_b->base_object;
3296 1.1 mrg bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3297 1.1 mrg && full_seq.start_b + full_seq.length == num_dimensions_b
3298 1.1 mrg && (indices_a->unconstrained_base
3299 1.1 mrg == indices_b->unconstrained_base)
3300 1.1 mrg && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3301 1.1 mrg && (types_compatible_p (TREE_TYPE (base_a),
3302 1.1 mrg TREE_TYPE (base_b))
3303 1.1 mrg || (!base_supports_access_fn_components_p (base_a)
3304 1.1 mrg && !base_supports_access_fn_components_p (base_b)
3305 1.1 mrg && operand_equal_p
3306 1.1 mrg (TYPE_SIZE (TREE_TYPE (base_a)),
3307 1.1 mrg TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3308 1.1 mrg && (!loop_nest.exists ()
3309 1.1 mrg || (object_address_invariant_in_loop_p
3310 1.1 mrg (loop_nest[0], base_a))));
3311 1.1 mrg
3312 1.1 mrg /* If the bases are the same, we can include the base variation too.
3313 1.1 mrg E.g. the b accesses in:
3314 1.1 mrg
3315 1.1 mrg for (int i = 0; i < n; ++i)
3316 1.1 mrg b[i + 4][0] = b[i][0];
3317 1.1 mrg
3318 1.1 mrg have a definite dependence distance of 4, while for:
3319 1.1 mrg
3320 1.1 mrg for (int i = 0; i < n; ++i)
3321 1.1 mrg a[i + 4][0] = b[i][0];
3322 1.1 mrg
3323 1.1 mrg the dependence distance depends on the gap between a and b.
3324 1.1 mrg
3325 1.1 mrg If the bases are different then we can only rely on the sequence
3326 1.1 mrg rooted at a structure access, since arrays are allowed to overlap
3327 1.1 mrg arbitrarily and change shape arbitrarily. E.g. we treat this as
3328 1.1 mrg valid code:
3329 1.1 mrg
3330 1.1 mrg int a[256];
3331 1.1 mrg ...
3332 1.1 mrg ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3333 1.1 mrg
3334 1.1 mrg where two lvalues with the same int[4][3] type overlap, and where
3335 1.1 mrg both lvalues are distinct from the object's declared type. */
3336 1.1 mrg if (same_base_p)
3337 1.1 mrg {
3338 1.1 mrg if (indices_a->unconstrained_base)
3339 1.1 mrg full_seq.length += 1;
3340 1.1 mrg }
3341 1.1 mrg else
3342 1.1 mrg full_seq = struct_seq;
3343 1.1 mrg
3344 1.1 mrg /* Punt if we didn't find a suitable sequence. */
3345 1.1 mrg if (full_seq.length == 0)
3346 1.1 mrg {
3347 1.1 mrg if (use_alt_indices
3348 1.1 mrg || (TREE_CODE (DR_REF (a)) == MEM_REF
3349 1.1 mrg && TREE_CODE (DR_REF (b)) == MEM_REF)
3350 1.1 mrg || may_be_nonaddressable_p (DR_REF (a))
3351 1.1 mrg || may_be_nonaddressable_p (DR_REF (b)))
3352 1.1 mrg {
3353 1.1 mrg /* Fully exhausted possibilities. */
3354 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3355 1.1 mrg return res;
3356 1.1 mrg }
3357 1.1 mrg
3358 1.1 mrg /* Try evaluating both DRs as dereferences of pointers. */
3359 1.1 mrg if (!a->alt_indices.base_object
3360 1.1 mrg && TREE_CODE (DR_REF (a)) != MEM_REF)
3361 1.1 mrg {
3362 1.1 mrg tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3363 1.1 mrg build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3364 1.1 mrg build_int_cst
3365 1.1 mrg (reference_alias_ptr_type (DR_REF (a)), 0));
3366 1.1 mrg dr_analyze_indices (&a->alt_indices, alt_ref,
3367 1.1 mrg loop_preheader_edge (loop_nest[0]),
3368 1.1 mrg loop_containing_stmt (DR_STMT (a)));
3369 1.1 mrg }
3370 1.1 mrg if (!b->alt_indices.base_object
3371 1.1 mrg && TREE_CODE (DR_REF (b)) != MEM_REF)
3372 1.1 mrg {
3373 1.1 mrg tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3374 1.1 mrg build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3375 1.1 mrg build_int_cst
3376 1.1 mrg (reference_alias_ptr_type (DR_REF (b)), 0));
3377 1.1 mrg dr_analyze_indices (&b->alt_indices, alt_ref,
3378 1.1 mrg loop_preheader_edge (loop_nest[0]),
3379 1.1 mrg loop_containing_stmt (DR_STMT (b)));
3380 1.1 mrg }
3381 1.1 mrg return initialize_data_dependence_relation (res, loop_nest, true);
3382 1.1 mrg }
3383 1.1 mrg
3384 1.1 mrg if (!same_base_p)
3385 1.1 mrg {
3386 1.1 mrg /* Partial overlap is possible for different bases when strict aliasing
3387 1.1 mrg is not in effect. It's also possible if either base involves a union
3388 1.1 mrg access; e.g. for:
3389 1.1 mrg
3390 1.1 mrg struct s1 { int a[2]; };
3391 1.1 mrg struct s2 { struct s1 b; int c; };
3392 1.1 mrg struct s3 { int d; struct s1 e; };
3393 1.1 mrg union u { struct s2 f; struct s3 g; } *p, *q;
3394 1.1 mrg
3395 1.1 mrg the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3396 1.1 mrg "p->g.e" (base "p->g") and might partially overlap the s1 at
3397 1.1 mrg "q->g.e" (base "q->g"). */
3398 1.1 mrg if (!flag_strict_aliasing
3399 1.1 mrg || ref_contains_union_access_p (full_seq.object_a)
3400 1.1 mrg || ref_contains_union_access_p (full_seq.object_b))
3401 1.1 mrg {
3402 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3403 1.1 mrg return res;
3404 1.1 mrg }
3405 1.1 mrg
3406 1.1 mrg DDR_COULD_BE_INDEPENDENT_P (res) = true;
3407 1.1 mrg if (!loop_nest.exists ()
3408 1.1 mrg || (object_address_invariant_in_loop_p (loop_nest[0],
3409 1.1 mrg full_seq.object_a)
3410 1.1 mrg && object_address_invariant_in_loop_p (loop_nest[0],
3411 1.1 mrg full_seq.object_b)))
3412 1.1 mrg {
3413 1.1 mrg DDR_OBJECT_A (res) = full_seq.object_a;
3414 1.1 mrg DDR_OBJECT_B (res) = full_seq.object_b;
3415 1.1 mrg }
3416 1.1 mrg }
3417 1.1 mrg
3418 1.1 mrg DDR_AFFINE_P (res) = true;
3419 1.1 mrg DDR_ARE_DEPENDENT (res) = NULL_TREE;
3420 1.1 mrg DDR_SUBSCRIPTS (res).create (full_seq.length);
3421 1.1 mrg DDR_LOOP_NEST (res) = loop_nest;
3422 1.1 mrg DDR_SELF_REFERENCE (res) = false;
3423 1.1 mrg
3424 1.1 mrg for (i = 0; i < full_seq.length; ++i)
3425 1.1 mrg {
3426 1.1 mrg struct subscript *subscript;
3427 1.1 mrg
3428 1.1 mrg subscript = XNEW (struct subscript);
3429 1.1 mrg SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3430 1.1 mrg SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3431 1.1 mrg SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3432 1.1 mrg SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3433 1.1 mrg SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3434 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know;
3435 1.1 mrg DDR_SUBSCRIPTS (res).safe_push (subscript);
3436 1.1 mrg }
3437 1.1 mrg
3438 1.1 mrg return res;
3439 1.1 mrg }
3440 1.1 mrg
3441 1.1 mrg /* Initialize a data dependence relation between data accesses A and
3442 1.1 mrg B. NB_LOOPS is the number of loops surrounding the references: the
3443 1.1 mrg size of the classic distance/direction vectors. */
3444 1.1 mrg
3445 1.1 mrg struct data_dependence_relation *
3446 1.1 mrg initialize_data_dependence_relation (struct data_reference *a,
3447 1.1 mrg struct data_reference *b,
3448 1.1 mrg vec<loop_p> loop_nest)
3449 1.1 mrg {
3450 1.1 mrg data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3451 1.1 mrg DDR_A (res) = a;
3452 1.1 mrg DDR_B (res) = b;
3453 1.1 mrg DDR_LOOP_NEST (res).create (0);
3454 1.1 mrg DDR_SUBSCRIPTS (res).create (0);
3455 1.1 mrg DDR_DIR_VECTS (res).create (0);
3456 1.1 mrg DDR_DIST_VECTS (res).create (0);
3457 1.1 mrg
3458 1.1 mrg if (a == NULL || b == NULL)
3459 1.1 mrg {
3460 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3461 1.1 mrg return res;
3462 1.1 mrg }
3463 1.1 mrg
3464 1.1 mrg /* If the data references do not alias, then they are independent. */
3465 1.1 mrg if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3466 1.1 mrg {
3467 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_known;
3468 1.1 mrg return res;
3469 1.1 mrg }
3470 1.1 mrg
3471 1.1 mrg return initialize_data_dependence_relation (res, loop_nest, false);
3472 1.1 mrg }
3473 1.1 mrg
3474 1.1 mrg
3475 1.1 mrg /* Frees memory used by the conflict function F. */
3476 1.1 mrg
3477 1.1 mrg static void
3478 1.1 mrg free_conflict_function (conflict_function *f)
3479 1.1 mrg {
3480 1.1 mrg unsigned i;
3481 1.1 mrg
3482 1.1 mrg if (CF_NONTRIVIAL_P (f))
3483 1.1 mrg {
3484 1.1 mrg for (i = 0; i < f->n; i++)
3485 1.1 mrg affine_fn_free (f->fns[i]);
3486 1.1 mrg }
3487 1.1 mrg free (f);
3488 1.1 mrg }
3489 1.1 mrg
3490 1.1 mrg /* Frees memory used by SUBSCRIPTS. */
3491 1.1 mrg
3492 1.1 mrg static void
3493 1.1 mrg free_subscripts (vec<subscript_p> subscripts)
3494 1.1 mrg {
3495 1.1 mrg for (subscript_p s : subscripts)
3496 1.1 mrg {
3497 1.1 mrg free_conflict_function (s->conflicting_iterations_in_a);
3498 1.1 mrg free_conflict_function (s->conflicting_iterations_in_b);
3499 1.1 mrg free (s);
3500 1.1 mrg }
3501 1.1 mrg subscripts.release ();
3502 1.1 mrg }
3503 1.1 mrg
3504 1.1 mrg /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3505 1.1 mrg description. */
3506 1.1 mrg
3507 1.1 mrg static inline void
3508 1.1 mrg finalize_ddr_dependent (struct data_dependence_relation *ddr,
3509 1.1 mrg tree chrec)
3510 1.1 mrg {
3511 1.1 mrg DDR_ARE_DEPENDENT (ddr) = chrec;
3512 1.1 mrg free_subscripts (DDR_SUBSCRIPTS (ddr));
3513 1.1 mrg DDR_SUBSCRIPTS (ddr).create (0);
3514 1.1 mrg }
3515 1.1 mrg
3516 1.1 mrg /* The dependence relation DDR cannot be represented by a distance
3517 1.1 mrg vector. */
3518 1.1 mrg
3519 1.1 mrg static inline void
3520 1.1 mrg non_affine_dependence_relation (struct data_dependence_relation *ddr)
3521 1.1 mrg {
3522 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3523 1.1 mrg fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3524 1.1 mrg
3525 1.1 mrg DDR_AFFINE_P (ddr) = false;
3526 1.1 mrg }
3527 1.1 mrg
3528 1.1 mrg
3529 1.1 mrg
3531 1.1 mrg /* This section contains the classic Banerjee tests. */
3532 1.1 mrg
3533 1.1 mrg /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3534 1.1 mrg variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3535 1.1 mrg
3536 1.1 mrg static inline bool
3537 1.1 mrg ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3538 1.1 mrg {
3539 1.1 mrg return (evolution_function_is_constant_p (chrec_a)
3540 1.1 mrg && evolution_function_is_constant_p (chrec_b));
3541 1.1 mrg }
3542 1.1 mrg
3543 1.1 mrg /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3544 1.1 mrg variable, i.e., if the SIV (Single Index Variable) test is true. */
3545 1.1 mrg
3546 1.1 mrg static bool
3547 1.1 mrg siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3548 1.1 mrg {
3549 1.1 mrg if ((evolution_function_is_constant_p (chrec_a)
3550 1.1 mrg && evolution_function_is_univariate_p (chrec_b))
3551 1.1 mrg || (evolution_function_is_constant_p (chrec_b)
3552 1.1 mrg && evolution_function_is_univariate_p (chrec_a)))
3553 1.1 mrg return true;
3554 1.1 mrg
3555 1.1 mrg if (evolution_function_is_univariate_p (chrec_a)
3556 1.1 mrg && evolution_function_is_univariate_p (chrec_b))
3557 1.1 mrg {
3558 1.1 mrg switch (TREE_CODE (chrec_a))
3559 1.1 mrg {
3560 1.1 mrg case POLYNOMIAL_CHREC:
3561 1.1 mrg switch (TREE_CODE (chrec_b))
3562 1.1 mrg {
3563 1.1 mrg case POLYNOMIAL_CHREC:
3564 1.1 mrg if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3565 1.1 mrg return false;
3566 1.1 mrg /* FALLTHRU */
3567 1.1 mrg
3568 1.1 mrg default:
3569 1.1 mrg return true;
3570 1.1 mrg }
3571 1.1 mrg
3572 1.1 mrg default:
3573 1.1 mrg return true;
3574 1.1 mrg }
3575 1.1 mrg }
3576 1.1 mrg
3577 1.1 mrg return false;
3578 1.1 mrg }
3579 1.1 mrg
3580 1.1 mrg /* Creates a conflict function with N dimensions. The affine functions
3581 1.1 mrg in each dimension follow. */
3582 1.1 mrg
3583 1.1 mrg static conflict_function *
3584 1.1 mrg conflict_fn (unsigned n, ...)
3585 1.1 mrg {
3586 1.1 mrg unsigned i;
3587 1.1 mrg conflict_function *ret = XCNEW (conflict_function);
3588 1.1 mrg va_list ap;
3589 1.1 mrg
3590 1.1 mrg gcc_assert (n > 0 && n <= MAX_DIM);
3591 1.1 mrg va_start (ap, n);
3592 1.1 mrg
3593 1.1 mrg ret->n = n;
3594 1.1 mrg for (i = 0; i < n; i++)
3595 1.1 mrg ret->fns[i] = va_arg (ap, affine_fn);
3596 1.1 mrg va_end (ap);
3597 1.1 mrg
3598 1.1 mrg return ret;
3599 1.1 mrg }
3600 1.1 mrg
3601 1.1 mrg /* Returns constant affine function with value CST. */
3602 1.1 mrg
3603 1.1 mrg static affine_fn
3604 1.1 mrg affine_fn_cst (tree cst)
3605 1.1 mrg {
3606 1.1 mrg affine_fn fn;
3607 1.1 mrg fn.create (1);
3608 1.1 mrg fn.quick_push (cst);
3609 1.1 mrg return fn;
3610 1.1 mrg }
3611 1.1 mrg
3612 1.1 mrg /* Returns affine function with single variable, CST + COEF * x_DIM. */
3613 1.1 mrg
3614 1.1 mrg static affine_fn
3615 1.1 mrg affine_fn_univar (tree cst, unsigned dim, tree coef)
3616 1.1 mrg {
3617 1.1 mrg affine_fn fn;
3618 1.1 mrg fn.create (dim + 1);
3619 1.1 mrg unsigned i;
3620 1.1 mrg
3621 1.1 mrg gcc_assert (dim > 0);
3622 1.1 mrg fn.quick_push (cst);
3623 1.1 mrg for (i = 1; i < dim; i++)
3624 1.1 mrg fn.quick_push (integer_zero_node);
3625 1.1 mrg fn.quick_push (coef);
3626 1.1 mrg return fn;
3627 1.1 mrg }
3628 1.1 mrg
3629 1.1 mrg /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3630 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the
3631 1.1 mrg relation between the elements accessed twice by CHREC_A and
3632 1.1 mrg CHREC_B. For k >= 0, the following property is verified:
3633 1.1 mrg
3634 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3635 1.1 mrg
3636 1.1 mrg static void
3637 1.1 mrg analyze_ziv_subscript (tree chrec_a,
3638 1.1 mrg tree chrec_b,
3639 1.1 mrg conflict_function **overlaps_a,
3640 1.1 mrg conflict_function **overlaps_b,
3641 1.1 mrg tree *last_conflicts)
3642 1.1 mrg {
3643 1.1 mrg tree type, difference;
3644 1.1 mrg dependence_stats.num_ziv++;
3645 1.1 mrg
3646 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3647 1.1 mrg fprintf (dump_file, "(analyze_ziv_subscript \n");
3648 1.1 mrg
3649 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3650 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL);
3651 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL);
3652 1.1 mrg difference = chrec_fold_minus (type, chrec_a, chrec_b);
3653 1.1 mrg
3654 1.1 mrg switch (TREE_CODE (difference))
3655 1.1 mrg {
3656 1.1 mrg case INTEGER_CST:
3657 1.1 mrg if (integer_zerop (difference))
3658 1.1 mrg {
3659 1.1 mrg /* The difference is equal to zero: the accessed index
3660 1.1 mrg overlaps for each iteration in the loop. */
3661 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3662 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3663 1.1 mrg *last_conflicts = chrec_dont_know;
3664 1.1 mrg dependence_stats.num_ziv_dependent++;
3665 1.1 mrg }
3666 1.1 mrg else
3667 1.1 mrg {
3668 1.1 mrg /* The accesses do not overlap. */
3669 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3670 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3671 1.1 mrg *last_conflicts = integer_zero_node;
3672 1.1 mrg dependence_stats.num_ziv_independent++;
3673 1.1 mrg }
3674 1.1 mrg break;
3675 1.1 mrg
3676 1.1 mrg default:
3677 1.1 mrg /* We're not sure whether the indexes overlap. For the moment,
3678 1.1 mrg conservatively answer "don't know". */
3679 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3680 1.1 mrg fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3681 1.1 mrg
3682 1.1 mrg *overlaps_a = conflict_fn_not_known ();
3683 1.1 mrg *overlaps_b = conflict_fn_not_known ();
3684 1.1 mrg *last_conflicts = chrec_dont_know;
3685 1.1 mrg dependence_stats.num_ziv_unimplemented++;
3686 1.1 mrg break;
3687 1.1 mrg }
3688 1.1 mrg
3689 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3690 1.1 mrg fprintf (dump_file, ")\n");
3691 1.1 mrg }
3692 1.1 mrg
3693 1.1 mrg /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3694 1.1 mrg and only if it fits to the int type. If this is not the case, or the
3695 1.1 mrg bound on the number of iterations of LOOP could not be derived, returns
3696 1.1 mrg chrec_dont_know. */
3697 1.1 mrg
3698 1.1 mrg static tree
3699 1.1 mrg max_stmt_executions_tree (class loop *loop)
3700 1.1 mrg {
3701 1.1 mrg widest_int nit;
3702 1.1 mrg
3703 1.1 mrg if (!max_stmt_executions (loop, &nit))
3704 1.1 mrg return chrec_dont_know;
3705 1.1 mrg
3706 1.1 mrg if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3707 1.1 mrg return chrec_dont_know;
3708 1.1 mrg
3709 1.1 mrg return wide_int_to_tree (unsigned_type_node, nit);
3710 1.1 mrg }
3711 1.1 mrg
3712 1.1 mrg /* Determine whether the CHREC is always positive/negative. If the expression
3713 1.1 mrg cannot be statically analyzed, return false, otherwise set the answer into
3714 1.1 mrg VALUE. */
3715 1.1 mrg
3716 1.1 mrg static bool
3717 1.1 mrg chrec_is_positive (tree chrec, bool *value)
3718 1.1 mrg {
3719 1.1 mrg bool value0, value1, value2;
3720 1.1 mrg tree end_value, nb_iter;
3721 1.1 mrg
3722 1.1 mrg switch (TREE_CODE (chrec))
3723 1.1 mrg {
3724 1.1 mrg case POLYNOMIAL_CHREC:
3725 1.1 mrg if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3726 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3727 1.1 mrg return false;
3728 1.1 mrg
3729 1.1 mrg /* FIXME -- overflows. */
3730 1.1 mrg if (value0 == value1)
3731 1.1 mrg {
3732 1.1 mrg *value = value0;
3733 1.1 mrg return true;
3734 1.1 mrg }
3735 1.1 mrg
3736 1.1 mrg /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3737 1.1 mrg and the proof consists in showing that the sign never
3738 1.1 mrg changes during the execution of the loop, from 0 to
3739 1.1 mrg loop->nb_iterations. */
3740 1.1 mrg if (!evolution_function_is_affine_p (chrec))
3741 1.1 mrg return false;
3742 1.1 mrg
3743 1.1 mrg nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3744 1.1 mrg if (chrec_contains_undetermined (nb_iter))
3745 1.1 mrg return false;
3746 1.1 mrg
3747 1.1 mrg #if 0
3748 1.1 mrg /* TODO -- If the test is after the exit, we may decrease the number of
3749 1.1 mrg iterations by one. */
3750 1.1 mrg if (after_exit)
3751 1.1 mrg nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3752 1.1 mrg #endif
3753 1.1 mrg
3754 1.1 mrg end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3755 1.1 mrg
3756 1.1 mrg if (!chrec_is_positive (end_value, &value2))
3757 1.1 mrg return false;
3758 1.1 mrg
3759 1.1 mrg *value = value0;
3760 1.1 mrg return value0 == value1;
3761 1.1 mrg
3762 1.1 mrg case INTEGER_CST:
3763 1.1 mrg switch (tree_int_cst_sgn (chrec))
3764 1.1 mrg {
3765 1.1 mrg case -1:
3766 1.1 mrg *value = false;
3767 1.1 mrg break;
3768 1.1 mrg case 1:
3769 1.1 mrg *value = true;
3770 1.1 mrg break;
3771 1.1 mrg default:
3772 1.1 mrg return false;
3773 1.1 mrg }
3774 1.1 mrg return true;
3775 1.1 mrg
3776 1.1 mrg default:
3777 1.1 mrg return false;
3778 1.1 mrg }
3779 1.1 mrg }
3780 1.1 mrg
3781 1.1 mrg
3782 1.1 mrg /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3783 1.1 mrg constant, and CHREC_B is an affine function. *OVERLAPS_A and
3784 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the
3785 1.1 mrg relation between the elements accessed twice by CHREC_A and
3786 1.1 mrg CHREC_B. For k >= 0, the following property is verified:
3787 1.1 mrg
3788 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3789 1.1 mrg
3790 1.1 mrg static void
3791 1.1 mrg analyze_siv_subscript_cst_affine (tree chrec_a,
3792 1.1 mrg tree chrec_b,
3793 1.1 mrg conflict_function **overlaps_a,
3794 1.1 mrg conflict_function **overlaps_b,
3795 1.1 mrg tree *last_conflicts)
3796 1.1 mrg {
3797 1.1 mrg bool value0, value1, value2;
3798 1.1 mrg tree type, difference, tmp;
3799 1.1 mrg
3800 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3801 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL);
3802 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL);
3803 1.1 mrg difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3804 1.1 mrg
3805 1.1 mrg /* Special case overlap in the first iteration. */
3806 1.1 mrg if (integer_zerop (difference))
3807 1.1 mrg {
3808 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3809 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3810 1.1 mrg *last_conflicts = integer_one_node;
3811 1.1 mrg return;
3812 1.1 mrg }
3813 1.1 mrg
3814 1.1 mrg if (!chrec_is_positive (initial_condition (difference), &value0))
3815 1.1 mrg {
3816 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3817 1.1 mrg fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3818 1.1 mrg
3819 1.1 mrg dependence_stats.num_siv_unimplemented++;
3820 1.1 mrg *overlaps_a = conflict_fn_not_known ();
3821 1.1 mrg *overlaps_b = conflict_fn_not_known ();
3822 1.1 mrg *last_conflicts = chrec_dont_know;
3823 1.1 mrg return;
3824 1.1 mrg }
3825 1.1 mrg else
3826 1.1 mrg {
3827 1.1 mrg if (value0 == false)
3828 1.1 mrg {
3829 1.1 mrg if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3830 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3831 1.1 mrg {
3832 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3833 1.1 mrg fprintf (dump_file, "siv test failed: chrec not positive.\n");
3834 1.1 mrg
3835 1.1 mrg *overlaps_a = conflict_fn_not_known ();
3836 1.1 mrg *overlaps_b = conflict_fn_not_known ();
3837 1.1 mrg *last_conflicts = chrec_dont_know;
3838 1.1 mrg dependence_stats.num_siv_unimplemented++;
3839 1.1 mrg return;
3840 1.1 mrg }
3841 1.1 mrg else
3842 1.1 mrg {
3843 1.1 mrg if (value1 == true)
3844 1.1 mrg {
3845 1.1 mrg /* Example:
3846 1.1 mrg chrec_a = 12
3847 1.1 mrg chrec_b = {10, +, 1}
3848 1.1 mrg */
3849 1.1 mrg
3850 1.1 mrg if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3851 1.1 mrg {
3852 1.1 mrg HOST_WIDE_INT numiter;
3853 1.1 mrg class loop *loop = get_chrec_loop (chrec_b);
3854 1.1 mrg
3855 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3856 1.1 mrg tmp = fold_build2 (EXACT_DIV_EXPR, type,
3857 1.1 mrg fold_build1 (ABS_EXPR, type, difference),
3858 1.1 mrg CHREC_RIGHT (chrec_b));
3859 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3860 1.1 mrg *last_conflicts = integer_one_node;
3861 1.1 mrg
3862 1.1 mrg
3863 1.1 mrg /* Perform weak-zero siv test to see if overlap is
3864 1.1 mrg outside the loop bounds. */
3865 1.1 mrg numiter = max_stmt_executions_int (loop);
3866 1.1 mrg
3867 1.1 mrg if (numiter >= 0
3868 1.1 mrg && compare_tree_int (tmp, numiter) > 0)
3869 1.1 mrg {
3870 1.1 mrg free_conflict_function (*overlaps_a);
3871 1.1 mrg free_conflict_function (*overlaps_b);
3872 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3873 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3874 1.1 mrg *last_conflicts = integer_zero_node;
3875 1.1 mrg dependence_stats.num_siv_independent++;
3876 1.1 mrg return;
3877 1.1 mrg }
3878 1.1 mrg dependence_stats.num_siv_dependent++;
3879 1.1 mrg return;
3880 1.1 mrg }
3881 1.1 mrg
3882 1.1 mrg /* When the step does not divide the difference, there are
3883 1.1 mrg no overlaps. */
3884 1.1 mrg else
3885 1.1 mrg {
3886 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3887 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3888 1.1 mrg *last_conflicts = integer_zero_node;
3889 1.1 mrg dependence_stats.num_siv_independent++;
3890 1.1 mrg return;
3891 1.1 mrg }
3892 1.1 mrg }
3893 1.1 mrg
3894 1.1 mrg else
3895 1.1 mrg {
3896 1.1 mrg /* Example:
3897 1.1 mrg chrec_a = 12
3898 1.1 mrg chrec_b = {10, +, -1}
3899 1.1 mrg
3900 1.1 mrg In this case, chrec_a will not overlap with chrec_b. */
3901 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3902 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3903 1.1 mrg *last_conflicts = integer_zero_node;
3904 1.1 mrg dependence_stats.num_siv_independent++;
3905 1.1 mrg return;
3906 1.1 mrg }
3907 1.1 mrg }
3908 1.1 mrg }
3909 1.1 mrg else
3910 1.1 mrg {
3911 1.1 mrg if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3912 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3913 1.1 mrg {
3914 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
3915 1.1 mrg fprintf (dump_file, "siv test failed: chrec not positive.\n");
3916 1.1 mrg
3917 1.1 mrg *overlaps_a = conflict_fn_not_known ();
3918 1.1 mrg *overlaps_b = conflict_fn_not_known ();
3919 1.1 mrg *last_conflicts = chrec_dont_know;
3920 1.1 mrg dependence_stats.num_siv_unimplemented++;
3921 1.1 mrg return;
3922 1.1 mrg }
3923 1.1 mrg else
3924 1.1 mrg {
3925 1.1 mrg if (value2 == false)
3926 1.1 mrg {
3927 1.1 mrg /* Example:
3928 1.1 mrg chrec_a = 3
3929 1.1 mrg chrec_b = {10, +, -1}
3930 1.1 mrg */
3931 1.1 mrg if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3932 1.1 mrg {
3933 1.1 mrg HOST_WIDE_INT numiter;
3934 1.1 mrg class loop *loop = get_chrec_loop (chrec_b);
3935 1.1 mrg
3936 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3937 1.1 mrg tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3938 1.1 mrg CHREC_RIGHT (chrec_b));
3939 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3940 1.1 mrg *last_conflicts = integer_one_node;
3941 1.1 mrg
3942 1.1 mrg /* Perform weak-zero siv test to see if overlap is
3943 1.1 mrg outside the loop bounds. */
3944 1.1 mrg numiter = max_stmt_executions_int (loop);
3945 1.1 mrg
3946 1.1 mrg if (numiter >= 0
3947 1.1 mrg && compare_tree_int (tmp, numiter) > 0)
3948 1.1 mrg {
3949 1.1 mrg free_conflict_function (*overlaps_a);
3950 1.1 mrg free_conflict_function (*overlaps_b);
3951 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3952 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3953 1.1 mrg *last_conflicts = integer_zero_node;
3954 1.1 mrg dependence_stats.num_siv_independent++;
3955 1.1 mrg return;
3956 1.1 mrg }
3957 1.1 mrg dependence_stats.num_siv_dependent++;
3958 1.1 mrg return;
3959 1.1 mrg }
3960 1.1 mrg
3961 1.1 mrg /* When the step does not divide the difference, there
3962 1.1 mrg are no overlaps. */
3963 1.1 mrg else
3964 1.1 mrg {
3965 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3966 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3967 1.1 mrg *last_conflicts = integer_zero_node;
3968 1.1 mrg dependence_stats.num_siv_independent++;
3969 1.1 mrg return;
3970 1.1 mrg }
3971 1.1 mrg }
3972 1.1 mrg else
3973 1.1 mrg {
3974 1.1 mrg /* Example:
3975 1.1 mrg chrec_a = 3
3976 1.1 mrg chrec_b = {4, +, 1}
3977 1.1 mrg
3978 1.1 mrg In this case, chrec_a will not overlap with chrec_b. */
3979 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
3980 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
3981 1.1 mrg *last_conflicts = integer_zero_node;
3982 1.1 mrg dependence_stats.num_siv_independent++;
3983 1.1 mrg return;
3984 1.1 mrg }
3985 1.1 mrg }
3986 1.1 mrg }
3987 1.1 mrg }
3988 1.1 mrg }
3989 1.1 mrg
3990 1.1 mrg /* Helper recursive function for initializing the matrix A. Returns
3991 1.1 mrg the initial value of CHREC. */
3992 1.1 mrg
3993 1.1 mrg static tree
3994 1.1 mrg initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3995 1.1 mrg {
3996 1.1 mrg gcc_assert (chrec);
3997 1.1 mrg
3998 1.1 mrg switch (TREE_CODE (chrec))
3999 1.1 mrg {
4000 1.1 mrg case POLYNOMIAL_CHREC:
4001 1.1 mrg HOST_WIDE_INT chrec_right;
4002 1.1 mrg if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4003 1.1 mrg return chrec_dont_know;
4004 1.1 mrg chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4005 1.1 mrg /* We want to be able to negate without overflow. */
4006 1.1 mrg if (chrec_right == HOST_WIDE_INT_MIN)
4007 1.1 mrg return chrec_dont_know;
4008 1.1 mrg A[index][0] = mult * chrec_right;
4009 1.1 mrg return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4010 1.1 mrg
4011 1.1 mrg case PLUS_EXPR:
4012 1.1 mrg case MULT_EXPR:
4013 1.1 mrg case MINUS_EXPR:
4014 1.1 mrg {
4015 1.1 mrg tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4016 1.1 mrg tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4017 1.1 mrg
4018 1.1 mrg return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4019 1.1 mrg }
4020 1.1 mrg
4021 1.1 mrg CASE_CONVERT:
4022 1.1 mrg {
4023 1.1 mrg tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4024 1.1 mrg return chrec_convert (chrec_type (chrec), op, NULL);
4025 1.1 mrg }
4026 1.1 mrg
4027 1.1 mrg case BIT_NOT_EXPR:
4028 1.1 mrg {
4029 1.1 mrg /* Handle ~X as -1 - X. */
4030 1.1 mrg tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4031 1.1 mrg return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4032 1.1 mrg build_int_cst (TREE_TYPE (chrec), -1), op);
4033 1.1 mrg }
4034 1.1 mrg
4035 1.1 mrg case INTEGER_CST:
4036 1.1 mrg return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4037 1.1 mrg
4038 1.1 mrg default:
4039 1.1 mrg gcc_unreachable ();
4040 1.1 mrg return NULL_TREE;
4041 1.1 mrg }
4042 1.1 mrg }
4043 1.1 mrg
4044 1.1 mrg #define FLOOR_DIV(x,y) ((x) / (y))
4045 1.1 mrg
4046 1.1 mrg /* Solves the special case of the Diophantine equation:
4047 1.1 mrg | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4048 1.1 mrg
4049 1.1 mrg Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4050 1.1 mrg number of iterations that loops X and Y run. The overlaps will be
4051 1.1 mrg constructed as evolutions in dimension DIM. */
4052 1.1 mrg
4053 1.1 mrg static void
4054 1.1 mrg compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4055 1.1 mrg HOST_WIDE_INT step_a,
4056 1.1 mrg HOST_WIDE_INT step_b,
4057 1.1 mrg affine_fn *overlaps_a,
4058 1.1 mrg affine_fn *overlaps_b,
4059 1.1 mrg tree *last_conflicts, int dim)
4060 1.1 mrg {
4061 1.1 mrg if (((step_a > 0 && step_b > 0)
4062 1.1 mrg || (step_a < 0 && step_b < 0)))
4063 1.1 mrg {
4064 1.1 mrg HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4065 1.1 mrg HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4066 1.1 mrg
4067 1.1 mrg gcd_steps_a_b = gcd (step_a, step_b);
4068 1.1 mrg step_overlaps_a = step_b / gcd_steps_a_b;
4069 1.1 mrg step_overlaps_b = step_a / gcd_steps_a_b;
4070 1.1 mrg
4071 1.1 mrg if (niter > 0)
4072 1.1 mrg {
4073 1.1 mrg tau2 = FLOOR_DIV (niter, step_overlaps_a);
4074 1.1 mrg tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4075 1.1 mrg last_conflict = tau2;
4076 1.1 mrg *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4077 1.1 mrg }
4078 1.1 mrg else
4079 1.1 mrg *last_conflicts = chrec_dont_know;
4080 1.1 mrg
4081 1.1 mrg *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4082 1.1 mrg build_int_cst (NULL_TREE,
4083 1.1 mrg step_overlaps_a));
4084 1.1 mrg *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4085 1.1 mrg build_int_cst (NULL_TREE,
4086 1.1 mrg step_overlaps_b));
4087 1.1 mrg }
4088 1.1 mrg
4089 1.1 mrg else
4090 1.1 mrg {
4091 1.1 mrg *overlaps_a = affine_fn_cst (integer_zero_node);
4092 1.1 mrg *overlaps_b = affine_fn_cst (integer_zero_node);
4093 1.1 mrg *last_conflicts = integer_zero_node;
4094 1.1 mrg }
4095 1.1 mrg }
4096 1.1 mrg
4097 1.1 mrg /* Solves the special case of a Diophantine equation where CHREC_A is
4098 1.1 mrg an affine bivariate function, and CHREC_B is an affine univariate
4099 1.1 mrg function. For example,
4100 1.1 mrg
4101 1.1 mrg | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4102 1.1 mrg
4103 1.1 mrg has the following overlapping functions:
4104 1.1 mrg
4105 1.1 mrg | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4106 1.1 mrg | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4107 1.1 mrg | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4108 1.1 mrg
4109 1.1 mrg FORNOW: This is a specialized implementation for a case occurring in
4110 1.1 mrg a common benchmark. Implement the general algorithm. */
4111 1.1 mrg
4112 1.1 mrg static void
4113 1.1 mrg compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4114 1.1 mrg conflict_function **overlaps_a,
4115 1.1 mrg conflict_function **overlaps_b,
4116 1.1 mrg tree *last_conflicts)
4117 1.1 mrg {
4118 1.1 mrg bool xz_p, yz_p, xyz_p;
4119 1.1 mrg HOST_WIDE_INT step_x, step_y, step_z;
4120 1.1 mrg HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4121 1.1 mrg affine_fn overlaps_a_xz, overlaps_b_xz;
4122 1.1 mrg affine_fn overlaps_a_yz, overlaps_b_yz;
4123 1.1 mrg affine_fn overlaps_a_xyz, overlaps_b_xyz;
4124 1.1 mrg affine_fn ova1, ova2, ovb;
4125 1.1 mrg tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4126 1.1 mrg
4127 1.1 mrg step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4128 1.1 mrg step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4129 1.1 mrg step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4130 1.1 mrg
4131 1.1 mrg niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4132 1.1 mrg niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4133 1.1 mrg niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4134 1.1 mrg
4135 1.1 mrg if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4136 1.1 mrg {
4137 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4138 1.1 mrg fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4139 1.1 mrg
4140 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4141 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4142 1.1 mrg *last_conflicts = chrec_dont_know;
4143 1.1 mrg return;
4144 1.1 mrg }
4145 1.1 mrg
4146 1.1 mrg niter = MIN (niter_x, niter_z);
4147 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4148 1.1 mrg &overlaps_a_xz,
4149 1.1 mrg &overlaps_b_xz,
4150 1.1 mrg &last_conflicts_xz, 1);
4151 1.1 mrg niter = MIN (niter_y, niter_z);
4152 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4153 1.1 mrg &overlaps_a_yz,
4154 1.1 mrg &overlaps_b_yz,
4155 1.1 mrg &last_conflicts_yz, 2);
4156 1.1 mrg niter = MIN (niter_x, niter_z);
4157 1.1 mrg niter = MIN (niter_y, niter);
4158 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4159 1.1 mrg &overlaps_a_xyz,
4160 1.1 mrg &overlaps_b_xyz,
4161 1.1 mrg &last_conflicts_xyz, 3);
4162 1.1 mrg
4163 1.1 mrg xz_p = !integer_zerop (last_conflicts_xz);
4164 1.1 mrg yz_p = !integer_zerop (last_conflicts_yz);
4165 1.1 mrg xyz_p = !integer_zerop (last_conflicts_xyz);
4166 1.1 mrg
4167 1.1 mrg if (xz_p || yz_p || xyz_p)
4168 1.1 mrg {
4169 1.1 mrg ova1 = affine_fn_cst (integer_zero_node);
4170 1.1 mrg ova2 = affine_fn_cst (integer_zero_node);
4171 1.1 mrg ovb = affine_fn_cst (integer_zero_node);
4172 1.1 mrg if (xz_p)
4173 1.1 mrg {
4174 1.1 mrg affine_fn t0 = ova1;
4175 1.1 mrg affine_fn t2 = ovb;
4176 1.1 mrg
4177 1.1 mrg ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4178 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_xz);
4179 1.1 mrg affine_fn_free (t0);
4180 1.1 mrg affine_fn_free (t2);
4181 1.1 mrg *last_conflicts = last_conflicts_xz;
4182 1.1 mrg }
4183 1.1 mrg if (yz_p)
4184 1.1 mrg {
4185 1.1 mrg affine_fn t0 = ova2;
4186 1.1 mrg affine_fn t2 = ovb;
4187 1.1 mrg
4188 1.1 mrg ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4189 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_yz);
4190 1.1 mrg affine_fn_free (t0);
4191 1.1 mrg affine_fn_free (t2);
4192 1.1 mrg *last_conflicts = last_conflicts_yz;
4193 1.1 mrg }
4194 1.1 mrg if (xyz_p)
4195 1.1 mrg {
4196 1.1 mrg affine_fn t0 = ova1;
4197 1.1 mrg affine_fn t2 = ova2;
4198 1.1 mrg affine_fn t4 = ovb;
4199 1.1 mrg
4200 1.1 mrg ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4201 1.1 mrg ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4202 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4203 1.1 mrg affine_fn_free (t0);
4204 1.1 mrg affine_fn_free (t2);
4205 1.1 mrg affine_fn_free (t4);
4206 1.1 mrg *last_conflicts = last_conflicts_xyz;
4207 1.1 mrg }
4208 1.1 mrg *overlaps_a = conflict_fn (2, ova1, ova2);
4209 1.1 mrg *overlaps_b = conflict_fn (1, ovb);
4210 1.1 mrg }
4211 1.1 mrg else
4212 1.1 mrg {
4213 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4214 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4215 1.1 mrg *last_conflicts = integer_zero_node;
4216 1.1 mrg }
4217 1.1 mrg
4218 1.1 mrg affine_fn_free (overlaps_a_xz);
4219 1.1 mrg affine_fn_free (overlaps_b_xz);
4220 1.1 mrg affine_fn_free (overlaps_a_yz);
4221 1.1 mrg affine_fn_free (overlaps_b_yz);
4222 1.1 mrg affine_fn_free (overlaps_a_xyz);
4223 1.1 mrg affine_fn_free (overlaps_b_xyz);
4224 1.1 mrg }
4225 1.1 mrg
4226 1.1 mrg /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4227 1.1 mrg
4228 1.1 mrg static void
4229 1.1 mrg lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4230 1.1 mrg int size)
4231 1.1 mrg {
4232 1.1 mrg memcpy (vec2, vec1, size * sizeof (*vec1));
4233 1.1 mrg }
4234 1.1 mrg
4235 1.1 mrg /* Copy the elements of M x N matrix MAT1 to MAT2. */
4236 1.1 mrg
4237 1.1 mrg static void
4238 1.1 mrg lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4239 1.1 mrg int m, int n)
4240 1.1 mrg {
4241 1.1 mrg int i;
4242 1.1 mrg
4243 1.1 mrg for (i = 0; i < m; i++)
4244 1.1 mrg lambda_vector_copy (mat1[i], mat2[i], n);
4245 1.1 mrg }
4246 1.1 mrg
4247 1.1 mrg /* Store the N x N identity matrix in MAT. */
4248 1.1 mrg
4249 1.1 mrg static void
4250 1.1 mrg lambda_matrix_id (lambda_matrix mat, int size)
4251 1.1 mrg {
4252 1.1 mrg int i, j;
4253 1.1 mrg
4254 1.1 mrg for (i = 0; i < size; i++)
4255 1.1 mrg for (j = 0; j < size; j++)
4256 1.1 mrg mat[i][j] = (i == j) ? 1 : 0;
4257 1.1 mrg }
4258 1.1 mrg
4259 1.1 mrg /* Return the index of the first nonzero element of vector VEC1 between
4260 1.1 mrg START and N. We must have START <= N.
4261 1.1 mrg Returns N if VEC1 is the zero vector. */
4262 1.1 mrg
4263 1.1 mrg static int
4264 1.1 mrg lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4265 1.1 mrg {
4266 1.1 mrg int j = start;
4267 1.1 mrg while (j < n && vec1[j] == 0)
4268 1.1 mrg j++;
4269 1.1 mrg return j;
4270 1.1 mrg }
4271 1.1 mrg
4272 1.1 mrg /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4273 1.1 mrg R2 = R2 + CONST1 * R1. */
4274 1.1 mrg
4275 1.1 mrg static bool
4276 1.1 mrg lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4277 1.1 mrg lambda_int const1)
4278 1.1 mrg {
4279 1.1 mrg int i;
4280 1.1 mrg
4281 1.1 mrg if (const1 == 0)
4282 1.1 mrg return true;
4283 1.1 mrg
4284 1.1 mrg for (i = 0; i < n; i++)
4285 1.1 mrg {
4286 1.1 mrg bool ovf;
4287 1.1 mrg lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4288 1.1 mrg if (ovf)
4289 1.1 mrg return false;
4290 1.1 mrg lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4291 1.1 mrg if (ovf || tem2 == HOST_WIDE_INT_MIN)
4292 1.1 mrg return false;
4293 1.1 mrg mat[r2][i] = tem2;
4294 1.1 mrg }
4295 1.1 mrg
4296 1.1 mrg return true;
4297 1.1 mrg }
4298 1.1 mrg
4299 1.1 mrg /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4300 1.1 mrg and store the result in VEC2. */
4301 1.1 mrg
4302 1.1 mrg static void
4303 1.1 mrg lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4304 1.1 mrg int size, lambda_int const1)
4305 1.1 mrg {
4306 1.1 mrg int i;
4307 1.1 mrg
4308 1.1 mrg if (const1 == 0)
4309 1.1 mrg lambda_vector_clear (vec2, size);
4310 1.1 mrg else
4311 1.1 mrg for (i = 0; i < size; i++)
4312 1.1 mrg vec2[i] = const1 * vec1[i];
4313 1.1 mrg }
4314 1.1 mrg
4315 1.1 mrg /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4316 1.1 mrg
4317 1.1 mrg static void
4318 1.1 mrg lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4319 1.1 mrg int size)
4320 1.1 mrg {
4321 1.1 mrg lambda_vector_mult_const (vec1, vec2, size, -1);
4322 1.1 mrg }
4323 1.1 mrg
4324 1.1 mrg /* Negate row R1 of matrix MAT which has N columns. */
4325 1.1 mrg
4326 1.1 mrg static void
4327 1.1 mrg lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4328 1.1 mrg {
4329 1.1 mrg lambda_vector_negate (mat[r1], mat[r1], n);
4330 1.1 mrg }
4331 1.1 mrg
4332 1.1 mrg /* Return true if two vectors are equal. */
4333 1.1 mrg
4334 1.1 mrg static bool
4335 1.1 mrg lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4336 1.1 mrg {
4337 1.1 mrg int i;
4338 1.1 mrg for (i = 0; i < size; i++)
4339 1.1 mrg if (vec1[i] != vec2[i])
4340 1.1 mrg return false;
4341 1.1 mrg return true;
4342 1.1 mrg }
4343 1.1 mrg
4344 1.1 mrg /* Given an M x N integer matrix A, this function determines an M x
4345 1.1 mrg M unimodular matrix U, and an M x N echelon matrix S such that
4346 1.1 mrg "U.A = S". This decomposition is also known as "right Hermite".
4347 1.1 mrg
4348 1.1 mrg Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4349 1.1 mrg Restructuring Compilers" Utpal Banerjee. */
4350 1.1 mrg
4351 1.1 mrg static bool
4352 1.1 mrg lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4353 1.1 mrg lambda_matrix S, lambda_matrix U)
4354 1.1 mrg {
4355 1.1 mrg int i, j, i0 = 0;
4356 1.1 mrg
4357 1.1 mrg lambda_matrix_copy (A, S, m, n);
4358 1.1 mrg lambda_matrix_id (U, m);
4359 1.1 mrg
4360 1.1 mrg for (j = 0; j < n; j++)
4361 1.1 mrg {
4362 1.1 mrg if (lambda_vector_first_nz (S[j], m, i0) < m)
4363 1.1 mrg {
4364 1.1 mrg ++i0;
4365 1.1 mrg for (i = m - 1; i >= i0; i--)
4366 1.1 mrg {
4367 1.1 mrg while (S[i][j] != 0)
4368 1.1 mrg {
4369 1.1 mrg lambda_int factor, a, b;
4370 1.1 mrg
4371 1.1 mrg a = S[i-1][j];
4372 1.1 mrg b = S[i][j];
4373 1.1 mrg gcc_assert (a != HOST_WIDE_INT_MIN);
4374 1.1 mrg factor = a / b;
4375 1.1 mrg
4376 1.1 mrg if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4377 1.1 mrg return false;
4378 1.1 mrg std::swap (S[i], S[i-1]);
4379 1.1 mrg
4380 1.1 mrg if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4381 1.1 mrg return false;
4382 1.1 mrg std::swap (U[i], U[i-1]);
4383 1.1 mrg }
4384 1.1 mrg }
4385 1.1 mrg }
4386 1.1 mrg }
4387 1.1 mrg
4388 1.1 mrg return true;
4389 1.1 mrg }
4390 1.1 mrg
4391 1.1 mrg /* Determines the overlapping elements due to accesses CHREC_A and
4392 1.1 mrg CHREC_B, that are affine functions. This function cannot handle
4393 1.1 mrg symbolic evolution functions, ie. when initial conditions are
4394 1.1 mrg parameters, because it uses lambda matrices of integers. */
4395 1.1 mrg
4396 1.1 mrg static void
4397 1.1 mrg analyze_subscript_affine_affine (tree chrec_a,
4398 1.1 mrg tree chrec_b,
4399 1.1 mrg conflict_function **overlaps_a,
4400 1.1 mrg conflict_function **overlaps_b,
4401 1.1 mrg tree *last_conflicts)
4402 1.1 mrg {
4403 1.1 mrg unsigned nb_vars_a, nb_vars_b, dim;
4404 1.1 mrg lambda_int gamma, gcd_alpha_beta;
4405 1.1 mrg lambda_matrix A, U, S;
4406 1.1 mrg struct obstack scratch_obstack;
4407 1.1 mrg
4408 1.1 mrg if (eq_evolutions_p (chrec_a, chrec_b))
4409 1.1 mrg {
4410 1.1 mrg /* The accessed index overlaps for each iteration in the
4411 1.1 mrg loop. */
4412 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4413 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4414 1.1 mrg *last_conflicts = chrec_dont_know;
4415 1.1 mrg return;
4416 1.1 mrg }
4417 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4418 1.1 mrg fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4419 1.1 mrg
4420 1.1 mrg /* For determining the initial intersection, we have to solve a
4421 1.1 mrg Diophantine equation. This is the most time consuming part.
4422 1.1 mrg
4423 1.1 mrg For answering to the question: "Is there a dependence?" we have
4424 1.1 mrg to prove that there exists a solution to the Diophantine
4425 1.1 mrg equation, and that the solution is in the iteration domain,
4426 1.1 mrg i.e. the solution is positive or zero, and that the solution
4427 1.1 mrg happens before the upper bound loop.nb_iterations. Otherwise
4428 1.1 mrg there is no dependence. This function outputs a description of
4429 1.1 mrg the iterations that hold the intersections. */
4430 1.1 mrg
4431 1.1 mrg nb_vars_a = nb_vars_in_chrec (chrec_a);
4432 1.1 mrg nb_vars_b = nb_vars_in_chrec (chrec_b);
4433 1.1 mrg
4434 1.1 mrg gcc_obstack_init (&scratch_obstack);
4435 1.1 mrg
4436 1.1 mrg dim = nb_vars_a + nb_vars_b;
4437 1.1 mrg U = lambda_matrix_new (dim, dim, &scratch_obstack);
4438 1.1 mrg A = lambda_matrix_new (dim, 1, &scratch_obstack);
4439 1.1 mrg S = lambda_matrix_new (dim, 1, &scratch_obstack);
4440 1.1 mrg
4441 1.1 mrg tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4442 1.1 mrg tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4443 1.1 mrg if (init_a == chrec_dont_know
4444 1.1 mrg || init_b == chrec_dont_know)
4445 1.1 mrg {
4446 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4447 1.1 mrg fprintf (dump_file, "affine-affine test failed: "
4448 1.1 mrg "representation issue.\n");
4449 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4450 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4451 1.1 mrg *last_conflicts = chrec_dont_know;
4452 1.1 mrg goto end_analyze_subs_aa;
4453 1.1 mrg }
4454 1.1 mrg gamma = int_cst_value (init_b) - int_cst_value (init_a);
4455 1.1 mrg
4456 1.1 mrg /* Don't do all the hard work of solving the Diophantine equation
4457 1.1 mrg when we already know the solution: for example,
4458 1.1 mrg | {3, +, 1}_1
4459 1.1 mrg | {3, +, 4}_2
4460 1.1 mrg | gamma = 3 - 3 = 0.
4461 1.1 mrg Then the first overlap occurs during the first iterations:
4462 1.1 mrg | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4463 1.1 mrg */
4464 1.1 mrg if (gamma == 0)
4465 1.1 mrg {
4466 1.1 mrg if (nb_vars_a == 1 && nb_vars_b == 1)
4467 1.1 mrg {
4468 1.1 mrg HOST_WIDE_INT step_a, step_b;
4469 1.1 mrg HOST_WIDE_INT niter, niter_a, niter_b;
4470 1.1 mrg affine_fn ova, ovb;
4471 1.1 mrg
4472 1.1 mrg niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4473 1.1 mrg niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4474 1.1 mrg niter = MIN (niter_a, niter_b);
4475 1.1 mrg step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4476 1.1 mrg step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4477 1.1 mrg
4478 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4479 1.1 mrg &ova, &ovb,
4480 1.1 mrg last_conflicts, 1);
4481 1.1 mrg *overlaps_a = conflict_fn (1, ova);
4482 1.1 mrg *overlaps_b = conflict_fn (1, ovb);
4483 1.1 mrg }
4484 1.1 mrg
4485 1.1 mrg else if (nb_vars_a == 2 && nb_vars_b == 1)
4486 1.1 mrg compute_overlap_steps_for_affine_1_2
4487 1.1 mrg (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4488 1.1 mrg
4489 1.1 mrg else if (nb_vars_a == 1 && nb_vars_b == 2)
4490 1.1 mrg compute_overlap_steps_for_affine_1_2
4491 1.1 mrg (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4492 1.1 mrg
4493 1.1 mrg else
4494 1.1 mrg {
4495 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4496 1.1 mrg fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4497 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4498 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4499 1.1 mrg *last_conflicts = chrec_dont_know;
4500 1.1 mrg }
4501 1.1 mrg goto end_analyze_subs_aa;
4502 1.1 mrg }
4503 1.1 mrg
4504 1.1 mrg /* U.A = S */
4505 1.1 mrg if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4506 1.1 mrg {
4507 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4508 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4509 1.1 mrg *last_conflicts = chrec_dont_know;
4510 1.1 mrg goto end_analyze_subs_aa;
4511 1.1 mrg }
4512 1.1 mrg
4513 1.1 mrg if (S[0][0] < 0)
4514 1.1 mrg {
4515 1.1 mrg S[0][0] *= -1;
4516 1.1 mrg lambda_matrix_row_negate (U, dim, 0);
4517 1.1 mrg }
4518 1.1 mrg gcd_alpha_beta = S[0][0];
4519 1.1 mrg
4520 1.1 mrg /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4521 1.1 mrg but that is a quite strange case. Instead of ICEing, answer
4522 1.1 mrg don't know. */
4523 1.1 mrg if (gcd_alpha_beta == 0)
4524 1.1 mrg {
4525 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4526 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4527 1.1 mrg *last_conflicts = chrec_dont_know;
4528 1.1 mrg goto end_analyze_subs_aa;
4529 1.1 mrg }
4530 1.1 mrg
4531 1.1 mrg /* The classic "gcd-test". */
4532 1.1 mrg if (!int_divides_p (gcd_alpha_beta, gamma))
4533 1.1 mrg {
4534 1.1 mrg /* The "gcd-test" has determined that there is no integer
4535 1.1 mrg solution, i.e. there is no dependence. */
4536 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
4537 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
4538 1.1 mrg *last_conflicts = integer_zero_node;
4539 1.1 mrg }
4540 1.1 mrg
4541 1.1 mrg /* Both access functions are univariate. This includes SIV and MIV cases. */
4542 1.1 mrg else if (nb_vars_a == 1 && nb_vars_b == 1)
4543 1.1 mrg {
4544 1.1 mrg /* Both functions should have the same evolution sign. */
4545 1.1 mrg if (((A[0][0] > 0 && -A[1][0] > 0)
4546 1.1 mrg || (A[0][0] < 0 && -A[1][0] < 0)))
4547 1.1 mrg {
4548 1.1 mrg /* The solutions are given by:
4549 1.1 mrg |
4550 1.1 mrg | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4551 1.1 mrg | [u21 u22] [y0]
4552 1.1 mrg
4553 1.1 mrg For a given integer t. Using the following variables,
4554 1.1 mrg
4555 1.1 mrg | i0 = u11 * gamma / gcd_alpha_beta
4556 1.1 mrg | j0 = u12 * gamma / gcd_alpha_beta
4557 1.1 mrg | i1 = u21
4558 1.1 mrg | j1 = u22
4559 1.1 mrg
4560 1.1 mrg the solutions are:
4561 1.1 mrg
4562 1.1 mrg | x0 = i0 + i1 * t,
4563 1.1 mrg | y0 = j0 + j1 * t. */
4564 1.1 mrg HOST_WIDE_INT i0, j0, i1, j1;
4565 1.1 mrg
4566 1.1 mrg i0 = U[0][0] * gamma / gcd_alpha_beta;
4567 1.1 mrg j0 = U[0][1] * gamma / gcd_alpha_beta;
4568 1.1 mrg i1 = U[1][0];
4569 1.1 mrg j1 = U[1][1];
4570 1.1 mrg
4571 1.1 mrg if ((i1 == 0 && i0 < 0)
4572 1.1 mrg || (j1 == 0 && j0 < 0))
4573 1.1 mrg {
4574 1.1 mrg /* There is no solution.
4575 1.1 mrg FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4576 1.1 mrg falls in here, but for the moment we don't look at the
4577 1.1 mrg upper bound of the iteration domain. */
4578 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
4579 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
4580 1.1 mrg *last_conflicts = integer_zero_node;
4581 1.1 mrg goto end_analyze_subs_aa;
4582 1.1 mrg }
4583 1.1 mrg
4584 1.1 mrg if (i1 > 0 && j1 > 0)
4585 1.1 mrg {
4586 1.1 mrg HOST_WIDE_INT niter_a
4587 1.1 mrg = max_stmt_executions_int (get_chrec_loop (chrec_a));
4588 1.1 mrg HOST_WIDE_INT niter_b
4589 1.1 mrg = max_stmt_executions_int (get_chrec_loop (chrec_b));
4590 1.1 mrg HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4591 1.1 mrg
4592 1.1 mrg /* (X0, Y0) is a solution of the Diophantine equation:
4593 1.1 mrg "chrec_a (X0) = chrec_b (Y0)". */
4594 1.1 mrg HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4595 1.1 mrg CEIL (-j0, j1));
4596 1.1 mrg HOST_WIDE_INT x0 = i1 * tau1 + i0;
4597 1.1 mrg HOST_WIDE_INT y0 = j1 * tau1 + j0;
4598 1.1 mrg
4599 1.1 mrg /* (X1, Y1) is the smallest positive solution of the eq
4600 1.1 mrg "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4601 1.1 mrg first conflict occurs. */
4602 1.1 mrg HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4603 1.1 mrg HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4604 1.1 mrg HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4605 1.1 mrg
4606 1.1 mrg if (niter > 0)
4607 1.1 mrg {
4608 1.1 mrg /* If the overlap occurs outside of the bounds of the
4609 1.1 mrg loop, there is no dependence. */
4610 1.1 mrg if (x1 >= niter_a || y1 >= niter_b)
4611 1.1 mrg {
4612 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
4613 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
4614 1.1 mrg *last_conflicts = integer_zero_node;
4615 1.1 mrg goto end_analyze_subs_aa;
4616 1.1 mrg }
4617 1.1 mrg
4618 1.1 mrg /* max stmt executions can get quite large, avoid
4619 1.1 mrg overflows by using wide ints here. */
4620 1.1 mrg widest_int tau2
4621 1.1 mrg = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4622 1.1 mrg wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4623 1.1 mrg widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4624 1.1 mrg if (wi::min_precision (last_conflict, SIGNED)
4625 1.1 mrg <= TYPE_PRECISION (integer_type_node))
4626 1.1 mrg *last_conflicts
4627 1.1 mrg = build_int_cst (integer_type_node,
4628 1.1 mrg last_conflict.to_shwi ());
4629 1.1 mrg else
4630 1.1 mrg *last_conflicts = chrec_dont_know;
4631 1.1 mrg }
4632 1.1 mrg else
4633 1.1 mrg *last_conflicts = chrec_dont_know;
4634 1.1 mrg
4635 1.1 mrg *overlaps_a
4636 1.1 mrg = conflict_fn (1,
4637 1.1 mrg affine_fn_univar (build_int_cst (NULL_TREE, x1),
4638 1.1 mrg 1,
4639 1.1 mrg build_int_cst (NULL_TREE, i1)));
4640 1.1 mrg *overlaps_b
4641 1.1 mrg = conflict_fn (1,
4642 1.1 mrg affine_fn_univar (build_int_cst (NULL_TREE, y1),
4643 1.1 mrg 1,
4644 1.1 mrg build_int_cst (NULL_TREE, j1)));
4645 1.1 mrg }
4646 1.1 mrg else
4647 1.1 mrg {
4648 1.1 mrg /* FIXME: For the moment, the upper bound of the
4649 1.1 mrg iteration domain for i and j is not checked. */
4650 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4651 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4652 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4653 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4654 1.1 mrg *last_conflicts = chrec_dont_know;
4655 1.1 mrg }
4656 1.1 mrg }
4657 1.1 mrg else
4658 1.1 mrg {
4659 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4660 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4661 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4662 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4663 1.1 mrg *last_conflicts = chrec_dont_know;
4664 1.1 mrg }
4665 1.1 mrg }
4666 1.1 mrg else
4667 1.1 mrg {
4668 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4669 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4670 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4671 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4672 1.1 mrg *last_conflicts = chrec_dont_know;
4673 1.1 mrg }
4674 1.1 mrg
4675 1.1 mrg end_analyze_subs_aa:
4676 1.1 mrg obstack_free (&scratch_obstack, NULL);
4677 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4678 1.1 mrg {
4679 1.1 mrg fprintf (dump_file, " (overlaps_a = ");
4680 1.1 mrg dump_conflict_function (dump_file, *overlaps_a);
4681 1.1 mrg fprintf (dump_file, ")\n (overlaps_b = ");
4682 1.1 mrg dump_conflict_function (dump_file, *overlaps_b);
4683 1.1 mrg fprintf (dump_file, "))\n");
4684 1.1 mrg }
4685 1.1 mrg }
4686 1.1 mrg
4687 1.1 mrg /* Returns true when analyze_subscript_affine_affine can be used for
4688 1.1 mrg determining the dependence relation between chrec_a and chrec_b,
4689 1.1 mrg that contain symbols. This function modifies chrec_a and chrec_b
4690 1.1 mrg such that the analysis result is the same, and such that they don't
4691 1.1 mrg contain symbols, and then can safely be passed to the analyzer.
4692 1.1 mrg
4693 1.1 mrg Example: The analysis of the following tuples of evolutions produce
4694 1.1 mrg the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4695 1.1 mrg vs. {0, +, 1}_1
4696 1.1 mrg
4697 1.1 mrg {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4698 1.1 mrg {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4699 1.1 mrg */
4700 1.1 mrg
4701 1.1 mrg static bool
4702 1.1 mrg can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4703 1.1 mrg {
4704 1.1 mrg tree diff, type, left_a, left_b, right_b;
4705 1.1 mrg
4706 1.1 mrg if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4707 1.1 mrg || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4708 1.1 mrg /* FIXME: For the moment not handled. Might be refined later. */
4709 1.1 mrg return false;
4710 1.1 mrg
4711 1.1 mrg type = chrec_type (*chrec_a);
4712 1.1 mrg left_a = CHREC_LEFT (*chrec_a);
4713 1.1 mrg left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4714 1.1 mrg diff = chrec_fold_minus (type, left_a, left_b);
4715 1.1 mrg
4716 1.1 mrg if (!evolution_function_is_constant_p (diff))
4717 1.1 mrg return false;
4718 1.1 mrg
4719 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4720 1.1 mrg fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4721 1.1 mrg
4722 1.1 mrg *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4723 1.1 mrg diff, CHREC_RIGHT (*chrec_a));
4724 1.1 mrg right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4725 1.1 mrg *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4726 1.1 mrg build_int_cst (type, 0),
4727 1.1 mrg right_b);
4728 1.1 mrg return true;
4729 1.1 mrg }
4730 1.1 mrg
4731 1.1 mrg /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4732 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the
4733 1.1 mrg relation between the elements accessed twice by CHREC_A and
4734 1.1 mrg CHREC_B. For k >= 0, the following property is verified:
4735 1.1 mrg
4736 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4737 1.1 mrg
4738 1.1 mrg static void
4739 1.1 mrg analyze_siv_subscript (tree chrec_a,
4740 1.1 mrg tree chrec_b,
4741 1.1 mrg conflict_function **overlaps_a,
4742 1.1 mrg conflict_function **overlaps_b,
4743 1.1 mrg tree *last_conflicts,
4744 1.1 mrg int loop_nest_num)
4745 1.1 mrg {
4746 1.1 mrg dependence_stats.num_siv++;
4747 1.1 mrg
4748 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4749 1.1 mrg fprintf (dump_file, "(analyze_siv_subscript \n");
4750 1.1 mrg
4751 1.1 mrg if (evolution_function_is_constant_p (chrec_a)
4752 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4753 1.1 mrg analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4754 1.1 mrg overlaps_a, overlaps_b, last_conflicts);
4755 1.1 mrg
4756 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4757 1.1 mrg && evolution_function_is_constant_p (chrec_b))
4758 1.1 mrg analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4759 1.1 mrg overlaps_b, overlaps_a, last_conflicts);
4760 1.1 mrg
4761 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4762 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4763 1.1 mrg {
4764 1.1 mrg if (!chrec_contains_symbols (chrec_a)
4765 1.1 mrg && !chrec_contains_symbols (chrec_b))
4766 1.1 mrg {
4767 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b,
4768 1.1 mrg overlaps_a, overlaps_b,
4769 1.1 mrg last_conflicts);
4770 1.1 mrg
4771 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a)
4772 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b))
4773 1.1 mrg dependence_stats.num_siv_unimplemented++;
4774 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4775 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b))
4776 1.1 mrg dependence_stats.num_siv_independent++;
4777 1.1 mrg else
4778 1.1 mrg dependence_stats.num_siv_dependent++;
4779 1.1 mrg }
4780 1.1 mrg else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4781 1.1 mrg &chrec_b))
4782 1.1 mrg {
4783 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b,
4784 1.1 mrg overlaps_a, overlaps_b,
4785 1.1 mrg last_conflicts);
4786 1.1 mrg
4787 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a)
4788 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b))
4789 1.1 mrg dependence_stats.num_siv_unimplemented++;
4790 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4791 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b))
4792 1.1 mrg dependence_stats.num_siv_independent++;
4793 1.1 mrg else
4794 1.1 mrg dependence_stats.num_siv_dependent++;
4795 1.1 mrg }
4796 1.1 mrg else
4797 1.1 mrg goto siv_subscript_dontknow;
4798 1.1 mrg }
4799 1.1 mrg
4800 1.1 mrg else
4801 1.1 mrg {
4802 1.1 mrg siv_subscript_dontknow:;
4803 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4804 1.1 mrg fprintf (dump_file, " siv test failed: unimplemented");
4805 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4806 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4807 1.1 mrg *last_conflicts = chrec_dont_know;
4808 1.1 mrg dependence_stats.num_siv_unimplemented++;
4809 1.1 mrg }
4810 1.1 mrg
4811 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4812 1.1 mrg fprintf (dump_file, ")\n");
4813 1.1 mrg }
4814 1.1 mrg
4815 1.1 mrg /* Returns false if we can prove that the greatest common divisor of the steps
4816 1.1 mrg of CHREC does not divide CST, false otherwise. */
4817 1.1 mrg
4818 1.1 mrg static bool
4819 1.1 mrg gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4820 1.1 mrg {
4821 1.1 mrg HOST_WIDE_INT cd = 0, val;
4822 1.1 mrg tree step;
4823 1.1 mrg
4824 1.1 mrg if (!tree_fits_shwi_p (cst))
4825 1.1 mrg return true;
4826 1.1 mrg val = tree_to_shwi (cst);
4827 1.1 mrg
4828 1.1 mrg while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4829 1.1 mrg {
4830 1.1 mrg step = CHREC_RIGHT (chrec);
4831 1.1 mrg if (!tree_fits_shwi_p (step))
4832 1.1 mrg return true;
4833 1.1 mrg cd = gcd (cd, tree_to_shwi (step));
4834 1.1 mrg chrec = CHREC_LEFT (chrec);
4835 1.1 mrg }
4836 1.1 mrg
4837 1.1 mrg return val % cd == 0;
4838 1.1 mrg }
4839 1.1 mrg
4840 1.1 mrg /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4841 1.1 mrg LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4842 1.1 mrg functions that describe the relation between the elements accessed
4843 1.1 mrg twice by CHREC_A and CHREC_B. For k >= 0, the following property
4844 1.1 mrg is verified:
4845 1.1 mrg
4846 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4847 1.1 mrg
4848 1.1 mrg static void
4849 1.1 mrg analyze_miv_subscript (tree chrec_a,
4850 1.1 mrg tree chrec_b,
4851 1.1 mrg conflict_function **overlaps_a,
4852 1.1 mrg conflict_function **overlaps_b,
4853 1.1 mrg tree *last_conflicts,
4854 1.1 mrg class loop *loop_nest)
4855 1.1 mrg {
4856 1.1 mrg tree type, difference;
4857 1.1 mrg
4858 1.1 mrg dependence_stats.num_miv++;
4859 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4860 1.1 mrg fprintf (dump_file, "(analyze_miv_subscript \n");
4861 1.1 mrg
4862 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4863 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL);
4864 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL);
4865 1.1 mrg difference = chrec_fold_minus (type, chrec_a, chrec_b);
4866 1.1 mrg
4867 1.1 mrg if (eq_evolutions_p (chrec_a, chrec_b))
4868 1.1 mrg {
4869 1.1 mrg /* Access functions are the same: all the elements are accessed
4870 1.1 mrg in the same order. */
4871 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4872 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4873 1.1 mrg *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4874 1.1 mrg dependence_stats.num_miv_dependent++;
4875 1.1 mrg }
4876 1.1 mrg
4877 1.1 mrg else if (evolution_function_is_constant_p (difference)
4878 1.1 mrg && evolution_function_is_affine_multivariate_p (chrec_a,
4879 1.1 mrg loop_nest->num)
4880 1.1 mrg && !gcd_of_steps_may_divide_p (chrec_a, difference))
4881 1.1 mrg {
4882 1.1 mrg /* testsuite/.../ssa-chrec-33.c
4883 1.1 mrg {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4884 1.1 mrg
4885 1.1 mrg The difference is 1, and all the evolution steps are multiples
4886 1.1 mrg of 2, consequently there are no overlapping elements. */
4887 1.1 mrg *overlaps_a = conflict_fn_no_dependence ();
4888 1.1 mrg *overlaps_b = conflict_fn_no_dependence ();
4889 1.1 mrg *last_conflicts = integer_zero_node;
4890 1.1 mrg dependence_stats.num_miv_independent++;
4891 1.1 mrg }
4892 1.1 mrg
4893 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4894 1.1 mrg && !chrec_contains_symbols (chrec_a, loop_nest)
4895 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4896 1.1 mrg && !chrec_contains_symbols (chrec_b, loop_nest))
4897 1.1 mrg {
4898 1.1 mrg /* testsuite/.../ssa-chrec-35.c
4899 1.1 mrg {0, +, 1}_2 vs. {0, +, 1}_3
4900 1.1 mrg the overlapping elements are respectively located at iterations:
4901 1.1 mrg {0, +, 1}_x and {0, +, 1}_x,
4902 1.1 mrg in other words, we have the equality:
4903 1.1 mrg {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4904 1.1 mrg
4905 1.1 mrg Other examples:
4906 1.1 mrg {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4907 1.1 mrg {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4908 1.1 mrg
4909 1.1 mrg {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4910 1.1 mrg {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4911 1.1 mrg */
4912 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b,
4913 1.1 mrg overlaps_a, overlaps_b, last_conflicts);
4914 1.1 mrg
4915 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a)
4916 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b))
4917 1.1 mrg dependence_stats.num_miv_unimplemented++;
4918 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4919 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b))
4920 1.1 mrg dependence_stats.num_miv_independent++;
4921 1.1 mrg else
4922 1.1 mrg dependence_stats.num_miv_dependent++;
4923 1.1 mrg }
4924 1.1 mrg
4925 1.1 mrg else
4926 1.1 mrg {
4927 1.1 mrg /* When the analysis is too difficult, answer "don't know". */
4928 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4929 1.1 mrg fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4930 1.1 mrg
4931 1.1 mrg *overlaps_a = conflict_fn_not_known ();
4932 1.1 mrg *overlaps_b = conflict_fn_not_known ();
4933 1.1 mrg *last_conflicts = chrec_dont_know;
4934 1.1 mrg dependence_stats.num_miv_unimplemented++;
4935 1.1 mrg }
4936 1.1 mrg
4937 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4938 1.1 mrg fprintf (dump_file, ")\n");
4939 1.1 mrg }
4940 1.1 mrg
4941 1.1 mrg /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4942 1.1 mrg with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4943 1.1 mrg OVERLAP_ITERATIONS_B are initialized with two functions that
4944 1.1 mrg describe the iterations that contain conflicting elements.
4945 1.1 mrg
4946 1.1 mrg Remark: For an integer k >= 0, the following equality is true:
4947 1.1 mrg
4948 1.1 mrg CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4949 1.1 mrg */
4950 1.1 mrg
4951 1.1 mrg static void
4952 1.1 mrg analyze_overlapping_iterations (tree chrec_a,
4953 1.1 mrg tree chrec_b,
4954 1.1 mrg conflict_function **overlap_iterations_a,
4955 1.1 mrg conflict_function **overlap_iterations_b,
4956 1.1 mrg tree *last_conflicts, class loop *loop_nest)
4957 1.1 mrg {
4958 1.1 mrg unsigned int lnn = loop_nest->num;
4959 1.1 mrg
4960 1.1 mrg dependence_stats.num_subscript_tests++;
4961 1.1 mrg
4962 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4963 1.1 mrg {
4964 1.1 mrg fprintf (dump_file, "(analyze_overlapping_iterations \n");
4965 1.1 mrg fprintf (dump_file, " (chrec_a = ");
4966 1.1 mrg print_generic_expr (dump_file, chrec_a);
4967 1.1 mrg fprintf (dump_file, ")\n (chrec_b = ");
4968 1.1 mrg print_generic_expr (dump_file, chrec_b);
4969 1.1 mrg fprintf (dump_file, ")\n");
4970 1.1 mrg }
4971 1.1 mrg
4972 1.1 mrg if (chrec_a == NULL_TREE
4973 1.1 mrg || chrec_b == NULL_TREE
4974 1.1 mrg || chrec_contains_undetermined (chrec_a)
4975 1.1 mrg || chrec_contains_undetermined (chrec_b))
4976 1.1 mrg {
4977 1.1 mrg dependence_stats.num_subscript_undetermined++;
4978 1.1 mrg
4979 1.1 mrg *overlap_iterations_a = conflict_fn_not_known ();
4980 1.1 mrg *overlap_iterations_b = conflict_fn_not_known ();
4981 1.1 mrg }
4982 1.1 mrg
4983 1.1 mrg /* If they are the same chrec, and are affine, they overlap
4984 1.1 mrg on every iteration. */
4985 1.1 mrg else if (eq_evolutions_p (chrec_a, chrec_b)
4986 1.1 mrg && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4987 1.1 mrg || operand_equal_p (chrec_a, chrec_b, 0)))
4988 1.1 mrg {
4989 1.1 mrg dependence_stats.num_same_subscript_function++;
4990 1.1 mrg *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4991 1.1 mrg *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4992 1.1 mrg *last_conflicts = chrec_dont_know;
4993 1.1 mrg }
4994 1.1 mrg
4995 1.1 mrg /* If they aren't the same, and aren't affine, we can't do anything
4996 1.1 mrg yet. */
4997 1.1 mrg else if ((chrec_contains_symbols (chrec_a)
4998 1.1 mrg || chrec_contains_symbols (chrec_b))
4999 1.1 mrg && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5000 1.1 mrg || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5001 1.1 mrg {
5002 1.1 mrg dependence_stats.num_subscript_undetermined++;
5003 1.1 mrg *overlap_iterations_a = conflict_fn_not_known ();
5004 1.1 mrg *overlap_iterations_b = conflict_fn_not_known ();
5005 1.1 mrg }
5006 1.1 mrg
5007 1.1 mrg else if (ziv_subscript_p (chrec_a, chrec_b))
5008 1.1 mrg analyze_ziv_subscript (chrec_a, chrec_b,
5009 1.1 mrg overlap_iterations_a, overlap_iterations_b,
5010 1.1 mrg last_conflicts);
5011 1.1 mrg
5012 1.1 mrg else if (siv_subscript_p (chrec_a, chrec_b))
5013 1.1 mrg analyze_siv_subscript (chrec_a, chrec_b,
5014 1.1 mrg overlap_iterations_a, overlap_iterations_b,
5015 1.1 mrg last_conflicts, lnn);
5016 1.1 mrg
5017 1.1 mrg else
5018 1.1 mrg analyze_miv_subscript (chrec_a, chrec_b,
5019 1.1 mrg overlap_iterations_a, overlap_iterations_b,
5020 1.1 mrg last_conflicts, loop_nest);
5021 1.1 mrg
5022 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
5023 1.1 mrg {
5024 1.1 mrg fprintf (dump_file, " (overlap_iterations_a = ");
5025 1.1 mrg dump_conflict_function (dump_file, *overlap_iterations_a);
5026 1.1 mrg fprintf (dump_file, ")\n (overlap_iterations_b = ");
5027 1.1 mrg dump_conflict_function (dump_file, *overlap_iterations_b);
5028 1.1 mrg fprintf (dump_file, "))\n");
5029 1.1 mrg }
5030 1.1 mrg }
5031 1.1 mrg
5032 1.1 mrg /* Helper function for uniquely inserting distance vectors. */
5033 1.1 mrg
5034 1.1 mrg static void
5035 1.1 mrg save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5036 1.1 mrg {
5037 1.1 mrg for (lambda_vector v : DDR_DIST_VECTS (ddr))
5038 1.1 mrg if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5039 1.1 mrg return;
5040 1.1 mrg
5041 1.1 mrg DDR_DIST_VECTS (ddr).safe_push (dist_v);
5042 1.1 mrg }
5043 1.1 mrg
5044 1.1 mrg /* Helper function for uniquely inserting direction vectors. */
5045 1.1 mrg
5046 1.1 mrg static void
5047 1.1 mrg save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5048 1.1 mrg {
5049 1.1 mrg for (lambda_vector v : DDR_DIR_VECTS (ddr))
5050 1.1 mrg if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5051 1.1 mrg return;
5052 1.1 mrg
5053 1.1 mrg DDR_DIR_VECTS (ddr).safe_push (dir_v);
5054 1.1 mrg }
5055 1.1 mrg
5056 1.1 mrg /* Add a distance of 1 on all the loops outer than INDEX. If we
5057 1.1 mrg haven't yet determined a distance for this outer loop, push a new
5058 1.1 mrg distance vector composed of the previous distance, and a distance
5059 1.1 mrg of 1 for this outer loop. Example:
5060 1.1 mrg
5061 1.1 mrg | loop_1
5062 1.1 mrg | loop_2
5063 1.1 mrg | A[10]
5064 1.1 mrg | endloop_2
5065 1.1 mrg | endloop_1
5066 1.1 mrg
5067 1.1 mrg Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5068 1.1 mrg save (0, 1), then we have to save (1, 0). */
5069 1.1 mrg
5070 1.1 mrg static void
5071 1.1 mrg add_outer_distances (struct data_dependence_relation *ddr,
5072 1.1 mrg lambda_vector dist_v, int index)
5073 1.1 mrg {
5074 1.1 mrg /* For each outer loop where init_v is not set, the accesses are
5075 1.1 mrg in dependence of distance 1 in the loop. */
5076 1.1 mrg while (--index >= 0)
5077 1.1 mrg {
5078 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5079 1.1 mrg lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5080 1.1 mrg save_v[index] = 1;
5081 1.1 mrg save_dist_v (ddr, save_v);
5082 1.1 mrg }
5083 1.1 mrg }
5084 1.1 mrg
5085 1.1 mrg /* Return false when fail to represent the data dependence as a
5086 1.1 mrg distance vector. A_INDEX is the index of the first reference
5087 1.1 mrg (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5088 1.1 mrg second reference. INIT_B is set to true when a component has been
5089 1.1 mrg added to the distance vector DIST_V. INDEX_CARRY is then set to
5090 1.1 mrg the index in DIST_V that carries the dependence. */
5091 1.1 mrg
5092 1.1 mrg static bool
5093 1.1 mrg build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5094 1.1 mrg unsigned int a_index, unsigned int b_index,
5095 1.1 mrg lambda_vector dist_v, bool *init_b,
5096 1.1 mrg int *index_carry)
5097 1.1 mrg {
5098 1.1 mrg unsigned i;
5099 1.1 mrg lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5100 1.1 mrg class loop *loop = DDR_LOOP_NEST (ddr)[0];
5101 1.1 mrg
5102 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5103 1.1 mrg {
5104 1.1 mrg tree access_fn_a, access_fn_b;
5105 1.1 mrg struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5106 1.1 mrg
5107 1.1 mrg if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5108 1.1 mrg {
5109 1.1 mrg non_affine_dependence_relation (ddr);
5110 1.1 mrg return false;
5111 1.1 mrg }
5112 1.1 mrg
5113 1.1 mrg access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5114 1.1 mrg access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5115 1.1 mrg
5116 1.1 mrg if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5117 1.1 mrg && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5118 1.1 mrg {
5119 1.1 mrg HOST_WIDE_INT dist;
5120 1.1 mrg int index;
5121 1.1 mrg int var_a = CHREC_VARIABLE (access_fn_a);
5122 1.1 mrg int var_b = CHREC_VARIABLE (access_fn_b);
5123 1.1 mrg
5124 1.1 mrg if (var_a != var_b
5125 1.1 mrg || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5126 1.1 mrg {
5127 1.1 mrg non_affine_dependence_relation (ddr);
5128 1.1 mrg return false;
5129 1.1 mrg }
5130 1.1 mrg
5131 1.1 mrg /* When data references are collected in a loop while data
5132 1.1 mrg dependences are analyzed in loop nest nested in the loop, we
5133 1.1 mrg would have more number of access functions than number of
5134 1.1 mrg loops. Skip access functions of loops not in the loop nest.
5135 1.1 mrg
5136 1.1 mrg See PR89725 for more information. */
5137 1.1 mrg if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5138 1.1 mrg continue;
5139 1.1 mrg
5140 1.1 mrg dist = int_cst_value (SUB_DISTANCE (subscript));
5141 1.1 mrg index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5142 1.1 mrg *index_carry = MIN (index, *index_carry);
5143 1.1 mrg
5144 1.1 mrg /* This is the subscript coupling test. If we have already
5145 1.1 mrg recorded a distance for this loop (a distance coming from
5146 1.1 mrg another subscript), it should be the same. For example,
5147 1.1 mrg in the following code, there is no dependence:
5148 1.1 mrg
5149 1.1 mrg | loop i = 0, N, 1
5150 1.1 mrg | T[i+1][i] = ...
5151 1.1 mrg | ... = T[i][i]
5152 1.1 mrg | endloop
5153 1.1 mrg */
5154 1.1 mrg if (init_v[index] != 0 && dist_v[index] != dist)
5155 1.1 mrg {
5156 1.1 mrg finalize_ddr_dependent (ddr, chrec_known);
5157 1.1 mrg return false;
5158 1.1 mrg }
5159 1.1 mrg
5160 1.1 mrg dist_v[index] = dist;
5161 1.1 mrg init_v[index] = 1;
5162 1.1 mrg *init_b = true;
5163 1.1 mrg }
5164 1.1 mrg else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5165 1.1 mrg {
5166 1.1 mrg /* This can be for example an affine vs. constant dependence
5167 1.1 mrg (T[i] vs. T[3]) that is not an affine dependence and is
5168 1.1 mrg not representable as a distance vector. */
5169 1.1 mrg non_affine_dependence_relation (ddr);
5170 1.1 mrg return false;
5171 1.1 mrg }
5172 1.1 mrg }
5173 1.1 mrg
5174 1.1 mrg return true;
5175 1.1 mrg }
5176 1.1 mrg
5177 1.1 mrg /* Return true when the DDR contains only invariant access functions wrto. loop
5178 1.1 mrg number LNUM. */
5179 1.1 mrg
5180 1.1 mrg static bool
5181 1.1 mrg invariant_access_functions (const struct data_dependence_relation *ddr,
5182 1.1 mrg int lnum)
5183 1.1 mrg {
5184 1.1 mrg for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5185 1.1 mrg if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5186 1.1 mrg || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5187 1.1 mrg return false;
5188 1.1 mrg
5189 1.1 mrg return true;
5190 1.1 mrg }
5191 1.1 mrg
5192 1.1 mrg /* Helper function for the case where DDR_A and DDR_B are the same
5193 1.1 mrg multivariate access function with a constant step. For an example
5194 1.1 mrg see pr34635-1.c. */
5195 1.1 mrg
5196 1.1 mrg static void
5197 1.1 mrg add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5198 1.1 mrg {
5199 1.1 mrg int x_1, x_2;
5200 1.1 mrg tree c_1 = CHREC_LEFT (c_2);
5201 1.1 mrg tree c_0 = CHREC_LEFT (c_1);
5202 1.1 mrg lambda_vector dist_v;
5203 1.1 mrg HOST_WIDE_INT v1, v2, cd;
5204 1.1 mrg
5205 1.1 mrg /* Polynomials with more than 2 variables are not handled yet. When
5206 1.1 mrg the evolution steps are parameters, it is not possible to
5207 1.1 mrg represent the dependence using classical distance vectors. */
5208 1.1 mrg if (TREE_CODE (c_0) != INTEGER_CST
5209 1.1 mrg || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5210 1.1 mrg || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5211 1.1 mrg {
5212 1.1 mrg DDR_AFFINE_P (ddr) = false;
5213 1.1 mrg return;
5214 1.1 mrg }
5215 1.1 mrg
5216 1.1 mrg x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5217 1.1 mrg x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5218 1.1 mrg
5219 1.1 mrg /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5220 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5221 1.1 mrg v1 = int_cst_value (CHREC_RIGHT (c_1));
5222 1.1 mrg v2 = int_cst_value (CHREC_RIGHT (c_2));
5223 1.1 mrg cd = gcd (v1, v2);
5224 1.1 mrg v1 /= cd;
5225 1.1 mrg v2 /= cd;
5226 1.1 mrg
5227 1.1 mrg if (v2 < 0)
5228 1.1 mrg {
5229 1.1 mrg v2 = -v2;
5230 1.1 mrg v1 = -v1;
5231 1.1 mrg }
5232 1.1 mrg
5233 1.1 mrg dist_v[x_1] = v2;
5234 1.1 mrg dist_v[x_2] = -v1;
5235 1.1 mrg save_dist_v (ddr, dist_v);
5236 1.1 mrg
5237 1.1 mrg add_outer_distances (ddr, dist_v, x_1);
5238 1.1 mrg }
5239 1.1 mrg
5240 1.1 mrg /* Helper function for the case where DDR_A and DDR_B are the same
5241 1.1 mrg access functions. */
5242 1.1 mrg
5243 1.1 mrg static void
5244 1.1 mrg add_other_self_distances (struct data_dependence_relation *ddr)
5245 1.1 mrg {
5246 1.1 mrg lambda_vector dist_v;
5247 1.1 mrg unsigned i;
5248 1.1 mrg int index_carry = DDR_NB_LOOPS (ddr);
5249 1.1 mrg subscript *sub;
5250 1.1 mrg class loop *loop = DDR_LOOP_NEST (ddr)[0];
5251 1.1 mrg
5252 1.1 mrg FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5253 1.1 mrg {
5254 1.1 mrg tree access_fun = SUB_ACCESS_FN (sub, 0);
5255 1.1 mrg
5256 1.1 mrg if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5257 1.1 mrg {
5258 1.1 mrg if (!evolution_function_is_univariate_p (access_fun, loop->num))
5259 1.1 mrg {
5260 1.1 mrg if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5261 1.1 mrg {
5262 1.1 mrg DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5263 1.1 mrg return;
5264 1.1 mrg }
5265 1.1 mrg
5266 1.1 mrg access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5267 1.1 mrg
5268 1.1 mrg if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5269 1.1 mrg add_multivariate_self_dist (ddr, access_fun);
5270 1.1 mrg else
5271 1.1 mrg /* The evolution step is not constant: it varies in
5272 1.1 mrg the outer loop, so this cannot be represented by a
5273 1.1 mrg distance vector. For example in pr34635.c the
5274 1.1 mrg evolution is {0, +, {0, +, 4}_1}_2. */
5275 1.1 mrg DDR_AFFINE_P (ddr) = false;
5276 1.1 mrg
5277 1.1 mrg return;
5278 1.1 mrg }
5279 1.1 mrg
5280 1.1 mrg /* When data references are collected in a loop while data
5281 1.1 mrg dependences are analyzed in loop nest nested in the loop, we
5282 1.1 mrg would have more number of access functions than number of
5283 1.1 mrg loops. Skip access functions of loops not in the loop nest.
5284 1.1 mrg
5285 1.1 mrg See PR89725 for more information. */
5286 1.1 mrg if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5287 1.1 mrg loop))
5288 1.1 mrg continue;
5289 1.1 mrg
5290 1.1 mrg index_carry = MIN (index_carry,
5291 1.1 mrg index_in_loop_nest (CHREC_VARIABLE (access_fun),
5292 1.1 mrg DDR_LOOP_NEST (ddr)));
5293 1.1 mrg }
5294 1.1 mrg }
5295 1.1 mrg
5296 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5297 1.1 mrg add_outer_distances (ddr, dist_v, index_carry);
5298 1.1 mrg }
5299 1.1 mrg
5300 1.1 mrg static void
5301 1.1 mrg insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5302 1.1 mrg {
5303 1.1 mrg lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5304 1.1 mrg
5305 1.1 mrg dist_v[0] = 1;
5306 1.1 mrg save_dist_v (ddr, dist_v);
5307 1.1 mrg }
5308 1.1 mrg
5309 1.1 mrg /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5310 1.1 mrg is the case for example when access functions are the same and
5311 1.1 mrg equal to a constant, as in:
5312 1.1 mrg
5313 1.1 mrg | loop_1
5314 1.1 mrg | A[3] = ...
5315 1.1 mrg | ... = A[3]
5316 1.1 mrg | endloop_1
5317 1.1 mrg
5318 1.1 mrg in which case the distance vectors are (0) and (1). */
5319 1.1 mrg
5320 1.1 mrg static void
5321 1.1 mrg add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5322 1.1 mrg {
5323 1.1 mrg unsigned i, j;
5324 1.1 mrg
5325 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5326 1.1 mrg {
5327 1.1 mrg subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5328 1.1 mrg conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5329 1.1 mrg conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5330 1.1 mrg
5331 1.1 mrg for (j = 0; j < ca->n; j++)
5332 1.1 mrg if (affine_function_zero_p (ca->fns[j]))
5333 1.1 mrg {
5334 1.1 mrg insert_innermost_unit_dist_vector (ddr);
5335 1.1 mrg return;
5336 1.1 mrg }
5337 1.1 mrg
5338 1.1 mrg for (j = 0; j < cb->n; j++)
5339 1.1 mrg if (affine_function_zero_p (cb->fns[j]))
5340 1.1 mrg {
5341 1.1 mrg insert_innermost_unit_dist_vector (ddr);
5342 1.1 mrg return;
5343 1.1 mrg }
5344 1.1 mrg }
5345 1.1 mrg }
5346 1.1 mrg
5347 1.1 mrg /* Return true when the DDR contains two data references that have the
5348 1.1 mrg same access functions. */
5349 1.1 mrg
5350 1.1 mrg static inline bool
5351 1.1 mrg same_access_functions (const struct data_dependence_relation *ddr)
5352 1.1 mrg {
5353 1.1 mrg for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5354 1.1 mrg if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5355 1.1 mrg SUB_ACCESS_FN (sub, 1)))
5356 1.1 mrg return false;
5357 1.1 mrg
5358 1.1 mrg return true;
5359 1.1 mrg }
5360 1.1 mrg
5361 1.1 mrg /* Compute the classic per loop distance vector. DDR is the data
5362 1.1 mrg dependence relation to build a vector from. Return false when fail
5363 1.1 mrg to represent the data dependence as a distance vector. */
5364 1.1 mrg
5365 1.1 mrg static bool
5366 1.1 mrg build_classic_dist_vector (struct data_dependence_relation *ddr,
5367 1.1 mrg class loop *loop_nest)
5368 1.1 mrg {
5369 1.1 mrg bool init_b = false;
5370 1.1 mrg int index_carry = DDR_NB_LOOPS (ddr);
5371 1.1 mrg lambda_vector dist_v;
5372 1.1 mrg
5373 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5374 1.1 mrg return false;
5375 1.1 mrg
5376 1.1 mrg if (same_access_functions (ddr))
5377 1.1 mrg {
5378 1.1 mrg /* Save the 0 vector. */
5379 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5380 1.1 mrg save_dist_v (ddr, dist_v);
5381 1.1 mrg
5382 1.1 mrg if (invariant_access_functions (ddr, loop_nest->num))
5383 1.1 mrg add_distance_for_zero_overlaps (ddr);
5384 1.1 mrg
5385 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1)
5386 1.1 mrg add_other_self_distances (ddr);
5387 1.1 mrg
5388 1.1 mrg return true;
5389 1.1 mrg }
5390 1.1 mrg
5391 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5392 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5393 1.1 mrg return false;
5394 1.1 mrg
5395 1.1 mrg /* Save the distance vector if we initialized one. */
5396 1.1 mrg if (init_b)
5397 1.1 mrg {
5398 1.1 mrg /* Verify a basic constraint: classic distance vectors should
5399 1.1 mrg always be lexicographically positive.
5400 1.1 mrg
5401 1.1 mrg Data references are collected in the order of execution of
5402 1.1 mrg the program, thus for the following loop
5403 1.1 mrg
5404 1.1 mrg | for (i = 1; i < 100; i++)
5405 1.1 mrg | for (j = 1; j < 100; j++)
5406 1.1 mrg | {
5407 1.1 mrg | t = T[j+1][i-1]; // A
5408 1.1 mrg | T[j][i] = t + 2; // B
5409 1.1 mrg | }
5410 1.1 mrg
5411 1.1 mrg references are collected following the direction of the wind:
5412 1.1 mrg A then B. The data dependence tests are performed also
5413 1.1 mrg following this order, such that we're looking at the distance
5414 1.1 mrg separating the elements accessed by A from the elements later
5415 1.1 mrg accessed by B. But in this example, the distance returned by
5416 1.1 mrg test_dep (A, B) is lexicographically negative (-1, 1), that
5417 1.1 mrg means that the access A occurs later than B with respect to
5418 1.1 mrg the outer loop, ie. we're actually looking upwind. In this
5419 1.1 mrg case we solve test_dep (B, A) looking downwind to the
5420 1.1 mrg lexicographically positive solution, that returns the
5421 1.1 mrg distance vector (1, -1). */
5422 1.1 mrg if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5423 1.1 mrg {
5424 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5425 1.1 mrg if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5426 1.1 mrg return false;
5427 1.1 mrg compute_subscript_distance (ddr);
5428 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5429 1.1 mrg &index_carry))
5430 1.1 mrg return false;
5431 1.1 mrg save_dist_v (ddr, save_v);
5432 1.1 mrg DDR_REVERSED_P (ddr) = true;
5433 1.1 mrg
5434 1.1 mrg /* In this case there is a dependence forward for all the
5435 1.1 mrg outer loops:
5436 1.1 mrg
5437 1.1 mrg | for (k = 1; k < 100; k++)
5438 1.1 mrg | for (i = 1; i < 100; i++)
5439 1.1 mrg | for (j = 1; j < 100; j++)
5440 1.1 mrg | {
5441 1.1 mrg | t = T[j+1][i-1]; // A
5442 1.1 mrg | T[j][i] = t + 2; // B
5443 1.1 mrg | }
5444 1.1 mrg
5445 1.1 mrg the vectors are:
5446 1.1 mrg (0, 1, -1)
5447 1.1 mrg (1, 1, -1)
5448 1.1 mrg (1, -1, 1)
5449 1.1 mrg */
5450 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1)
5451 1.1 mrg {
5452 1.1 mrg add_outer_distances (ddr, save_v, index_carry);
5453 1.1 mrg add_outer_distances (ddr, dist_v, index_carry);
5454 1.1 mrg }
5455 1.1 mrg }
5456 1.1 mrg else
5457 1.1 mrg {
5458 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5459 1.1 mrg lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5460 1.1 mrg
5461 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1)
5462 1.1 mrg {
5463 1.1 mrg lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5464 1.1 mrg
5465 1.1 mrg if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5466 1.1 mrg return false;
5467 1.1 mrg compute_subscript_distance (ddr);
5468 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5469 1.1 mrg &index_carry))
5470 1.1 mrg return false;
5471 1.1 mrg
5472 1.1 mrg save_dist_v (ddr, save_v);
5473 1.1 mrg add_outer_distances (ddr, dist_v, index_carry);
5474 1.1 mrg add_outer_distances (ddr, opposite_v, index_carry);
5475 1.1 mrg }
5476 1.1 mrg else
5477 1.1 mrg save_dist_v (ddr, save_v);
5478 1.1 mrg }
5479 1.1 mrg }
5480 1.1 mrg else
5481 1.1 mrg {
5482 1.1 mrg /* There is a distance of 1 on all the outer loops: Example:
5483 1.1 mrg there is a dependence of distance 1 on loop_1 for the array A.
5484 1.1 mrg
5485 1.1 mrg | loop_1
5486 1.1 mrg | A[5] = ...
5487 1.1 mrg | endloop
5488 1.1 mrg */
5489 1.1 mrg add_outer_distances (ddr, dist_v,
5490 1.1 mrg lambda_vector_first_nz (dist_v,
5491 1.1 mrg DDR_NB_LOOPS (ddr), 0));
5492 1.1 mrg }
5493 1.1 mrg
5494 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
5495 1.1 mrg {
5496 1.1 mrg unsigned i;
5497 1.1 mrg
5498 1.1 mrg fprintf (dump_file, "(build_classic_dist_vector\n");
5499 1.1 mrg for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5500 1.1 mrg {
5501 1.1 mrg fprintf (dump_file, " dist_vector = (");
5502 1.1 mrg print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5503 1.1 mrg DDR_NB_LOOPS (ddr));
5504 1.1 mrg fprintf (dump_file, " )\n");
5505 1.1 mrg }
5506 1.1 mrg fprintf (dump_file, ")\n");
5507 1.1 mrg }
5508 1.1 mrg
5509 1.1 mrg return true;
5510 1.1 mrg }
5511 1.1 mrg
5512 1.1 mrg /* Return the direction for a given distance.
5513 1.1 mrg FIXME: Computing dir this way is suboptimal, since dir can catch
5514 1.1 mrg cases that dist is unable to represent. */
5515 1.1 mrg
5516 1.1 mrg static inline enum data_dependence_direction
5517 1.1 mrg dir_from_dist (int dist)
5518 1.1 mrg {
5519 1.1 mrg if (dist > 0)
5520 1.1 mrg return dir_positive;
5521 1.1 mrg else if (dist < 0)
5522 1.1 mrg return dir_negative;
5523 1.1 mrg else
5524 1.1 mrg return dir_equal;
5525 1.1 mrg }
5526 1.1 mrg
5527 1.1 mrg /* Compute the classic per loop direction vector. DDR is the data
5528 1.1 mrg dependence relation to build a vector from. */
5529 1.1 mrg
5530 1.1 mrg static void
5531 1.1 mrg build_classic_dir_vector (struct data_dependence_relation *ddr)
5532 1.1 mrg {
5533 1.1 mrg unsigned i, j;
5534 1.1 mrg lambda_vector dist_v;
5535 1.1 mrg
5536 1.1 mrg FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5537 1.1 mrg {
5538 1.1 mrg lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5539 1.1 mrg
5540 1.1 mrg for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5541 1.1 mrg dir_v[j] = dir_from_dist (dist_v[j]);
5542 1.1 mrg
5543 1.1 mrg save_dir_v (ddr, dir_v);
5544 1.1 mrg }
5545 1.1 mrg }
5546 1.1 mrg
5547 1.1 mrg /* Helper function. Returns true when there is a dependence between the
5548 1.1 mrg data references. A_INDEX is the index of the first reference (0 for
5549 1.1 mrg DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5550 1.1 mrg
5551 1.1 mrg static bool
5552 1.1 mrg subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5553 1.1 mrg unsigned int a_index, unsigned int b_index,
5554 1.1 mrg class loop *loop_nest)
5555 1.1 mrg {
5556 1.1 mrg unsigned int i;
5557 1.1 mrg tree last_conflicts;
5558 1.1 mrg struct subscript *subscript;
5559 1.1 mrg tree res = NULL_TREE;
5560 1.1 mrg
5561 1.1 mrg for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5562 1.1 mrg {
5563 1.1 mrg conflict_function *overlaps_a, *overlaps_b;
5564 1.1 mrg
5565 1.1 mrg analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5566 1.1 mrg SUB_ACCESS_FN (subscript, b_index),
5567 1.1 mrg &overlaps_a, &overlaps_b,
5568 1.1 mrg &last_conflicts, loop_nest);
5569 1.1 mrg
5570 1.1 mrg if (SUB_CONFLICTS_IN_A (subscript))
5571 1.1 mrg free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5572 1.1 mrg if (SUB_CONFLICTS_IN_B (subscript))
5573 1.1 mrg free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5574 1.1 mrg
5575 1.1 mrg SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5576 1.1 mrg SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5577 1.1 mrg SUB_LAST_CONFLICT (subscript) = last_conflicts;
5578 1.1 mrg
5579 1.1 mrg /* If there is any undetermined conflict function we have to
5580 1.1 mrg give a conservative answer in case we cannot prove that
5581 1.1 mrg no dependence exists when analyzing another subscript. */
5582 1.1 mrg if (CF_NOT_KNOWN_P (overlaps_a)
5583 1.1 mrg || CF_NOT_KNOWN_P (overlaps_b))
5584 1.1 mrg {
5585 1.1 mrg res = chrec_dont_know;
5586 1.1 mrg continue;
5587 1.1 mrg }
5588 1.1 mrg
5589 1.1 mrg /* When there is a subscript with no dependence we can stop. */
5590 1.1 mrg else if (CF_NO_DEPENDENCE_P (overlaps_a)
5591 1.1 mrg || CF_NO_DEPENDENCE_P (overlaps_b))
5592 1.1 mrg {
5593 1.1 mrg res = chrec_known;
5594 1.1 mrg break;
5595 1.1 mrg }
5596 1.1 mrg }
5597 1.1 mrg
5598 1.1 mrg if (res == NULL_TREE)
5599 1.1 mrg return true;
5600 1.1 mrg
5601 1.1 mrg if (res == chrec_known)
5602 1.1 mrg dependence_stats.num_dependence_independent++;
5603 1.1 mrg else
5604 1.1 mrg dependence_stats.num_dependence_undetermined++;
5605 1.1 mrg finalize_ddr_dependent (ddr, res);
5606 1.1 mrg return false;
5607 1.1 mrg }
5608 1.1 mrg
5609 1.1 mrg /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5610 1.1 mrg
5611 1.1 mrg static void
5612 1.1 mrg subscript_dependence_tester (struct data_dependence_relation *ddr,
5613 1.1 mrg class loop *loop_nest)
5614 1.1 mrg {
5615 1.1 mrg if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5616 1.1 mrg dependence_stats.num_dependence_dependent++;
5617 1.1 mrg
5618 1.1 mrg compute_subscript_distance (ddr);
5619 1.1 mrg if (build_classic_dist_vector (ddr, loop_nest))
5620 1.1 mrg build_classic_dir_vector (ddr);
5621 1.1 mrg }
5622 1.1 mrg
5623 1.1 mrg /* Returns true when all the access functions of A are affine or
5624 1.1 mrg constant with respect to LOOP_NEST. */
5625 1.1 mrg
5626 1.1 mrg static bool
5627 1.1 mrg access_functions_are_affine_or_constant_p (const struct data_reference *a,
5628 1.1 mrg const class loop *loop_nest)
5629 1.1 mrg {
5630 1.1 mrg vec<tree> fns = DR_ACCESS_FNS (a);
5631 1.1 mrg for (tree t : fns)
5632 1.1 mrg if (!evolution_function_is_invariant_p (t, loop_nest->num)
5633 1.1 mrg && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5634 1.1 mrg return false;
5635 1.1 mrg
5636 1.1 mrg return true;
5637 1.1 mrg }
5638 1.1 mrg
5639 1.1 mrg /* This computes the affine dependence relation between A and B with
5640 1.1 mrg respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5641 1.1 mrg independence between two accesses, while CHREC_DONT_KNOW is used
5642 1.1 mrg for representing the unknown relation.
5643 1.1 mrg
5644 1.1 mrg Note that it is possible to stop the computation of the dependence
5645 1.1 mrg relation the first time we detect a CHREC_KNOWN element for a given
5646 1.1 mrg subscript. */
5647 1.1 mrg
5648 1.1 mrg void
5649 1.1 mrg compute_affine_dependence (struct data_dependence_relation *ddr,
5650 1.1 mrg class loop *loop_nest)
5651 1.1 mrg {
5652 1.1 mrg struct data_reference *dra = DDR_A (ddr);
5653 1.1 mrg struct data_reference *drb = DDR_B (ddr);
5654 1.1 mrg
5655 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
5656 1.1 mrg {
5657 1.1 mrg fprintf (dump_file, "(compute_affine_dependence\n");
5658 1.1 mrg fprintf (dump_file, " ref_a: ");
5659 1.1 mrg print_generic_expr (dump_file, DR_REF (dra));
5660 1.1 mrg fprintf (dump_file, ", stmt_a: ");
5661 1.1 mrg print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5662 1.1 mrg fprintf (dump_file, " ref_b: ");
5663 1.1 mrg print_generic_expr (dump_file, DR_REF (drb));
5664 1.1 mrg fprintf (dump_file, ", stmt_b: ");
5665 1.1 mrg print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5666 1.1 mrg }
5667 1.1 mrg
5668 1.1 mrg /* Analyze only when the dependence relation is not yet known. */
5669 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5670 1.1 mrg {
5671 1.1 mrg dependence_stats.num_dependence_tests++;
5672 1.1 mrg
5673 1.1 mrg if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5674 1.1 mrg && access_functions_are_affine_or_constant_p (drb, loop_nest))
5675 1.1 mrg subscript_dependence_tester (ddr, loop_nest);
5676 1.1 mrg
5677 1.1 mrg /* As a last case, if the dependence cannot be determined, or if
5678 1.1 mrg the dependence is considered too difficult to determine, answer
5679 1.1 mrg "don't know". */
5680 1.1 mrg else
5681 1.1 mrg {
5682 1.1 mrg dependence_stats.num_dependence_undetermined++;
5683 1.1 mrg
5684 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
5685 1.1 mrg {
5686 1.1 mrg fprintf (dump_file, "Data ref a:\n");
5687 1.1 mrg dump_data_reference (dump_file, dra);
5688 1.1 mrg fprintf (dump_file, "Data ref b:\n");
5689 1.1 mrg dump_data_reference (dump_file, drb);
5690 1.1 mrg fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5691 1.1 mrg }
5692 1.1 mrg finalize_ddr_dependent (ddr, chrec_dont_know);
5693 1.1 mrg }
5694 1.1 mrg }
5695 1.1 mrg
5696 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
5697 1.1 mrg {
5698 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5699 1.1 mrg fprintf (dump_file, ") -> no dependence\n");
5700 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5701 1.1 mrg fprintf (dump_file, ") -> dependence analysis failed\n");
5702 1.1 mrg else
5703 1.1 mrg fprintf (dump_file, ")\n");
5704 1.1 mrg }
5705 1.1 mrg }
5706 1.1 mrg
5707 1.1 mrg /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5708 1.1 mrg the data references in DATAREFS, in the LOOP_NEST. When
5709 1.1 mrg COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5710 1.1 mrg relations. Return true when successful, i.e. data references number
5711 1.1 mrg is small enough to be handled. */
5712 1.1 mrg
5713 1.1 mrg bool
5714 1.1 mrg compute_all_dependences (const vec<data_reference_p> &datarefs,
5715 1.1 mrg vec<ddr_p> *dependence_relations,
5716 1.1 mrg const vec<loop_p> &loop_nest,
5717 1.1 mrg bool compute_self_and_rr)
5718 1.1 mrg {
5719 1.1 mrg struct data_dependence_relation *ddr;
5720 1.1 mrg struct data_reference *a, *b;
5721 1.1 mrg unsigned int i, j;
5722 1.1 mrg
5723 1.1 mrg if ((int) datarefs.length ()
5724 1.1 mrg > param_loop_max_datarefs_for_datadeps)
5725 1.1 mrg {
5726 1.1 mrg struct data_dependence_relation *ddr;
5727 1.1 mrg
5728 1.1 mrg /* Insert a single relation into dependence_relations:
5729 1.1 mrg chrec_dont_know. */
5730 1.1 mrg ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5731 1.1 mrg dependence_relations->safe_push (ddr);
5732 1.1 mrg return false;
5733 1.1 mrg }
5734 1.1 mrg
5735 1.1 mrg FOR_EACH_VEC_ELT (datarefs, i, a)
5736 1.1 mrg for (j = i + 1; datarefs.iterate (j, &b); j++)
5737 1.1 mrg if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5738 1.1 mrg {
5739 1.1 mrg ddr = initialize_data_dependence_relation (a, b, loop_nest);
5740 1.1 mrg dependence_relations->safe_push (ddr);
5741 1.1 mrg if (loop_nest.exists ())
5742 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]);
5743 1.1 mrg }
5744 1.1 mrg
5745 1.1 mrg if (compute_self_and_rr)
5746 1.1 mrg FOR_EACH_VEC_ELT (datarefs, i, a)
5747 1.1 mrg {
5748 1.1 mrg ddr = initialize_data_dependence_relation (a, a, loop_nest);
5749 1.1 mrg dependence_relations->safe_push (ddr);
5750 1.1 mrg if (loop_nest.exists ())
5751 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]);
5752 1.1 mrg }
5753 1.1 mrg
5754 1.1 mrg return true;
5755 1.1 mrg }
5756 1.1 mrg
5757 1.1 mrg /* Describes a location of a memory reference. */
5758 1.1 mrg
5759 1.1 mrg struct data_ref_loc
5760 1.1 mrg {
5761 1.1 mrg /* The memory reference. */
5762 1.1 mrg tree ref;
5763 1.1 mrg
5764 1.1 mrg /* True if the memory reference is read. */
5765 1.1 mrg bool is_read;
5766 1.1 mrg
5767 1.1 mrg /* True if the data reference is conditional within the containing
5768 1.1 mrg statement, i.e. if it might not occur even when the statement
5769 1.1 mrg is executed and runs to completion. */
5770 1.1 mrg bool is_conditional_in_stmt;
5771 1.1 mrg };
5772 1.1 mrg
5773 1.1 mrg
5774 1.1 mrg /* Stores the locations of memory references in STMT to REFERENCES. Returns
5775 1.1 mrg true if STMT clobbers memory, false otherwise. */
5776 1.1 mrg
5777 1.1 mrg static bool
5778 1.1 mrg get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5779 1.1 mrg {
5780 1.1 mrg bool clobbers_memory = false;
5781 1.1 mrg data_ref_loc ref;
5782 1.1 mrg tree op0, op1;
5783 1.1 mrg enum gimple_code stmt_code = gimple_code (stmt);
5784 1.1 mrg
5785 1.1 mrg /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5786 1.1 mrg As we cannot model data-references to not spelled out
5787 1.1 mrg accesses give up if they may occur. */
5788 1.1 mrg if (stmt_code == GIMPLE_CALL
5789 1.1 mrg && !(gimple_call_flags (stmt) & ECF_CONST))
5790 1.1 mrg {
5791 1.1 mrg /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5792 1.1 mrg if (gimple_call_internal_p (stmt))
5793 1.1 mrg switch (gimple_call_internal_fn (stmt))
5794 1.1 mrg {
5795 1.1 mrg case IFN_GOMP_SIMD_LANE:
5796 1.1 mrg {
5797 1.1 mrg class loop *loop = gimple_bb (stmt)->loop_father;
5798 1.1 mrg tree uid = gimple_call_arg (stmt, 0);
5799 1.1 mrg gcc_assert (TREE_CODE (uid) == SSA_NAME);
5800 1.1 mrg if (loop == NULL
5801 1.1 mrg || loop->simduid != SSA_NAME_VAR (uid))
5802 1.1 mrg clobbers_memory = true;
5803 1.1 mrg break;
5804 1.1 mrg }
5805 1.1 mrg case IFN_MASK_LOAD:
5806 1.1 mrg case IFN_MASK_STORE:
5807 1.1 mrg break;
5808 1.1 mrg default:
5809 1.1 mrg clobbers_memory = true;
5810 1.1 mrg break;
5811 1.1 mrg }
5812 1.1 mrg else
5813 1.1 mrg clobbers_memory = true;
5814 1.1 mrg }
5815 1.1 mrg else if (stmt_code == GIMPLE_ASM
5816 1.1 mrg && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5817 1.1 mrg || gimple_vuse (stmt)))
5818 1.1 mrg clobbers_memory = true;
5819 1.1 mrg
5820 1.1 mrg if (!gimple_vuse (stmt))
5821 1.1 mrg return clobbers_memory;
5822 1.1 mrg
5823 1.1 mrg if (stmt_code == GIMPLE_ASSIGN)
5824 1.1 mrg {
5825 1.1 mrg tree base;
5826 1.1 mrg op0 = gimple_assign_lhs (stmt);
5827 1.1 mrg op1 = gimple_assign_rhs1 (stmt);
5828 1.1 mrg
5829 1.1 mrg if (DECL_P (op1)
5830 1.1 mrg || (REFERENCE_CLASS_P (op1)
5831 1.1 mrg && (base = get_base_address (op1))
5832 1.1 mrg && TREE_CODE (base) != SSA_NAME
5833 1.1 mrg && !is_gimple_min_invariant (base)))
5834 1.1 mrg {
5835 1.1 mrg ref.ref = op1;
5836 1.1 mrg ref.is_read = true;
5837 1.1 mrg ref.is_conditional_in_stmt = false;
5838 1.1 mrg references->safe_push (ref);
5839 1.1 mrg }
5840 1.1 mrg }
5841 1.1 mrg else if (stmt_code == GIMPLE_CALL)
5842 1.1 mrg {
5843 1.1 mrg unsigned i, n;
5844 1.1 mrg tree ptr, type;
5845 1.1 mrg unsigned int align;
5846 1.1 mrg
5847 1.1 mrg ref.is_read = false;
5848 1.1 mrg if (gimple_call_internal_p (stmt))
5849 1.1 mrg switch (gimple_call_internal_fn (stmt))
5850 1.1 mrg {
5851 1.1 mrg case IFN_MASK_LOAD:
5852 1.1 mrg if (gimple_call_lhs (stmt) == NULL_TREE)
5853 1.1 mrg break;
5854 1.1 mrg ref.is_read = true;
5855 1.1 mrg /* FALLTHRU */
5856 1.1 mrg case IFN_MASK_STORE:
5857 1.1 mrg ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5858 1.1 mrg align = tree_to_shwi (gimple_call_arg (stmt, 1));
5859 1.1 mrg if (ref.is_read)
5860 1.1 mrg type = TREE_TYPE (gimple_call_lhs (stmt));
5861 1.1 mrg else
5862 1.1 mrg type = TREE_TYPE (gimple_call_arg (stmt, 3));
5863 1.1 mrg if (TYPE_ALIGN (type) != align)
5864 1.1 mrg type = build_aligned_type (type, align);
5865 1.1 mrg ref.is_conditional_in_stmt = true;
5866 1.1 mrg ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5867 1.1 mrg ptr);
5868 1.1 mrg references->safe_push (ref);
5869 1.1 mrg return false;
5870 1.1 mrg default:
5871 1.1 mrg break;
5872 1.1 mrg }
5873 1.1 mrg
5874 1.1 mrg op0 = gimple_call_lhs (stmt);
5875 1.1 mrg n = gimple_call_num_args (stmt);
5876 1.1 mrg for (i = 0; i < n; i++)
5877 1.1 mrg {
5878 1.1 mrg op1 = gimple_call_arg (stmt, i);
5879 1.1 mrg
5880 1.1 mrg if (DECL_P (op1)
5881 1.1 mrg || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5882 1.1 mrg {
5883 1.1 mrg ref.ref = op1;
5884 1.1 mrg ref.is_read = true;
5885 1.1 mrg ref.is_conditional_in_stmt = false;
5886 1.1 mrg references->safe_push (ref);
5887 1.1 mrg }
5888 1.1 mrg }
5889 1.1 mrg }
5890 1.1 mrg else
5891 1.1 mrg return clobbers_memory;
5892 1.1 mrg
5893 1.1 mrg if (op0
5894 1.1 mrg && (DECL_P (op0)
5895 1.1 mrg || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5896 1.1 mrg {
5897 1.1 mrg ref.ref = op0;
5898 1.1 mrg ref.is_read = false;
5899 1.1 mrg ref.is_conditional_in_stmt = false;
5900 1.1 mrg references->safe_push (ref);
5901 1.1 mrg }
5902 1.1 mrg return clobbers_memory;
5903 1.1 mrg }
5904 1.1 mrg
5905 1.1 mrg
5906 1.1 mrg /* Returns true if the loop-nest has any data reference. */
5907 1.1 mrg
5908 1.1 mrg bool
5909 1.1 mrg loop_nest_has_data_refs (loop_p loop)
5910 1.1 mrg {
5911 1.1 mrg basic_block *bbs = get_loop_body (loop);
5912 1.1 mrg auto_vec<data_ref_loc, 3> references;
5913 1.1 mrg
5914 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; i++)
5915 1.1 mrg {
5916 1.1 mrg basic_block bb = bbs[i];
5917 1.1 mrg gimple_stmt_iterator bsi;
5918 1.1 mrg
5919 1.1 mrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5920 1.1 mrg {
5921 1.1 mrg gimple *stmt = gsi_stmt (bsi);
5922 1.1 mrg get_references_in_stmt (stmt, &references);
5923 1.1 mrg if (references.length ())
5924 1.1 mrg {
5925 1.1 mrg free (bbs);
5926 1.1 mrg return true;
5927 1.1 mrg }
5928 1.1 mrg }
5929 1.1 mrg }
5930 1.1 mrg free (bbs);
5931 1.1 mrg return false;
5932 1.1 mrg }
5933 1.1 mrg
5934 1.1 mrg /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5935 1.1 mrg reference, returns false, otherwise returns true. NEST is the outermost
5936 1.1 mrg loop of the loop nest in which the references should be analyzed. */
5937 1.1 mrg
5938 1.1 mrg opt_result
5939 1.1 mrg find_data_references_in_stmt (class loop *nest, gimple *stmt,
5940 1.1 mrg vec<data_reference_p> *datarefs)
5941 1.1 mrg {
5942 1.1 mrg auto_vec<data_ref_loc, 2> references;
5943 1.1 mrg data_reference_p dr;
5944 1.1 mrg
5945 1.1 mrg if (get_references_in_stmt (stmt, &references))
5946 1.1 mrg return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5947 1.1 mrg stmt);
5948 1.1 mrg
5949 1.1 mrg for (const data_ref_loc &ref : references)
5950 1.1 mrg {
5951 1.1 mrg dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5952 1.1 mrg loop_containing_stmt (stmt), ref.ref,
5953 1.1 mrg stmt, ref.is_read, ref.is_conditional_in_stmt);
5954 1.1 mrg gcc_assert (dr != NULL);
5955 1.1 mrg datarefs->safe_push (dr);
5956 1.1 mrg }
5957 1.1 mrg
5958 1.1 mrg return opt_result::success ();
5959 1.1 mrg }
5960 1.1 mrg
5961 1.1 mrg /* Stores the data references in STMT to DATAREFS. If there is an
5962 1.1 mrg unanalyzable reference, returns false, otherwise returns true.
5963 1.1 mrg NEST is the outermost loop of the loop nest in which the references
5964 1.1 mrg should be instantiated, LOOP is the loop in which the references
5965 1.1 mrg should be analyzed. */
5966 1.1 mrg
5967 1.1 mrg bool
5968 1.1 mrg graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5969 1.1 mrg vec<data_reference_p> *datarefs)
5970 1.1 mrg {
5971 1.1 mrg auto_vec<data_ref_loc, 2> references;
5972 1.1 mrg bool ret = true;
5973 1.1 mrg data_reference_p dr;
5974 1.1 mrg
5975 1.1 mrg if (get_references_in_stmt (stmt, &references))
5976 1.1 mrg return false;
5977 1.1 mrg
5978 1.1 mrg for (const data_ref_loc &ref : references)
5979 1.1 mrg {
5980 1.1 mrg dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5981 1.1 mrg ref.is_conditional_in_stmt);
5982 1.1 mrg gcc_assert (dr != NULL);
5983 1.1 mrg datarefs->safe_push (dr);
5984 1.1 mrg }
5985 1.1 mrg
5986 1.1 mrg return ret;
5987 1.1 mrg }
5988 1.1 mrg
5989 1.1 mrg /* Search the data references in LOOP, and record the information into
5990 1.1 mrg DATAREFS. Returns chrec_dont_know when failing to analyze a
5991 1.1 mrg difficult case, returns NULL_TREE otherwise. */
5992 1.1 mrg
5993 1.1 mrg tree
5994 1.1 mrg find_data_references_in_bb (class loop *loop, basic_block bb,
5995 1.1 mrg vec<data_reference_p> *datarefs)
5996 1.1 mrg {
5997 1.1 mrg gimple_stmt_iterator bsi;
5998 1.1 mrg
5999 1.1 mrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6000 1.1 mrg {
6001 1.1 mrg gimple *stmt = gsi_stmt (bsi);
6002 1.1 mrg
6003 1.1 mrg if (!find_data_references_in_stmt (loop, stmt, datarefs))
6004 1.1 mrg {
6005 1.1 mrg struct data_reference *res;
6006 1.1 mrg res = XCNEW (struct data_reference);
6007 1.1 mrg datarefs->safe_push (res);
6008 1.1 mrg
6009 1.1 mrg return chrec_dont_know;
6010 1.1 mrg }
6011 1.1 mrg }
6012 1.1 mrg
6013 1.1 mrg return NULL_TREE;
6014 1.1 mrg }
6015 1.1 mrg
6016 1.1 mrg /* Search the data references in LOOP, and record the information into
6017 1.1 mrg DATAREFS. Returns chrec_dont_know when failing to analyze a
6018 1.1 mrg difficult case, returns NULL_TREE otherwise.
6019 1.1 mrg
6020 1.1 mrg TODO: This function should be made smarter so that it can handle address
6021 1.1 mrg arithmetic as if they were array accesses, etc. */
6022 1.1 mrg
6023 1.1 mrg tree
6024 1.1 mrg find_data_references_in_loop (class loop *loop,
6025 1.1 mrg vec<data_reference_p> *datarefs)
6026 1.1 mrg {
6027 1.1 mrg basic_block bb, *bbs;
6028 1.1 mrg unsigned int i;
6029 1.1 mrg
6030 1.1 mrg bbs = get_loop_body_in_dom_order (loop);
6031 1.1 mrg
6032 1.1 mrg for (i = 0; i < loop->num_nodes; i++)
6033 1.1 mrg {
6034 1.1 mrg bb = bbs[i];
6035 1.1 mrg
6036 1.1 mrg if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6037 1.1 mrg {
6038 1.1 mrg free (bbs);
6039 1.1 mrg return chrec_dont_know;
6040 1.1 mrg }
6041 1.1 mrg }
6042 1.1 mrg free (bbs);
6043 1.1 mrg
6044 1.1 mrg return NULL_TREE;
6045 1.1 mrg }
6046 1.1 mrg
6047 1.1 mrg /* Return the alignment in bytes that DRB is guaranteed to have at all
6048 1.1 mrg times. */
6049 1.1 mrg
6050 1.1 mrg unsigned int
6051 1.1 mrg dr_alignment (innermost_loop_behavior *drb)
6052 1.1 mrg {
6053 1.1 mrg /* Get the alignment of BASE_ADDRESS + INIT. */
6054 1.1 mrg unsigned int alignment = drb->base_alignment;
6055 1.1 mrg unsigned int misalignment = (drb->base_misalignment
6056 1.1 mrg + TREE_INT_CST_LOW (drb->init));
6057 1.1 mrg if (misalignment != 0)
6058 1.1 mrg alignment = MIN (alignment, misalignment & -misalignment);
6059 1.1 mrg
6060 1.1 mrg /* Cap it to the alignment of OFFSET. */
6061 1.1 mrg if (!integer_zerop (drb->offset))
6062 1.1 mrg alignment = MIN (alignment, drb->offset_alignment);
6063 1.1 mrg
6064 1.1 mrg /* Cap it to the alignment of STEP. */
6065 1.1 mrg if (!integer_zerop (drb->step))
6066 1.1 mrg alignment = MIN (alignment, drb->step_alignment);
6067 1.1 mrg
6068 1.1 mrg return alignment;
6069 1.1 mrg }
6070 1.1 mrg
6071 1.1 mrg /* If BASE is a pointer-typed SSA name, try to find the object that it
6072 1.1 mrg is based on. Return this object X on success and store the alignment
6073 1.1 mrg in bytes of BASE - &X in *ALIGNMENT_OUT. */
6074 1.1 mrg
6075 1.1 mrg static tree
6076 1.1 mrg get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6077 1.1 mrg {
6078 1.1 mrg if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6079 1.1 mrg return NULL_TREE;
6080 1.1 mrg
6081 1.1 mrg gimple *def = SSA_NAME_DEF_STMT (base);
6082 1.1 mrg base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6083 1.1 mrg
6084 1.1 mrg /* Peel chrecs and record the minimum alignment preserved by
6085 1.1 mrg all steps. */
6086 1.1 mrg unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6087 1.1 mrg while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6088 1.1 mrg {
6089 1.1 mrg unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6090 1.1 mrg alignment = MIN (alignment, step_alignment);
6091 1.1 mrg base = CHREC_LEFT (base);
6092 1.1 mrg }
6093 1.1 mrg
6094 1.1 mrg /* Punt if the expression is too complicated to handle. */
6095 1.1 mrg if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6096 1.1 mrg return NULL_TREE;
6097 1.1 mrg
6098 1.1 mrg /* The only useful cases are those for which a dereference folds to something
6099 1.1 mrg other than an INDIRECT_REF. */
6100 1.1 mrg tree ref_type = TREE_TYPE (TREE_TYPE (base));
6101 1.1 mrg tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6102 1.1 mrg if (!ref)
6103 1.1 mrg return NULL_TREE;
6104 1.1 mrg
6105 1.1 mrg /* Analyze the base to which the steps we peeled were applied. */
6106 1.1 mrg poly_int64 bitsize, bitpos, bytepos;
6107 1.1 mrg machine_mode mode;
6108 1.1 mrg int unsignedp, reversep, volatilep;
6109 1.1 mrg tree offset;
6110 1.1 mrg base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6111 1.1 mrg &unsignedp, &reversep, &volatilep);
6112 1.1 mrg if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6113 1.1 mrg return NULL_TREE;
6114 1.1 mrg
6115 1.1 mrg /* Restrict the alignment to that guaranteed by the offsets. */
6116 1.1 mrg unsigned int bytepos_alignment = known_alignment (bytepos);
6117 1.1 mrg if (bytepos_alignment != 0)
6118 1.1 mrg alignment = MIN (alignment, bytepos_alignment);
6119 1.1 mrg if (offset)
6120 1.1 mrg {
6121 1.1 mrg unsigned int offset_alignment = highest_pow2_factor (offset);
6122 1.1 mrg alignment = MIN (alignment, offset_alignment);
6123 1.1 mrg }
6124 1.1 mrg
6125 1.1 mrg *alignment_out = alignment;
6126 1.1 mrg return base;
6127 1.1 mrg }
6128 1.1 mrg
6129 1.1 mrg /* Return the object whose alignment would need to be changed in order
6130 1.1 mrg to increase the alignment of ADDR. Store the maximum achievable
6131 1.1 mrg alignment in *MAX_ALIGNMENT. */
6132 1.1 mrg
6133 1.1 mrg tree
6134 1.1 mrg get_base_for_alignment (tree addr, unsigned int *max_alignment)
6135 1.1 mrg {
6136 1.1 mrg tree base = get_base_for_alignment_1 (addr, max_alignment);
6137 1.1 mrg if (base)
6138 1.1 mrg return base;
6139 1.1 mrg
6140 1.1 mrg if (TREE_CODE (addr) == ADDR_EXPR)
6141 1.1 mrg addr = TREE_OPERAND (addr, 0);
6142 1.1 mrg *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6143 1.1 mrg return addr;
6144 1.1 mrg }
6145 1.1 mrg
6146 1.1 mrg /* Recursive helper function. */
6147 1.1 mrg
6148 1.1 mrg static bool
6149 1.1 mrg find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6150 1.1 mrg {
6151 1.1 mrg /* Inner loops of the nest should not contain siblings. Example:
6152 1.1 mrg when there are two consecutive loops,
6153 1.1 mrg
6154 1.1 mrg | loop_0
6155 1.1 mrg | loop_1
6156 1.1 mrg | A[{0, +, 1}_1]
6157 1.1 mrg | endloop_1
6158 1.1 mrg | loop_2
6159 1.1 mrg | A[{0, +, 1}_2]
6160 1.1 mrg | endloop_2
6161 1.1 mrg | endloop_0
6162 1.1 mrg
6163 1.1 mrg the dependence relation cannot be captured by the distance
6164 1.1 mrg abstraction. */
6165 1.1 mrg if (loop->next)
6166 1.1 mrg return false;
6167 1.1 mrg
6168 1.1 mrg loop_nest->safe_push (loop);
6169 1.1 mrg if (loop->inner)
6170 1.1 mrg return find_loop_nest_1 (loop->inner, loop_nest);
6171 1.1 mrg return true;
6172 1.1 mrg }
6173 1.1 mrg
6174 1.1 mrg /* Return false when the LOOP is not well nested. Otherwise return
6175 1.1 mrg true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6176 1.1 mrg contain the loops from the outermost to the innermost, as they will
6177 1.1 mrg appear in the classic distance vector. */
6178 1.1 mrg
6179 1.1 mrg bool
6180 1.1 mrg find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6181 1.1 mrg {
6182 1.1 mrg loop_nest->safe_push (loop);
6183 1.1 mrg if (loop->inner)
6184 1.1 mrg return find_loop_nest_1 (loop->inner, loop_nest);
6185 1.1 mrg return true;
6186 1.1 mrg }
6187 1.1 mrg
6188 1.1 mrg /* Returns true when the data dependences have been computed, false otherwise.
6189 1.1 mrg Given a loop nest LOOP, the following vectors are returned:
6190 1.1 mrg DATAREFS is initialized to all the array elements contained in this loop,
6191 1.1 mrg DEPENDENCE_RELATIONS contains the relations between the data references.
6192 1.1 mrg Compute read-read and self relations if
6193 1.1 mrg COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6194 1.1 mrg
6195 1.1 mrg bool
6196 1.1 mrg compute_data_dependences_for_loop (class loop *loop,
6197 1.1 mrg bool compute_self_and_read_read_dependences,
6198 1.1 mrg vec<loop_p> *loop_nest,
6199 1.1 mrg vec<data_reference_p> *datarefs,
6200 1.1 mrg vec<ddr_p> *dependence_relations)
6201 1.1 mrg {
6202 1.1 mrg bool res = true;
6203 1.1 mrg
6204 1.1 mrg memset (&dependence_stats, 0, sizeof (dependence_stats));
6205 1.1 mrg
6206 1.1 mrg /* If the loop nest is not well formed, or one of the data references
6207 1.1 mrg is not computable, give up without spending time to compute other
6208 1.1 mrg dependences. */
6209 1.1 mrg if (!loop
6210 1.1 mrg || !find_loop_nest (loop, loop_nest)
6211 1.1 mrg || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6212 1.1 mrg || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6213 1.1 mrg compute_self_and_read_read_dependences))
6214 1.1 mrg res = false;
6215 1.1 mrg
6216 1.1 mrg if (dump_file && (dump_flags & TDF_STATS))
6217 1.1 mrg {
6218 1.1 mrg fprintf (dump_file, "Dependence tester statistics:\n");
6219 1.1 mrg
6220 1.1 mrg fprintf (dump_file, "Number of dependence tests: %d\n",
6221 1.1 mrg dependence_stats.num_dependence_tests);
6222 1.1 mrg fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6223 1.1 mrg dependence_stats.num_dependence_dependent);
6224 1.1 mrg fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6225 1.1 mrg dependence_stats.num_dependence_independent);
6226 1.1 mrg fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6227 1.1 mrg dependence_stats.num_dependence_undetermined);
6228 1.1 mrg
6229 1.1 mrg fprintf (dump_file, "Number of subscript tests: %d\n",
6230 1.1 mrg dependence_stats.num_subscript_tests);
6231 1.1 mrg fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6232 1.1 mrg dependence_stats.num_subscript_undetermined);
6233 1.1 mrg fprintf (dump_file, "Number of same subscript function: %d\n",
6234 1.1 mrg dependence_stats.num_same_subscript_function);
6235 1.1 mrg
6236 1.1 mrg fprintf (dump_file, "Number of ziv tests: %d\n",
6237 1.1 mrg dependence_stats.num_ziv);
6238 1.1 mrg fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6239 1.1 mrg dependence_stats.num_ziv_dependent);
6240 1.1 mrg fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6241 1.1 mrg dependence_stats.num_ziv_independent);
6242 1.1 mrg fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6243 1.1 mrg dependence_stats.num_ziv_unimplemented);
6244 1.1 mrg
6245 1.1 mrg fprintf (dump_file, "Number of siv tests: %d\n",
6246 1.1 mrg dependence_stats.num_siv);
6247 1.1 mrg fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6248 1.1 mrg dependence_stats.num_siv_dependent);
6249 1.1 mrg fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6250 1.1 mrg dependence_stats.num_siv_independent);
6251 1.1 mrg fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6252 1.1 mrg dependence_stats.num_siv_unimplemented);
6253 1.1 mrg
6254 1.1 mrg fprintf (dump_file, "Number of miv tests: %d\n",
6255 1.1 mrg dependence_stats.num_miv);
6256 1.1 mrg fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6257 1.1 mrg dependence_stats.num_miv_dependent);
6258 1.1 mrg fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6259 1.1 mrg dependence_stats.num_miv_independent);
6260 1.1 mrg fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6261 1.1 mrg dependence_stats.num_miv_unimplemented);
6262 1.1 mrg }
6263 1.1 mrg
6264 1.1 mrg return res;
6265 1.1 mrg }
6266 1.1 mrg
6267 1.1 mrg /* Free the memory used by a data dependence relation DDR. */
6268 1.1 mrg
6269 1.1 mrg void
6270 1.1 mrg free_dependence_relation (struct data_dependence_relation *ddr)
6271 1.1 mrg {
6272 1.1 mrg if (ddr == NULL)
6273 1.1 mrg return;
6274 1.1 mrg
6275 1.1 mrg if (DDR_SUBSCRIPTS (ddr).exists ())
6276 1.1 mrg free_subscripts (DDR_SUBSCRIPTS (ddr));
6277 1.1 mrg DDR_DIST_VECTS (ddr).release ();
6278 1.1 mrg DDR_DIR_VECTS (ddr).release ();
6279 1.1 mrg
6280 1.1 mrg free (ddr);
6281 1.1 mrg }
6282 1.1 mrg
6283 1.1 mrg /* Free the memory used by the data dependence relations from
6284 1.1 mrg DEPENDENCE_RELATIONS. */
6285 1.1 mrg
6286 1.1 mrg void
6287 1.1 mrg free_dependence_relations (vec<ddr_p>& dependence_relations)
6288 1.1 mrg {
6289 1.1 mrg for (data_dependence_relation *ddr : dependence_relations)
6290 1.1 mrg if (ddr)
6291 1.1 mrg free_dependence_relation (ddr);
6292 1.1 mrg
6293 1.1 mrg dependence_relations.release ();
6294 1.1 mrg }
6295 1.1 mrg
6296 1.1 mrg /* Free the memory used by the data references from DATAREFS. */
6297 1.1 mrg
6298 1.1 mrg void
6299 1.1 mrg free_data_refs (vec<data_reference_p>& datarefs)
6300 1.1 mrg {
6301 1.1 mrg for (data_reference *dr : datarefs)
6302 1.1 mrg free_data_ref (dr);
6303 1.1 mrg datarefs.release ();
6304 1.1 mrg }
6305 1.1 mrg
6306 1.1 mrg /* Common routine implementing both dr_direction_indicator and
6307 1.1 mrg dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6308 1.1 mrg to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6309 1.1 mrg Return the step as the indicator otherwise. */
6310 1.1 mrg
6311 1.1 mrg static tree
6312 1.1 mrg dr_step_indicator (struct data_reference *dr, int useful_min)
6313 1.1 mrg {
6314 1.1 mrg tree step = DR_STEP (dr);
6315 1.1 mrg if (!step)
6316 1.1 mrg return NULL_TREE;
6317 1.1 mrg STRIP_NOPS (step);
6318 1.1 mrg /* Look for cases where the step is scaled by a positive constant
6319 1.1 mrg integer, which will often be the access size. If the multiplication
6320 1.1 mrg doesn't change the sign (due to overflow effects) then we can
6321 1.1 mrg test the unscaled value instead. */
6322 1.1 mrg if (TREE_CODE (step) == MULT_EXPR
6323 1.1 mrg && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6324 1.1 mrg && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6325 1.1 mrg {
6326 1.1 mrg tree factor = TREE_OPERAND (step, 1);
6327 1.1 mrg step = TREE_OPERAND (step, 0);
6328 1.1 mrg
6329 1.1 mrg /* Strip widening and truncating conversions as well as nops. */
6330 1.1 mrg if (CONVERT_EXPR_P (step)
6331 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6332 1.1 mrg step = TREE_OPERAND (step, 0);
6333 1.1 mrg tree type = TREE_TYPE (step);
6334 1.1 mrg
6335 1.1 mrg /* Get the range of step values that would not cause overflow. */
6336 1.1 mrg widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6337 1.1 mrg / wi::to_widest (factor));
6338 1.1 mrg widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6339 1.1 mrg / wi::to_widest (factor));
6340 1.1 mrg
6341 1.1 mrg /* Get the range of values that the unconverted step actually has. */
6342 1.1 mrg wide_int step_min, step_max;
6343 1.1 mrg value_range vr;
6344 1.1 mrg if (TREE_CODE (step) != SSA_NAME
6345 1.1 mrg || !get_range_query (cfun)->range_of_expr (vr, step)
6346 1.1 mrg || vr.kind () != VR_RANGE)
6347 1.1 mrg {
6348 1.1 mrg step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6349 1.1 mrg step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6350 1.1 mrg }
6351 1.1 mrg else
6352 1.1 mrg {
6353 1.1 mrg step_min = vr.lower_bound ();
6354 1.1 mrg step_max = vr.upper_bound ();
6355 1.1 mrg }
6356 1.1 mrg
6357 1.1 mrg /* Check whether the unconverted step has an acceptable range. */
6358 1.1 mrg signop sgn = TYPE_SIGN (type);
6359 1.1 mrg if (wi::les_p (minv, widest_int::from (step_min, sgn))
6360 1.1 mrg && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6361 1.1 mrg {
6362 1.1 mrg if (wi::ge_p (step_min, useful_min, sgn))
6363 1.1 mrg return ssize_int (useful_min);
6364 1.1 mrg else if (wi::lt_p (step_max, 0, sgn))
6365 1.1 mrg return ssize_int (-1);
6366 1.1 mrg else
6367 1.1 mrg return fold_convert (ssizetype, step);
6368 1.1 mrg }
6369 1.1 mrg }
6370 1.1 mrg return DR_STEP (dr);
6371 1.1 mrg }
6372 1.1 mrg
6373 1.1 mrg /* Return a value that is negative iff DR has a negative step. */
6374 1.1 mrg
6375 1.1 mrg tree
6376 1.1 mrg dr_direction_indicator (struct data_reference *dr)
6377 1.1 mrg {
6378 1.1 mrg return dr_step_indicator (dr, 0);
6379 1.1 mrg }
6380 1.1 mrg
6381 1.1 mrg /* Return a value that is zero iff DR has a zero step. */
6382 1.1 mrg
6383 1.1 mrg tree
6384 1.1 mrg dr_zero_step_indicator (struct data_reference *dr)
6385 1.1 mrg {
6386 1.1 mrg return dr_step_indicator (dr, 1);
6387 1.1 mrg }
6388 1.1 mrg
6389 1.1 mrg /* Return true if DR is known to have a nonnegative (but possibly zero)
6390 1.1 mrg step. */
6391 1.1 mrg
6392 1.1 mrg bool
6393 1.1 mrg dr_known_forward_stride_p (struct data_reference *dr)
6394 1.1 mrg {
6395 1.1 mrg tree indicator = dr_direction_indicator (dr);
6396 1.1 mrg tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6397 1.1 mrg fold_convert (ssizetype, indicator),
6398 1.1 mrg ssize_int (0));
6399 return neg_step_val && integer_zerop (neg_step_val);
6400 }
6401