tree-data-ref.cc revision 1.1 1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop (at) cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #define INCLUDE_ALGORITHM
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "backend.h"
81 #include "rtl.h"
82 #include "tree.h"
83 #include "gimple.h"
84 #include "gimple-pretty-print.h"
85 #include "alias.h"
86 #include "fold-const.h"
87 #include "expr.h"
88 #include "gimple-iterator.h"
89 #include "tree-ssa-loop-niter.h"
90 #include "tree-ssa-loop.h"
91 #include "tree-ssa.h"
92 #include "cfgloop.h"
93 #include "tree-data-ref.h"
94 #include "tree-scalar-evolution.h"
95 #include "dumpfile.h"
96 #include "tree-affine.h"
97 #include "builtins.h"
98 #include "tree-eh.h"
99 #include "ssa.h"
100 #include "internal-fn.h"
101 #include "vr-values.h"
102 #include "range-op.h"
103 #include "tree-ssa-loop-ivopts.h"
104
105 static struct datadep_stats
106 {
107 int num_dependence_tests;
108 int num_dependence_dependent;
109 int num_dependence_independent;
110 int num_dependence_undetermined;
111
112 int num_subscript_tests;
113 int num_subscript_undetermined;
114 int num_same_subscript_function;
115
116 int num_ziv;
117 int num_ziv_independent;
118 int num_ziv_dependent;
119 int num_ziv_unimplemented;
120
121 int num_siv;
122 int num_siv_independent;
123 int num_siv_dependent;
124 int num_siv_unimplemented;
125
126 int num_miv;
127 int num_miv_independent;
128 int num_miv_dependent;
129 int num_miv_unimplemented;
130 } dependence_stats;
131
132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 unsigned int, unsigned int,
134 class loop *);
135 /* Returns true iff A divides B. */
136
137 static inline bool
138 tree_fold_divides_p (const_tree a, const_tree b)
139 {
140 gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143 }
144
145 /* Returns true iff A divides B. */
146
147 static inline bool
148 int_divides_p (lambda_int a, lambda_int b)
149 {
150 return ((b % a) == 0);
151 }
152
153 /* Return true if reference REF contains a union access. */
154
155 static bool
156 ref_contains_union_access_p (tree ref)
157 {
158 while (handled_component_p (ref))
159 {
160 ref = TREE_OPERAND (ref, 0);
161 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 return true;
164 }
165 return false;
166 }
167
168
169
171 /* Dump into FILE all the data references from DATAREFS. */
172
173 static void
174 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 {
176 for (data_reference *dr : datarefs)
177 dump_data_reference (file, dr);
178 }
179
180 /* Unified dump into FILE all the data references from DATAREFS. */
181
182 DEBUG_FUNCTION void
183 debug (vec<data_reference_p> &ref)
184 {
185 dump_data_references (stderr, ref);
186 }
187
188 DEBUG_FUNCTION void
189 debug (vec<data_reference_p> *ptr)
190 {
191 if (ptr)
192 debug (*ptr);
193 else
194 fprintf (stderr, "<nil>\n");
195 }
196
197
198 /* Dump into STDERR all the data references from DATAREFS. */
199
200 DEBUG_FUNCTION void
201 debug_data_references (vec<data_reference_p> datarefs)
202 {
203 dump_data_references (stderr, datarefs);
204 }
205
206 /* Print to STDERR the data_reference DR. */
207
208 DEBUG_FUNCTION void
209 debug_data_reference (struct data_reference *dr)
210 {
211 dump_data_reference (stderr, dr);
212 }
213
214 /* Dump function for a DATA_REFERENCE structure. */
215
216 void
217 dump_data_reference (FILE *outf,
218 struct data_reference *dr)
219 {
220 unsigned int i;
221
222 fprintf (outf, "#(Data Ref: \n");
223 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 fprintf (outf, "# stmt: ");
225 print_gimple_stmt (outf, DR_STMT (dr), 0);
226 fprintf (outf, "# ref: ");
227 print_generic_stmt (outf, DR_REF (dr));
228 fprintf (outf, "# base_object: ");
229 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230
231 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 {
233 fprintf (outf, "# Access function %d: ", i);
234 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 }
236 fprintf (outf, "#)\n");
237 }
238
239 /* Unified dump function for a DATA_REFERENCE structure. */
240
241 DEBUG_FUNCTION void
242 debug (data_reference &ref)
243 {
244 dump_data_reference (stderr, &ref);
245 }
246
247 DEBUG_FUNCTION void
248 debug (data_reference *ptr)
249 {
250 if (ptr)
251 debug (*ptr);
252 else
253 fprintf (stderr, "<nil>\n");
254 }
255
256
257 /* Dumps the affine function described by FN to the file OUTF. */
258
259 DEBUG_FUNCTION void
260 dump_affine_function (FILE *outf, affine_fn fn)
261 {
262 unsigned i;
263 tree coef;
264
265 print_generic_expr (outf, fn[0], TDF_SLIM);
266 for (i = 1; fn.iterate (i, &coef); i++)
267 {
268 fprintf (outf, " + ");
269 print_generic_expr (outf, coef, TDF_SLIM);
270 fprintf (outf, " * x_%u", i);
271 }
272 }
273
274 /* Dumps the conflict function CF to the file OUTF. */
275
276 DEBUG_FUNCTION void
277 dump_conflict_function (FILE *outf, conflict_function *cf)
278 {
279 unsigned i;
280
281 if (cf->n == NO_DEPENDENCE)
282 fprintf (outf, "no dependence");
283 else if (cf->n == NOT_KNOWN)
284 fprintf (outf, "not known");
285 else
286 {
287 for (i = 0; i < cf->n; i++)
288 {
289 if (i != 0)
290 fprintf (outf, " ");
291 fprintf (outf, "[");
292 dump_affine_function (outf, cf->fns[i]);
293 fprintf (outf, "]");
294 }
295 }
296 }
297
298 /* Dump function for a SUBSCRIPT structure. */
299
300 DEBUG_FUNCTION void
301 dump_subscript (FILE *outf, struct subscript *subscript)
302 {
303 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304
305 fprintf (outf, "\n (subscript \n");
306 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 dump_conflict_function (outf, cf);
308 if (CF_NONTRIVIAL_P (cf))
309 {
310 tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 fprintf (outf, "\n last_conflict: ");
312 print_generic_expr (outf, last_iteration);
313 }
314
315 cf = SUB_CONFLICTS_IN_B (subscript);
316 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 dump_conflict_function (outf, cf);
318 if (CF_NONTRIVIAL_P (cf))
319 {
320 tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 fprintf (outf, "\n last_conflict: ");
322 print_generic_expr (outf, last_iteration);
323 }
324
325 fprintf (outf, "\n (Subscript distance: ");
326 print_generic_expr (outf, SUB_DISTANCE (subscript));
327 fprintf (outf, " ))\n");
328 }
329
330 /* Print the classic direction vector DIRV to OUTF. */
331
332 DEBUG_FUNCTION void
333 print_direction_vector (FILE *outf,
334 lambda_vector dirv,
335 int length)
336 {
337 int eq;
338
339 for (eq = 0; eq < length; eq++)
340 {
341 enum data_dependence_direction dir = ((enum data_dependence_direction)
342 dirv[eq]);
343
344 switch (dir)
345 {
346 case dir_positive:
347 fprintf (outf, " +");
348 break;
349 case dir_negative:
350 fprintf (outf, " -");
351 break;
352 case dir_equal:
353 fprintf (outf, " =");
354 break;
355 case dir_positive_or_equal:
356 fprintf (outf, " +=");
357 break;
358 case dir_positive_or_negative:
359 fprintf (outf, " +-");
360 break;
361 case dir_negative_or_equal:
362 fprintf (outf, " -=");
363 break;
364 case dir_star:
365 fprintf (outf, " *");
366 break;
367 default:
368 fprintf (outf, "indep");
369 break;
370 }
371 }
372 fprintf (outf, "\n");
373 }
374
375 /* Print a vector of direction vectors. */
376
377 DEBUG_FUNCTION void
378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 int length)
380 {
381 for (lambda_vector v : dir_vects)
382 print_direction_vector (outf, v, length);
383 }
384
385 /* Print out a vector VEC of length N to OUTFILE. */
386
387 DEBUG_FUNCTION void
388 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 {
390 int i;
391
392 for (i = 0; i < n; i++)
393 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 fprintf (outfile, "\n");
395 }
396
397 /* Print a vector of distance vectors. */
398
399 DEBUG_FUNCTION void
400 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 int length)
402 {
403 for (lambda_vector v : dist_vects)
404 print_lambda_vector (outf, v, length);
405 }
406
407 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408
409 DEBUG_FUNCTION void
410 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 {
412 struct data_reference *dra, *drb;
413
414 fprintf (outf, "(Data Dep: \n");
415
416 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 {
418 if (ddr)
419 {
420 dra = DDR_A (ddr);
421 drb = DDR_B (ddr);
422 if (dra)
423 dump_data_reference (outf, dra);
424 else
425 fprintf (outf, " (nil)\n");
426 if (drb)
427 dump_data_reference (outf, drb);
428 else
429 fprintf (outf, " (nil)\n");
430 }
431 fprintf (outf, " (don't know)\n)\n");
432 return;
433 }
434
435 dra = DDR_A (ddr);
436 drb = DDR_B (ddr);
437 dump_data_reference (outf, dra);
438 dump_data_reference (outf, drb);
439
440 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 fprintf (outf, " (no dependence)\n");
442
443 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 {
445 unsigned int i;
446 class loop *loopi;
447
448 subscript *sub;
449 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 {
451 fprintf (outf, " access_fn_A: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 fprintf (outf, " access_fn_B: ");
454 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 dump_subscript (outf, sub);
456 }
457
458 fprintf (outf, " loop nest: (");
459 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 fprintf (outf, "%d ", loopi->num);
461 fprintf (outf, ")\n");
462
463 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 {
465 fprintf (outf, " distance_vector: ");
466 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
467 DDR_NB_LOOPS (ddr));
468 }
469
470 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 {
472 fprintf (outf, " direction_vector: ");
473 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 DDR_NB_LOOPS (ddr));
475 }
476 }
477
478 fprintf (outf, ")\n");
479 }
480
481 /* Debug version. */
482
483 DEBUG_FUNCTION void
484 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 {
486 dump_data_dependence_relation (stderr, ddr);
487 }
488
489 /* Dump into FILE all the dependence relations from DDRS. */
490
491 DEBUG_FUNCTION void
492 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 {
494 for (auto ddr : ddrs)
495 dump_data_dependence_relation (file, ddr);
496 }
497
498 DEBUG_FUNCTION void
499 debug (vec<ddr_p> &ref)
500 {
501 dump_data_dependence_relations (stderr, ref);
502 }
503
504 DEBUG_FUNCTION void
505 debug (vec<ddr_p> *ptr)
506 {
507 if (ptr)
508 debug (*ptr);
509 else
510 fprintf (stderr, "<nil>\n");
511 }
512
513
514 /* Dump to STDERR all the dependence relations from DDRS. */
515
516 DEBUG_FUNCTION void
517 debug_data_dependence_relations (vec<ddr_p> ddrs)
518 {
519 dump_data_dependence_relations (stderr, ddrs);
520 }
521
522 /* Dumps the distance and direction vectors in FILE. DDRS contains
523 the dependence relations, and VECT_SIZE is the size of the
524 dependence vectors, or in other words the number of loops in the
525 considered nest. */
526
527 DEBUG_FUNCTION void
528 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 {
530 for (data_dependence_relation *ddr : ddrs)
531 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 {
533 for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 {
535 fprintf (file, "DISTANCE_V (");
536 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
537 fprintf (file, ")\n");
538 }
539
540 for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 {
542 fprintf (file, "DIRECTION_V (");
543 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
544 fprintf (file, ")\n");
545 }
546 }
547
548 fprintf (file, "\n\n");
549 }
550
551 /* Dumps the data dependence relations DDRS in FILE. */
552
553 DEBUG_FUNCTION void
554 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 {
556 for (data_dependence_relation *ddr : ddrs)
557 dump_data_dependence_relation (file, ddr);
558
559 fprintf (file, "\n\n");
560 }
561
562 DEBUG_FUNCTION void
563 debug_ddrs (vec<ddr_p> ddrs)
564 {
565 dump_ddrs (stderr, ddrs);
566 }
567
568 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 OP0 CODE OP1, where:
570
571 - OP0 CODE OP1 has integral type TYPE
572 - the range of OP0 is given by OP0_RANGE and
573 - the range of OP1 is given by OP1_RANGE.
574
575 Independently of RESULT_RANGE, try to compute:
576
577 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 - (sizetype) (OP0 CODE OP1)
579
580 as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 Return true on success, or false if DELTA is not known at compile time.
582
583 Truncation and sign changes are known to distribute over CODE, i.e.
584
585 (itype) (A CODE B) == (itype) A CODE (itype) B
586
587 for any integral type ITYPE whose precision is no greater than the
588 precision of A and B. */
589
590 static bool
591 compute_distributive_range (tree type, value_range &op0_range,
592 tree_code code, value_range &op1_range,
593 tree *off, value_range *result_range)
594 {
595 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 if (result_range)
597 {
598 range_operator *op = range_op_handler (code, type);
599 op->fold_range (*result_range, type, op0_range, op1_range);
600 }
601
602 /* The distributive property guarantees that if TYPE is no narrower
603 than SIZETYPE,
604
605 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606
607 and so we can treat DELTA as zero. */
608 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 return true;
610
611 /* If overflow is undefined, we can assume that:
612
613 X == (ssizetype) OP0 CODE (ssizetype) OP1
614
615 is within the range of TYPE, i.e.:
616
617 X == (ssizetype) (TYPE) X
618
619 Distributing the (TYPE) truncation over X gives:
620
621 X == (ssizetype) (OP0 CODE OP1)
622
623 Casting both sides to sizetype and distributing the sizetype cast
624 over X gives:
625
626 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627
628 and so we can treat DELTA as zero. */
629 if (TYPE_OVERFLOW_UNDEFINED (type))
630 return true;
631
632 /* Compute the range of:
633
634 (ssizetype) OP0 CODE (ssizetype) OP1
635
636 The distributive property guarantees that this has the same bitpattern as:
637
638 (sizetype) OP0 CODE (sizetype) OP1
639
640 but its range is more conducive to analysis. */
641 range_cast (op0_range, ssizetype);
642 range_cast (op1_range, ssizetype);
643 value_range wide_range;
644 range_operator *op = range_op_handler (code, ssizetype);
645 bool saved_flag_wrapv = flag_wrapv;
646 flag_wrapv = 1;
647 op->fold_range (wide_range, ssizetype, op0_range, op1_range);
648 flag_wrapv = saved_flag_wrapv;
649 if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
650 return false;
651
652 wide_int lb = wide_range.lower_bound ();
653 wide_int ub = wide_range.upper_bound ();
654
655 /* Calculate the number of times that each end of the range overflows or
656 underflows TYPE. We can only calculate DELTA if the numbers match. */
657 unsigned int precision = TYPE_PRECISION (type);
658 if (!TYPE_UNSIGNED (type))
659 {
660 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
661 lb -= type_min;
662 ub -= type_min;
663 }
664 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
665 lb &= upper_bits;
666 ub &= upper_bits;
667 if (lb != ub)
668 return false;
669
670 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
671 negative values indicating underflow. The low PRECISION bits of LB
672 are clear, so DELTA is therefore LB (== UB). */
673 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
674 return true;
675 }
676
677 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
678 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
679 FROM_TYPE are integral types. */
680
681 static bool
682 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
683 {
684 gcc_assert (INTEGRAL_TYPE_P (to_type)
685 && INTEGRAL_TYPE_P (from_type)
686 && !TYPE_OVERFLOW_TRAPS (to_type)
687 && !TYPE_OVERFLOW_TRAPS (from_type));
688
689 /* Converting to something no narrower than sizetype and then to sizetype
690 is equivalent to converting directly to sizetype. */
691 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
692 return true;
693
694 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
695 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
696 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
697 return true;
698
699 /* For narrowing conversions, we could in principle test whether
700 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
701 and apply a constant adjustment.
702
703 For other conversions (which involve a sign change) we could
704 check that the signs are always equal, and apply a constant
705 adjustment if the signs are negative.
706
707 However, both cases should be rare. */
708 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
709 TYPE_SIGN (to_type));
710 }
711
712 static void
713 split_constant_offset (tree type, tree *var, tree *off,
714 value_range *result_range,
715 hash_map<tree, std::pair<tree, tree> > &cache,
716 unsigned *limit);
717
718 /* Helper function for split_constant_offset. If TYPE is a pointer type,
719 try to express OP0 CODE OP1 as:
720
721 POINTER_PLUS <*VAR, (sizetype) *OFF>
722
723 where:
724
725 - *VAR has type TYPE
726 - *OFF is a constant of type ssizetype.
727
728 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
729
730 *VAR + (sizetype) *OFF
731
732 where:
733
734 - *VAR has type sizetype
735 - *OFF is a constant of type ssizetype.
736
737 In both cases, OP0 CODE OP1 has type TYPE.
738
739 Return true on success. A false return value indicates that we can't
740 do better than set *OFF to zero.
741
742 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
743 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
744
745 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
746 visited. LIMIT counts down the number of SSA names that we are
747 allowed to process before giving up. */
748
749 static bool
750 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
751 tree *var, tree *off, value_range *result_range,
752 hash_map<tree, std::pair<tree, tree> > &cache,
753 unsigned *limit)
754 {
755 tree var0, var1;
756 tree off0, off1;
757 value_range op0_range, op1_range;
758
759 *var = NULL_TREE;
760 *off = NULL_TREE;
761
762 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
763 return false;
764
765 if (TREE_CODE (op0) == SSA_NAME
766 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
767 return false;
768 if (op1
769 && TREE_CODE (op1) == SSA_NAME
770 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
771 return false;
772
773 switch (code)
774 {
775 case INTEGER_CST:
776 *var = size_int (0);
777 *off = fold_convert (ssizetype, op0);
778 if (result_range)
779 result_range->set (op0, op0);
780 return true;
781
782 case POINTER_PLUS_EXPR:
783 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
784 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
785 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
786 *off = size_binop (PLUS_EXPR, off0, off1);
787 return true;
788
789 case PLUS_EXPR:
790 case MINUS_EXPR:
791 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
792 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
793 *off = size_binop (code, off0, off1);
794 if (!compute_distributive_range (type, op0_range, code, op1_range,
795 off, result_range))
796 return false;
797 *var = fold_build2 (code, sizetype, var0, var1);
798 return true;
799
800 case MULT_EXPR:
801 if (TREE_CODE (op1) != INTEGER_CST)
802 return false;
803
804 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
805 op1_range.set (op1, op1);
806 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
807 if (!compute_distributive_range (type, op0_range, code, op1_range,
808 off, result_range))
809 return false;
810 *var = fold_build2 (MULT_EXPR, sizetype, var0,
811 fold_convert (sizetype, op1));
812 return true;
813
814 case ADDR_EXPR:
815 {
816 tree base, poffset;
817 poly_int64 pbitsize, pbitpos, pbytepos;
818 machine_mode pmode;
819 int punsignedp, preversep, pvolatilep;
820
821 op0 = TREE_OPERAND (op0, 0);
822 base
823 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
824 &punsignedp, &preversep, &pvolatilep);
825
826 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
827 return false;
828 base = build_fold_addr_expr (base);
829 off0 = ssize_int (pbytepos);
830
831 if (poffset)
832 {
833 split_constant_offset (poffset, &poffset, &off1, nullptr,
834 cache, limit);
835 off0 = size_binop (PLUS_EXPR, off0, off1);
836 base = fold_build_pointer_plus (base, poffset);
837 }
838
839 var0 = fold_convert (type, base);
840
841 /* If variable length types are involved, punt, otherwise casts
842 might be converted into ARRAY_REFs in gimplify_conversion.
843 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
844 possibly no longer appears in current GIMPLE, might resurface.
845 This perhaps could run
846 if (CONVERT_EXPR_P (var0))
847 {
848 gimplify_conversion (&var0);
849 // Attempt to fill in any within var0 found ARRAY_REF's
850 // element size from corresponding op embedded ARRAY_REF,
851 // if unsuccessful, just punt.
852 } */
853 while (POINTER_TYPE_P (type))
854 type = TREE_TYPE (type);
855 if (int_size_in_bytes (type) < 0)
856 return false;
857
858 *var = var0;
859 *off = off0;
860 return true;
861 }
862
863 case SSA_NAME:
864 {
865 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 enum tree_code subcode;
867
868 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
869 return false;
870
871 subcode = gimple_assign_rhs_code (def_stmt);
872
873 /* We are using a cache to avoid un-CSEing large amounts of code. */
874 bool use_cache = false;
875 if (!has_single_use (op0)
876 && (subcode == POINTER_PLUS_EXPR
877 || subcode == PLUS_EXPR
878 || subcode == MINUS_EXPR
879 || subcode == MULT_EXPR
880 || subcode == ADDR_EXPR
881 || CONVERT_EXPR_CODE_P (subcode)))
882 {
883 use_cache = true;
884 bool existed;
885 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
886 if (existed)
887 {
888 if (integer_zerop (e.second))
889 return false;
890 *var = e.first;
891 *off = e.second;
892 /* The caller sets the range in this case. */
893 return true;
894 }
895 e = std::make_pair (op0, ssize_int (0));
896 }
897
898 if (*limit == 0)
899 return false;
900 --*limit;
901
902 var0 = gimple_assign_rhs1 (def_stmt);
903 var1 = gimple_assign_rhs2 (def_stmt);
904
905 bool res = split_constant_offset_1 (type, var0, subcode, var1,
906 var, off, nullptr, cache, limit);
907 if (res && use_cache)
908 *cache.get (op0) = std::make_pair (*var, *off);
909 /* The caller sets the range in this case. */
910 return res;
911 }
912 CASE_CONVERT:
913 {
914 /* We can only handle the following conversions:
915
916 - Conversions from one pointer type to another pointer type.
917
918 - Conversions from one non-trapping integral type to another
919 non-trapping integral type. In this case, the recursive
920 call makes sure that:
921
922 (sizetype) OP0
923
924 can be expressed as a sizetype operation involving VAR and OFF,
925 and all we need to do is check whether:
926
927 (sizetype) OP0 == (sizetype) (TYPE) OP0
928
929 - Conversions from a non-trapping sizetype-size integral type to
930 a like-sized pointer type. In this case, the recursive call
931 makes sure that:
932
933 (sizetype) OP0 == *VAR + (sizetype) *OFF
934
935 and we can convert that to:
936
937 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
938
939 - Conversions from a sizetype-sized pointer type to a like-sized
940 non-trapping integral type. In this case, the recursive call
941 makes sure that:
942
943 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
944
945 where the POINTER_PLUS and *VAR have the same precision as
946 TYPE (and the same precision as sizetype). Then:
947
948 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 tree itype = TREE_TYPE (op0);
950 if ((POINTER_TYPE_P (itype)
951 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 && (POINTER_TYPE_P (type)
953 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
957 {
958 if (POINTER_TYPE_P (type))
959 {
960 split_constant_offset (op0, var, off, nullptr, cache, limit);
961 *var = fold_convert (type, *var);
962 }
963 else if (POINTER_TYPE_P (itype))
964 {
965 split_constant_offset (op0, var, off, nullptr, cache, limit);
966 *var = fold_convert (sizetype, *var);
967 }
968 else
969 {
970 split_constant_offset (op0, var, off, &op0_range,
971 cache, limit);
972 if (!nop_conversion_for_offset_p (type, itype, op0_range))
973 return false;
974 if (result_range)
975 {
976 *result_range = op0_range;
977 range_cast (*result_range, type);
978 }
979 }
980 return true;
981 }
982 return false;
983 }
984
985 default:
986 return false;
987 }
988 }
989
990 /* If EXP has pointer type, try to express it as:
991
992 POINTER_PLUS <*VAR, (sizetype) *OFF>
993
994 where:
995
996 - *VAR has the same type as EXP
997 - *OFF is a constant of type ssizetype.
998
999 If EXP has an integral type, try to express (sizetype) EXP as:
1000
1001 *VAR + (sizetype) *OFF
1002
1003 where:
1004
1005 - *VAR has type sizetype
1006 - *OFF is a constant of type ssizetype.
1007
1008 If EXP_RANGE is nonnull, set it to the range of EXP.
1009
1010 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 visited. LIMIT counts down the number of SSA names that we are
1012 allowed to process before giving up. */
1013
1014 static void
1015 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 hash_map<tree, std::pair<tree, tree> > &cache,
1017 unsigned *limit)
1018 {
1019 tree type = TREE_TYPE (exp), op0, op1;
1020 enum tree_code code;
1021
1022 code = TREE_CODE (exp);
1023 if (exp_range)
1024 {
1025 *exp_range = type;
1026 if (code == SSA_NAME)
1027 {
1028 value_range vr;
1029 get_range_query (cfun)->range_of_expr (vr, exp);
1030 if (vr.undefined_p ())
1031 vr.set_varying (TREE_TYPE (exp));
1032 wide_int var_min = wi::to_wide (vr.min ());
1033 wide_int var_max = wi::to_wide (vr.max ());
1034 value_range_kind vr_kind = vr.kind ();
1035 wide_int var_nonzero = get_nonzero_bits (exp);
1036 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1037 &var_min, &var_max,
1038 var_nonzero,
1039 TYPE_SIGN (type));
1040 /* This check for VR_VARYING is here because the old code
1041 using get_range_info would return VR_RANGE for the entire
1042 domain, instead of VR_VARYING. The new code normalizes
1043 full-domain ranges to VR_VARYING. */
1044 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1045 *exp_range = value_range (type, var_min, var_max);
1046 }
1047 }
1048
1049 if (!tree_is_chrec (exp)
1050 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1051 {
1052 extract_ops_from_tree (exp, &code, &op0, &op1);
1053 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1054 exp_range, cache, limit))
1055 return;
1056 }
1057
1058 *var = exp;
1059 if (INTEGRAL_TYPE_P (type))
1060 *var = fold_convert (sizetype, *var);
1061 *off = ssize_int (0);
1062
1063 value_range r;
1064 if (exp_range && code != SSA_NAME
1065 && get_range_query (cfun)->range_of_expr (r, exp)
1066 && !r.undefined_p ())
1067 *exp_range = r;
1068 }
1069
1070 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1071 type as EXP while OFF has type ssizetype. */
1072
1073 void
1074 split_constant_offset (tree exp, tree *var, tree *off)
1075 {
1076 unsigned limit = param_ssa_name_def_chain_limit;
1077 static hash_map<tree, std::pair<tree, tree> > *cache;
1078 if (!cache)
1079 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1080 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1081 *var = fold_convert (TREE_TYPE (exp), *var);
1082 cache->empty ();
1083 }
1084
1085 /* Returns the address ADDR of an object in a canonical shape (without nop
1086 casts, and with type of pointer to the object). */
1087
1088 static tree
1089 canonicalize_base_object_address (tree addr)
1090 {
1091 tree orig = addr;
1092
1093 STRIP_NOPS (addr);
1094
1095 /* The base address may be obtained by casting from integer, in that case
1096 keep the cast. */
1097 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1098 return orig;
1099
1100 if (TREE_CODE (addr) != ADDR_EXPR)
1101 return addr;
1102
1103 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1104 }
1105
1106 /* Analyze the behavior of memory reference REF within STMT.
1107 There are two modes:
1108
1109 - BB analysis. In this case we simply split the address into base,
1110 init and offset components, without reference to any containing loop.
1111 The resulting base and offset are general expressions and they can
1112 vary arbitrarily from one iteration of the containing loop to the next.
1113 The step is always zero.
1114
1115 - loop analysis. In this case we analyze the reference both wrt LOOP
1116 and on the basis that the reference occurs (is "used") in LOOP;
1117 see the comment above analyze_scalar_evolution_in_loop for more
1118 information about this distinction. The base, init, offset and
1119 step fields are all invariant in LOOP.
1120
1121 Perform BB analysis if LOOP is null, or if LOOP is the function's
1122 dummy outermost loop. In other cases perform loop analysis.
1123
1124 Return true if the analysis succeeded and store the results in DRB if so.
1125 BB analysis can only fail for bitfield or reversed-storage accesses. */
1126
1127 opt_result
1128 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1129 class loop *loop, const gimple *stmt)
1130 {
1131 poly_int64 pbitsize, pbitpos;
1132 tree base, poffset;
1133 machine_mode pmode;
1134 int punsignedp, preversep, pvolatilep;
1135 affine_iv base_iv, offset_iv;
1136 tree init, dinit, step;
1137 bool in_loop = (loop && loop->num);
1138
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1140 fprintf (dump_file, "analyze_innermost: ");
1141
1142 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1143 &punsignedp, &preversep, &pvolatilep);
1144 gcc_assert (base != NULL_TREE);
1145
1146 poly_int64 pbytepos;
1147 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1148 return opt_result::failure_at (stmt,
1149 "failed: bit offset alignment.\n");
1150
1151 if (preversep)
1152 return opt_result::failure_at (stmt,
1153 "failed: reverse storage order.\n");
1154
1155 /* Calculate the alignment and misalignment for the inner reference. */
1156 unsigned int HOST_WIDE_INT bit_base_misalignment;
1157 unsigned int bit_base_alignment;
1158 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1159
1160 /* There are no bitfield references remaining in BASE, so the values
1161 we got back must be whole bytes. */
1162 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1163 && bit_base_misalignment % BITS_PER_UNIT == 0);
1164 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1165 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1166
1167 if (TREE_CODE (base) == MEM_REF)
1168 {
1169 if (!integer_zerop (TREE_OPERAND (base, 1)))
1170 {
1171 /* Subtract MOFF from the base and add it to POFFSET instead.
1172 Adjust the misalignment to reflect the amount we subtracted. */
1173 poly_offset_int moff = mem_ref_offset (base);
1174 base_misalignment -= moff.force_shwi ();
1175 tree mofft = wide_int_to_tree (sizetype, moff);
1176 if (!poffset)
1177 poffset = mofft;
1178 else
1179 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1180 }
1181 base = TREE_OPERAND (base, 0);
1182 }
1183 else
1184 base = build_fold_addr_expr (base);
1185
1186 if (in_loop)
1187 {
1188 if (!simple_iv (loop, loop, base, &base_iv, true))
1189 return opt_result::failure_at
1190 (stmt, "failed: evolution of base is not affine.\n");
1191 }
1192 else
1193 {
1194 base_iv.base = base;
1195 base_iv.step = ssize_int (0);
1196 base_iv.no_overflow = true;
1197 }
1198
1199 if (!poffset)
1200 {
1201 offset_iv.base = ssize_int (0);
1202 offset_iv.step = ssize_int (0);
1203 }
1204 else
1205 {
1206 if (!in_loop)
1207 {
1208 offset_iv.base = poffset;
1209 offset_iv.step = ssize_int (0);
1210 }
1211 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1212 return opt_result::failure_at
1213 (stmt, "failed: evolution of offset is not affine.\n");
1214 }
1215
1216 init = ssize_int (pbytepos);
1217
1218 /* Subtract any constant component from the base and add it to INIT instead.
1219 Adjust the misalignment to reflect the amount we subtracted. */
1220 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1221 init = size_binop (PLUS_EXPR, init, dinit);
1222 base_misalignment -= TREE_INT_CST_LOW (dinit);
1223
1224 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1225 init = size_binop (PLUS_EXPR, init, dinit);
1226
1227 step = size_binop (PLUS_EXPR,
1228 fold_convert (ssizetype, base_iv.step),
1229 fold_convert (ssizetype, offset_iv.step));
1230
1231 base = canonicalize_base_object_address (base_iv.base);
1232
1233 /* See if get_pointer_alignment can guarantee a higher alignment than
1234 the one we calculated above. */
1235 unsigned int HOST_WIDE_INT alt_misalignment;
1236 unsigned int alt_alignment;
1237 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1238
1239 /* As above, these values must be whole bytes. */
1240 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1241 && alt_misalignment % BITS_PER_UNIT == 0);
1242 alt_alignment /= BITS_PER_UNIT;
1243 alt_misalignment /= BITS_PER_UNIT;
1244
1245 if (base_alignment < alt_alignment)
1246 {
1247 base_alignment = alt_alignment;
1248 base_misalignment = alt_misalignment;
1249 }
1250
1251 drb->base_address = base;
1252 drb->offset = fold_convert (ssizetype, offset_iv.base);
1253 drb->init = init;
1254 drb->step = step;
1255 if (known_misalignment (base_misalignment, base_alignment,
1256 &drb->base_misalignment))
1257 drb->base_alignment = base_alignment;
1258 else
1259 {
1260 drb->base_alignment = known_alignment (base_misalignment);
1261 drb->base_misalignment = 0;
1262 }
1263 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1264 drb->step_alignment = highest_pow2_factor (step);
1265
1266 if (dump_file && (dump_flags & TDF_DETAILS))
1267 fprintf (dump_file, "success.\n");
1268
1269 return opt_result::success ();
1270 }
1271
1272 /* Return true if OP is a valid component reference for a DR access
1273 function. This accepts a subset of what handled_component_p accepts. */
1274
1275 static bool
1276 access_fn_component_p (tree op)
1277 {
1278 switch (TREE_CODE (op))
1279 {
1280 case REALPART_EXPR:
1281 case IMAGPART_EXPR:
1282 case ARRAY_REF:
1283 return true;
1284
1285 case COMPONENT_REF:
1286 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1287
1288 default:
1289 return false;
1290 }
1291 }
1292
1293 /* Returns whether BASE can have a access_fn_component_p with BASE
1294 as base. */
1295
1296 static bool
1297 base_supports_access_fn_components_p (tree base)
1298 {
1299 switch (TREE_CODE (TREE_TYPE (base)))
1300 {
1301 case COMPLEX_TYPE:
1302 case ARRAY_TYPE:
1303 case RECORD_TYPE:
1304 return true;
1305 default:
1306 return false;
1307 }
1308 }
1309
1310 /* Determines the base object and the list of indices of memory reference
1311 DR, analyzed in LOOP and instantiated before NEST. */
1312
1313 static void
1314 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1315 {
1316 /* If analyzing a basic-block there are no indices to analyze
1317 and thus no access functions. */
1318 if (!nest)
1319 {
1320 dri->base_object = ref;
1321 dri->access_fns.create (0);
1322 return;
1323 }
1324
1325 vec<tree> access_fns = vNULL;
1326
1327 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1328 into a two element array with a constant index. The base is
1329 then just the immediate underlying object. */
1330 if (TREE_CODE (ref) == REALPART_EXPR)
1331 {
1332 ref = TREE_OPERAND (ref, 0);
1333 access_fns.safe_push (integer_zero_node);
1334 }
1335 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1336 {
1337 ref = TREE_OPERAND (ref, 0);
1338 access_fns.safe_push (integer_one_node);
1339 }
1340
1341 /* Analyze access functions of dimensions we know to be independent.
1342 The list of component references handled here should be kept in
1343 sync with access_fn_component_p. */
1344 while (handled_component_p (ref))
1345 {
1346 if (TREE_CODE (ref) == ARRAY_REF)
1347 {
1348 tree op = TREE_OPERAND (ref, 1);
1349 tree access_fn = analyze_scalar_evolution (loop, op);
1350 access_fn = instantiate_scev (nest, loop, access_fn);
1351 access_fns.safe_push (access_fn);
1352 }
1353 else if (TREE_CODE (ref) == COMPONENT_REF
1354 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1355 {
1356 /* For COMPONENT_REFs of records (but not unions!) use the
1357 FIELD_DECL offset as constant access function so we can
1358 disambiguate a[i].f1 and a[i].f2. */
1359 tree off = component_ref_field_offset (ref);
1360 off = size_binop (PLUS_EXPR,
1361 size_binop (MULT_EXPR,
1362 fold_convert (bitsizetype, off),
1363 bitsize_int (BITS_PER_UNIT)),
1364 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1365 access_fns.safe_push (off);
1366 }
1367 else
1368 /* If we have an unhandled component we could not translate
1369 to an access function stop analyzing. We have determined
1370 our base object in this case. */
1371 break;
1372
1373 ref = TREE_OPERAND (ref, 0);
1374 }
1375
1376 /* If the address operand of a MEM_REF base has an evolution in the
1377 analyzed nest, add it as an additional independent access-function. */
1378 if (TREE_CODE (ref) == MEM_REF)
1379 {
1380 tree op = TREE_OPERAND (ref, 0);
1381 tree access_fn = analyze_scalar_evolution (loop, op);
1382 access_fn = instantiate_scev (nest, loop, access_fn);
1383 STRIP_NOPS (access_fn);
1384 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1385 {
1386 tree memoff = TREE_OPERAND (ref, 1);
1387 tree base = initial_condition (access_fn);
1388 tree orig_type = TREE_TYPE (base);
1389 STRIP_USELESS_TYPE_CONVERSION (base);
1390 tree off;
1391 split_constant_offset (base, &base, &off);
1392 STRIP_USELESS_TYPE_CONVERSION (base);
1393 /* Fold the MEM_REF offset into the evolutions initial
1394 value to make more bases comparable. */
1395 if (!integer_zerop (memoff))
1396 {
1397 off = size_binop (PLUS_EXPR, off,
1398 fold_convert (ssizetype, memoff));
1399 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1400 }
1401 /* Adjust the offset so it is a multiple of the access type
1402 size and thus we separate bases that can possibly be used
1403 to produce partial overlaps (which the access_fn machinery
1404 cannot handle). */
1405 wide_int rem;
1406 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1407 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1408 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1409 rem = wi::mod_trunc
1410 (wi::to_wide (off),
1411 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1412 SIGNED);
1413 else
1414 /* If we can't compute the remainder simply force the initial
1415 condition to zero. */
1416 rem = wi::to_wide (off);
1417 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1418 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1419 /* And finally replace the initial condition. */
1420 access_fn = chrec_replace_initial_condition
1421 (access_fn, fold_convert (orig_type, off));
1422 /* ??? This is still not a suitable base object for
1423 dr_may_alias_p - the base object needs to be an
1424 access that covers the object as whole. With
1425 an evolution in the pointer this cannot be
1426 guaranteed.
1427 As a band-aid, mark the access so we can special-case
1428 it in dr_may_alias_p. */
1429 tree old = ref;
1430 ref = fold_build2_loc (EXPR_LOCATION (ref),
1431 MEM_REF, TREE_TYPE (ref),
1432 base, memoff);
1433 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1434 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1435 dri->unconstrained_base = true;
1436 access_fns.safe_push (access_fn);
1437 }
1438 }
1439 else if (DECL_P (ref))
1440 {
1441 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1442 ref = build2 (MEM_REF, TREE_TYPE (ref),
1443 build_fold_addr_expr (ref),
1444 build_int_cst (reference_alias_ptr_type (ref), 0));
1445 }
1446
1447 dri->base_object = ref;
1448 dri->access_fns = access_fns;
1449 }
1450
1451 /* Extracts the alias analysis information from the memory reference DR. */
1452
1453 static void
1454 dr_analyze_alias (struct data_reference *dr)
1455 {
1456 tree ref = DR_REF (dr);
1457 tree base = get_base_address (ref), addr;
1458
1459 if (INDIRECT_REF_P (base)
1460 || TREE_CODE (base) == MEM_REF)
1461 {
1462 addr = TREE_OPERAND (base, 0);
1463 if (TREE_CODE (addr) == SSA_NAME)
1464 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1465 }
1466 }
1467
1468 /* Frees data reference DR. */
1469
1470 void
1471 free_data_ref (data_reference_p dr)
1472 {
1473 DR_ACCESS_FNS (dr).release ();
1474 if (dr->alt_indices.base_object)
1475 dr->alt_indices.access_fns.release ();
1476 free (dr);
1477 }
1478
1479 /* Analyze memory reference MEMREF, which is accessed in STMT.
1480 The reference is a read if IS_READ is true, otherwise it is a write.
1481 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1482 within STMT, i.e. that it might not occur even if STMT is executed
1483 and runs to completion.
1484
1485 Return the data_reference description of MEMREF. NEST is the outermost
1486 loop in which the reference should be instantiated, LOOP is the loop
1487 in which the data reference should be analyzed. */
1488
1489 struct data_reference *
1490 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1491 bool is_read, bool is_conditional_in_stmt)
1492 {
1493 struct data_reference *dr;
1494
1495 if (dump_file && (dump_flags & TDF_DETAILS))
1496 {
1497 fprintf (dump_file, "Creating dr for ");
1498 print_generic_expr (dump_file, memref, TDF_SLIM);
1499 fprintf (dump_file, "\n");
1500 }
1501
1502 dr = XCNEW (struct data_reference);
1503 DR_STMT (dr) = stmt;
1504 DR_REF (dr) = memref;
1505 DR_IS_READ (dr) = is_read;
1506 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1507
1508 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1509 nest != NULL ? loop : NULL, stmt);
1510 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1511 dr_analyze_alias (dr);
1512
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1514 {
1515 unsigned i;
1516 fprintf (dump_file, "\tbase_address: ");
1517 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1518 fprintf (dump_file, "\n\toffset from base address: ");
1519 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1520 fprintf (dump_file, "\n\tconstant offset from base address: ");
1521 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1522 fprintf (dump_file, "\n\tstep: ");
1523 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1524 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1525 fprintf (dump_file, "\n\tbase misalignment: %d",
1526 DR_BASE_MISALIGNMENT (dr));
1527 fprintf (dump_file, "\n\toffset alignment: %d",
1528 DR_OFFSET_ALIGNMENT (dr));
1529 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1530 fprintf (dump_file, "\n\tbase_object: ");
1531 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1532 fprintf (dump_file, "\n");
1533 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1534 {
1535 fprintf (dump_file, "\tAccess function %d: ", i);
1536 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1537 }
1538 }
1539
1540 return dr;
1541 }
1542
1543 /* A helper function computes order between two tree expressions T1 and T2.
1544 This is used in comparator functions sorting objects based on the order
1545 of tree expressions. The function returns -1, 0, or 1. */
1546
1547 int
1548 data_ref_compare_tree (tree t1, tree t2)
1549 {
1550 int i, cmp;
1551 enum tree_code code;
1552 char tclass;
1553
1554 if (t1 == t2)
1555 return 0;
1556 if (t1 == NULL)
1557 return -1;
1558 if (t2 == NULL)
1559 return 1;
1560
1561 STRIP_USELESS_TYPE_CONVERSION (t1);
1562 STRIP_USELESS_TYPE_CONVERSION (t2);
1563 if (t1 == t2)
1564 return 0;
1565
1566 if (TREE_CODE (t1) != TREE_CODE (t2)
1567 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1568 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1569
1570 code = TREE_CODE (t1);
1571 switch (code)
1572 {
1573 case INTEGER_CST:
1574 return tree_int_cst_compare (t1, t2);
1575
1576 case STRING_CST:
1577 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1578 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1579 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1580 TREE_STRING_LENGTH (t1));
1581
1582 case SSA_NAME:
1583 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1584 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1585 break;
1586
1587 default:
1588 if (POLY_INT_CST_P (t1))
1589 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1590 wi::to_poly_widest (t2));
1591
1592 tclass = TREE_CODE_CLASS (code);
1593
1594 /* For decls, compare their UIDs. */
1595 if (tclass == tcc_declaration)
1596 {
1597 if (DECL_UID (t1) != DECL_UID (t2))
1598 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1599 break;
1600 }
1601 /* For expressions, compare their operands recursively. */
1602 else if (IS_EXPR_CODE_CLASS (tclass))
1603 {
1604 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1605 {
1606 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1607 TREE_OPERAND (t2, i));
1608 if (cmp != 0)
1609 return cmp;
1610 }
1611 }
1612 else
1613 gcc_unreachable ();
1614 }
1615
1616 return 0;
1617 }
1618
1619 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1620 check. */
1621
1622 opt_result
1623 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1624 {
1625 if (dump_enabled_p ())
1626 dump_printf (MSG_NOTE,
1627 "consider run-time aliasing test between %T and %T\n",
1628 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1629
1630 if (!speed_p)
1631 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632 "runtime alias check not supported when"
1633 " optimizing for size.\n");
1634
1635 /* FORNOW: We don't support versioning with outer-loop in either
1636 vectorization or loop distribution. */
1637 if (loop != NULL && loop->inner != NULL)
1638 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1639 "runtime alias check not supported for"
1640 " outer loop.\n");
1641
1642 /* FORNOW: We don't support handling different address spaces. */
1643 if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1644 != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1645 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1646 "runtime alias check between different "
1647 "address spaces not supported.\n");
1648
1649 return opt_result::success ();
1650 }
1651
1652 /* Operator == between two dr_with_seg_len objects.
1653
1654 This equality operator is used to make sure two data refs
1655 are the same one so that we will consider to combine the
1656 aliasing checks of those two pairs of data dependent data
1657 refs. */
1658
1659 static bool
1660 operator == (const dr_with_seg_len& d1,
1661 const dr_with_seg_len& d2)
1662 {
1663 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1664 DR_BASE_ADDRESS (d2.dr), 0)
1665 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1666 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1667 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1668 && known_eq (d1.access_size, d2.access_size)
1669 && d1.align == d2.align);
1670 }
1671
1672 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1673 so that we can combine aliasing checks in one scan. */
1674
1675 static int
1676 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1677 {
1678 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1679 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1680 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1681 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1682
1683 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1684 if a and c have the same basic address snd step, and b and d have the same
1685 address and step. Therefore, if any a&c or b&d don't have the same address
1686 and step, we don't care the order of those two pairs after sorting. */
1687 int comp_res;
1688
1689 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1690 DR_BASE_ADDRESS (b1.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1693 DR_BASE_ADDRESS (b2.dr))) != 0)
1694 return comp_res;
1695 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1696 DR_STEP (b1.dr))) != 0)
1697 return comp_res;
1698 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1699 DR_STEP (b2.dr))) != 0)
1700 return comp_res;
1701 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1702 DR_OFFSET (b1.dr))) != 0)
1703 return comp_res;
1704 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1705 DR_INIT (b1.dr))) != 0)
1706 return comp_res;
1707 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1708 DR_OFFSET (b2.dr))) != 0)
1709 return comp_res;
1710 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1711 DR_INIT (b2.dr))) != 0)
1712 return comp_res;
1713
1714 return 0;
1715 }
1716
1717 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1718
1719 static void
1720 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1721 {
1722 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1723 DR_REF (alias_pair->first.dr),
1724 DR_REF (alias_pair->second.dr));
1725
1726 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1727 alias_pair->first.seg_len);
1728 if (!operand_equal_p (alias_pair->first.seg_len,
1729 alias_pair->second.seg_len, 0))
1730 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1731
1732 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1733 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1734 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1735 {
1736 dump_printf (MSG_NOTE, " vs. ");
1737 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1738 }
1739
1740 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1741 alias_pair->first.align);
1742 if (alias_pair->first.align != alias_pair->second.align)
1743 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1744
1745 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1746 if (alias_pair->flags & DR_ALIAS_RAW)
1747 dump_printf (MSG_NOTE, " RAW");
1748 if (alias_pair->flags & DR_ALIAS_WAR)
1749 dump_printf (MSG_NOTE, " WAR");
1750 if (alias_pair->flags & DR_ALIAS_WAW)
1751 dump_printf (MSG_NOTE, " WAW");
1752 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1753 dump_printf (MSG_NOTE, " ARBITRARY");
1754 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1755 dump_printf (MSG_NOTE, " SWAPPED");
1756 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1757 dump_printf (MSG_NOTE, " UNSWAPPED");
1758 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1759 dump_printf (MSG_NOTE, " MIXED_STEPS");
1760 if (alias_pair->flags == 0)
1761 dump_printf (MSG_NOTE, " <none>");
1762 dump_printf (MSG_NOTE, "\n");
1763 }
1764
1765 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1766 FACTOR is number of iterations that each data reference is accessed.
1767
1768 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1769 we create an expression:
1770
1771 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1772 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1773
1774 for aliasing checks. However, in some cases we can decrease the number
1775 of checks by combining two checks into one. For example, suppose we have
1776 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1777 condition is satisfied:
1778
1779 load_ptr_0 < load_ptr_1 &&
1780 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1781
1782 (this condition means, in each iteration of vectorized loop, the accessed
1783 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1784 load_ptr_1.)
1785
1786 we then can use only the following expression to finish the alising checks
1787 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1788
1789 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1790 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1791
1792 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1793 basic address. */
1794
1795 void
1796 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1797 poly_uint64)
1798 {
1799 if (alias_pairs->is_empty ())
1800 return;
1801
1802 /* Canonicalize each pair so that the base components are ordered wrt
1803 data_ref_compare_tree. This allows the loop below to merge more
1804 cases. */
1805 unsigned int i;
1806 dr_with_seg_len_pair_t *alias_pair;
1807 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1808 {
1809 data_reference_p dr_a = alias_pair->first.dr;
1810 data_reference_p dr_b = alias_pair->second.dr;
1811 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1812 DR_BASE_ADDRESS (dr_b));
1813 if (comp_res == 0)
1814 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1815 if (comp_res == 0)
1816 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1817 if (comp_res > 0)
1818 {
1819 std::swap (alias_pair->first, alias_pair->second);
1820 alias_pair->flags |= DR_ALIAS_SWAPPED;
1821 }
1822 else
1823 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1824 }
1825
1826 /* Sort the collected data ref pairs so that we can scan them once to
1827 combine all possible aliasing checks. */
1828 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1829
1830 /* Scan the sorted dr pairs and check if we can combine alias checks
1831 of two neighboring dr pairs. */
1832 unsigned int last = 0;
1833 for (i = 1; i < alias_pairs->length (); ++i)
1834 {
1835 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1836 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1837 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1838
1839 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1840 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1841 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1842 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1843
1844 /* Remove duplicate data ref pairs. */
1845 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1846 {
1847 if (dump_enabled_p ())
1848 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1849 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1850 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1851 alias_pair1->flags |= alias_pair2->flags;
1852 continue;
1853 }
1854
1855 /* Assume that we won't be able to merge the pairs, then correct
1856 if we do. */
1857 last += 1;
1858 if (last != i)
1859 (*alias_pairs)[last] = (*alias_pairs)[i];
1860
1861 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1862 {
1863 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1864 and DR_A1 and DR_A2 are two consecutive memrefs. */
1865 if (*dr_a1 == *dr_a2)
1866 {
1867 std::swap (dr_a1, dr_b1);
1868 std::swap (dr_a2, dr_b2);
1869 }
1870
1871 poly_int64 init_a1, init_a2;
1872 /* Only consider cases in which the distance between the initial
1873 DR_A1 and the initial DR_A2 is known at compile time. */
1874 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1875 DR_BASE_ADDRESS (dr_a2->dr), 0)
1876 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1877 DR_OFFSET (dr_a2->dr), 0)
1878 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1879 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1880 continue;
1881
1882 /* Don't combine if we can't tell which one comes first. */
1883 if (!ordered_p (init_a1, init_a2))
1884 continue;
1885
1886 /* Work out what the segment length would be if we did combine
1887 DR_A1 and DR_A2:
1888
1889 - If DR_A1 and DR_A2 have equal lengths, that length is
1890 also the combined length.
1891
1892 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1893 length is the lower bound on those lengths.
1894
1895 - If DR_A1 and DR_A2 both have positive lengths, the combined
1896 length is the upper bound on those lengths.
1897
1898 Other cases are unlikely to give a useful combination.
1899
1900 The lengths both have sizetype, so the sign is taken from
1901 the step instead. */
1902 poly_uint64 new_seg_len = 0;
1903 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1904 dr_a2->seg_len, 0);
1905 if (new_seg_len_p)
1906 {
1907 poly_uint64 seg_len_a1, seg_len_a2;
1908 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1909 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1910 continue;
1911
1912 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1913 if (TREE_CODE (indicator_a) != INTEGER_CST)
1914 continue;
1915
1916 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1917 if (TREE_CODE (indicator_b) != INTEGER_CST)
1918 continue;
1919
1920 int sign_a = tree_int_cst_sgn (indicator_a);
1921 int sign_b = tree_int_cst_sgn (indicator_b);
1922
1923 if (sign_a <= 0 && sign_b <= 0)
1924 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1925 else if (sign_a >= 0 && sign_b >= 0)
1926 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1927 else
1928 continue;
1929 }
1930 /* At this point we're committed to merging the refs. */
1931
1932 /* Make sure dr_a1 starts left of dr_a2. */
1933 if (maybe_gt (init_a1, init_a2))
1934 {
1935 std::swap (*dr_a1, *dr_a2);
1936 std::swap (init_a1, init_a2);
1937 }
1938
1939 /* The DR_Bs are equal, so only the DR_As can introduce
1940 mixed steps. */
1941 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1942 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1943
1944 if (new_seg_len_p)
1945 {
1946 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1947 new_seg_len);
1948 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1949 }
1950
1951 /* This is always positive due to the swap above. */
1952 poly_uint64 diff = init_a2 - init_a1;
1953
1954 /* The new check will start at DR_A1. Make sure that its access
1955 size encompasses the initial DR_A2. */
1956 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1957 {
1958 dr_a1->access_size = upper_bound (dr_a1->access_size,
1959 diff + dr_a2->access_size);
1960 unsigned int new_align = known_alignment (dr_a1->access_size);
1961 dr_a1->align = MIN (dr_a1->align, new_align);
1962 }
1963 if (dump_enabled_p ())
1964 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1965 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1966 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1967 alias_pair1->flags |= alias_pair2->flags;
1968 last -= 1;
1969 }
1970 }
1971 alias_pairs->truncate (last + 1);
1972
1973 /* Try to restore the original dr_with_seg_len order within each
1974 dr_with_seg_len_pair_t. If we ended up combining swapped and
1975 unswapped pairs into the same check, we have to invalidate any
1976 RAW, WAR and WAW information for it. */
1977 if (dump_enabled_p ())
1978 dump_printf (MSG_NOTE, "merged alias checks:\n");
1979 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1980 {
1981 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1982 unsigned int swapped = (alias_pair->flags & swap_mask);
1983 if (swapped == DR_ALIAS_SWAPPED)
1984 std::swap (alias_pair->first, alias_pair->second);
1985 else if (swapped != DR_ALIAS_UNSWAPPED)
1986 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1987 alias_pair->flags &= ~swap_mask;
1988 if (dump_enabled_p ())
1989 dump_alias_pair (alias_pair, " ");
1990 }
1991 }
1992
1993 /* A subroutine of create_intersect_range_checks, with a subset of the
1994 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1995 to optimize cases in which the references form a simple RAW, WAR or
1996 WAR dependence. */
1997
1998 static bool
1999 create_ifn_alias_checks (tree *cond_expr,
2000 const dr_with_seg_len_pair_t &alias_pair)
2001 {
2002 const dr_with_seg_len& dr_a = alias_pair.first;
2003 const dr_with_seg_len& dr_b = alias_pair.second;
2004
2005 /* Check for cases in which:
2006
2007 (a) we have a known RAW, WAR or WAR dependence
2008 (b) the accesses are well-ordered in both the original and new code
2009 (see the comment above the DR_ALIAS_* flags for details); and
2010 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2011 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2012 return false;
2013
2014 /* Make sure that both DRs access the same pattern of bytes,
2015 with a constant length and step. */
2016 poly_uint64 seg_len;
2017 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2018 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2019 || maybe_ne (dr_a.access_size, dr_b.access_size)
2020 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2021 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2022 return false;
2023
2024 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2025 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2026 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2027
2028 /* See whether the target suports what we want to do. WAW checks are
2029 equivalent to WAR checks here. */
2030 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2031 ? IFN_CHECK_RAW_PTRS
2032 : IFN_CHECK_WAR_PTRS);
2033 unsigned int align = MIN (dr_a.align, dr_b.align);
2034 poly_uint64 full_length = seg_len + bytes;
2035 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2036 full_length, align))
2037 {
2038 full_length = seg_len + dr_a.access_size;
2039 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2040 full_length, align))
2041 return false;
2042 }
2043
2044 /* Commit to using this form of test. */
2045 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2046 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2047
2048 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2049 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2050
2051 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2052 ifn, boolean_type_node,
2053 4, addr_a, addr_b,
2054 size_int (full_length),
2055 size_int (align));
2056
2057 if (dump_enabled_p ())
2058 {
2059 if (ifn == IFN_CHECK_RAW_PTRS)
2060 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2061 else
2062 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2063 }
2064 return true;
2065 }
2066
2067 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2068 free of aliases, using a condition based on index values instead
2069 of a condition based on addresses. Return true on success,
2070 storing the condition in *COND_EXPR.
2071
2072 This can only be done if the two data references in ALIAS_PAIR access
2073 the same array object and the index is the only difference. For example,
2074 if the two data references are DR_A and DR_B:
2075
2076 DR_A DR_B
2077 data-ref arr[i] arr[j]
2078 base_object arr arr
2079 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2080
2081 The addresses and their index are like:
2082
2083 |<- ADDR_A ->| |<- ADDR_B ->|
2084 ------------------------------------------------------->
2085 | | | | | | | | | |
2086 ------------------------------------------------------->
2087 i_0 ... i_0+4 j_0 ... j_0+4
2088
2089 We can create expression based on index rather than address:
2090
2091 (unsigned) (i_0 - j_0 + 3) <= 6
2092
2093 i.e. the indices are less than 4 apart.
2094
2095 Note evolution step of index needs to be considered in comparison. */
2096
2097 static bool
2098 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2099 const dr_with_seg_len_pair_t &alias_pair)
2100 {
2101 const dr_with_seg_len &dr_a = alias_pair.first;
2102 const dr_with_seg_len &dr_b = alias_pair.second;
2103 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2104 || integer_zerop (DR_STEP (dr_a.dr))
2105 || integer_zerop (DR_STEP (dr_b.dr))
2106 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2107 return false;
2108
2109 poly_uint64 seg_len1, seg_len2;
2110 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2111 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2112 return false;
2113
2114 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2115 return false;
2116
2117 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2118 return false;
2119
2120 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2121 return false;
2122
2123 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2124
2125 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2126 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2127 if (neg_step)
2128 {
2129 abs_step = -abs_step;
2130 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2131 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2132 }
2133
2134 /* Infer the number of iterations with which the memory segment is accessed
2135 by DR. In other words, alias is checked if memory segment accessed by
2136 DR_A in some iterations intersect with memory segment accessed by DR_B
2137 in the same amount iterations.
2138 Note segnment length is a linear function of number of iterations with
2139 DR_STEP as the coefficient. */
2140 poly_uint64 niter_len1, niter_len2;
2141 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2142 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2143 return false;
2144
2145 /* Divide each access size by the byte step, rounding up. */
2146 poly_uint64 niter_access1, niter_access2;
2147 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2148 abs_step, &niter_access1)
2149 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2150 abs_step, &niter_access2))
2151 return false;
2152
2153 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2154
2155 int found = -1;
2156 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2157 {
2158 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2159 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2160 /* Two indices must be the same if they are not scev, or not scev wrto
2161 current loop being vecorized. */
2162 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2163 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2164 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2165 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2166 {
2167 if (operand_equal_p (access1, access2, 0))
2168 continue;
2169
2170 return false;
2171 }
2172 if (found >= 0)
2173 return false;
2174 found = i;
2175 }
2176
2177 /* Ought not to happen in practice, since if all accesses are equal then the
2178 alias should be decidable at compile time. */
2179 if (found < 0)
2180 return false;
2181
2182 /* The two indices must have the same step. */
2183 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2184 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2185 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2186 return false;
2187
2188 tree idx_step = CHREC_RIGHT (access1);
2189 /* Index must have const step, otherwise DR_STEP won't be constant. */
2190 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2191 /* Index must evaluate in the same direction as DR. */
2192 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2193
2194 tree min1 = CHREC_LEFT (access1);
2195 tree min2 = CHREC_LEFT (access2);
2196 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2197 return false;
2198
2199 /* Ideally, alias can be checked against loop's control IV, but we
2200 need to prove linear mapping between control IV and reference
2201 index. Although that should be true, we check against (array)
2202 index of data reference. Like segment length, index length is
2203 linear function of the number of iterations with index_step as
2204 the coefficient, i.e, niter_len * idx_step. */
2205 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2206 SIGNED);
2207 if (neg_step)
2208 abs_idx_step = -abs_idx_step;
2209 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2210 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2211 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2212 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2213
2214 gcc_assert (known_ge (idx_len1, 0)
2215 && known_ge (idx_len2, 0)
2216 && known_ge (idx_access1, 0)
2217 && known_ge (idx_access2, 0));
2218
2219 /* Each access has the following pattern, with lengths measured
2220 in units of INDEX:
2221
2222 <-- idx_len -->
2223 <--- A: -ve step --->
2224 +-----+-------+-----+-------+-----+
2225 | n-1 | ..... | 0 | ..... | n-1 |
2226 +-----+-------+-----+-------+-----+
2227 <--- B: +ve step --->
2228 <-- idx_len -->
2229 |
2230 min
2231
2232 where "n" is the number of scalar iterations covered by the segment
2233 and where each access spans idx_access units.
2234
2235 A is the range of bytes accessed when the step is negative,
2236 B is the range when the step is positive.
2237
2238 When checking for general overlap, we need to test whether
2239 the range:
2240
2241 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2242
2243 overlaps:
2244
2245 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2246
2247 where:
2248
2249 low_offsetN = +ve step ? 0 : -idx_lenN;
2250 high_offsetN = +ve step ? idx_lenN : 0;
2251
2252 This is equivalent to testing whether:
2253
2254 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2255 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2256
2257 Converting this into a single test, there is an overlap if:
2258
2259 0 <= min2 - min1 + bias <= limit
2260
2261 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2262 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2263 + (high_offset2 - low_offset2 + idx_access2 - 1)
2264 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2265
2266 Combining the tests requires limit to be computable in an unsigned
2267 form of the index type; if it isn't, we fall back to the usual
2268 pointer-based checks.
2269
2270 We can do better if DR_B is a write and if DR_A and DR_B are
2271 well-ordered in both the original and the new code (see the
2272 comment above the DR_ALIAS_* flags for details). In this case
2273 we know that for each i in [0, n-1], the write performed by
2274 access i of DR_B occurs after access numbers j<=i of DR_A in
2275 both the original and the new code. Any write or anti
2276 dependencies wrt those DR_A accesses are therefore maintained.
2277
2278 We just need to make sure that each individual write in DR_B does not
2279 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2280 after the DR_B access in the original code but happen before it in
2281 the new code.
2282
2283 We know the steps for both accesses are equal, so by induction, we
2284 just need to test whether the first write of DR_B overlaps a later
2285 access of DR_A. In other words, we need to move min1 along by
2286 one iteration:
2287
2288 min1' = min1 + idx_step
2289
2290 and use the ranges:
2291
2292 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2293
2294 and:
2295
2296 [min2, min2 + idx_access2 - 1]
2297
2298 where:
2299
2300 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2301 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2302 if (waw_or_war_p)
2303 idx_len1 -= abs_idx_step;
2304
2305 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2306 if (!waw_or_war_p)
2307 limit += idx_len2;
2308
2309 tree utype = unsigned_type_for (TREE_TYPE (min1));
2310 if (!wi::fits_to_tree_p (limit, utype))
2311 return false;
2312
2313 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2314 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2315 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2316 /* Equivalent to adding IDX_STEP to MIN1. */
2317 if (waw_or_war_p)
2318 bias -= wi::to_offset (idx_step);
2319
2320 tree subject = fold_build2 (MINUS_EXPR, utype,
2321 fold_convert (utype, min2),
2322 fold_convert (utype, min1));
2323 subject = fold_build2 (PLUS_EXPR, utype, subject,
2324 wide_int_to_tree (utype, bias));
2325 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2326 wide_int_to_tree (utype, limit));
2327 if (*cond_expr)
2328 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2329 *cond_expr, part_cond_expr);
2330 else
2331 *cond_expr = part_cond_expr;
2332 if (dump_enabled_p ())
2333 {
2334 if (waw_or_war_p)
2335 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2336 else
2337 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2338 }
2339 return true;
2340 }
2341
2342 /* A subroutine of create_intersect_range_checks, with a subset of the
2343 same arguments. Try to optimize cases in which the second access
2344 is a write and in which some overlap is valid. */
2345
2346 static bool
2347 create_waw_or_war_checks (tree *cond_expr,
2348 const dr_with_seg_len_pair_t &alias_pair)
2349 {
2350 const dr_with_seg_len& dr_a = alias_pair.first;
2351 const dr_with_seg_len& dr_b = alias_pair.second;
2352
2353 /* Check for cases in which:
2354
2355 (a) DR_B is always a write;
2356 (b) the accesses are well-ordered in both the original and new code
2357 (see the comment above the DR_ALIAS_* flags for details); and
2358 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2359 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2360 return false;
2361
2362 /* Check for equal (but possibly variable) steps. */
2363 tree step = DR_STEP (dr_a.dr);
2364 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2365 return false;
2366
2367 /* Make sure that we can operate on sizetype without loss of precision. */
2368 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2369 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2370 return false;
2371
2372 /* All addresses involved are known to have a common alignment ALIGN.
2373 We can therefore subtract ALIGN from an exclusive endpoint to get
2374 an inclusive endpoint. In the best (and common) case, ALIGN is the
2375 same as the access sizes of both DRs, and so subtracting ALIGN
2376 cancels out the addition of an access size. */
2377 unsigned int align = MIN (dr_a.align, dr_b.align);
2378 poly_uint64 last_chunk_a = dr_a.access_size - align;
2379 poly_uint64 last_chunk_b = dr_b.access_size - align;
2380
2381 /* Get a boolean expression that is true when the step is negative. */
2382 tree indicator = dr_direction_indicator (dr_a.dr);
2383 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2384 fold_convert (ssizetype, indicator),
2385 ssize_int (0));
2386
2387 /* Get lengths in sizetype. */
2388 tree seg_len_a
2389 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2390 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2391
2392 /* Each access has the following pattern:
2393
2394 <- |seg_len| ->
2395 <--- A: -ve step --->
2396 +-----+-------+-----+-------+-----+
2397 | n-1 | ..... | 0 | ..... | n-1 |
2398 +-----+-------+-----+-------+-----+
2399 <--- B: +ve step --->
2400 <- |seg_len| ->
2401 |
2402 base address
2403
2404 where "n" is the number of scalar iterations covered by the segment.
2405
2406 A is the range of bytes accessed when the step is negative,
2407 B is the range when the step is positive.
2408
2409 We know that DR_B is a write. We also know (from checking that
2410 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2411 the write performed by access i of DR_B occurs after access numbers
2412 j<=i of DR_A in both the original and the new code. Any write or
2413 anti dependencies wrt those DR_A accesses are therefore maintained.
2414
2415 We just need to make sure that each individual write in DR_B does not
2416 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2417 after the DR_B access in the original code but happen before it in
2418 the new code.
2419
2420 We know the steps for both accesses are equal, so by induction, we
2421 just need to test whether the first write of DR_B overlaps a later
2422 access of DR_A. In other words, we need to move addr_a along by
2423 one iteration:
2424
2425 addr_a' = addr_a + step
2426
2427 and check whether:
2428
2429 [addr_b, addr_b + last_chunk_b]
2430
2431 overlaps:
2432
2433 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2434
2435 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2436
2437 low_offset_a = +ve step ? 0 : seg_len_a - step
2438 high_offset_a = +ve step ? seg_len_a - step : 0
2439
2440 This is equivalent to testing whether:
2441
2442 addr_a' + low_offset_a <= addr_b + last_chunk_b
2443 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2444
2445 Converting this into a single test, there is an overlap if:
2446
2447 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2448
2449 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2450
2451 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2452 less than the size of the object underlying DR_A. We also know
2453 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2454 guaranteed at compile time. There can therefore be no overflow if
2455 "limit" is calculated in an unsigned type with pointer precision. */
2456 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2457 DR_OFFSET (dr_a.dr));
2458 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2459
2460 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2461 DR_OFFSET (dr_b.dr));
2462 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2463
2464 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2465 addr_a = fold_build_pointer_plus (addr_a, step);
2466 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2467 seg_len_a, step);
2468 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2469 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2470
2471 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2472 seg_len_a_minus_step, size_zero_node);
2473 if (!CONSTANT_CLASS_P (low_offset_a))
2474 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2475
2476 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2477 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2478 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2479 low_offset_a);
2480
2481 /* The amount added to addr_b - addr_a'. */
2482 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2483 size_int (last_chunk_b), low_offset_a);
2484
2485 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2486 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2487 size_int (last_chunk_a + last_chunk_b));
2488
2489 tree subject = fold_build2 (MINUS_EXPR, sizetype,
2490 fold_convert (sizetype, addr_b),
2491 fold_convert (sizetype, addr_a));
2492 subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2493
2494 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2495 if (dump_enabled_p ())
2496 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2497 return true;
2498 }
2499
2500 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2501 every address ADDR accessed by D:
2502
2503 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2504
2505 In this case, every element accessed by D is aligned to at least
2506 ALIGN bytes.
2507
2508 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2509
2510 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2511
2512 static void
2513 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2514 tree *seg_max_out, HOST_WIDE_INT align)
2515 {
2516 /* Each access has the following pattern:
2517
2518 <- |seg_len| ->
2519 <--- A: -ve step --->
2520 +-----+-------+-----+-------+-----+
2521 | n-1 | ,.... | 0 | ..... | n-1 |
2522 +-----+-------+-----+-------+-----+
2523 <--- B: +ve step --->
2524 <- |seg_len| ->
2525 |
2526 base address
2527
2528 where "n" is the number of scalar iterations covered by the segment.
2529 (This should be VF for a particular pair if we know that both steps
2530 are the same, otherwise it will be the full number of scalar loop
2531 iterations.)
2532
2533 A is the range of bytes accessed when the step is negative,
2534 B is the range when the step is positive.
2535
2536 If the access size is "access_size" bytes, the lowest addressed byte is:
2537
2538 base + (step < 0 ? seg_len : 0) [LB]
2539
2540 and the highest addressed byte is always below:
2541
2542 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2543
2544 Thus:
2545
2546 LB <= ADDR < UB
2547
2548 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2549 bytes, so:
2550
2551 LB <= ADDR <= UB - ALIGN
2552
2553 where "- ALIGN" folds naturally with the "+ access_size" and often
2554 cancels it out.
2555
2556 We don't try to simplify LB and UB beyond this (e.g. by using
2557 MIN and MAX based on whether seg_len rather than the stride is
2558 negative) because it is possible for the absolute size of the
2559 segment to overflow the range of a ssize_t.
2560
2561 Keeping the pointer_plus outside of the cond_expr should allow
2562 the cond_exprs to be shared with other alias checks. */
2563 tree indicator = dr_direction_indicator (d.dr);
2564 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2565 fold_convert (ssizetype, indicator),
2566 ssize_int (0));
2567 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2568 DR_OFFSET (d.dr));
2569 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2570 tree seg_len
2571 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2572
2573 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2574 seg_len, size_zero_node);
2575 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2576 size_zero_node, seg_len);
2577 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2578 size_int (d.access_size - align));
2579
2580 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2581 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2582 }
2583
2584 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2585 storing the condition in *COND_EXPR. The fallback is to generate a
2586 a test that the two accesses do not overlap:
2587
2588 end_a <= start_b || end_b <= start_a. */
2589
2590 static void
2591 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2592 const dr_with_seg_len_pair_t &alias_pair)
2593 {
2594 const dr_with_seg_len& dr_a = alias_pair.first;
2595 const dr_with_seg_len& dr_b = alias_pair.second;
2596 *cond_expr = NULL_TREE;
2597 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2598 return;
2599
2600 if (create_ifn_alias_checks (cond_expr, alias_pair))
2601 return;
2602
2603 if (create_waw_or_war_checks (cond_expr, alias_pair))
2604 return;
2605
2606 unsigned HOST_WIDE_INT min_align;
2607 tree_code cmp_code;
2608 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2609 are equivalent. This is just an optimization heuristic. */
2610 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2611 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2612 {
2613 /* In this case adding access_size to seg_len is likely to give
2614 a simple X * step, where X is either the number of scalar
2615 iterations or the vectorization factor. We're better off
2616 keeping that, rather than subtracting an alignment from it.
2617
2618 In this case the maximum values are exclusive and so there is
2619 no alias if the maximum of one segment equals the minimum
2620 of another. */
2621 min_align = 0;
2622 cmp_code = LE_EXPR;
2623 }
2624 else
2625 {
2626 /* Calculate the minimum alignment shared by all four pointers,
2627 then arrange for this alignment to be subtracted from the
2628 exclusive maximum values to get inclusive maximum values.
2629 This "- min_align" is cumulative with a "+ access_size"
2630 in the calculation of the maximum values. In the best
2631 (and common) case, the two cancel each other out, leaving
2632 us with an inclusive bound based only on seg_len. In the
2633 worst case we're simply adding a smaller number than before.
2634
2635 Because the maximum values are inclusive, there is an alias
2636 if the maximum value of one segment is equal to the minimum
2637 value of the other. */
2638 min_align = std::min (dr_a.align, dr_b.align);
2639 cmp_code = LT_EXPR;
2640 }
2641
2642 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2643 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2644 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2645
2646 *cond_expr
2647 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2648 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2649 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2650 if (dump_enabled_p ())
2651 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2652 }
2653
2654 /* Create a conditional expression that represents the run-time checks for
2655 overlapping of address ranges represented by a list of data references
2656 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2657 COND_EXPR is the conditional expression to be used in the if statement
2658 that controls which version of the loop gets executed at runtime. */
2659
2660 void
2661 create_runtime_alias_checks (class loop *loop,
2662 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2663 tree * cond_expr)
2664 {
2665 tree part_cond_expr;
2666
2667 fold_defer_overflow_warnings ();
2668 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2669 {
2670 gcc_assert (alias_pair.flags);
2671 if (dump_enabled_p ())
2672 dump_printf (MSG_NOTE,
2673 "create runtime check for data references %T and %T\n",
2674 DR_REF (alias_pair.first.dr),
2675 DR_REF (alias_pair.second.dr));
2676
2677 /* Create condition expression for each pair data references. */
2678 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2679 if (*cond_expr)
2680 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2681 *cond_expr, part_cond_expr);
2682 else
2683 *cond_expr = part_cond_expr;
2684 }
2685 fold_undefer_and_ignore_overflow_warnings ();
2686 }
2687
2688 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2689 expressions. */
2690 static bool
2691 dr_equal_offsets_p1 (tree offset1, tree offset2)
2692 {
2693 bool res;
2694
2695 STRIP_NOPS (offset1);
2696 STRIP_NOPS (offset2);
2697
2698 if (offset1 == offset2)
2699 return true;
2700
2701 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2702 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2703 return false;
2704
2705 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2706 TREE_OPERAND (offset2, 0));
2707
2708 if (!res || !BINARY_CLASS_P (offset1))
2709 return res;
2710
2711 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2712 TREE_OPERAND (offset2, 1));
2713
2714 return res;
2715 }
2716
2717 /* Check if DRA and DRB have equal offsets. */
2718 bool
2719 dr_equal_offsets_p (struct data_reference *dra,
2720 struct data_reference *drb)
2721 {
2722 tree offset1, offset2;
2723
2724 offset1 = DR_OFFSET (dra);
2725 offset2 = DR_OFFSET (drb);
2726
2727 return dr_equal_offsets_p1 (offset1, offset2);
2728 }
2729
2730 /* Returns true if FNA == FNB. */
2731
2732 static bool
2733 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2734 {
2735 unsigned i, n = fna.length ();
2736
2737 if (n != fnb.length ())
2738 return false;
2739
2740 for (i = 0; i < n; i++)
2741 if (!operand_equal_p (fna[i], fnb[i], 0))
2742 return false;
2743
2744 return true;
2745 }
2746
2747 /* If all the functions in CF are the same, returns one of them,
2748 otherwise returns NULL. */
2749
2750 static affine_fn
2751 common_affine_function (conflict_function *cf)
2752 {
2753 unsigned i;
2754 affine_fn comm;
2755
2756 if (!CF_NONTRIVIAL_P (cf))
2757 return affine_fn ();
2758
2759 comm = cf->fns[0];
2760
2761 for (i = 1; i < cf->n; i++)
2762 if (!affine_function_equal_p (comm, cf->fns[i]))
2763 return affine_fn ();
2764
2765 return comm;
2766 }
2767
2768 /* Returns the base of the affine function FN. */
2769
2770 static tree
2771 affine_function_base (affine_fn fn)
2772 {
2773 return fn[0];
2774 }
2775
2776 /* Returns true if FN is a constant. */
2777
2778 static bool
2779 affine_function_constant_p (affine_fn fn)
2780 {
2781 unsigned i;
2782 tree coef;
2783
2784 for (i = 1; fn.iterate (i, &coef); i++)
2785 if (!integer_zerop (coef))
2786 return false;
2787
2788 return true;
2789 }
2790
2791 /* Returns true if FN is the zero constant function. */
2792
2793 static bool
2794 affine_function_zero_p (affine_fn fn)
2795 {
2796 return (integer_zerop (affine_function_base (fn))
2797 && affine_function_constant_p (fn));
2798 }
2799
2800 /* Returns a signed integer type with the largest precision from TA
2801 and TB. */
2802
2803 static tree
2804 signed_type_for_types (tree ta, tree tb)
2805 {
2806 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2807 return signed_type_for (ta);
2808 else
2809 return signed_type_for (tb);
2810 }
2811
2812 /* Applies operation OP on affine functions FNA and FNB, and returns the
2813 result. */
2814
2815 static affine_fn
2816 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2817 {
2818 unsigned i, n, m;
2819 affine_fn ret;
2820 tree coef;
2821
2822 if (fnb.length () > fna.length ())
2823 {
2824 n = fna.length ();
2825 m = fnb.length ();
2826 }
2827 else
2828 {
2829 n = fnb.length ();
2830 m = fna.length ();
2831 }
2832
2833 ret.create (m);
2834 for (i = 0; i < n; i++)
2835 {
2836 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2837 TREE_TYPE (fnb[i]));
2838 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2839 }
2840
2841 for (; fna.iterate (i, &coef); i++)
2842 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2843 coef, integer_zero_node));
2844 for (; fnb.iterate (i, &coef); i++)
2845 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2846 integer_zero_node, coef));
2847
2848 return ret;
2849 }
2850
2851 /* Returns the sum of affine functions FNA and FNB. */
2852
2853 static affine_fn
2854 affine_fn_plus (affine_fn fna, affine_fn fnb)
2855 {
2856 return affine_fn_op (PLUS_EXPR, fna, fnb);
2857 }
2858
2859 /* Returns the difference of affine functions FNA and FNB. */
2860
2861 static affine_fn
2862 affine_fn_minus (affine_fn fna, affine_fn fnb)
2863 {
2864 return affine_fn_op (MINUS_EXPR, fna, fnb);
2865 }
2866
2867 /* Frees affine function FN. */
2868
2869 static void
2870 affine_fn_free (affine_fn fn)
2871 {
2872 fn.release ();
2873 }
2874
2875 /* Determine for each subscript in the data dependence relation DDR
2876 the distance. */
2877
2878 static void
2879 compute_subscript_distance (struct data_dependence_relation *ddr)
2880 {
2881 conflict_function *cf_a, *cf_b;
2882 affine_fn fn_a, fn_b, diff;
2883
2884 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2885 {
2886 unsigned int i;
2887
2888 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2889 {
2890 struct subscript *subscript;
2891
2892 subscript = DDR_SUBSCRIPT (ddr, i);
2893 cf_a = SUB_CONFLICTS_IN_A (subscript);
2894 cf_b = SUB_CONFLICTS_IN_B (subscript);
2895
2896 fn_a = common_affine_function (cf_a);
2897 fn_b = common_affine_function (cf_b);
2898 if (!fn_a.exists () || !fn_b.exists ())
2899 {
2900 SUB_DISTANCE (subscript) = chrec_dont_know;
2901 return;
2902 }
2903 diff = affine_fn_minus (fn_a, fn_b);
2904
2905 if (affine_function_constant_p (diff))
2906 SUB_DISTANCE (subscript) = affine_function_base (diff);
2907 else
2908 SUB_DISTANCE (subscript) = chrec_dont_know;
2909
2910 affine_fn_free (diff);
2911 }
2912 }
2913 }
2914
2915 /* Returns the conflict function for "unknown". */
2916
2917 static conflict_function *
2918 conflict_fn_not_known (void)
2919 {
2920 conflict_function *fn = XCNEW (conflict_function);
2921 fn->n = NOT_KNOWN;
2922
2923 return fn;
2924 }
2925
2926 /* Returns the conflict function for "independent". */
2927
2928 static conflict_function *
2929 conflict_fn_no_dependence (void)
2930 {
2931 conflict_function *fn = XCNEW (conflict_function);
2932 fn->n = NO_DEPENDENCE;
2933
2934 return fn;
2935 }
2936
2937 /* Returns true if the address of OBJ is invariant in LOOP. */
2938
2939 static bool
2940 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2941 {
2942 while (handled_component_p (obj))
2943 {
2944 if (TREE_CODE (obj) == ARRAY_REF)
2945 {
2946 for (int i = 1; i < 4; ++i)
2947 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2948 loop->num))
2949 return false;
2950 }
2951 else if (TREE_CODE (obj) == COMPONENT_REF)
2952 {
2953 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2954 loop->num))
2955 return false;
2956 }
2957 obj = TREE_OPERAND (obj, 0);
2958 }
2959
2960 if (!INDIRECT_REF_P (obj)
2961 && TREE_CODE (obj) != MEM_REF)
2962 return true;
2963
2964 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2965 loop->num);
2966 }
2967
2968 /* Returns false if we can prove that data references A and B do not alias,
2969 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2970 considered. */
2971
2972 bool
2973 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2974 class loop *loop_nest)
2975 {
2976 tree addr_a = DR_BASE_OBJECT (a);
2977 tree addr_b = DR_BASE_OBJECT (b);
2978
2979 /* If we are not processing a loop nest but scalar code we
2980 do not need to care about possible cross-iteration dependences
2981 and thus can process the full original reference. Do so,
2982 similar to how loop invariant motion applies extra offset-based
2983 disambiguation. */
2984 if (!loop_nest)
2985 {
2986 aff_tree off1, off2;
2987 poly_widest_int size1, size2;
2988 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2989 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2990 aff_combination_scale (&off1, -1);
2991 aff_combination_add (&off2, &off1);
2992 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2993 return false;
2994 }
2995
2996 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2997 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2998 /* For cross-iteration dependences the cliques must be valid for the
2999 whole loop, not just individual iterations. */
3000 && (!loop_nest
3001 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3002 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3003 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3004 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3005 return false;
3006
3007 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3008 do not know the size of the base-object. So we cannot do any
3009 offset/overlap based analysis but have to rely on points-to
3010 information only. */
3011 if (TREE_CODE (addr_a) == MEM_REF
3012 && (DR_UNCONSTRAINED_BASE (a)
3013 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3014 {
3015 /* For true dependences we can apply TBAA. */
3016 if (flag_strict_aliasing
3017 && DR_IS_WRITE (a) && DR_IS_READ (b)
3018 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3019 get_alias_set (DR_REF (b))))
3020 return false;
3021 if (TREE_CODE (addr_b) == MEM_REF)
3022 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3023 TREE_OPERAND (addr_b, 0));
3024 else
3025 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3026 build_fold_addr_expr (addr_b));
3027 }
3028 else if (TREE_CODE (addr_b) == MEM_REF
3029 && (DR_UNCONSTRAINED_BASE (b)
3030 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3031 {
3032 /* For true dependences we can apply TBAA. */
3033 if (flag_strict_aliasing
3034 && DR_IS_WRITE (a) && DR_IS_READ (b)
3035 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3036 get_alias_set (DR_REF (b))))
3037 return false;
3038 if (TREE_CODE (addr_a) == MEM_REF)
3039 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3040 TREE_OPERAND (addr_b, 0));
3041 else
3042 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3043 TREE_OPERAND (addr_b, 0));
3044 }
3045
3046 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3047 that is being subsetted in the loop nest. */
3048 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3049 return refs_output_dependent_p (addr_a, addr_b);
3050 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3051 return refs_anti_dependent_p (addr_a, addr_b);
3052 return refs_may_alias_p (addr_a, addr_b);
3053 }
3054
3055 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3056 if it is meaningful to compare their associated access functions
3057 when checking for dependencies. */
3058
3059 static bool
3060 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3061 {
3062 /* Allow pairs of component refs from the following sets:
3063
3064 { REALPART_EXPR, IMAGPART_EXPR }
3065 { COMPONENT_REF }
3066 { ARRAY_REF }. */
3067 tree_code code_a = TREE_CODE (ref_a);
3068 tree_code code_b = TREE_CODE (ref_b);
3069 if (code_a == IMAGPART_EXPR)
3070 code_a = REALPART_EXPR;
3071 if (code_b == IMAGPART_EXPR)
3072 code_b = REALPART_EXPR;
3073 if (code_a != code_b)
3074 return false;
3075
3076 if (TREE_CODE (ref_a) == COMPONENT_REF)
3077 /* ??? We cannot simply use the type of operand #0 of the refs here as
3078 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3079 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3080 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3081 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3082
3083 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3084 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3085 }
3086
3087 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3088 is true when the main indices of A and B were not comparable so we try again
3089 with alternate indices computed on an indirect reference. */
3090
3091 struct data_dependence_relation *
3092 initialize_data_dependence_relation (struct data_dependence_relation *res,
3093 vec<loop_p> loop_nest,
3094 bool use_alt_indices)
3095 {
3096 struct data_reference *a = DDR_A (res);
3097 struct data_reference *b = DDR_B (res);
3098 unsigned int i;
3099
3100 struct indices *indices_a = &a->indices;
3101 struct indices *indices_b = &b->indices;
3102 if (use_alt_indices)
3103 {
3104 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3105 indices_a = &a->alt_indices;
3106 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3107 indices_b = &b->alt_indices;
3108 }
3109 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3110 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3111 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3112 {
3113 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3114 return res;
3115 }
3116
3117 /* For unconstrained bases, the root (highest-indexed) subscript
3118 describes a variation in the base of the original DR_REF rather
3119 than a component access. We have no type that accurately describes
3120 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3121 applying this subscript) so limit the search to the last real
3122 component access.
3123
3124 E.g. for:
3125
3126 void
3127 f (int a[][8], int b[][8])
3128 {
3129 for (int i = 0; i < 8; ++i)
3130 a[i * 2][0] = b[i][0];
3131 }
3132
3133 the a and b accesses have a single ARRAY_REF component reference [0]
3134 but have two subscripts. */
3135 if (indices_a->unconstrained_base)
3136 num_dimensions_a -= 1;
3137 if (indices_b->unconstrained_base)
3138 num_dimensions_b -= 1;
3139
3140 /* These structures describe sequences of component references in
3141 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3142 specific access function. */
3143 struct {
3144 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3145 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3146 indices. In C notation, these are the indices of the rightmost
3147 component references; e.g. for a sequence .b.c.d, the start
3148 index is for .d. */
3149 unsigned int start_a;
3150 unsigned int start_b;
3151
3152 /* The sequence contains LENGTH consecutive access functions from
3153 each DR. */
3154 unsigned int length;
3155
3156 /* The enclosing objects for the A and B sequences respectively,
3157 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3158 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3159 tree object_a;
3160 tree object_b;
3161 } full_seq = {}, struct_seq = {};
3162
3163 /* Before each iteration of the loop:
3164
3165 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3166 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3167 unsigned int index_a = 0;
3168 unsigned int index_b = 0;
3169 tree ref_a = DR_REF (a);
3170 tree ref_b = DR_REF (b);
3171
3172 /* Now walk the component references from the final DR_REFs back up to
3173 the enclosing base objects. Each component reference corresponds
3174 to one access function in the DR, with access function 0 being for
3175 the final DR_REF and the highest-indexed access function being the
3176 one that is applied to the base of the DR.
3177
3178 Look for a sequence of component references whose access functions
3179 are comparable (see access_fn_components_comparable_p). If more
3180 than one such sequence exists, pick the one nearest the base
3181 (which is the leftmost sequence in C notation). Store this sequence
3182 in FULL_SEQ.
3183
3184 For example, if we have:
3185
3186 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3187
3188 A: a[0][i].s.c.d
3189 B: __real b[0][i].s.e[i].f
3190
3191 (where d is the same type as the real component of f) then the access
3192 functions would be:
3193
3194 0 1 2 3
3195 A: .d .c .s [i]
3196
3197 0 1 2 3 4 5
3198 B: __real .f [i] .e .s [i]
3199
3200 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3201 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3202 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3203 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3204 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3205 index foo[10] arrays, so is again comparable. The sequence is
3206 therefore:
3207
3208 A: [1, 3] (i.e. [i].s.c)
3209 B: [3, 5] (i.e. [i].s.e)
3210
3211 Also look for sequences of component references whose access
3212 functions are comparable and whose enclosing objects have the same
3213 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3214 example, STRUCT_SEQ would be:
3215
3216 A: [1, 2] (i.e. s.c)
3217 B: [3, 4] (i.e. s.e) */
3218 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3219 {
3220 /* The alternate indices form always has a single dimension
3221 with unconstrained base. */
3222 gcc_assert (!use_alt_indices);
3223
3224 /* REF_A and REF_B must be one of the component access types
3225 allowed by dr_analyze_indices. */
3226 gcc_checking_assert (access_fn_component_p (ref_a));
3227 gcc_checking_assert (access_fn_component_p (ref_b));
3228
3229 /* Get the immediately-enclosing objects for REF_A and REF_B,
3230 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3231 and DR_ACCESS_FN (B, INDEX_B). */
3232 tree object_a = TREE_OPERAND (ref_a, 0);
3233 tree object_b = TREE_OPERAND (ref_b, 0);
3234
3235 tree type_a = TREE_TYPE (object_a);
3236 tree type_b = TREE_TYPE (object_b);
3237 if (access_fn_components_comparable_p (ref_a, ref_b))
3238 {
3239 /* This pair of component accesses is comparable for dependence
3240 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3241 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3242 if (full_seq.start_a + full_seq.length != index_a
3243 || full_seq.start_b + full_seq.length != index_b)
3244 {
3245 /* The accesses don't extend the current sequence,
3246 so start a new one here. */
3247 full_seq.start_a = index_a;
3248 full_seq.start_b = index_b;
3249 full_seq.length = 0;
3250 }
3251
3252 /* Add this pair of references to the sequence. */
3253 full_seq.length += 1;
3254 full_seq.object_a = object_a;
3255 full_seq.object_b = object_b;
3256
3257 /* If the enclosing objects are structures (and thus have the
3258 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3259 if (TREE_CODE (type_a) == RECORD_TYPE)
3260 struct_seq = full_seq;
3261
3262 /* Move to the next containing reference for both A and B. */
3263 ref_a = object_a;
3264 ref_b = object_b;
3265 index_a += 1;
3266 index_b += 1;
3267 continue;
3268 }
3269
3270 /* Try to approach equal type sizes. */
3271 if (!COMPLETE_TYPE_P (type_a)
3272 || !COMPLETE_TYPE_P (type_b)
3273 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3274 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3275 break;
3276
3277 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3278 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3279 if (size_a <= size_b)
3280 {
3281 index_a += 1;
3282 ref_a = object_a;
3283 }
3284 if (size_b <= size_a)
3285 {
3286 index_b += 1;
3287 ref_b = object_b;
3288 }
3289 }
3290
3291 /* See whether FULL_SEQ ends at the base and whether the two bases
3292 are equal. We do not care about TBAA or alignment info so we can
3293 use OEP_ADDRESS_OF to avoid false negatives. */
3294 tree base_a = indices_a->base_object;
3295 tree base_b = indices_b->base_object;
3296 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3297 && full_seq.start_b + full_seq.length == num_dimensions_b
3298 && (indices_a->unconstrained_base
3299 == indices_b->unconstrained_base)
3300 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3301 && (types_compatible_p (TREE_TYPE (base_a),
3302 TREE_TYPE (base_b))
3303 || (!base_supports_access_fn_components_p (base_a)
3304 && !base_supports_access_fn_components_p (base_b)
3305 && operand_equal_p
3306 (TYPE_SIZE (TREE_TYPE (base_a)),
3307 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3308 && (!loop_nest.exists ()
3309 || (object_address_invariant_in_loop_p
3310 (loop_nest[0], base_a))));
3311
3312 /* If the bases are the same, we can include the base variation too.
3313 E.g. the b accesses in:
3314
3315 for (int i = 0; i < n; ++i)
3316 b[i + 4][0] = b[i][0];
3317
3318 have a definite dependence distance of 4, while for:
3319
3320 for (int i = 0; i < n; ++i)
3321 a[i + 4][0] = b[i][0];
3322
3323 the dependence distance depends on the gap between a and b.
3324
3325 If the bases are different then we can only rely on the sequence
3326 rooted at a structure access, since arrays are allowed to overlap
3327 arbitrarily and change shape arbitrarily. E.g. we treat this as
3328 valid code:
3329
3330 int a[256];
3331 ...
3332 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3333
3334 where two lvalues with the same int[4][3] type overlap, and where
3335 both lvalues are distinct from the object's declared type. */
3336 if (same_base_p)
3337 {
3338 if (indices_a->unconstrained_base)
3339 full_seq.length += 1;
3340 }
3341 else
3342 full_seq = struct_seq;
3343
3344 /* Punt if we didn't find a suitable sequence. */
3345 if (full_seq.length == 0)
3346 {
3347 if (use_alt_indices
3348 || (TREE_CODE (DR_REF (a)) == MEM_REF
3349 && TREE_CODE (DR_REF (b)) == MEM_REF)
3350 || may_be_nonaddressable_p (DR_REF (a))
3351 || may_be_nonaddressable_p (DR_REF (b)))
3352 {
3353 /* Fully exhausted possibilities. */
3354 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3355 return res;
3356 }
3357
3358 /* Try evaluating both DRs as dereferences of pointers. */
3359 if (!a->alt_indices.base_object
3360 && TREE_CODE (DR_REF (a)) != MEM_REF)
3361 {
3362 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3363 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3364 build_int_cst
3365 (reference_alias_ptr_type (DR_REF (a)), 0));
3366 dr_analyze_indices (&a->alt_indices, alt_ref,
3367 loop_preheader_edge (loop_nest[0]),
3368 loop_containing_stmt (DR_STMT (a)));
3369 }
3370 if (!b->alt_indices.base_object
3371 && TREE_CODE (DR_REF (b)) != MEM_REF)
3372 {
3373 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3374 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3375 build_int_cst
3376 (reference_alias_ptr_type (DR_REF (b)), 0));
3377 dr_analyze_indices (&b->alt_indices, alt_ref,
3378 loop_preheader_edge (loop_nest[0]),
3379 loop_containing_stmt (DR_STMT (b)));
3380 }
3381 return initialize_data_dependence_relation (res, loop_nest, true);
3382 }
3383
3384 if (!same_base_p)
3385 {
3386 /* Partial overlap is possible for different bases when strict aliasing
3387 is not in effect. It's also possible if either base involves a union
3388 access; e.g. for:
3389
3390 struct s1 { int a[2]; };
3391 struct s2 { struct s1 b; int c; };
3392 struct s3 { int d; struct s1 e; };
3393 union u { struct s2 f; struct s3 g; } *p, *q;
3394
3395 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3396 "p->g.e" (base "p->g") and might partially overlap the s1 at
3397 "q->g.e" (base "q->g"). */
3398 if (!flag_strict_aliasing
3399 || ref_contains_union_access_p (full_seq.object_a)
3400 || ref_contains_union_access_p (full_seq.object_b))
3401 {
3402 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3403 return res;
3404 }
3405
3406 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3407 if (!loop_nest.exists ()
3408 || (object_address_invariant_in_loop_p (loop_nest[0],
3409 full_seq.object_a)
3410 && object_address_invariant_in_loop_p (loop_nest[0],
3411 full_seq.object_b)))
3412 {
3413 DDR_OBJECT_A (res) = full_seq.object_a;
3414 DDR_OBJECT_B (res) = full_seq.object_b;
3415 }
3416 }
3417
3418 DDR_AFFINE_P (res) = true;
3419 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3420 DDR_SUBSCRIPTS (res).create (full_seq.length);
3421 DDR_LOOP_NEST (res) = loop_nest;
3422 DDR_SELF_REFERENCE (res) = false;
3423
3424 for (i = 0; i < full_seq.length; ++i)
3425 {
3426 struct subscript *subscript;
3427
3428 subscript = XNEW (struct subscript);
3429 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3430 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3431 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3432 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3433 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3434 SUB_DISTANCE (subscript) = chrec_dont_know;
3435 DDR_SUBSCRIPTS (res).safe_push (subscript);
3436 }
3437
3438 return res;
3439 }
3440
3441 /* Initialize a data dependence relation between data accesses A and
3442 B. NB_LOOPS is the number of loops surrounding the references: the
3443 size of the classic distance/direction vectors. */
3444
3445 struct data_dependence_relation *
3446 initialize_data_dependence_relation (struct data_reference *a,
3447 struct data_reference *b,
3448 vec<loop_p> loop_nest)
3449 {
3450 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3451 DDR_A (res) = a;
3452 DDR_B (res) = b;
3453 DDR_LOOP_NEST (res).create (0);
3454 DDR_SUBSCRIPTS (res).create (0);
3455 DDR_DIR_VECTS (res).create (0);
3456 DDR_DIST_VECTS (res).create (0);
3457
3458 if (a == NULL || b == NULL)
3459 {
3460 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3461 return res;
3462 }
3463
3464 /* If the data references do not alias, then they are independent. */
3465 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3466 {
3467 DDR_ARE_DEPENDENT (res) = chrec_known;
3468 return res;
3469 }
3470
3471 return initialize_data_dependence_relation (res, loop_nest, false);
3472 }
3473
3474
3475 /* Frees memory used by the conflict function F. */
3476
3477 static void
3478 free_conflict_function (conflict_function *f)
3479 {
3480 unsigned i;
3481
3482 if (CF_NONTRIVIAL_P (f))
3483 {
3484 for (i = 0; i < f->n; i++)
3485 affine_fn_free (f->fns[i]);
3486 }
3487 free (f);
3488 }
3489
3490 /* Frees memory used by SUBSCRIPTS. */
3491
3492 static void
3493 free_subscripts (vec<subscript_p> subscripts)
3494 {
3495 for (subscript_p s : subscripts)
3496 {
3497 free_conflict_function (s->conflicting_iterations_in_a);
3498 free_conflict_function (s->conflicting_iterations_in_b);
3499 free (s);
3500 }
3501 subscripts.release ();
3502 }
3503
3504 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3505 description. */
3506
3507 static inline void
3508 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3509 tree chrec)
3510 {
3511 DDR_ARE_DEPENDENT (ddr) = chrec;
3512 free_subscripts (DDR_SUBSCRIPTS (ddr));
3513 DDR_SUBSCRIPTS (ddr).create (0);
3514 }
3515
3516 /* The dependence relation DDR cannot be represented by a distance
3517 vector. */
3518
3519 static inline void
3520 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3521 {
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3524
3525 DDR_AFFINE_P (ddr) = false;
3526 }
3527
3528
3529
3531 /* This section contains the classic Banerjee tests. */
3532
3533 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3534 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3535
3536 static inline bool
3537 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3538 {
3539 return (evolution_function_is_constant_p (chrec_a)
3540 && evolution_function_is_constant_p (chrec_b));
3541 }
3542
3543 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3544 variable, i.e., if the SIV (Single Index Variable) test is true. */
3545
3546 static bool
3547 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3548 {
3549 if ((evolution_function_is_constant_p (chrec_a)
3550 && evolution_function_is_univariate_p (chrec_b))
3551 || (evolution_function_is_constant_p (chrec_b)
3552 && evolution_function_is_univariate_p (chrec_a)))
3553 return true;
3554
3555 if (evolution_function_is_univariate_p (chrec_a)
3556 && evolution_function_is_univariate_p (chrec_b))
3557 {
3558 switch (TREE_CODE (chrec_a))
3559 {
3560 case POLYNOMIAL_CHREC:
3561 switch (TREE_CODE (chrec_b))
3562 {
3563 case POLYNOMIAL_CHREC:
3564 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3565 return false;
3566 /* FALLTHRU */
3567
3568 default:
3569 return true;
3570 }
3571
3572 default:
3573 return true;
3574 }
3575 }
3576
3577 return false;
3578 }
3579
3580 /* Creates a conflict function with N dimensions. The affine functions
3581 in each dimension follow. */
3582
3583 static conflict_function *
3584 conflict_fn (unsigned n, ...)
3585 {
3586 unsigned i;
3587 conflict_function *ret = XCNEW (conflict_function);
3588 va_list ap;
3589
3590 gcc_assert (n > 0 && n <= MAX_DIM);
3591 va_start (ap, n);
3592
3593 ret->n = n;
3594 for (i = 0; i < n; i++)
3595 ret->fns[i] = va_arg (ap, affine_fn);
3596 va_end (ap);
3597
3598 return ret;
3599 }
3600
3601 /* Returns constant affine function with value CST. */
3602
3603 static affine_fn
3604 affine_fn_cst (tree cst)
3605 {
3606 affine_fn fn;
3607 fn.create (1);
3608 fn.quick_push (cst);
3609 return fn;
3610 }
3611
3612 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3613
3614 static affine_fn
3615 affine_fn_univar (tree cst, unsigned dim, tree coef)
3616 {
3617 affine_fn fn;
3618 fn.create (dim + 1);
3619 unsigned i;
3620
3621 gcc_assert (dim > 0);
3622 fn.quick_push (cst);
3623 for (i = 1; i < dim; i++)
3624 fn.quick_push (integer_zero_node);
3625 fn.quick_push (coef);
3626 return fn;
3627 }
3628
3629 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3630 *OVERLAPS_B are initialized to the functions that describe the
3631 relation between the elements accessed twice by CHREC_A and
3632 CHREC_B. For k >= 0, the following property is verified:
3633
3634 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3635
3636 static void
3637 analyze_ziv_subscript (tree chrec_a,
3638 tree chrec_b,
3639 conflict_function **overlaps_a,
3640 conflict_function **overlaps_b,
3641 tree *last_conflicts)
3642 {
3643 tree type, difference;
3644 dependence_stats.num_ziv++;
3645
3646 if (dump_file && (dump_flags & TDF_DETAILS))
3647 fprintf (dump_file, "(analyze_ziv_subscript \n");
3648
3649 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3650 chrec_a = chrec_convert (type, chrec_a, NULL);
3651 chrec_b = chrec_convert (type, chrec_b, NULL);
3652 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3653
3654 switch (TREE_CODE (difference))
3655 {
3656 case INTEGER_CST:
3657 if (integer_zerop (difference))
3658 {
3659 /* The difference is equal to zero: the accessed index
3660 overlaps for each iteration in the loop. */
3661 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3662 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3663 *last_conflicts = chrec_dont_know;
3664 dependence_stats.num_ziv_dependent++;
3665 }
3666 else
3667 {
3668 /* The accesses do not overlap. */
3669 *overlaps_a = conflict_fn_no_dependence ();
3670 *overlaps_b = conflict_fn_no_dependence ();
3671 *last_conflicts = integer_zero_node;
3672 dependence_stats.num_ziv_independent++;
3673 }
3674 break;
3675
3676 default:
3677 /* We're not sure whether the indexes overlap. For the moment,
3678 conservatively answer "don't know". */
3679 if (dump_file && (dump_flags & TDF_DETAILS))
3680 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3681
3682 *overlaps_a = conflict_fn_not_known ();
3683 *overlaps_b = conflict_fn_not_known ();
3684 *last_conflicts = chrec_dont_know;
3685 dependence_stats.num_ziv_unimplemented++;
3686 break;
3687 }
3688
3689 if (dump_file && (dump_flags & TDF_DETAILS))
3690 fprintf (dump_file, ")\n");
3691 }
3692
3693 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3694 and only if it fits to the int type. If this is not the case, or the
3695 bound on the number of iterations of LOOP could not be derived, returns
3696 chrec_dont_know. */
3697
3698 static tree
3699 max_stmt_executions_tree (class loop *loop)
3700 {
3701 widest_int nit;
3702
3703 if (!max_stmt_executions (loop, &nit))
3704 return chrec_dont_know;
3705
3706 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3707 return chrec_dont_know;
3708
3709 return wide_int_to_tree (unsigned_type_node, nit);
3710 }
3711
3712 /* Determine whether the CHREC is always positive/negative. If the expression
3713 cannot be statically analyzed, return false, otherwise set the answer into
3714 VALUE. */
3715
3716 static bool
3717 chrec_is_positive (tree chrec, bool *value)
3718 {
3719 bool value0, value1, value2;
3720 tree end_value, nb_iter;
3721
3722 switch (TREE_CODE (chrec))
3723 {
3724 case POLYNOMIAL_CHREC:
3725 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3726 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3727 return false;
3728
3729 /* FIXME -- overflows. */
3730 if (value0 == value1)
3731 {
3732 *value = value0;
3733 return true;
3734 }
3735
3736 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3737 and the proof consists in showing that the sign never
3738 changes during the execution of the loop, from 0 to
3739 loop->nb_iterations. */
3740 if (!evolution_function_is_affine_p (chrec))
3741 return false;
3742
3743 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3744 if (chrec_contains_undetermined (nb_iter))
3745 return false;
3746
3747 #if 0
3748 /* TODO -- If the test is after the exit, we may decrease the number of
3749 iterations by one. */
3750 if (after_exit)
3751 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3752 #endif
3753
3754 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3755
3756 if (!chrec_is_positive (end_value, &value2))
3757 return false;
3758
3759 *value = value0;
3760 return value0 == value1;
3761
3762 case INTEGER_CST:
3763 switch (tree_int_cst_sgn (chrec))
3764 {
3765 case -1:
3766 *value = false;
3767 break;
3768 case 1:
3769 *value = true;
3770 break;
3771 default:
3772 return false;
3773 }
3774 return true;
3775
3776 default:
3777 return false;
3778 }
3779 }
3780
3781
3782 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3783 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3784 *OVERLAPS_B are initialized to the functions that describe the
3785 relation between the elements accessed twice by CHREC_A and
3786 CHREC_B. For k >= 0, the following property is verified:
3787
3788 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3789
3790 static void
3791 analyze_siv_subscript_cst_affine (tree chrec_a,
3792 tree chrec_b,
3793 conflict_function **overlaps_a,
3794 conflict_function **overlaps_b,
3795 tree *last_conflicts)
3796 {
3797 bool value0, value1, value2;
3798 tree type, difference, tmp;
3799
3800 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3801 chrec_a = chrec_convert (type, chrec_a, NULL);
3802 chrec_b = chrec_convert (type, chrec_b, NULL);
3803 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3804
3805 /* Special case overlap in the first iteration. */
3806 if (integer_zerop (difference))
3807 {
3808 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3809 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3810 *last_conflicts = integer_one_node;
3811 return;
3812 }
3813
3814 if (!chrec_is_positive (initial_condition (difference), &value0))
3815 {
3816 if (dump_file && (dump_flags & TDF_DETAILS))
3817 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3818
3819 dependence_stats.num_siv_unimplemented++;
3820 *overlaps_a = conflict_fn_not_known ();
3821 *overlaps_b = conflict_fn_not_known ();
3822 *last_conflicts = chrec_dont_know;
3823 return;
3824 }
3825 else
3826 {
3827 if (value0 == false)
3828 {
3829 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3830 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3831 {
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3834
3835 *overlaps_a = conflict_fn_not_known ();
3836 *overlaps_b = conflict_fn_not_known ();
3837 *last_conflicts = chrec_dont_know;
3838 dependence_stats.num_siv_unimplemented++;
3839 return;
3840 }
3841 else
3842 {
3843 if (value1 == true)
3844 {
3845 /* Example:
3846 chrec_a = 12
3847 chrec_b = {10, +, 1}
3848 */
3849
3850 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3851 {
3852 HOST_WIDE_INT numiter;
3853 class loop *loop = get_chrec_loop (chrec_b);
3854
3855 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3856 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3857 fold_build1 (ABS_EXPR, type, difference),
3858 CHREC_RIGHT (chrec_b));
3859 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3860 *last_conflicts = integer_one_node;
3861
3862
3863 /* Perform weak-zero siv test to see if overlap is
3864 outside the loop bounds. */
3865 numiter = max_stmt_executions_int (loop);
3866
3867 if (numiter >= 0
3868 && compare_tree_int (tmp, numiter) > 0)
3869 {
3870 free_conflict_function (*overlaps_a);
3871 free_conflict_function (*overlaps_b);
3872 *overlaps_a = conflict_fn_no_dependence ();
3873 *overlaps_b = conflict_fn_no_dependence ();
3874 *last_conflicts = integer_zero_node;
3875 dependence_stats.num_siv_independent++;
3876 return;
3877 }
3878 dependence_stats.num_siv_dependent++;
3879 return;
3880 }
3881
3882 /* When the step does not divide the difference, there are
3883 no overlaps. */
3884 else
3885 {
3886 *overlaps_a = conflict_fn_no_dependence ();
3887 *overlaps_b = conflict_fn_no_dependence ();
3888 *last_conflicts = integer_zero_node;
3889 dependence_stats.num_siv_independent++;
3890 return;
3891 }
3892 }
3893
3894 else
3895 {
3896 /* Example:
3897 chrec_a = 12
3898 chrec_b = {10, +, -1}
3899
3900 In this case, chrec_a will not overlap with chrec_b. */
3901 *overlaps_a = conflict_fn_no_dependence ();
3902 *overlaps_b = conflict_fn_no_dependence ();
3903 *last_conflicts = integer_zero_node;
3904 dependence_stats.num_siv_independent++;
3905 return;
3906 }
3907 }
3908 }
3909 else
3910 {
3911 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3912 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3913 {
3914 if (dump_file && (dump_flags & TDF_DETAILS))
3915 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3916
3917 *overlaps_a = conflict_fn_not_known ();
3918 *overlaps_b = conflict_fn_not_known ();
3919 *last_conflicts = chrec_dont_know;
3920 dependence_stats.num_siv_unimplemented++;
3921 return;
3922 }
3923 else
3924 {
3925 if (value2 == false)
3926 {
3927 /* Example:
3928 chrec_a = 3
3929 chrec_b = {10, +, -1}
3930 */
3931 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3932 {
3933 HOST_WIDE_INT numiter;
3934 class loop *loop = get_chrec_loop (chrec_b);
3935
3936 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3937 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3938 CHREC_RIGHT (chrec_b));
3939 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3940 *last_conflicts = integer_one_node;
3941
3942 /* Perform weak-zero siv test to see if overlap is
3943 outside the loop bounds. */
3944 numiter = max_stmt_executions_int (loop);
3945
3946 if (numiter >= 0
3947 && compare_tree_int (tmp, numiter) > 0)
3948 {
3949 free_conflict_function (*overlaps_a);
3950 free_conflict_function (*overlaps_b);
3951 *overlaps_a = conflict_fn_no_dependence ();
3952 *overlaps_b = conflict_fn_no_dependence ();
3953 *last_conflicts = integer_zero_node;
3954 dependence_stats.num_siv_independent++;
3955 return;
3956 }
3957 dependence_stats.num_siv_dependent++;
3958 return;
3959 }
3960
3961 /* When the step does not divide the difference, there
3962 are no overlaps. */
3963 else
3964 {
3965 *overlaps_a = conflict_fn_no_dependence ();
3966 *overlaps_b = conflict_fn_no_dependence ();
3967 *last_conflicts = integer_zero_node;
3968 dependence_stats.num_siv_independent++;
3969 return;
3970 }
3971 }
3972 else
3973 {
3974 /* Example:
3975 chrec_a = 3
3976 chrec_b = {4, +, 1}
3977
3978 In this case, chrec_a will not overlap with chrec_b. */
3979 *overlaps_a = conflict_fn_no_dependence ();
3980 *overlaps_b = conflict_fn_no_dependence ();
3981 *last_conflicts = integer_zero_node;
3982 dependence_stats.num_siv_independent++;
3983 return;
3984 }
3985 }
3986 }
3987 }
3988 }
3989
3990 /* Helper recursive function for initializing the matrix A. Returns
3991 the initial value of CHREC. */
3992
3993 static tree
3994 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3995 {
3996 gcc_assert (chrec);
3997
3998 switch (TREE_CODE (chrec))
3999 {
4000 case POLYNOMIAL_CHREC:
4001 HOST_WIDE_INT chrec_right;
4002 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4003 return chrec_dont_know;
4004 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4005 /* We want to be able to negate without overflow. */
4006 if (chrec_right == HOST_WIDE_INT_MIN)
4007 return chrec_dont_know;
4008 A[index][0] = mult * chrec_right;
4009 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4010
4011 case PLUS_EXPR:
4012 case MULT_EXPR:
4013 case MINUS_EXPR:
4014 {
4015 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4016 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4017
4018 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4019 }
4020
4021 CASE_CONVERT:
4022 {
4023 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4024 return chrec_convert (chrec_type (chrec), op, NULL);
4025 }
4026
4027 case BIT_NOT_EXPR:
4028 {
4029 /* Handle ~X as -1 - X. */
4030 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4031 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4032 build_int_cst (TREE_TYPE (chrec), -1), op);
4033 }
4034
4035 case INTEGER_CST:
4036 return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4037
4038 default:
4039 gcc_unreachable ();
4040 return NULL_TREE;
4041 }
4042 }
4043
4044 #define FLOOR_DIV(x,y) ((x) / (y))
4045
4046 /* Solves the special case of the Diophantine equation:
4047 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4048
4049 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4050 number of iterations that loops X and Y run. The overlaps will be
4051 constructed as evolutions in dimension DIM. */
4052
4053 static void
4054 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4055 HOST_WIDE_INT step_a,
4056 HOST_WIDE_INT step_b,
4057 affine_fn *overlaps_a,
4058 affine_fn *overlaps_b,
4059 tree *last_conflicts, int dim)
4060 {
4061 if (((step_a > 0 && step_b > 0)
4062 || (step_a < 0 && step_b < 0)))
4063 {
4064 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4065 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4066
4067 gcd_steps_a_b = gcd (step_a, step_b);
4068 step_overlaps_a = step_b / gcd_steps_a_b;
4069 step_overlaps_b = step_a / gcd_steps_a_b;
4070
4071 if (niter > 0)
4072 {
4073 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4074 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4075 last_conflict = tau2;
4076 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4077 }
4078 else
4079 *last_conflicts = chrec_dont_know;
4080
4081 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4082 build_int_cst (NULL_TREE,
4083 step_overlaps_a));
4084 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4085 build_int_cst (NULL_TREE,
4086 step_overlaps_b));
4087 }
4088
4089 else
4090 {
4091 *overlaps_a = affine_fn_cst (integer_zero_node);
4092 *overlaps_b = affine_fn_cst (integer_zero_node);
4093 *last_conflicts = integer_zero_node;
4094 }
4095 }
4096
4097 /* Solves the special case of a Diophantine equation where CHREC_A is
4098 an affine bivariate function, and CHREC_B is an affine univariate
4099 function. For example,
4100
4101 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4102
4103 has the following overlapping functions:
4104
4105 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4106 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4107 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4108
4109 FORNOW: This is a specialized implementation for a case occurring in
4110 a common benchmark. Implement the general algorithm. */
4111
4112 static void
4113 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4114 conflict_function **overlaps_a,
4115 conflict_function **overlaps_b,
4116 tree *last_conflicts)
4117 {
4118 bool xz_p, yz_p, xyz_p;
4119 HOST_WIDE_INT step_x, step_y, step_z;
4120 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4121 affine_fn overlaps_a_xz, overlaps_b_xz;
4122 affine_fn overlaps_a_yz, overlaps_b_yz;
4123 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4124 affine_fn ova1, ova2, ovb;
4125 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4126
4127 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4128 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4129 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4130
4131 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4132 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4133 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4134
4135 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4136 {
4137 if (dump_file && (dump_flags & TDF_DETAILS))
4138 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4139
4140 *overlaps_a = conflict_fn_not_known ();
4141 *overlaps_b = conflict_fn_not_known ();
4142 *last_conflicts = chrec_dont_know;
4143 return;
4144 }
4145
4146 niter = MIN (niter_x, niter_z);
4147 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4148 &overlaps_a_xz,
4149 &overlaps_b_xz,
4150 &last_conflicts_xz, 1);
4151 niter = MIN (niter_y, niter_z);
4152 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4153 &overlaps_a_yz,
4154 &overlaps_b_yz,
4155 &last_conflicts_yz, 2);
4156 niter = MIN (niter_x, niter_z);
4157 niter = MIN (niter_y, niter);
4158 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4159 &overlaps_a_xyz,
4160 &overlaps_b_xyz,
4161 &last_conflicts_xyz, 3);
4162
4163 xz_p = !integer_zerop (last_conflicts_xz);
4164 yz_p = !integer_zerop (last_conflicts_yz);
4165 xyz_p = !integer_zerop (last_conflicts_xyz);
4166
4167 if (xz_p || yz_p || xyz_p)
4168 {
4169 ova1 = affine_fn_cst (integer_zero_node);
4170 ova2 = affine_fn_cst (integer_zero_node);
4171 ovb = affine_fn_cst (integer_zero_node);
4172 if (xz_p)
4173 {
4174 affine_fn t0 = ova1;
4175 affine_fn t2 = ovb;
4176
4177 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4178 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4179 affine_fn_free (t0);
4180 affine_fn_free (t2);
4181 *last_conflicts = last_conflicts_xz;
4182 }
4183 if (yz_p)
4184 {
4185 affine_fn t0 = ova2;
4186 affine_fn t2 = ovb;
4187
4188 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4189 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4190 affine_fn_free (t0);
4191 affine_fn_free (t2);
4192 *last_conflicts = last_conflicts_yz;
4193 }
4194 if (xyz_p)
4195 {
4196 affine_fn t0 = ova1;
4197 affine_fn t2 = ova2;
4198 affine_fn t4 = ovb;
4199
4200 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4201 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4202 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4203 affine_fn_free (t0);
4204 affine_fn_free (t2);
4205 affine_fn_free (t4);
4206 *last_conflicts = last_conflicts_xyz;
4207 }
4208 *overlaps_a = conflict_fn (2, ova1, ova2);
4209 *overlaps_b = conflict_fn (1, ovb);
4210 }
4211 else
4212 {
4213 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4214 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4215 *last_conflicts = integer_zero_node;
4216 }
4217
4218 affine_fn_free (overlaps_a_xz);
4219 affine_fn_free (overlaps_b_xz);
4220 affine_fn_free (overlaps_a_yz);
4221 affine_fn_free (overlaps_b_yz);
4222 affine_fn_free (overlaps_a_xyz);
4223 affine_fn_free (overlaps_b_xyz);
4224 }
4225
4226 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4227
4228 static void
4229 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4230 int size)
4231 {
4232 memcpy (vec2, vec1, size * sizeof (*vec1));
4233 }
4234
4235 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4236
4237 static void
4238 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4239 int m, int n)
4240 {
4241 int i;
4242
4243 for (i = 0; i < m; i++)
4244 lambda_vector_copy (mat1[i], mat2[i], n);
4245 }
4246
4247 /* Store the N x N identity matrix in MAT. */
4248
4249 static void
4250 lambda_matrix_id (lambda_matrix mat, int size)
4251 {
4252 int i, j;
4253
4254 for (i = 0; i < size; i++)
4255 for (j = 0; j < size; j++)
4256 mat[i][j] = (i == j) ? 1 : 0;
4257 }
4258
4259 /* Return the index of the first nonzero element of vector VEC1 between
4260 START and N. We must have START <= N.
4261 Returns N if VEC1 is the zero vector. */
4262
4263 static int
4264 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4265 {
4266 int j = start;
4267 while (j < n && vec1[j] == 0)
4268 j++;
4269 return j;
4270 }
4271
4272 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4273 R2 = R2 + CONST1 * R1. */
4274
4275 static bool
4276 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4277 lambda_int const1)
4278 {
4279 int i;
4280
4281 if (const1 == 0)
4282 return true;
4283
4284 for (i = 0; i < n; i++)
4285 {
4286 bool ovf;
4287 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4288 if (ovf)
4289 return false;
4290 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4291 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4292 return false;
4293 mat[r2][i] = tem2;
4294 }
4295
4296 return true;
4297 }
4298
4299 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4300 and store the result in VEC2. */
4301
4302 static void
4303 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4304 int size, lambda_int const1)
4305 {
4306 int i;
4307
4308 if (const1 == 0)
4309 lambda_vector_clear (vec2, size);
4310 else
4311 for (i = 0; i < size; i++)
4312 vec2[i] = const1 * vec1[i];
4313 }
4314
4315 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4316
4317 static void
4318 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4319 int size)
4320 {
4321 lambda_vector_mult_const (vec1, vec2, size, -1);
4322 }
4323
4324 /* Negate row R1 of matrix MAT which has N columns. */
4325
4326 static void
4327 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4328 {
4329 lambda_vector_negate (mat[r1], mat[r1], n);
4330 }
4331
4332 /* Return true if two vectors are equal. */
4333
4334 static bool
4335 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4336 {
4337 int i;
4338 for (i = 0; i < size; i++)
4339 if (vec1[i] != vec2[i])
4340 return false;
4341 return true;
4342 }
4343
4344 /* Given an M x N integer matrix A, this function determines an M x
4345 M unimodular matrix U, and an M x N echelon matrix S such that
4346 "U.A = S". This decomposition is also known as "right Hermite".
4347
4348 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4349 Restructuring Compilers" Utpal Banerjee. */
4350
4351 static bool
4352 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4353 lambda_matrix S, lambda_matrix U)
4354 {
4355 int i, j, i0 = 0;
4356
4357 lambda_matrix_copy (A, S, m, n);
4358 lambda_matrix_id (U, m);
4359
4360 for (j = 0; j < n; j++)
4361 {
4362 if (lambda_vector_first_nz (S[j], m, i0) < m)
4363 {
4364 ++i0;
4365 for (i = m - 1; i >= i0; i--)
4366 {
4367 while (S[i][j] != 0)
4368 {
4369 lambda_int factor, a, b;
4370
4371 a = S[i-1][j];
4372 b = S[i][j];
4373 gcc_assert (a != HOST_WIDE_INT_MIN);
4374 factor = a / b;
4375
4376 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4377 return false;
4378 std::swap (S[i], S[i-1]);
4379
4380 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4381 return false;
4382 std::swap (U[i], U[i-1]);
4383 }
4384 }
4385 }
4386 }
4387
4388 return true;
4389 }
4390
4391 /* Determines the overlapping elements due to accesses CHREC_A and
4392 CHREC_B, that are affine functions. This function cannot handle
4393 symbolic evolution functions, ie. when initial conditions are
4394 parameters, because it uses lambda matrices of integers. */
4395
4396 static void
4397 analyze_subscript_affine_affine (tree chrec_a,
4398 tree chrec_b,
4399 conflict_function **overlaps_a,
4400 conflict_function **overlaps_b,
4401 tree *last_conflicts)
4402 {
4403 unsigned nb_vars_a, nb_vars_b, dim;
4404 lambda_int gamma, gcd_alpha_beta;
4405 lambda_matrix A, U, S;
4406 struct obstack scratch_obstack;
4407
4408 if (eq_evolutions_p (chrec_a, chrec_b))
4409 {
4410 /* The accessed index overlaps for each iteration in the
4411 loop. */
4412 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4413 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4414 *last_conflicts = chrec_dont_know;
4415 return;
4416 }
4417 if (dump_file && (dump_flags & TDF_DETAILS))
4418 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4419
4420 /* For determining the initial intersection, we have to solve a
4421 Diophantine equation. This is the most time consuming part.
4422
4423 For answering to the question: "Is there a dependence?" we have
4424 to prove that there exists a solution to the Diophantine
4425 equation, and that the solution is in the iteration domain,
4426 i.e. the solution is positive or zero, and that the solution
4427 happens before the upper bound loop.nb_iterations. Otherwise
4428 there is no dependence. This function outputs a description of
4429 the iterations that hold the intersections. */
4430
4431 nb_vars_a = nb_vars_in_chrec (chrec_a);
4432 nb_vars_b = nb_vars_in_chrec (chrec_b);
4433
4434 gcc_obstack_init (&scratch_obstack);
4435
4436 dim = nb_vars_a + nb_vars_b;
4437 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4438 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4439 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4440
4441 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4442 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4443 if (init_a == chrec_dont_know
4444 || init_b == chrec_dont_know)
4445 {
4446 if (dump_file && (dump_flags & TDF_DETAILS))
4447 fprintf (dump_file, "affine-affine test failed: "
4448 "representation issue.\n");
4449 *overlaps_a = conflict_fn_not_known ();
4450 *overlaps_b = conflict_fn_not_known ();
4451 *last_conflicts = chrec_dont_know;
4452 goto end_analyze_subs_aa;
4453 }
4454 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4455
4456 /* Don't do all the hard work of solving the Diophantine equation
4457 when we already know the solution: for example,
4458 | {3, +, 1}_1
4459 | {3, +, 4}_2
4460 | gamma = 3 - 3 = 0.
4461 Then the first overlap occurs during the first iterations:
4462 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4463 */
4464 if (gamma == 0)
4465 {
4466 if (nb_vars_a == 1 && nb_vars_b == 1)
4467 {
4468 HOST_WIDE_INT step_a, step_b;
4469 HOST_WIDE_INT niter, niter_a, niter_b;
4470 affine_fn ova, ovb;
4471
4472 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4473 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4474 niter = MIN (niter_a, niter_b);
4475 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4476 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4477
4478 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4479 &ova, &ovb,
4480 last_conflicts, 1);
4481 *overlaps_a = conflict_fn (1, ova);
4482 *overlaps_b = conflict_fn (1, ovb);
4483 }
4484
4485 else if (nb_vars_a == 2 && nb_vars_b == 1)
4486 compute_overlap_steps_for_affine_1_2
4487 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4488
4489 else if (nb_vars_a == 1 && nb_vars_b == 2)
4490 compute_overlap_steps_for_affine_1_2
4491 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4492
4493 else
4494 {
4495 if (dump_file && (dump_flags & TDF_DETAILS))
4496 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4497 *overlaps_a = conflict_fn_not_known ();
4498 *overlaps_b = conflict_fn_not_known ();
4499 *last_conflicts = chrec_dont_know;
4500 }
4501 goto end_analyze_subs_aa;
4502 }
4503
4504 /* U.A = S */
4505 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4506 {
4507 *overlaps_a = conflict_fn_not_known ();
4508 *overlaps_b = conflict_fn_not_known ();
4509 *last_conflicts = chrec_dont_know;
4510 goto end_analyze_subs_aa;
4511 }
4512
4513 if (S[0][0] < 0)
4514 {
4515 S[0][0] *= -1;
4516 lambda_matrix_row_negate (U, dim, 0);
4517 }
4518 gcd_alpha_beta = S[0][0];
4519
4520 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4521 but that is a quite strange case. Instead of ICEing, answer
4522 don't know. */
4523 if (gcd_alpha_beta == 0)
4524 {
4525 *overlaps_a = conflict_fn_not_known ();
4526 *overlaps_b = conflict_fn_not_known ();
4527 *last_conflicts = chrec_dont_know;
4528 goto end_analyze_subs_aa;
4529 }
4530
4531 /* The classic "gcd-test". */
4532 if (!int_divides_p (gcd_alpha_beta, gamma))
4533 {
4534 /* The "gcd-test" has determined that there is no integer
4535 solution, i.e. there is no dependence. */
4536 *overlaps_a = conflict_fn_no_dependence ();
4537 *overlaps_b = conflict_fn_no_dependence ();
4538 *last_conflicts = integer_zero_node;
4539 }
4540
4541 /* Both access functions are univariate. This includes SIV and MIV cases. */
4542 else if (nb_vars_a == 1 && nb_vars_b == 1)
4543 {
4544 /* Both functions should have the same evolution sign. */
4545 if (((A[0][0] > 0 && -A[1][0] > 0)
4546 || (A[0][0] < 0 && -A[1][0] < 0)))
4547 {
4548 /* The solutions are given by:
4549 |
4550 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4551 | [u21 u22] [y0]
4552
4553 For a given integer t. Using the following variables,
4554
4555 | i0 = u11 * gamma / gcd_alpha_beta
4556 | j0 = u12 * gamma / gcd_alpha_beta
4557 | i1 = u21
4558 | j1 = u22
4559
4560 the solutions are:
4561
4562 | x0 = i0 + i1 * t,
4563 | y0 = j0 + j1 * t. */
4564 HOST_WIDE_INT i0, j0, i1, j1;
4565
4566 i0 = U[0][0] * gamma / gcd_alpha_beta;
4567 j0 = U[0][1] * gamma / gcd_alpha_beta;
4568 i1 = U[1][0];
4569 j1 = U[1][1];
4570
4571 if ((i1 == 0 && i0 < 0)
4572 || (j1 == 0 && j0 < 0))
4573 {
4574 /* There is no solution.
4575 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4576 falls in here, but for the moment we don't look at the
4577 upper bound of the iteration domain. */
4578 *overlaps_a = conflict_fn_no_dependence ();
4579 *overlaps_b = conflict_fn_no_dependence ();
4580 *last_conflicts = integer_zero_node;
4581 goto end_analyze_subs_aa;
4582 }
4583
4584 if (i1 > 0 && j1 > 0)
4585 {
4586 HOST_WIDE_INT niter_a
4587 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4588 HOST_WIDE_INT niter_b
4589 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4590 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4591
4592 /* (X0, Y0) is a solution of the Diophantine equation:
4593 "chrec_a (X0) = chrec_b (Y0)". */
4594 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4595 CEIL (-j0, j1));
4596 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4597 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4598
4599 /* (X1, Y1) is the smallest positive solution of the eq
4600 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4601 first conflict occurs. */
4602 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4603 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4604 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4605
4606 if (niter > 0)
4607 {
4608 /* If the overlap occurs outside of the bounds of the
4609 loop, there is no dependence. */
4610 if (x1 >= niter_a || y1 >= niter_b)
4611 {
4612 *overlaps_a = conflict_fn_no_dependence ();
4613 *overlaps_b = conflict_fn_no_dependence ();
4614 *last_conflicts = integer_zero_node;
4615 goto end_analyze_subs_aa;
4616 }
4617
4618 /* max stmt executions can get quite large, avoid
4619 overflows by using wide ints here. */
4620 widest_int tau2
4621 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4622 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4623 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4624 if (wi::min_precision (last_conflict, SIGNED)
4625 <= TYPE_PRECISION (integer_type_node))
4626 *last_conflicts
4627 = build_int_cst (integer_type_node,
4628 last_conflict.to_shwi ());
4629 else
4630 *last_conflicts = chrec_dont_know;
4631 }
4632 else
4633 *last_conflicts = chrec_dont_know;
4634
4635 *overlaps_a
4636 = conflict_fn (1,
4637 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4638 1,
4639 build_int_cst (NULL_TREE, i1)));
4640 *overlaps_b
4641 = conflict_fn (1,
4642 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4643 1,
4644 build_int_cst (NULL_TREE, j1)));
4645 }
4646 else
4647 {
4648 /* FIXME: For the moment, the upper bound of the
4649 iteration domain for i and j is not checked. */
4650 if (dump_file && (dump_flags & TDF_DETAILS))
4651 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4652 *overlaps_a = conflict_fn_not_known ();
4653 *overlaps_b = conflict_fn_not_known ();
4654 *last_conflicts = chrec_dont_know;
4655 }
4656 }
4657 else
4658 {
4659 if (dump_file && (dump_flags & TDF_DETAILS))
4660 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4661 *overlaps_a = conflict_fn_not_known ();
4662 *overlaps_b = conflict_fn_not_known ();
4663 *last_conflicts = chrec_dont_know;
4664 }
4665 }
4666 else
4667 {
4668 if (dump_file && (dump_flags & TDF_DETAILS))
4669 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4670 *overlaps_a = conflict_fn_not_known ();
4671 *overlaps_b = conflict_fn_not_known ();
4672 *last_conflicts = chrec_dont_know;
4673 }
4674
4675 end_analyze_subs_aa:
4676 obstack_free (&scratch_obstack, NULL);
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4678 {
4679 fprintf (dump_file, " (overlaps_a = ");
4680 dump_conflict_function (dump_file, *overlaps_a);
4681 fprintf (dump_file, ")\n (overlaps_b = ");
4682 dump_conflict_function (dump_file, *overlaps_b);
4683 fprintf (dump_file, "))\n");
4684 }
4685 }
4686
4687 /* Returns true when analyze_subscript_affine_affine can be used for
4688 determining the dependence relation between chrec_a and chrec_b,
4689 that contain symbols. This function modifies chrec_a and chrec_b
4690 such that the analysis result is the same, and such that they don't
4691 contain symbols, and then can safely be passed to the analyzer.
4692
4693 Example: The analysis of the following tuples of evolutions produce
4694 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4695 vs. {0, +, 1}_1
4696
4697 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4698 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4699 */
4700
4701 static bool
4702 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4703 {
4704 tree diff, type, left_a, left_b, right_b;
4705
4706 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4707 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4708 /* FIXME: For the moment not handled. Might be refined later. */
4709 return false;
4710
4711 type = chrec_type (*chrec_a);
4712 left_a = CHREC_LEFT (*chrec_a);
4713 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4714 diff = chrec_fold_minus (type, left_a, left_b);
4715
4716 if (!evolution_function_is_constant_p (diff))
4717 return false;
4718
4719 if (dump_file && (dump_flags & TDF_DETAILS))
4720 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4721
4722 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4723 diff, CHREC_RIGHT (*chrec_a));
4724 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4725 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4726 build_int_cst (type, 0),
4727 right_b);
4728 return true;
4729 }
4730
4731 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4732 *OVERLAPS_B are initialized to the functions that describe the
4733 relation between the elements accessed twice by CHREC_A and
4734 CHREC_B. For k >= 0, the following property is verified:
4735
4736 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4737
4738 static void
4739 analyze_siv_subscript (tree chrec_a,
4740 tree chrec_b,
4741 conflict_function **overlaps_a,
4742 conflict_function **overlaps_b,
4743 tree *last_conflicts,
4744 int loop_nest_num)
4745 {
4746 dependence_stats.num_siv++;
4747
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "(analyze_siv_subscript \n");
4750
4751 if (evolution_function_is_constant_p (chrec_a)
4752 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4753 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4754 overlaps_a, overlaps_b, last_conflicts);
4755
4756 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4757 && evolution_function_is_constant_p (chrec_b))
4758 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4759 overlaps_b, overlaps_a, last_conflicts);
4760
4761 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4762 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4763 {
4764 if (!chrec_contains_symbols (chrec_a)
4765 && !chrec_contains_symbols (chrec_b))
4766 {
4767 analyze_subscript_affine_affine (chrec_a, chrec_b,
4768 overlaps_a, overlaps_b,
4769 last_conflicts);
4770
4771 if (CF_NOT_KNOWN_P (*overlaps_a)
4772 || CF_NOT_KNOWN_P (*overlaps_b))
4773 dependence_stats.num_siv_unimplemented++;
4774 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4775 || CF_NO_DEPENDENCE_P (*overlaps_b))
4776 dependence_stats.num_siv_independent++;
4777 else
4778 dependence_stats.num_siv_dependent++;
4779 }
4780 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4781 &chrec_b))
4782 {
4783 analyze_subscript_affine_affine (chrec_a, chrec_b,
4784 overlaps_a, overlaps_b,
4785 last_conflicts);
4786
4787 if (CF_NOT_KNOWN_P (*overlaps_a)
4788 || CF_NOT_KNOWN_P (*overlaps_b))
4789 dependence_stats.num_siv_unimplemented++;
4790 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4791 || CF_NO_DEPENDENCE_P (*overlaps_b))
4792 dependence_stats.num_siv_independent++;
4793 else
4794 dependence_stats.num_siv_dependent++;
4795 }
4796 else
4797 goto siv_subscript_dontknow;
4798 }
4799
4800 else
4801 {
4802 siv_subscript_dontknow:;
4803 if (dump_file && (dump_flags & TDF_DETAILS))
4804 fprintf (dump_file, " siv test failed: unimplemented");
4805 *overlaps_a = conflict_fn_not_known ();
4806 *overlaps_b = conflict_fn_not_known ();
4807 *last_conflicts = chrec_dont_know;
4808 dependence_stats.num_siv_unimplemented++;
4809 }
4810
4811 if (dump_file && (dump_flags & TDF_DETAILS))
4812 fprintf (dump_file, ")\n");
4813 }
4814
4815 /* Returns false if we can prove that the greatest common divisor of the steps
4816 of CHREC does not divide CST, false otherwise. */
4817
4818 static bool
4819 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4820 {
4821 HOST_WIDE_INT cd = 0, val;
4822 tree step;
4823
4824 if (!tree_fits_shwi_p (cst))
4825 return true;
4826 val = tree_to_shwi (cst);
4827
4828 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4829 {
4830 step = CHREC_RIGHT (chrec);
4831 if (!tree_fits_shwi_p (step))
4832 return true;
4833 cd = gcd (cd, tree_to_shwi (step));
4834 chrec = CHREC_LEFT (chrec);
4835 }
4836
4837 return val % cd == 0;
4838 }
4839
4840 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4841 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4842 functions that describe the relation between the elements accessed
4843 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4844 is verified:
4845
4846 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4847
4848 static void
4849 analyze_miv_subscript (tree chrec_a,
4850 tree chrec_b,
4851 conflict_function **overlaps_a,
4852 conflict_function **overlaps_b,
4853 tree *last_conflicts,
4854 class loop *loop_nest)
4855 {
4856 tree type, difference;
4857
4858 dependence_stats.num_miv++;
4859 if (dump_file && (dump_flags & TDF_DETAILS))
4860 fprintf (dump_file, "(analyze_miv_subscript \n");
4861
4862 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4863 chrec_a = chrec_convert (type, chrec_a, NULL);
4864 chrec_b = chrec_convert (type, chrec_b, NULL);
4865 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4866
4867 if (eq_evolutions_p (chrec_a, chrec_b))
4868 {
4869 /* Access functions are the same: all the elements are accessed
4870 in the same order. */
4871 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4872 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4873 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4874 dependence_stats.num_miv_dependent++;
4875 }
4876
4877 else if (evolution_function_is_constant_p (difference)
4878 && evolution_function_is_affine_multivariate_p (chrec_a,
4879 loop_nest->num)
4880 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4881 {
4882 /* testsuite/.../ssa-chrec-33.c
4883 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4884
4885 The difference is 1, and all the evolution steps are multiples
4886 of 2, consequently there are no overlapping elements. */
4887 *overlaps_a = conflict_fn_no_dependence ();
4888 *overlaps_b = conflict_fn_no_dependence ();
4889 *last_conflicts = integer_zero_node;
4890 dependence_stats.num_miv_independent++;
4891 }
4892
4893 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4894 && !chrec_contains_symbols (chrec_a, loop_nest)
4895 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4896 && !chrec_contains_symbols (chrec_b, loop_nest))
4897 {
4898 /* testsuite/.../ssa-chrec-35.c
4899 {0, +, 1}_2 vs. {0, +, 1}_3
4900 the overlapping elements are respectively located at iterations:
4901 {0, +, 1}_x and {0, +, 1}_x,
4902 in other words, we have the equality:
4903 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4904
4905 Other examples:
4906 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4907 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4908
4909 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4910 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4911 */
4912 analyze_subscript_affine_affine (chrec_a, chrec_b,
4913 overlaps_a, overlaps_b, last_conflicts);
4914
4915 if (CF_NOT_KNOWN_P (*overlaps_a)
4916 || CF_NOT_KNOWN_P (*overlaps_b))
4917 dependence_stats.num_miv_unimplemented++;
4918 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4919 || CF_NO_DEPENDENCE_P (*overlaps_b))
4920 dependence_stats.num_miv_independent++;
4921 else
4922 dependence_stats.num_miv_dependent++;
4923 }
4924
4925 else
4926 {
4927 /* When the analysis is too difficult, answer "don't know". */
4928 if (dump_file && (dump_flags & TDF_DETAILS))
4929 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4930
4931 *overlaps_a = conflict_fn_not_known ();
4932 *overlaps_b = conflict_fn_not_known ();
4933 *last_conflicts = chrec_dont_know;
4934 dependence_stats.num_miv_unimplemented++;
4935 }
4936
4937 if (dump_file && (dump_flags & TDF_DETAILS))
4938 fprintf (dump_file, ")\n");
4939 }
4940
4941 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4942 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4943 OVERLAP_ITERATIONS_B are initialized with two functions that
4944 describe the iterations that contain conflicting elements.
4945
4946 Remark: For an integer k >= 0, the following equality is true:
4947
4948 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4949 */
4950
4951 static void
4952 analyze_overlapping_iterations (tree chrec_a,
4953 tree chrec_b,
4954 conflict_function **overlap_iterations_a,
4955 conflict_function **overlap_iterations_b,
4956 tree *last_conflicts, class loop *loop_nest)
4957 {
4958 unsigned int lnn = loop_nest->num;
4959
4960 dependence_stats.num_subscript_tests++;
4961
4962 if (dump_file && (dump_flags & TDF_DETAILS))
4963 {
4964 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4965 fprintf (dump_file, " (chrec_a = ");
4966 print_generic_expr (dump_file, chrec_a);
4967 fprintf (dump_file, ")\n (chrec_b = ");
4968 print_generic_expr (dump_file, chrec_b);
4969 fprintf (dump_file, ")\n");
4970 }
4971
4972 if (chrec_a == NULL_TREE
4973 || chrec_b == NULL_TREE
4974 || chrec_contains_undetermined (chrec_a)
4975 || chrec_contains_undetermined (chrec_b))
4976 {
4977 dependence_stats.num_subscript_undetermined++;
4978
4979 *overlap_iterations_a = conflict_fn_not_known ();
4980 *overlap_iterations_b = conflict_fn_not_known ();
4981 }
4982
4983 /* If they are the same chrec, and are affine, they overlap
4984 on every iteration. */
4985 else if (eq_evolutions_p (chrec_a, chrec_b)
4986 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4987 || operand_equal_p (chrec_a, chrec_b, 0)))
4988 {
4989 dependence_stats.num_same_subscript_function++;
4990 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4991 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4992 *last_conflicts = chrec_dont_know;
4993 }
4994
4995 /* If they aren't the same, and aren't affine, we can't do anything
4996 yet. */
4997 else if ((chrec_contains_symbols (chrec_a)
4998 || chrec_contains_symbols (chrec_b))
4999 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5000 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5001 {
5002 dependence_stats.num_subscript_undetermined++;
5003 *overlap_iterations_a = conflict_fn_not_known ();
5004 *overlap_iterations_b = conflict_fn_not_known ();
5005 }
5006
5007 else if (ziv_subscript_p (chrec_a, chrec_b))
5008 analyze_ziv_subscript (chrec_a, chrec_b,
5009 overlap_iterations_a, overlap_iterations_b,
5010 last_conflicts);
5011
5012 else if (siv_subscript_p (chrec_a, chrec_b))
5013 analyze_siv_subscript (chrec_a, chrec_b,
5014 overlap_iterations_a, overlap_iterations_b,
5015 last_conflicts, lnn);
5016
5017 else
5018 analyze_miv_subscript (chrec_a, chrec_b,
5019 overlap_iterations_a, overlap_iterations_b,
5020 last_conflicts, loop_nest);
5021
5022 if (dump_file && (dump_flags & TDF_DETAILS))
5023 {
5024 fprintf (dump_file, " (overlap_iterations_a = ");
5025 dump_conflict_function (dump_file, *overlap_iterations_a);
5026 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5027 dump_conflict_function (dump_file, *overlap_iterations_b);
5028 fprintf (dump_file, "))\n");
5029 }
5030 }
5031
5032 /* Helper function for uniquely inserting distance vectors. */
5033
5034 static void
5035 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5036 {
5037 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5038 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5039 return;
5040
5041 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5042 }
5043
5044 /* Helper function for uniquely inserting direction vectors. */
5045
5046 static void
5047 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5048 {
5049 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5050 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5051 return;
5052
5053 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5054 }
5055
5056 /* Add a distance of 1 on all the loops outer than INDEX. If we
5057 haven't yet determined a distance for this outer loop, push a new
5058 distance vector composed of the previous distance, and a distance
5059 of 1 for this outer loop. Example:
5060
5061 | loop_1
5062 | loop_2
5063 | A[10]
5064 | endloop_2
5065 | endloop_1
5066
5067 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5068 save (0, 1), then we have to save (1, 0). */
5069
5070 static void
5071 add_outer_distances (struct data_dependence_relation *ddr,
5072 lambda_vector dist_v, int index)
5073 {
5074 /* For each outer loop where init_v is not set, the accesses are
5075 in dependence of distance 1 in the loop. */
5076 while (--index >= 0)
5077 {
5078 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5079 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5080 save_v[index] = 1;
5081 save_dist_v (ddr, save_v);
5082 }
5083 }
5084
5085 /* Return false when fail to represent the data dependence as a
5086 distance vector. A_INDEX is the index of the first reference
5087 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5088 second reference. INIT_B is set to true when a component has been
5089 added to the distance vector DIST_V. INDEX_CARRY is then set to
5090 the index in DIST_V that carries the dependence. */
5091
5092 static bool
5093 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5094 unsigned int a_index, unsigned int b_index,
5095 lambda_vector dist_v, bool *init_b,
5096 int *index_carry)
5097 {
5098 unsigned i;
5099 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5100 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5101
5102 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5103 {
5104 tree access_fn_a, access_fn_b;
5105 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5106
5107 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5108 {
5109 non_affine_dependence_relation (ddr);
5110 return false;
5111 }
5112
5113 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5114 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5115
5116 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5117 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5118 {
5119 HOST_WIDE_INT dist;
5120 int index;
5121 int var_a = CHREC_VARIABLE (access_fn_a);
5122 int var_b = CHREC_VARIABLE (access_fn_b);
5123
5124 if (var_a != var_b
5125 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5126 {
5127 non_affine_dependence_relation (ddr);
5128 return false;
5129 }
5130
5131 /* When data references are collected in a loop while data
5132 dependences are analyzed in loop nest nested in the loop, we
5133 would have more number of access functions than number of
5134 loops. Skip access functions of loops not in the loop nest.
5135
5136 See PR89725 for more information. */
5137 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5138 continue;
5139
5140 dist = int_cst_value (SUB_DISTANCE (subscript));
5141 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5142 *index_carry = MIN (index, *index_carry);
5143
5144 /* This is the subscript coupling test. If we have already
5145 recorded a distance for this loop (a distance coming from
5146 another subscript), it should be the same. For example,
5147 in the following code, there is no dependence:
5148
5149 | loop i = 0, N, 1
5150 | T[i+1][i] = ...
5151 | ... = T[i][i]
5152 | endloop
5153 */
5154 if (init_v[index] != 0 && dist_v[index] != dist)
5155 {
5156 finalize_ddr_dependent (ddr, chrec_known);
5157 return false;
5158 }
5159
5160 dist_v[index] = dist;
5161 init_v[index] = 1;
5162 *init_b = true;
5163 }
5164 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5165 {
5166 /* This can be for example an affine vs. constant dependence
5167 (T[i] vs. T[3]) that is not an affine dependence and is
5168 not representable as a distance vector. */
5169 non_affine_dependence_relation (ddr);
5170 return false;
5171 }
5172 }
5173
5174 return true;
5175 }
5176
5177 /* Return true when the DDR contains only invariant access functions wrto. loop
5178 number LNUM. */
5179
5180 static bool
5181 invariant_access_functions (const struct data_dependence_relation *ddr,
5182 int lnum)
5183 {
5184 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5185 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5186 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5187 return false;
5188
5189 return true;
5190 }
5191
5192 /* Helper function for the case where DDR_A and DDR_B are the same
5193 multivariate access function with a constant step. For an example
5194 see pr34635-1.c. */
5195
5196 static void
5197 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5198 {
5199 int x_1, x_2;
5200 tree c_1 = CHREC_LEFT (c_2);
5201 tree c_0 = CHREC_LEFT (c_1);
5202 lambda_vector dist_v;
5203 HOST_WIDE_INT v1, v2, cd;
5204
5205 /* Polynomials with more than 2 variables are not handled yet. When
5206 the evolution steps are parameters, it is not possible to
5207 represent the dependence using classical distance vectors. */
5208 if (TREE_CODE (c_0) != INTEGER_CST
5209 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5210 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5211 {
5212 DDR_AFFINE_P (ddr) = false;
5213 return;
5214 }
5215
5216 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5217 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5218
5219 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5220 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5221 v1 = int_cst_value (CHREC_RIGHT (c_1));
5222 v2 = int_cst_value (CHREC_RIGHT (c_2));
5223 cd = gcd (v1, v2);
5224 v1 /= cd;
5225 v2 /= cd;
5226
5227 if (v2 < 0)
5228 {
5229 v2 = -v2;
5230 v1 = -v1;
5231 }
5232
5233 dist_v[x_1] = v2;
5234 dist_v[x_2] = -v1;
5235 save_dist_v (ddr, dist_v);
5236
5237 add_outer_distances (ddr, dist_v, x_1);
5238 }
5239
5240 /* Helper function for the case where DDR_A and DDR_B are the same
5241 access functions. */
5242
5243 static void
5244 add_other_self_distances (struct data_dependence_relation *ddr)
5245 {
5246 lambda_vector dist_v;
5247 unsigned i;
5248 int index_carry = DDR_NB_LOOPS (ddr);
5249 subscript *sub;
5250 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5251
5252 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5253 {
5254 tree access_fun = SUB_ACCESS_FN (sub, 0);
5255
5256 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5257 {
5258 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5259 {
5260 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5261 {
5262 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5263 return;
5264 }
5265
5266 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5267
5268 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5269 add_multivariate_self_dist (ddr, access_fun);
5270 else
5271 /* The evolution step is not constant: it varies in
5272 the outer loop, so this cannot be represented by a
5273 distance vector. For example in pr34635.c the
5274 evolution is {0, +, {0, +, 4}_1}_2. */
5275 DDR_AFFINE_P (ddr) = false;
5276
5277 return;
5278 }
5279
5280 /* When data references are collected in a loop while data
5281 dependences are analyzed in loop nest nested in the loop, we
5282 would have more number of access functions than number of
5283 loops. Skip access functions of loops not in the loop nest.
5284
5285 See PR89725 for more information. */
5286 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5287 loop))
5288 continue;
5289
5290 index_carry = MIN (index_carry,
5291 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5292 DDR_LOOP_NEST (ddr)));
5293 }
5294 }
5295
5296 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5297 add_outer_distances (ddr, dist_v, index_carry);
5298 }
5299
5300 static void
5301 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5302 {
5303 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5304
5305 dist_v[0] = 1;
5306 save_dist_v (ddr, dist_v);
5307 }
5308
5309 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5310 is the case for example when access functions are the same and
5311 equal to a constant, as in:
5312
5313 | loop_1
5314 | A[3] = ...
5315 | ... = A[3]
5316 | endloop_1
5317
5318 in which case the distance vectors are (0) and (1). */
5319
5320 static void
5321 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5322 {
5323 unsigned i, j;
5324
5325 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5326 {
5327 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5328 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5329 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5330
5331 for (j = 0; j < ca->n; j++)
5332 if (affine_function_zero_p (ca->fns[j]))
5333 {
5334 insert_innermost_unit_dist_vector (ddr);
5335 return;
5336 }
5337
5338 for (j = 0; j < cb->n; j++)
5339 if (affine_function_zero_p (cb->fns[j]))
5340 {
5341 insert_innermost_unit_dist_vector (ddr);
5342 return;
5343 }
5344 }
5345 }
5346
5347 /* Return true when the DDR contains two data references that have the
5348 same access functions. */
5349
5350 static inline bool
5351 same_access_functions (const struct data_dependence_relation *ddr)
5352 {
5353 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5354 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5355 SUB_ACCESS_FN (sub, 1)))
5356 return false;
5357
5358 return true;
5359 }
5360
5361 /* Compute the classic per loop distance vector. DDR is the data
5362 dependence relation to build a vector from. Return false when fail
5363 to represent the data dependence as a distance vector. */
5364
5365 static bool
5366 build_classic_dist_vector (struct data_dependence_relation *ddr,
5367 class loop *loop_nest)
5368 {
5369 bool init_b = false;
5370 int index_carry = DDR_NB_LOOPS (ddr);
5371 lambda_vector dist_v;
5372
5373 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5374 return false;
5375
5376 if (same_access_functions (ddr))
5377 {
5378 /* Save the 0 vector. */
5379 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5380 save_dist_v (ddr, dist_v);
5381
5382 if (invariant_access_functions (ddr, loop_nest->num))
5383 add_distance_for_zero_overlaps (ddr);
5384
5385 if (DDR_NB_LOOPS (ddr) > 1)
5386 add_other_self_distances (ddr);
5387
5388 return true;
5389 }
5390
5391 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5392 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5393 return false;
5394
5395 /* Save the distance vector if we initialized one. */
5396 if (init_b)
5397 {
5398 /* Verify a basic constraint: classic distance vectors should
5399 always be lexicographically positive.
5400
5401 Data references are collected in the order of execution of
5402 the program, thus for the following loop
5403
5404 | for (i = 1; i < 100; i++)
5405 | for (j = 1; j < 100; j++)
5406 | {
5407 | t = T[j+1][i-1]; // A
5408 | T[j][i] = t + 2; // B
5409 | }
5410
5411 references are collected following the direction of the wind:
5412 A then B. The data dependence tests are performed also
5413 following this order, such that we're looking at the distance
5414 separating the elements accessed by A from the elements later
5415 accessed by B. But in this example, the distance returned by
5416 test_dep (A, B) is lexicographically negative (-1, 1), that
5417 means that the access A occurs later than B with respect to
5418 the outer loop, ie. we're actually looking upwind. In this
5419 case we solve test_dep (B, A) looking downwind to the
5420 lexicographically positive solution, that returns the
5421 distance vector (1, -1). */
5422 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5423 {
5424 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5425 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5426 return false;
5427 compute_subscript_distance (ddr);
5428 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5429 &index_carry))
5430 return false;
5431 save_dist_v (ddr, save_v);
5432 DDR_REVERSED_P (ddr) = true;
5433
5434 /* In this case there is a dependence forward for all the
5435 outer loops:
5436
5437 | for (k = 1; k < 100; k++)
5438 | for (i = 1; i < 100; i++)
5439 | for (j = 1; j < 100; j++)
5440 | {
5441 | t = T[j+1][i-1]; // A
5442 | T[j][i] = t + 2; // B
5443 | }
5444
5445 the vectors are:
5446 (0, 1, -1)
5447 (1, 1, -1)
5448 (1, -1, 1)
5449 */
5450 if (DDR_NB_LOOPS (ddr) > 1)
5451 {
5452 add_outer_distances (ddr, save_v, index_carry);
5453 add_outer_distances (ddr, dist_v, index_carry);
5454 }
5455 }
5456 else
5457 {
5458 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5459 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5460
5461 if (DDR_NB_LOOPS (ddr) > 1)
5462 {
5463 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5464
5465 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5466 return false;
5467 compute_subscript_distance (ddr);
5468 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5469 &index_carry))
5470 return false;
5471
5472 save_dist_v (ddr, save_v);
5473 add_outer_distances (ddr, dist_v, index_carry);
5474 add_outer_distances (ddr, opposite_v, index_carry);
5475 }
5476 else
5477 save_dist_v (ddr, save_v);
5478 }
5479 }
5480 else
5481 {
5482 /* There is a distance of 1 on all the outer loops: Example:
5483 there is a dependence of distance 1 on loop_1 for the array A.
5484
5485 | loop_1
5486 | A[5] = ...
5487 | endloop
5488 */
5489 add_outer_distances (ddr, dist_v,
5490 lambda_vector_first_nz (dist_v,
5491 DDR_NB_LOOPS (ddr), 0));
5492 }
5493
5494 if (dump_file && (dump_flags & TDF_DETAILS))
5495 {
5496 unsigned i;
5497
5498 fprintf (dump_file, "(build_classic_dist_vector\n");
5499 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5500 {
5501 fprintf (dump_file, " dist_vector = (");
5502 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5503 DDR_NB_LOOPS (ddr));
5504 fprintf (dump_file, " )\n");
5505 }
5506 fprintf (dump_file, ")\n");
5507 }
5508
5509 return true;
5510 }
5511
5512 /* Return the direction for a given distance.
5513 FIXME: Computing dir this way is suboptimal, since dir can catch
5514 cases that dist is unable to represent. */
5515
5516 static inline enum data_dependence_direction
5517 dir_from_dist (int dist)
5518 {
5519 if (dist > 0)
5520 return dir_positive;
5521 else if (dist < 0)
5522 return dir_negative;
5523 else
5524 return dir_equal;
5525 }
5526
5527 /* Compute the classic per loop direction vector. DDR is the data
5528 dependence relation to build a vector from. */
5529
5530 static void
5531 build_classic_dir_vector (struct data_dependence_relation *ddr)
5532 {
5533 unsigned i, j;
5534 lambda_vector dist_v;
5535
5536 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5537 {
5538 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5539
5540 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5541 dir_v[j] = dir_from_dist (dist_v[j]);
5542
5543 save_dir_v (ddr, dir_v);
5544 }
5545 }
5546
5547 /* Helper function. Returns true when there is a dependence between the
5548 data references. A_INDEX is the index of the first reference (0 for
5549 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5550
5551 static bool
5552 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5553 unsigned int a_index, unsigned int b_index,
5554 class loop *loop_nest)
5555 {
5556 unsigned int i;
5557 tree last_conflicts;
5558 struct subscript *subscript;
5559 tree res = NULL_TREE;
5560
5561 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5562 {
5563 conflict_function *overlaps_a, *overlaps_b;
5564
5565 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5566 SUB_ACCESS_FN (subscript, b_index),
5567 &overlaps_a, &overlaps_b,
5568 &last_conflicts, loop_nest);
5569
5570 if (SUB_CONFLICTS_IN_A (subscript))
5571 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5572 if (SUB_CONFLICTS_IN_B (subscript))
5573 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5574
5575 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5576 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5577 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5578
5579 /* If there is any undetermined conflict function we have to
5580 give a conservative answer in case we cannot prove that
5581 no dependence exists when analyzing another subscript. */
5582 if (CF_NOT_KNOWN_P (overlaps_a)
5583 || CF_NOT_KNOWN_P (overlaps_b))
5584 {
5585 res = chrec_dont_know;
5586 continue;
5587 }
5588
5589 /* When there is a subscript with no dependence we can stop. */
5590 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5591 || CF_NO_DEPENDENCE_P (overlaps_b))
5592 {
5593 res = chrec_known;
5594 break;
5595 }
5596 }
5597
5598 if (res == NULL_TREE)
5599 return true;
5600
5601 if (res == chrec_known)
5602 dependence_stats.num_dependence_independent++;
5603 else
5604 dependence_stats.num_dependence_undetermined++;
5605 finalize_ddr_dependent (ddr, res);
5606 return false;
5607 }
5608
5609 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5610
5611 static void
5612 subscript_dependence_tester (struct data_dependence_relation *ddr,
5613 class loop *loop_nest)
5614 {
5615 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5616 dependence_stats.num_dependence_dependent++;
5617
5618 compute_subscript_distance (ddr);
5619 if (build_classic_dist_vector (ddr, loop_nest))
5620 build_classic_dir_vector (ddr);
5621 }
5622
5623 /* Returns true when all the access functions of A are affine or
5624 constant with respect to LOOP_NEST. */
5625
5626 static bool
5627 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5628 const class loop *loop_nest)
5629 {
5630 vec<tree> fns = DR_ACCESS_FNS (a);
5631 for (tree t : fns)
5632 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5633 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5634 return false;
5635
5636 return true;
5637 }
5638
5639 /* This computes the affine dependence relation between A and B with
5640 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5641 independence between two accesses, while CHREC_DONT_KNOW is used
5642 for representing the unknown relation.
5643
5644 Note that it is possible to stop the computation of the dependence
5645 relation the first time we detect a CHREC_KNOWN element for a given
5646 subscript. */
5647
5648 void
5649 compute_affine_dependence (struct data_dependence_relation *ddr,
5650 class loop *loop_nest)
5651 {
5652 struct data_reference *dra = DDR_A (ddr);
5653 struct data_reference *drb = DDR_B (ddr);
5654
5655 if (dump_file && (dump_flags & TDF_DETAILS))
5656 {
5657 fprintf (dump_file, "(compute_affine_dependence\n");
5658 fprintf (dump_file, " ref_a: ");
5659 print_generic_expr (dump_file, DR_REF (dra));
5660 fprintf (dump_file, ", stmt_a: ");
5661 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5662 fprintf (dump_file, " ref_b: ");
5663 print_generic_expr (dump_file, DR_REF (drb));
5664 fprintf (dump_file, ", stmt_b: ");
5665 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5666 }
5667
5668 /* Analyze only when the dependence relation is not yet known. */
5669 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5670 {
5671 dependence_stats.num_dependence_tests++;
5672
5673 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5674 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5675 subscript_dependence_tester (ddr, loop_nest);
5676
5677 /* As a last case, if the dependence cannot be determined, or if
5678 the dependence is considered too difficult to determine, answer
5679 "don't know". */
5680 else
5681 {
5682 dependence_stats.num_dependence_undetermined++;
5683
5684 if (dump_file && (dump_flags & TDF_DETAILS))
5685 {
5686 fprintf (dump_file, "Data ref a:\n");
5687 dump_data_reference (dump_file, dra);
5688 fprintf (dump_file, "Data ref b:\n");
5689 dump_data_reference (dump_file, drb);
5690 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5691 }
5692 finalize_ddr_dependent (ddr, chrec_dont_know);
5693 }
5694 }
5695
5696 if (dump_file && (dump_flags & TDF_DETAILS))
5697 {
5698 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5699 fprintf (dump_file, ") -> no dependence\n");
5700 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5701 fprintf (dump_file, ") -> dependence analysis failed\n");
5702 else
5703 fprintf (dump_file, ")\n");
5704 }
5705 }
5706
5707 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5708 the data references in DATAREFS, in the LOOP_NEST. When
5709 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5710 relations. Return true when successful, i.e. data references number
5711 is small enough to be handled. */
5712
5713 bool
5714 compute_all_dependences (const vec<data_reference_p> &datarefs,
5715 vec<ddr_p> *dependence_relations,
5716 const vec<loop_p> &loop_nest,
5717 bool compute_self_and_rr)
5718 {
5719 struct data_dependence_relation *ddr;
5720 struct data_reference *a, *b;
5721 unsigned int i, j;
5722
5723 if ((int) datarefs.length ()
5724 > param_loop_max_datarefs_for_datadeps)
5725 {
5726 struct data_dependence_relation *ddr;
5727
5728 /* Insert a single relation into dependence_relations:
5729 chrec_dont_know. */
5730 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5731 dependence_relations->safe_push (ddr);
5732 return false;
5733 }
5734
5735 FOR_EACH_VEC_ELT (datarefs, i, a)
5736 for (j = i + 1; datarefs.iterate (j, &b); j++)
5737 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5738 {
5739 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5740 dependence_relations->safe_push (ddr);
5741 if (loop_nest.exists ())
5742 compute_affine_dependence (ddr, loop_nest[0]);
5743 }
5744
5745 if (compute_self_and_rr)
5746 FOR_EACH_VEC_ELT (datarefs, i, a)
5747 {
5748 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5749 dependence_relations->safe_push (ddr);
5750 if (loop_nest.exists ())
5751 compute_affine_dependence (ddr, loop_nest[0]);
5752 }
5753
5754 return true;
5755 }
5756
5757 /* Describes a location of a memory reference. */
5758
5759 struct data_ref_loc
5760 {
5761 /* The memory reference. */
5762 tree ref;
5763
5764 /* True if the memory reference is read. */
5765 bool is_read;
5766
5767 /* True if the data reference is conditional within the containing
5768 statement, i.e. if it might not occur even when the statement
5769 is executed and runs to completion. */
5770 bool is_conditional_in_stmt;
5771 };
5772
5773
5774 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5775 true if STMT clobbers memory, false otherwise. */
5776
5777 static bool
5778 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5779 {
5780 bool clobbers_memory = false;
5781 data_ref_loc ref;
5782 tree op0, op1;
5783 enum gimple_code stmt_code = gimple_code (stmt);
5784
5785 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5786 As we cannot model data-references to not spelled out
5787 accesses give up if they may occur. */
5788 if (stmt_code == GIMPLE_CALL
5789 && !(gimple_call_flags (stmt) & ECF_CONST))
5790 {
5791 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5792 if (gimple_call_internal_p (stmt))
5793 switch (gimple_call_internal_fn (stmt))
5794 {
5795 case IFN_GOMP_SIMD_LANE:
5796 {
5797 class loop *loop = gimple_bb (stmt)->loop_father;
5798 tree uid = gimple_call_arg (stmt, 0);
5799 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5800 if (loop == NULL
5801 || loop->simduid != SSA_NAME_VAR (uid))
5802 clobbers_memory = true;
5803 break;
5804 }
5805 case IFN_MASK_LOAD:
5806 case IFN_MASK_STORE:
5807 break;
5808 default:
5809 clobbers_memory = true;
5810 break;
5811 }
5812 else
5813 clobbers_memory = true;
5814 }
5815 else if (stmt_code == GIMPLE_ASM
5816 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5817 || gimple_vuse (stmt)))
5818 clobbers_memory = true;
5819
5820 if (!gimple_vuse (stmt))
5821 return clobbers_memory;
5822
5823 if (stmt_code == GIMPLE_ASSIGN)
5824 {
5825 tree base;
5826 op0 = gimple_assign_lhs (stmt);
5827 op1 = gimple_assign_rhs1 (stmt);
5828
5829 if (DECL_P (op1)
5830 || (REFERENCE_CLASS_P (op1)
5831 && (base = get_base_address (op1))
5832 && TREE_CODE (base) != SSA_NAME
5833 && !is_gimple_min_invariant (base)))
5834 {
5835 ref.ref = op1;
5836 ref.is_read = true;
5837 ref.is_conditional_in_stmt = false;
5838 references->safe_push (ref);
5839 }
5840 }
5841 else if (stmt_code == GIMPLE_CALL)
5842 {
5843 unsigned i, n;
5844 tree ptr, type;
5845 unsigned int align;
5846
5847 ref.is_read = false;
5848 if (gimple_call_internal_p (stmt))
5849 switch (gimple_call_internal_fn (stmt))
5850 {
5851 case IFN_MASK_LOAD:
5852 if (gimple_call_lhs (stmt) == NULL_TREE)
5853 break;
5854 ref.is_read = true;
5855 /* FALLTHRU */
5856 case IFN_MASK_STORE:
5857 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5858 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5859 if (ref.is_read)
5860 type = TREE_TYPE (gimple_call_lhs (stmt));
5861 else
5862 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5863 if (TYPE_ALIGN (type) != align)
5864 type = build_aligned_type (type, align);
5865 ref.is_conditional_in_stmt = true;
5866 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5867 ptr);
5868 references->safe_push (ref);
5869 return false;
5870 default:
5871 break;
5872 }
5873
5874 op0 = gimple_call_lhs (stmt);
5875 n = gimple_call_num_args (stmt);
5876 for (i = 0; i < n; i++)
5877 {
5878 op1 = gimple_call_arg (stmt, i);
5879
5880 if (DECL_P (op1)
5881 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5882 {
5883 ref.ref = op1;
5884 ref.is_read = true;
5885 ref.is_conditional_in_stmt = false;
5886 references->safe_push (ref);
5887 }
5888 }
5889 }
5890 else
5891 return clobbers_memory;
5892
5893 if (op0
5894 && (DECL_P (op0)
5895 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5896 {
5897 ref.ref = op0;
5898 ref.is_read = false;
5899 ref.is_conditional_in_stmt = false;
5900 references->safe_push (ref);
5901 }
5902 return clobbers_memory;
5903 }
5904
5905
5906 /* Returns true if the loop-nest has any data reference. */
5907
5908 bool
5909 loop_nest_has_data_refs (loop_p loop)
5910 {
5911 basic_block *bbs = get_loop_body (loop);
5912 auto_vec<data_ref_loc, 3> references;
5913
5914 for (unsigned i = 0; i < loop->num_nodes; i++)
5915 {
5916 basic_block bb = bbs[i];
5917 gimple_stmt_iterator bsi;
5918
5919 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5920 {
5921 gimple *stmt = gsi_stmt (bsi);
5922 get_references_in_stmt (stmt, &references);
5923 if (references.length ())
5924 {
5925 free (bbs);
5926 return true;
5927 }
5928 }
5929 }
5930 free (bbs);
5931 return false;
5932 }
5933
5934 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5935 reference, returns false, otherwise returns true. NEST is the outermost
5936 loop of the loop nest in which the references should be analyzed. */
5937
5938 opt_result
5939 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5940 vec<data_reference_p> *datarefs)
5941 {
5942 auto_vec<data_ref_loc, 2> references;
5943 data_reference_p dr;
5944
5945 if (get_references_in_stmt (stmt, &references))
5946 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5947 stmt);
5948
5949 for (const data_ref_loc &ref : references)
5950 {
5951 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5952 loop_containing_stmt (stmt), ref.ref,
5953 stmt, ref.is_read, ref.is_conditional_in_stmt);
5954 gcc_assert (dr != NULL);
5955 datarefs->safe_push (dr);
5956 }
5957
5958 return opt_result::success ();
5959 }
5960
5961 /* Stores the data references in STMT to DATAREFS. If there is an
5962 unanalyzable reference, returns false, otherwise returns true.
5963 NEST is the outermost loop of the loop nest in which the references
5964 should be instantiated, LOOP is the loop in which the references
5965 should be analyzed. */
5966
5967 bool
5968 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5969 vec<data_reference_p> *datarefs)
5970 {
5971 auto_vec<data_ref_loc, 2> references;
5972 bool ret = true;
5973 data_reference_p dr;
5974
5975 if (get_references_in_stmt (stmt, &references))
5976 return false;
5977
5978 for (const data_ref_loc &ref : references)
5979 {
5980 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5981 ref.is_conditional_in_stmt);
5982 gcc_assert (dr != NULL);
5983 datarefs->safe_push (dr);
5984 }
5985
5986 return ret;
5987 }
5988
5989 /* Search the data references in LOOP, and record the information into
5990 DATAREFS. Returns chrec_dont_know when failing to analyze a
5991 difficult case, returns NULL_TREE otherwise. */
5992
5993 tree
5994 find_data_references_in_bb (class loop *loop, basic_block bb,
5995 vec<data_reference_p> *datarefs)
5996 {
5997 gimple_stmt_iterator bsi;
5998
5999 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6000 {
6001 gimple *stmt = gsi_stmt (bsi);
6002
6003 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6004 {
6005 struct data_reference *res;
6006 res = XCNEW (struct data_reference);
6007 datarefs->safe_push (res);
6008
6009 return chrec_dont_know;
6010 }
6011 }
6012
6013 return NULL_TREE;
6014 }
6015
6016 /* Search the data references in LOOP, and record the information into
6017 DATAREFS. Returns chrec_dont_know when failing to analyze a
6018 difficult case, returns NULL_TREE otherwise.
6019
6020 TODO: This function should be made smarter so that it can handle address
6021 arithmetic as if they were array accesses, etc. */
6022
6023 tree
6024 find_data_references_in_loop (class loop *loop,
6025 vec<data_reference_p> *datarefs)
6026 {
6027 basic_block bb, *bbs;
6028 unsigned int i;
6029
6030 bbs = get_loop_body_in_dom_order (loop);
6031
6032 for (i = 0; i < loop->num_nodes; i++)
6033 {
6034 bb = bbs[i];
6035
6036 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6037 {
6038 free (bbs);
6039 return chrec_dont_know;
6040 }
6041 }
6042 free (bbs);
6043
6044 return NULL_TREE;
6045 }
6046
6047 /* Return the alignment in bytes that DRB is guaranteed to have at all
6048 times. */
6049
6050 unsigned int
6051 dr_alignment (innermost_loop_behavior *drb)
6052 {
6053 /* Get the alignment of BASE_ADDRESS + INIT. */
6054 unsigned int alignment = drb->base_alignment;
6055 unsigned int misalignment = (drb->base_misalignment
6056 + TREE_INT_CST_LOW (drb->init));
6057 if (misalignment != 0)
6058 alignment = MIN (alignment, misalignment & -misalignment);
6059
6060 /* Cap it to the alignment of OFFSET. */
6061 if (!integer_zerop (drb->offset))
6062 alignment = MIN (alignment, drb->offset_alignment);
6063
6064 /* Cap it to the alignment of STEP. */
6065 if (!integer_zerop (drb->step))
6066 alignment = MIN (alignment, drb->step_alignment);
6067
6068 return alignment;
6069 }
6070
6071 /* If BASE is a pointer-typed SSA name, try to find the object that it
6072 is based on. Return this object X on success and store the alignment
6073 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6074
6075 static tree
6076 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6077 {
6078 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6079 return NULL_TREE;
6080
6081 gimple *def = SSA_NAME_DEF_STMT (base);
6082 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6083
6084 /* Peel chrecs and record the minimum alignment preserved by
6085 all steps. */
6086 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6087 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6088 {
6089 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6090 alignment = MIN (alignment, step_alignment);
6091 base = CHREC_LEFT (base);
6092 }
6093
6094 /* Punt if the expression is too complicated to handle. */
6095 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6096 return NULL_TREE;
6097
6098 /* The only useful cases are those for which a dereference folds to something
6099 other than an INDIRECT_REF. */
6100 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6101 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6102 if (!ref)
6103 return NULL_TREE;
6104
6105 /* Analyze the base to which the steps we peeled were applied. */
6106 poly_int64 bitsize, bitpos, bytepos;
6107 machine_mode mode;
6108 int unsignedp, reversep, volatilep;
6109 tree offset;
6110 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6111 &unsignedp, &reversep, &volatilep);
6112 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6113 return NULL_TREE;
6114
6115 /* Restrict the alignment to that guaranteed by the offsets. */
6116 unsigned int bytepos_alignment = known_alignment (bytepos);
6117 if (bytepos_alignment != 0)
6118 alignment = MIN (alignment, bytepos_alignment);
6119 if (offset)
6120 {
6121 unsigned int offset_alignment = highest_pow2_factor (offset);
6122 alignment = MIN (alignment, offset_alignment);
6123 }
6124
6125 *alignment_out = alignment;
6126 return base;
6127 }
6128
6129 /* Return the object whose alignment would need to be changed in order
6130 to increase the alignment of ADDR. Store the maximum achievable
6131 alignment in *MAX_ALIGNMENT. */
6132
6133 tree
6134 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6135 {
6136 tree base = get_base_for_alignment_1 (addr, max_alignment);
6137 if (base)
6138 return base;
6139
6140 if (TREE_CODE (addr) == ADDR_EXPR)
6141 addr = TREE_OPERAND (addr, 0);
6142 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6143 return addr;
6144 }
6145
6146 /* Recursive helper function. */
6147
6148 static bool
6149 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6150 {
6151 /* Inner loops of the nest should not contain siblings. Example:
6152 when there are two consecutive loops,
6153
6154 | loop_0
6155 | loop_1
6156 | A[{0, +, 1}_1]
6157 | endloop_1
6158 | loop_2
6159 | A[{0, +, 1}_2]
6160 | endloop_2
6161 | endloop_0
6162
6163 the dependence relation cannot be captured by the distance
6164 abstraction. */
6165 if (loop->next)
6166 return false;
6167
6168 loop_nest->safe_push (loop);
6169 if (loop->inner)
6170 return find_loop_nest_1 (loop->inner, loop_nest);
6171 return true;
6172 }
6173
6174 /* Return false when the LOOP is not well nested. Otherwise return
6175 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6176 contain the loops from the outermost to the innermost, as they will
6177 appear in the classic distance vector. */
6178
6179 bool
6180 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6181 {
6182 loop_nest->safe_push (loop);
6183 if (loop->inner)
6184 return find_loop_nest_1 (loop->inner, loop_nest);
6185 return true;
6186 }
6187
6188 /* Returns true when the data dependences have been computed, false otherwise.
6189 Given a loop nest LOOP, the following vectors are returned:
6190 DATAREFS is initialized to all the array elements contained in this loop,
6191 DEPENDENCE_RELATIONS contains the relations between the data references.
6192 Compute read-read and self relations if
6193 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6194
6195 bool
6196 compute_data_dependences_for_loop (class loop *loop,
6197 bool compute_self_and_read_read_dependences,
6198 vec<loop_p> *loop_nest,
6199 vec<data_reference_p> *datarefs,
6200 vec<ddr_p> *dependence_relations)
6201 {
6202 bool res = true;
6203
6204 memset (&dependence_stats, 0, sizeof (dependence_stats));
6205
6206 /* If the loop nest is not well formed, or one of the data references
6207 is not computable, give up without spending time to compute other
6208 dependences. */
6209 if (!loop
6210 || !find_loop_nest (loop, loop_nest)
6211 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6212 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6213 compute_self_and_read_read_dependences))
6214 res = false;
6215
6216 if (dump_file && (dump_flags & TDF_STATS))
6217 {
6218 fprintf (dump_file, "Dependence tester statistics:\n");
6219
6220 fprintf (dump_file, "Number of dependence tests: %d\n",
6221 dependence_stats.num_dependence_tests);
6222 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6223 dependence_stats.num_dependence_dependent);
6224 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6225 dependence_stats.num_dependence_independent);
6226 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6227 dependence_stats.num_dependence_undetermined);
6228
6229 fprintf (dump_file, "Number of subscript tests: %d\n",
6230 dependence_stats.num_subscript_tests);
6231 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6232 dependence_stats.num_subscript_undetermined);
6233 fprintf (dump_file, "Number of same subscript function: %d\n",
6234 dependence_stats.num_same_subscript_function);
6235
6236 fprintf (dump_file, "Number of ziv tests: %d\n",
6237 dependence_stats.num_ziv);
6238 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6239 dependence_stats.num_ziv_dependent);
6240 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6241 dependence_stats.num_ziv_independent);
6242 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6243 dependence_stats.num_ziv_unimplemented);
6244
6245 fprintf (dump_file, "Number of siv tests: %d\n",
6246 dependence_stats.num_siv);
6247 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6248 dependence_stats.num_siv_dependent);
6249 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6250 dependence_stats.num_siv_independent);
6251 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6252 dependence_stats.num_siv_unimplemented);
6253
6254 fprintf (dump_file, "Number of miv tests: %d\n",
6255 dependence_stats.num_miv);
6256 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6257 dependence_stats.num_miv_dependent);
6258 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6259 dependence_stats.num_miv_independent);
6260 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6261 dependence_stats.num_miv_unimplemented);
6262 }
6263
6264 return res;
6265 }
6266
6267 /* Free the memory used by a data dependence relation DDR. */
6268
6269 void
6270 free_dependence_relation (struct data_dependence_relation *ddr)
6271 {
6272 if (ddr == NULL)
6273 return;
6274
6275 if (DDR_SUBSCRIPTS (ddr).exists ())
6276 free_subscripts (DDR_SUBSCRIPTS (ddr));
6277 DDR_DIST_VECTS (ddr).release ();
6278 DDR_DIR_VECTS (ddr).release ();
6279
6280 free (ddr);
6281 }
6282
6283 /* Free the memory used by the data dependence relations from
6284 DEPENDENCE_RELATIONS. */
6285
6286 void
6287 free_dependence_relations (vec<ddr_p>& dependence_relations)
6288 {
6289 for (data_dependence_relation *ddr : dependence_relations)
6290 if (ddr)
6291 free_dependence_relation (ddr);
6292
6293 dependence_relations.release ();
6294 }
6295
6296 /* Free the memory used by the data references from DATAREFS. */
6297
6298 void
6299 free_data_refs (vec<data_reference_p>& datarefs)
6300 {
6301 for (data_reference *dr : datarefs)
6302 free_data_ref (dr);
6303 datarefs.release ();
6304 }
6305
6306 /* Common routine implementing both dr_direction_indicator and
6307 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6308 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6309 Return the step as the indicator otherwise. */
6310
6311 static tree
6312 dr_step_indicator (struct data_reference *dr, int useful_min)
6313 {
6314 tree step = DR_STEP (dr);
6315 if (!step)
6316 return NULL_TREE;
6317 STRIP_NOPS (step);
6318 /* Look for cases where the step is scaled by a positive constant
6319 integer, which will often be the access size. If the multiplication
6320 doesn't change the sign (due to overflow effects) then we can
6321 test the unscaled value instead. */
6322 if (TREE_CODE (step) == MULT_EXPR
6323 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6324 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6325 {
6326 tree factor = TREE_OPERAND (step, 1);
6327 step = TREE_OPERAND (step, 0);
6328
6329 /* Strip widening and truncating conversions as well as nops. */
6330 if (CONVERT_EXPR_P (step)
6331 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6332 step = TREE_OPERAND (step, 0);
6333 tree type = TREE_TYPE (step);
6334
6335 /* Get the range of step values that would not cause overflow. */
6336 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6337 / wi::to_widest (factor));
6338 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6339 / wi::to_widest (factor));
6340
6341 /* Get the range of values that the unconverted step actually has. */
6342 wide_int step_min, step_max;
6343 value_range vr;
6344 if (TREE_CODE (step) != SSA_NAME
6345 || !get_range_query (cfun)->range_of_expr (vr, step)
6346 || vr.kind () != VR_RANGE)
6347 {
6348 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6349 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6350 }
6351 else
6352 {
6353 step_min = vr.lower_bound ();
6354 step_max = vr.upper_bound ();
6355 }
6356
6357 /* Check whether the unconverted step has an acceptable range. */
6358 signop sgn = TYPE_SIGN (type);
6359 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6360 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6361 {
6362 if (wi::ge_p (step_min, useful_min, sgn))
6363 return ssize_int (useful_min);
6364 else if (wi::lt_p (step_max, 0, sgn))
6365 return ssize_int (-1);
6366 else
6367 return fold_convert (ssizetype, step);
6368 }
6369 }
6370 return DR_STEP (dr);
6371 }
6372
6373 /* Return a value that is negative iff DR has a negative step. */
6374
6375 tree
6376 dr_direction_indicator (struct data_reference *dr)
6377 {
6378 return dr_step_indicator (dr, 0);
6379 }
6380
6381 /* Return a value that is zero iff DR has a zero step. */
6382
6383 tree
6384 dr_zero_step_indicator (struct data_reference *dr)
6385 {
6386 return dr_step_indicator (dr, 1);
6387 }
6388
6389 /* Return true if DR is known to have a nonnegative (but possibly zero)
6390 step. */
6391
6392 bool
6393 dr_known_forward_stride_p (struct data_reference *dr)
6394 {
6395 tree indicator = dr_direction_indicator (dr);
6396 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6397 fold_convert (ssizetype, indicator),
6398 ssize_int (0));
6399 return neg_step_val && integer_zerop (neg_step_val);
6400 }
6401