Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* Data references and dependences detectors.
      2  1.1  mrg    Copyright (C) 2003-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Sebastian Pop <pop (at) cri.ensmp.fr>
      4  1.1  mrg 
      5  1.1  mrg This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      8  1.1  mrg the terms of the GNU General Public License as published by the Free
      9  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     10  1.1  mrg version.
     11  1.1  mrg 
     12  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  1.1  mrg for more details.
     16  1.1  mrg 
     17  1.1  mrg You should have received a copy of the GNU General Public License
     18  1.1  mrg along with GCC; see the file COPYING3.  If not see
     19  1.1  mrg <http://www.gnu.org/licenses/>.  */
     20  1.1  mrg 
     21  1.1  mrg /* This pass walks a given loop structure searching for array
     22  1.1  mrg    references.  The information about the array accesses is recorded
     23  1.1  mrg    in DATA_REFERENCE structures.
     24  1.1  mrg 
     25  1.1  mrg    The basic test for determining the dependences is:
     26  1.1  mrg    given two access functions chrec1 and chrec2 to a same array, and
     27  1.1  mrg    x and y two vectors from the iteration domain, the same element of
     28  1.1  mrg    the array is accessed twice at iterations x and y if and only if:
     29  1.1  mrg    |             chrec1 (x) == chrec2 (y).
     30  1.1  mrg 
     31  1.1  mrg    The goals of this analysis are:
     32  1.1  mrg 
     33  1.1  mrg    - to determine the independence: the relation between two
     34  1.1  mrg      independent accesses is qualified with the chrec_known (this
     35  1.1  mrg      information allows a loop parallelization),
     36  1.1  mrg 
     37  1.1  mrg    - when two data references access the same data, to qualify the
     38  1.1  mrg      dependence relation with classic dependence representations:
     39  1.1  mrg 
     40  1.1  mrg        - distance vectors
     41  1.1  mrg        - direction vectors
     42  1.1  mrg        - loop carried level dependence
     43  1.1  mrg        - polyhedron dependence
     44  1.1  mrg      or with the chains of recurrences based representation,
     45  1.1  mrg 
     46  1.1  mrg    - to define a knowledge base for storing the data dependence
     47  1.1  mrg      information,
     48  1.1  mrg 
     49  1.1  mrg    - to define an interface to access this data.
     50  1.1  mrg 
     51  1.1  mrg 
     52  1.1  mrg    Definitions:
     53  1.1  mrg 
     54  1.1  mrg    - subscript: given two array accesses a subscript is the tuple
     55  1.1  mrg    composed of the access functions for a given dimension.  Example:
     56  1.1  mrg    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
     57  1.1  mrg    (f1, g1), (f2, g2), (f3, g3).
     58  1.1  mrg 
     59  1.1  mrg    - Diophantine equation: an equation whose coefficients and
     60  1.1  mrg    solutions are integer constants, for example the equation
     61  1.1  mrg    |   3*x + 2*y = 1
     62  1.1  mrg    has an integer solution x = 1 and y = -1.
     63  1.1  mrg 
     64  1.1  mrg    References:
     65  1.1  mrg 
     66  1.1  mrg    - "Advanced Compilation for High Performance Computing" by Randy
     67  1.1  mrg    Allen and Ken Kennedy.
     68  1.1  mrg    http://citeseer.ist.psu.edu/goff91practical.html
     69  1.1  mrg 
     70  1.1  mrg    - "Loop Transformations for Restructuring Compilers - The Foundations"
     71  1.1  mrg    by Utpal Banerjee.
     72  1.1  mrg 
     73  1.1  mrg 
     74  1.1  mrg */
     75  1.1  mrg 
     76  1.1  mrg #define INCLUDE_ALGORITHM
     77  1.1  mrg #include "config.h"
     78  1.1  mrg #include "system.h"
     79  1.1  mrg #include "coretypes.h"
     80  1.1  mrg #include "backend.h"
     81  1.1  mrg #include "rtl.h"
     82  1.1  mrg #include "tree.h"
     83  1.1  mrg #include "gimple.h"
     84  1.1  mrg #include "gimple-pretty-print.h"
     85  1.1  mrg #include "alias.h"
     86  1.1  mrg #include "fold-const.h"
     87  1.1  mrg #include "expr.h"
     88  1.1  mrg #include "gimple-iterator.h"
     89  1.1  mrg #include "tree-ssa-loop-niter.h"
     90  1.1  mrg #include "tree-ssa-loop.h"
     91  1.1  mrg #include "tree-ssa.h"
     92  1.1  mrg #include "cfgloop.h"
     93  1.1  mrg #include "tree-data-ref.h"
     94  1.1  mrg #include "tree-scalar-evolution.h"
     95  1.1  mrg #include "dumpfile.h"
     96  1.1  mrg #include "tree-affine.h"
     97  1.1  mrg #include "builtins.h"
     98  1.1  mrg #include "tree-eh.h"
     99  1.1  mrg #include "ssa.h"
    100  1.1  mrg #include "internal-fn.h"
    101  1.1  mrg #include "vr-values.h"
    102  1.1  mrg #include "range-op.h"
    103  1.1  mrg #include "tree-ssa-loop-ivopts.h"
    104  1.1  mrg 
    105  1.1  mrg static struct datadep_stats
    106  1.1  mrg {
    107  1.1  mrg   int num_dependence_tests;
    108  1.1  mrg   int num_dependence_dependent;
    109  1.1  mrg   int num_dependence_independent;
    110  1.1  mrg   int num_dependence_undetermined;
    111  1.1  mrg 
    112  1.1  mrg   int num_subscript_tests;
    113  1.1  mrg   int num_subscript_undetermined;
    114  1.1  mrg   int num_same_subscript_function;
    115  1.1  mrg 
    116  1.1  mrg   int num_ziv;
    117  1.1  mrg   int num_ziv_independent;
    118  1.1  mrg   int num_ziv_dependent;
    119  1.1  mrg   int num_ziv_unimplemented;
    120  1.1  mrg 
    121  1.1  mrg   int num_siv;
    122  1.1  mrg   int num_siv_independent;
    123  1.1  mrg   int num_siv_dependent;
    124  1.1  mrg   int num_siv_unimplemented;
    125  1.1  mrg 
    126  1.1  mrg   int num_miv;
    127  1.1  mrg   int num_miv_independent;
    128  1.1  mrg   int num_miv_dependent;
    129  1.1  mrg   int num_miv_unimplemented;
    130  1.1  mrg } dependence_stats;
    131  1.1  mrg 
    132  1.1  mrg static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
    133  1.1  mrg 					   unsigned int, unsigned int,
    134  1.1  mrg 					   class loop *);
    135  1.1  mrg /* Returns true iff A divides B.  */
    136  1.1  mrg 
    137  1.1  mrg static inline bool
    138  1.1  mrg tree_fold_divides_p (const_tree a, const_tree b)
    139  1.1  mrg {
    140  1.1  mrg   gcc_assert (TREE_CODE (a) == INTEGER_CST);
    141  1.1  mrg   gcc_assert (TREE_CODE (b) == INTEGER_CST);
    142  1.1  mrg   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
    143  1.1  mrg }
    144  1.1  mrg 
    145  1.1  mrg /* Returns true iff A divides B.  */
    146  1.1  mrg 
    147  1.1  mrg static inline bool
    148  1.1  mrg int_divides_p (lambda_int a, lambda_int b)
    149  1.1  mrg {
    150  1.1  mrg   return ((b % a) == 0);
    151  1.1  mrg }
    152  1.1  mrg 
    153  1.1  mrg /* Return true if reference REF contains a union access.  */
    154  1.1  mrg 
    155  1.1  mrg static bool
    156  1.1  mrg ref_contains_union_access_p (tree ref)
    157  1.1  mrg {
    158  1.1  mrg   while (handled_component_p (ref))
    159  1.1  mrg     {
    160  1.1  mrg       ref = TREE_OPERAND (ref, 0);
    161  1.1  mrg       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
    162  1.1  mrg 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
    163  1.1  mrg 	return true;
    164  1.1  mrg     }
    165  1.1  mrg   return false;
    166  1.1  mrg }
    167  1.1  mrg 
    168  1.1  mrg 
    169  1.1  mrg 
    171  1.1  mrg /* Dump into FILE all the data references from DATAREFS.  */
    172  1.1  mrg 
    173  1.1  mrg static void
    174  1.1  mrg dump_data_references (FILE *file, vec<data_reference_p> datarefs)
    175  1.1  mrg {
    176  1.1  mrg   for (data_reference *dr : datarefs)
    177  1.1  mrg     dump_data_reference (file, dr);
    178  1.1  mrg }
    179  1.1  mrg 
    180  1.1  mrg /* Unified dump into FILE all the data references from DATAREFS.  */
    181  1.1  mrg 
    182  1.1  mrg DEBUG_FUNCTION void
    183  1.1  mrg debug (vec<data_reference_p> &ref)
    184  1.1  mrg {
    185  1.1  mrg   dump_data_references (stderr, ref);
    186  1.1  mrg }
    187  1.1  mrg 
    188  1.1  mrg DEBUG_FUNCTION void
    189  1.1  mrg debug (vec<data_reference_p> *ptr)
    190  1.1  mrg {
    191  1.1  mrg   if (ptr)
    192  1.1  mrg     debug (*ptr);
    193  1.1  mrg   else
    194  1.1  mrg     fprintf (stderr, "<nil>\n");
    195  1.1  mrg }
    196  1.1  mrg 
    197  1.1  mrg 
    198  1.1  mrg /* Dump into STDERR all the data references from DATAREFS.  */
    199  1.1  mrg 
    200  1.1  mrg DEBUG_FUNCTION void
    201  1.1  mrg debug_data_references (vec<data_reference_p> datarefs)
    202  1.1  mrg {
    203  1.1  mrg   dump_data_references (stderr, datarefs);
    204  1.1  mrg }
    205  1.1  mrg 
    206  1.1  mrg /* Print to STDERR the data_reference DR.  */
    207  1.1  mrg 
    208  1.1  mrg DEBUG_FUNCTION void
    209  1.1  mrg debug_data_reference (struct data_reference *dr)
    210  1.1  mrg {
    211  1.1  mrg   dump_data_reference (stderr, dr);
    212  1.1  mrg }
    213  1.1  mrg 
    214  1.1  mrg /* Dump function for a DATA_REFERENCE structure.  */
    215  1.1  mrg 
    216  1.1  mrg void
    217  1.1  mrg dump_data_reference (FILE *outf,
    218  1.1  mrg 		     struct data_reference *dr)
    219  1.1  mrg {
    220  1.1  mrg   unsigned int i;
    221  1.1  mrg 
    222  1.1  mrg   fprintf (outf, "#(Data Ref: \n");
    223  1.1  mrg   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
    224  1.1  mrg   fprintf (outf, "#  stmt: ");
    225  1.1  mrg   print_gimple_stmt (outf, DR_STMT (dr), 0);
    226  1.1  mrg   fprintf (outf, "#  ref: ");
    227  1.1  mrg   print_generic_stmt (outf, DR_REF (dr));
    228  1.1  mrg   fprintf (outf, "#  base_object: ");
    229  1.1  mrg   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
    230  1.1  mrg 
    231  1.1  mrg   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
    232  1.1  mrg     {
    233  1.1  mrg       fprintf (outf, "#  Access function %d: ", i);
    234  1.1  mrg       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
    235  1.1  mrg     }
    236  1.1  mrg   fprintf (outf, "#)\n");
    237  1.1  mrg }
    238  1.1  mrg 
    239  1.1  mrg /* Unified dump function for a DATA_REFERENCE structure.  */
    240  1.1  mrg 
    241  1.1  mrg DEBUG_FUNCTION void
    242  1.1  mrg debug (data_reference &ref)
    243  1.1  mrg {
    244  1.1  mrg   dump_data_reference (stderr, &ref);
    245  1.1  mrg }
    246  1.1  mrg 
    247  1.1  mrg DEBUG_FUNCTION void
    248  1.1  mrg debug (data_reference *ptr)
    249  1.1  mrg {
    250  1.1  mrg   if (ptr)
    251  1.1  mrg     debug (*ptr);
    252  1.1  mrg   else
    253  1.1  mrg     fprintf (stderr, "<nil>\n");
    254  1.1  mrg }
    255  1.1  mrg 
    256  1.1  mrg 
    257  1.1  mrg /* Dumps the affine function described by FN to the file OUTF.  */
    258  1.1  mrg 
    259  1.1  mrg DEBUG_FUNCTION void
    260  1.1  mrg dump_affine_function (FILE *outf, affine_fn fn)
    261  1.1  mrg {
    262  1.1  mrg   unsigned i;
    263  1.1  mrg   tree coef;
    264  1.1  mrg 
    265  1.1  mrg   print_generic_expr (outf, fn[0], TDF_SLIM);
    266  1.1  mrg   for (i = 1; fn.iterate (i, &coef); i++)
    267  1.1  mrg     {
    268  1.1  mrg       fprintf (outf, " + ");
    269  1.1  mrg       print_generic_expr (outf, coef, TDF_SLIM);
    270  1.1  mrg       fprintf (outf, " * x_%u", i);
    271  1.1  mrg     }
    272  1.1  mrg }
    273  1.1  mrg 
    274  1.1  mrg /* Dumps the conflict function CF to the file OUTF.  */
    275  1.1  mrg 
    276  1.1  mrg DEBUG_FUNCTION void
    277  1.1  mrg dump_conflict_function (FILE *outf, conflict_function *cf)
    278  1.1  mrg {
    279  1.1  mrg   unsigned i;
    280  1.1  mrg 
    281  1.1  mrg   if (cf->n == NO_DEPENDENCE)
    282  1.1  mrg     fprintf (outf, "no dependence");
    283  1.1  mrg   else if (cf->n == NOT_KNOWN)
    284  1.1  mrg     fprintf (outf, "not known");
    285  1.1  mrg   else
    286  1.1  mrg     {
    287  1.1  mrg       for (i = 0; i < cf->n; i++)
    288  1.1  mrg 	{
    289  1.1  mrg 	  if (i != 0)
    290  1.1  mrg 	    fprintf (outf, " ");
    291  1.1  mrg 	  fprintf (outf, "[");
    292  1.1  mrg 	  dump_affine_function (outf, cf->fns[i]);
    293  1.1  mrg 	  fprintf (outf, "]");
    294  1.1  mrg 	}
    295  1.1  mrg     }
    296  1.1  mrg }
    297  1.1  mrg 
    298  1.1  mrg /* Dump function for a SUBSCRIPT structure.  */
    299  1.1  mrg 
    300  1.1  mrg DEBUG_FUNCTION void
    301  1.1  mrg dump_subscript (FILE *outf, struct subscript *subscript)
    302  1.1  mrg {
    303  1.1  mrg   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
    304  1.1  mrg 
    305  1.1  mrg   fprintf (outf, "\n (subscript \n");
    306  1.1  mrg   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
    307  1.1  mrg   dump_conflict_function (outf, cf);
    308  1.1  mrg   if (CF_NONTRIVIAL_P (cf))
    309  1.1  mrg     {
    310  1.1  mrg       tree last_iteration = SUB_LAST_CONFLICT (subscript);
    311  1.1  mrg       fprintf (outf, "\n  last_conflict: ");
    312  1.1  mrg       print_generic_expr (outf, last_iteration);
    313  1.1  mrg     }
    314  1.1  mrg 
    315  1.1  mrg   cf = SUB_CONFLICTS_IN_B (subscript);
    316  1.1  mrg   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
    317  1.1  mrg   dump_conflict_function (outf, cf);
    318  1.1  mrg   if (CF_NONTRIVIAL_P (cf))
    319  1.1  mrg     {
    320  1.1  mrg       tree last_iteration = SUB_LAST_CONFLICT (subscript);
    321  1.1  mrg       fprintf (outf, "\n  last_conflict: ");
    322  1.1  mrg       print_generic_expr (outf, last_iteration);
    323  1.1  mrg     }
    324  1.1  mrg 
    325  1.1  mrg   fprintf (outf, "\n  (Subscript distance: ");
    326  1.1  mrg   print_generic_expr (outf, SUB_DISTANCE (subscript));
    327  1.1  mrg   fprintf (outf, " ))\n");
    328  1.1  mrg }
    329  1.1  mrg 
    330  1.1  mrg /* Print the classic direction vector DIRV to OUTF.  */
    331  1.1  mrg 
    332  1.1  mrg DEBUG_FUNCTION void
    333  1.1  mrg print_direction_vector (FILE *outf,
    334  1.1  mrg 			lambda_vector dirv,
    335  1.1  mrg 			int length)
    336  1.1  mrg {
    337  1.1  mrg   int eq;
    338  1.1  mrg 
    339  1.1  mrg   for (eq = 0; eq < length; eq++)
    340  1.1  mrg     {
    341  1.1  mrg       enum data_dependence_direction dir = ((enum data_dependence_direction)
    342  1.1  mrg 					    dirv[eq]);
    343  1.1  mrg 
    344  1.1  mrg       switch (dir)
    345  1.1  mrg 	{
    346  1.1  mrg 	case dir_positive:
    347  1.1  mrg 	  fprintf (outf, "    +");
    348  1.1  mrg 	  break;
    349  1.1  mrg 	case dir_negative:
    350  1.1  mrg 	  fprintf (outf, "    -");
    351  1.1  mrg 	  break;
    352  1.1  mrg 	case dir_equal:
    353  1.1  mrg 	  fprintf (outf, "    =");
    354  1.1  mrg 	  break;
    355  1.1  mrg 	case dir_positive_or_equal:
    356  1.1  mrg 	  fprintf (outf, "   +=");
    357  1.1  mrg 	  break;
    358  1.1  mrg 	case dir_positive_or_negative:
    359  1.1  mrg 	  fprintf (outf, "   +-");
    360  1.1  mrg 	  break;
    361  1.1  mrg 	case dir_negative_or_equal:
    362  1.1  mrg 	  fprintf (outf, "   -=");
    363  1.1  mrg 	  break;
    364  1.1  mrg 	case dir_star:
    365  1.1  mrg 	  fprintf (outf, "    *");
    366  1.1  mrg 	  break;
    367  1.1  mrg 	default:
    368  1.1  mrg 	  fprintf (outf, "indep");
    369  1.1  mrg 	  break;
    370  1.1  mrg 	}
    371  1.1  mrg     }
    372  1.1  mrg   fprintf (outf, "\n");
    373  1.1  mrg }
    374  1.1  mrg 
    375  1.1  mrg /* Print a vector of direction vectors.  */
    376  1.1  mrg 
    377  1.1  mrg DEBUG_FUNCTION void
    378  1.1  mrg print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
    379  1.1  mrg 		   int length)
    380  1.1  mrg {
    381  1.1  mrg   for (lambda_vector v : dir_vects)
    382  1.1  mrg     print_direction_vector (outf, v, length);
    383  1.1  mrg }
    384  1.1  mrg 
    385  1.1  mrg /* Print out a vector VEC of length N to OUTFILE.  */
    386  1.1  mrg 
    387  1.1  mrg DEBUG_FUNCTION void
    388  1.1  mrg print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
    389  1.1  mrg {
    390  1.1  mrg   int i;
    391  1.1  mrg 
    392  1.1  mrg   for (i = 0; i < n; i++)
    393  1.1  mrg     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
    394  1.1  mrg   fprintf (outfile, "\n");
    395  1.1  mrg }
    396  1.1  mrg 
    397  1.1  mrg /* Print a vector of distance vectors.  */
    398  1.1  mrg 
    399  1.1  mrg DEBUG_FUNCTION void
    400  1.1  mrg print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
    401  1.1  mrg 		    int length)
    402  1.1  mrg {
    403  1.1  mrg   for (lambda_vector v : dist_vects)
    404  1.1  mrg     print_lambda_vector (outf, v, length);
    405  1.1  mrg }
    406  1.1  mrg 
    407  1.1  mrg /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
    408  1.1  mrg 
    409  1.1  mrg DEBUG_FUNCTION void
    410  1.1  mrg dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
    411  1.1  mrg {
    412  1.1  mrg   struct data_reference *dra, *drb;
    413  1.1  mrg 
    414  1.1  mrg   fprintf (outf, "(Data Dep: \n");
    415  1.1  mrg 
    416  1.1  mrg   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    417  1.1  mrg     {
    418  1.1  mrg       if (ddr)
    419  1.1  mrg 	{
    420  1.1  mrg 	  dra = DDR_A (ddr);
    421  1.1  mrg 	  drb = DDR_B (ddr);
    422  1.1  mrg 	  if (dra)
    423  1.1  mrg 	    dump_data_reference (outf, dra);
    424  1.1  mrg 	  else
    425  1.1  mrg 	    fprintf (outf, "    (nil)\n");
    426  1.1  mrg 	  if (drb)
    427  1.1  mrg 	    dump_data_reference (outf, drb);
    428  1.1  mrg 	  else
    429  1.1  mrg 	    fprintf (outf, "    (nil)\n");
    430  1.1  mrg 	}
    431  1.1  mrg       fprintf (outf, "    (don't know)\n)\n");
    432  1.1  mrg       return;
    433  1.1  mrg     }
    434  1.1  mrg 
    435  1.1  mrg   dra = DDR_A (ddr);
    436  1.1  mrg   drb = DDR_B (ddr);
    437  1.1  mrg   dump_data_reference (outf, dra);
    438  1.1  mrg   dump_data_reference (outf, drb);
    439  1.1  mrg 
    440  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
    441  1.1  mrg     fprintf (outf, "    (no dependence)\n");
    442  1.1  mrg 
    443  1.1  mrg   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
    444  1.1  mrg     {
    445  1.1  mrg       unsigned int i;
    446  1.1  mrg       class loop *loopi;
    447  1.1  mrg 
    448  1.1  mrg       subscript *sub;
    449  1.1  mrg       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
    450  1.1  mrg 	{
    451  1.1  mrg 	  fprintf (outf, "  access_fn_A: ");
    452  1.1  mrg 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
    453  1.1  mrg 	  fprintf (outf, "  access_fn_B: ");
    454  1.1  mrg 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
    455  1.1  mrg 	  dump_subscript (outf, sub);
    456  1.1  mrg 	}
    457  1.1  mrg 
    458  1.1  mrg       fprintf (outf, "  loop nest: (");
    459  1.1  mrg       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
    460  1.1  mrg 	fprintf (outf, "%d ", loopi->num);
    461  1.1  mrg       fprintf (outf, ")\n");
    462  1.1  mrg 
    463  1.1  mrg       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
    464  1.1  mrg 	{
    465  1.1  mrg 	  fprintf (outf, "  distance_vector: ");
    466  1.1  mrg 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
    467  1.1  mrg 			       DDR_NB_LOOPS (ddr));
    468  1.1  mrg 	}
    469  1.1  mrg 
    470  1.1  mrg       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
    471  1.1  mrg 	{
    472  1.1  mrg 	  fprintf (outf, "  direction_vector: ");
    473  1.1  mrg 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
    474  1.1  mrg 				  DDR_NB_LOOPS (ddr));
    475  1.1  mrg 	}
    476  1.1  mrg     }
    477  1.1  mrg 
    478  1.1  mrg   fprintf (outf, ")\n");
    479  1.1  mrg }
    480  1.1  mrg 
    481  1.1  mrg /* Debug version.  */
    482  1.1  mrg 
    483  1.1  mrg DEBUG_FUNCTION void
    484  1.1  mrg debug_data_dependence_relation (const struct data_dependence_relation *ddr)
    485  1.1  mrg {
    486  1.1  mrg   dump_data_dependence_relation (stderr, ddr);
    487  1.1  mrg }
    488  1.1  mrg 
    489  1.1  mrg /* Dump into FILE all the dependence relations from DDRS.  */
    490  1.1  mrg 
    491  1.1  mrg DEBUG_FUNCTION void
    492  1.1  mrg dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
    493  1.1  mrg {
    494  1.1  mrg   for (auto ddr : ddrs)
    495  1.1  mrg     dump_data_dependence_relation (file, ddr);
    496  1.1  mrg }
    497  1.1  mrg 
    498  1.1  mrg DEBUG_FUNCTION void
    499  1.1  mrg debug (vec<ddr_p> &ref)
    500  1.1  mrg {
    501  1.1  mrg   dump_data_dependence_relations (stderr, ref);
    502  1.1  mrg }
    503  1.1  mrg 
    504  1.1  mrg DEBUG_FUNCTION void
    505  1.1  mrg debug (vec<ddr_p> *ptr)
    506  1.1  mrg {
    507  1.1  mrg   if (ptr)
    508  1.1  mrg     debug (*ptr);
    509  1.1  mrg   else
    510  1.1  mrg     fprintf (stderr, "<nil>\n");
    511  1.1  mrg }
    512  1.1  mrg 
    513  1.1  mrg 
    514  1.1  mrg /* Dump to STDERR all the dependence relations from DDRS.  */
    515  1.1  mrg 
    516  1.1  mrg DEBUG_FUNCTION void
    517  1.1  mrg debug_data_dependence_relations (vec<ddr_p> ddrs)
    518  1.1  mrg {
    519  1.1  mrg   dump_data_dependence_relations (stderr, ddrs);
    520  1.1  mrg }
    521  1.1  mrg 
    522  1.1  mrg /* Dumps the distance and direction vectors in FILE.  DDRS contains
    523  1.1  mrg    the dependence relations, and VECT_SIZE is the size of the
    524  1.1  mrg    dependence vectors, or in other words the number of loops in the
    525  1.1  mrg    considered nest.  */
    526  1.1  mrg 
    527  1.1  mrg DEBUG_FUNCTION void
    528  1.1  mrg dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
    529  1.1  mrg {
    530  1.1  mrg   for (data_dependence_relation *ddr : ddrs)
    531  1.1  mrg     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
    532  1.1  mrg       {
    533  1.1  mrg 	for (lambda_vector v : DDR_DIST_VECTS (ddr))
    534  1.1  mrg 	  {
    535  1.1  mrg 	    fprintf (file, "DISTANCE_V (");
    536  1.1  mrg 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
    537  1.1  mrg 	    fprintf (file, ")\n");
    538  1.1  mrg 	  }
    539  1.1  mrg 
    540  1.1  mrg 	for (lambda_vector v : DDR_DIR_VECTS (ddr))
    541  1.1  mrg 	  {
    542  1.1  mrg 	    fprintf (file, "DIRECTION_V (");
    543  1.1  mrg 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
    544  1.1  mrg 	    fprintf (file, ")\n");
    545  1.1  mrg 	  }
    546  1.1  mrg       }
    547  1.1  mrg 
    548  1.1  mrg   fprintf (file, "\n\n");
    549  1.1  mrg }
    550  1.1  mrg 
    551  1.1  mrg /* Dumps the data dependence relations DDRS in FILE.  */
    552  1.1  mrg 
    553  1.1  mrg DEBUG_FUNCTION void
    554  1.1  mrg dump_ddrs (FILE *file, vec<ddr_p> ddrs)
    555  1.1  mrg {
    556  1.1  mrg   for (data_dependence_relation *ddr : ddrs)
    557  1.1  mrg     dump_data_dependence_relation (file, ddr);
    558  1.1  mrg 
    559  1.1  mrg   fprintf (file, "\n\n");
    560  1.1  mrg }
    561  1.1  mrg 
    562  1.1  mrg DEBUG_FUNCTION void
    563  1.1  mrg debug_ddrs (vec<ddr_p> ddrs)
    564  1.1  mrg {
    565  1.1  mrg   dump_ddrs (stderr, ddrs);
    566  1.1  mrg }
    567  1.1  mrg 
    568  1.1  mrg /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
    569  1.1  mrg    OP0 CODE OP1, where:
    570  1.1  mrg 
    571  1.1  mrg    - OP0 CODE OP1 has integral type TYPE
    572  1.1  mrg    - the range of OP0 is given by OP0_RANGE and
    573  1.1  mrg    - the range of OP1 is given by OP1_RANGE.
    574  1.1  mrg 
    575  1.1  mrg    Independently of RESULT_RANGE, try to compute:
    576  1.1  mrg 
    577  1.1  mrg      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
    578  1.1  mrg 	     - (sizetype) (OP0 CODE OP1)
    579  1.1  mrg 
    580  1.1  mrg    as a constant and subtract DELTA from the ssizetype constant in *OFF.
    581  1.1  mrg    Return true on success, or false if DELTA is not known at compile time.
    582  1.1  mrg 
    583  1.1  mrg    Truncation and sign changes are known to distribute over CODE, i.e.
    584  1.1  mrg 
    585  1.1  mrg      (itype) (A CODE B) == (itype) A CODE (itype) B
    586  1.1  mrg 
    587  1.1  mrg    for any integral type ITYPE whose precision is no greater than the
    588  1.1  mrg    precision of A and B.  */
    589  1.1  mrg 
    590  1.1  mrg static bool
    591  1.1  mrg compute_distributive_range (tree type, value_range &op0_range,
    592  1.1  mrg 			    tree_code code, value_range &op1_range,
    593  1.1  mrg 			    tree *off, value_range *result_range)
    594  1.1  mrg {
    595  1.1  mrg   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
    596  1.1  mrg   if (result_range)
    597  1.1  mrg     {
    598  1.1  mrg       range_operator *op = range_op_handler (code, type);
    599  1.1  mrg       op->fold_range (*result_range, type, op0_range, op1_range);
    600  1.1  mrg     }
    601  1.1  mrg 
    602  1.1  mrg   /* The distributive property guarantees that if TYPE is no narrower
    603  1.1  mrg      than SIZETYPE,
    604  1.1  mrg 
    605  1.1  mrg        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
    606  1.1  mrg 
    607  1.1  mrg      and so we can treat DELTA as zero.  */
    608  1.1  mrg   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
    609  1.1  mrg     return true;
    610  1.1  mrg 
    611  1.1  mrg   /* If overflow is undefined, we can assume that:
    612  1.1  mrg 
    613  1.1  mrg        X == (ssizetype) OP0 CODE (ssizetype) OP1
    614  1.1  mrg 
    615  1.1  mrg      is within the range of TYPE, i.e.:
    616  1.1  mrg 
    617  1.1  mrg        X == (ssizetype) (TYPE) X
    618  1.1  mrg 
    619  1.1  mrg      Distributing the (TYPE) truncation over X gives:
    620  1.1  mrg 
    621  1.1  mrg        X == (ssizetype) (OP0 CODE OP1)
    622  1.1  mrg 
    623  1.1  mrg      Casting both sides to sizetype and distributing the sizetype cast
    624  1.1  mrg      over X gives:
    625  1.1  mrg 
    626  1.1  mrg        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
    627  1.1  mrg 
    628  1.1  mrg      and so we can treat DELTA as zero.  */
    629  1.1  mrg   if (TYPE_OVERFLOW_UNDEFINED (type))
    630  1.1  mrg     return true;
    631  1.1  mrg 
    632  1.1  mrg   /* Compute the range of:
    633  1.1  mrg 
    634  1.1  mrg        (ssizetype) OP0 CODE (ssizetype) OP1
    635  1.1  mrg 
    636  1.1  mrg      The distributive property guarantees that this has the same bitpattern as:
    637  1.1  mrg 
    638  1.1  mrg        (sizetype) OP0 CODE (sizetype) OP1
    639  1.1  mrg 
    640  1.1  mrg      but its range is more conducive to analysis.  */
    641  1.1  mrg   range_cast (op0_range, ssizetype);
    642  1.1  mrg   range_cast (op1_range, ssizetype);
    643  1.1  mrg   value_range wide_range;
    644  1.1  mrg   range_operator *op = range_op_handler (code, ssizetype);
    645  1.1  mrg   bool saved_flag_wrapv = flag_wrapv;
    646  1.1  mrg   flag_wrapv = 1;
    647  1.1  mrg   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
    648  1.1  mrg   flag_wrapv = saved_flag_wrapv;
    649  1.1  mrg   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
    650  1.1  mrg     return false;
    651  1.1  mrg 
    652  1.1  mrg   wide_int lb = wide_range.lower_bound ();
    653  1.1  mrg   wide_int ub = wide_range.upper_bound ();
    654  1.1  mrg 
    655  1.1  mrg   /* Calculate the number of times that each end of the range overflows or
    656  1.1  mrg      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
    657  1.1  mrg   unsigned int precision = TYPE_PRECISION (type);
    658  1.1  mrg   if (!TYPE_UNSIGNED (type))
    659  1.1  mrg     {
    660  1.1  mrg       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
    661  1.1  mrg       lb -= type_min;
    662  1.1  mrg       ub -= type_min;
    663  1.1  mrg     }
    664  1.1  mrg   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
    665  1.1  mrg   lb &= upper_bits;
    666  1.1  mrg   ub &= upper_bits;
    667  1.1  mrg   if (lb != ub)
    668  1.1  mrg     return false;
    669  1.1  mrg 
    670  1.1  mrg   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
    671  1.1  mrg      negative values indicating underflow.  The low PRECISION bits of LB
    672  1.1  mrg      are clear, so DELTA is therefore LB (== UB).  */
    673  1.1  mrg   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
    674  1.1  mrg   return true;
    675  1.1  mrg }
    676  1.1  mrg 
    677  1.1  mrg /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
    678  1.1  mrg    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
    679  1.1  mrg    FROM_TYPE are integral types.  */
    680  1.1  mrg 
    681  1.1  mrg static bool
    682  1.1  mrg nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
    683  1.1  mrg {
    684  1.1  mrg   gcc_assert (INTEGRAL_TYPE_P (to_type)
    685  1.1  mrg 	      && INTEGRAL_TYPE_P (from_type)
    686  1.1  mrg 	      && !TYPE_OVERFLOW_TRAPS (to_type)
    687  1.1  mrg 	      && !TYPE_OVERFLOW_TRAPS (from_type));
    688  1.1  mrg 
    689  1.1  mrg   /* Converting to something no narrower than sizetype and then to sizetype
    690  1.1  mrg      is equivalent to converting directly to sizetype.  */
    691  1.1  mrg   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
    692  1.1  mrg     return true;
    693  1.1  mrg 
    694  1.1  mrg   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
    695  1.1  mrg   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
    696  1.1  mrg       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
    697  1.1  mrg     return true;
    698  1.1  mrg 
    699  1.1  mrg   /* For narrowing conversions, we could in principle test whether
    700  1.1  mrg      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
    701  1.1  mrg      and apply a constant adjustment.
    702  1.1  mrg 
    703  1.1  mrg      For other conversions (which involve a sign change) we could
    704  1.1  mrg      check that the signs are always equal, and apply a constant
    705  1.1  mrg      adjustment if the signs are negative.
    706  1.1  mrg 
    707  1.1  mrg      However, both cases should be rare.  */
    708  1.1  mrg   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
    709  1.1  mrg 			    TYPE_SIGN (to_type));
    710  1.1  mrg }
    711  1.1  mrg 
    712  1.1  mrg static void
    713  1.1  mrg split_constant_offset (tree type, tree *var, tree *off,
    714  1.1  mrg 		       value_range *result_range,
    715  1.1  mrg 		       hash_map<tree, std::pair<tree, tree> > &cache,
    716  1.1  mrg 		       unsigned *limit);
    717  1.1  mrg 
    718  1.1  mrg /* Helper function for split_constant_offset.  If TYPE is a pointer type,
    719  1.1  mrg    try to express OP0 CODE OP1 as:
    720  1.1  mrg 
    721  1.1  mrg      POINTER_PLUS <*VAR, (sizetype) *OFF>
    722  1.1  mrg 
    723  1.1  mrg    where:
    724  1.1  mrg 
    725  1.1  mrg    - *VAR has type TYPE
    726  1.1  mrg    - *OFF is a constant of type ssizetype.
    727  1.1  mrg 
    728  1.1  mrg    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
    729  1.1  mrg 
    730  1.1  mrg      *VAR + (sizetype) *OFF
    731  1.1  mrg 
    732  1.1  mrg    where:
    733  1.1  mrg 
    734  1.1  mrg    - *VAR has type sizetype
    735  1.1  mrg    - *OFF is a constant of type ssizetype.
    736  1.1  mrg 
    737  1.1  mrg    In both cases, OP0 CODE OP1 has type TYPE.
    738  1.1  mrg 
    739  1.1  mrg    Return true on success.  A false return value indicates that we can't
    740  1.1  mrg    do better than set *OFF to zero.
    741  1.1  mrg 
    742  1.1  mrg    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
    743  1.1  mrg    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
    744  1.1  mrg 
    745  1.1  mrg    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
    746  1.1  mrg    visited.  LIMIT counts down the number of SSA names that we are
    747  1.1  mrg    allowed to process before giving up.  */
    748  1.1  mrg 
    749  1.1  mrg static bool
    750  1.1  mrg split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
    751  1.1  mrg 			 tree *var, tree *off, value_range *result_range,
    752  1.1  mrg 			 hash_map<tree, std::pair<tree, tree> > &cache,
    753  1.1  mrg 			 unsigned *limit)
    754  1.1  mrg {
    755  1.1  mrg   tree var0, var1;
    756  1.1  mrg   tree off0, off1;
    757  1.1  mrg   value_range op0_range, op1_range;
    758  1.1  mrg 
    759  1.1  mrg   *var = NULL_TREE;
    760  1.1  mrg   *off = NULL_TREE;
    761  1.1  mrg 
    762  1.1  mrg   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
    763  1.1  mrg     return false;
    764  1.1  mrg 
    765  1.1  mrg   if (TREE_CODE (op0) == SSA_NAME
    766  1.1  mrg       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
    767  1.1  mrg     return false;
    768  1.1  mrg   if (op1
    769  1.1  mrg       && TREE_CODE (op1) == SSA_NAME
    770  1.1  mrg       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
    771  1.1  mrg     return false;
    772  1.1  mrg 
    773  1.1  mrg   switch (code)
    774  1.1  mrg     {
    775  1.1  mrg     case INTEGER_CST:
    776  1.1  mrg       *var = size_int (0);
    777  1.1  mrg       *off = fold_convert (ssizetype, op0);
    778  1.1  mrg       if (result_range)
    779  1.1  mrg 	result_range->set (op0, op0);
    780  1.1  mrg       return true;
    781  1.1  mrg 
    782  1.1  mrg     case POINTER_PLUS_EXPR:
    783  1.1  mrg       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
    784  1.1  mrg       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
    785  1.1  mrg       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
    786  1.1  mrg       *off = size_binop (PLUS_EXPR, off0, off1);
    787  1.1  mrg       return true;
    788  1.1  mrg 
    789  1.1  mrg     case PLUS_EXPR:
    790  1.1  mrg     case MINUS_EXPR:
    791  1.1  mrg       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
    792  1.1  mrg       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
    793  1.1  mrg       *off = size_binop (code, off0, off1);
    794  1.1  mrg       if (!compute_distributive_range (type, op0_range, code, op1_range,
    795  1.1  mrg 				       off, result_range))
    796  1.1  mrg 	return false;
    797  1.1  mrg       *var = fold_build2 (code, sizetype, var0, var1);
    798  1.1  mrg       return true;
    799  1.1  mrg 
    800  1.1  mrg     case MULT_EXPR:
    801  1.1  mrg       if (TREE_CODE (op1) != INTEGER_CST)
    802  1.1  mrg 	return false;
    803  1.1  mrg 
    804  1.1  mrg       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
    805  1.1  mrg       op1_range.set (op1, op1);
    806  1.1  mrg       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
    807  1.1  mrg       if (!compute_distributive_range (type, op0_range, code, op1_range,
    808  1.1  mrg 				       off, result_range))
    809  1.1  mrg 	return false;
    810  1.1  mrg       *var = fold_build2 (MULT_EXPR, sizetype, var0,
    811  1.1  mrg 			  fold_convert (sizetype, op1));
    812  1.1  mrg       return true;
    813  1.1  mrg 
    814  1.1  mrg     case ADDR_EXPR:
    815  1.1  mrg       {
    816  1.1  mrg 	tree base, poffset;
    817  1.1  mrg 	poly_int64 pbitsize, pbitpos, pbytepos;
    818  1.1  mrg 	machine_mode pmode;
    819  1.1  mrg 	int punsignedp, preversep, pvolatilep;
    820  1.1  mrg 
    821  1.1  mrg 	op0 = TREE_OPERAND (op0, 0);
    822  1.1  mrg 	base
    823  1.1  mrg 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
    824  1.1  mrg 				 &punsignedp, &preversep, &pvolatilep);
    825  1.1  mrg 
    826  1.1  mrg 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
    827  1.1  mrg 	  return false;
    828  1.1  mrg 	base = build_fold_addr_expr (base);
    829  1.1  mrg 	off0 = ssize_int (pbytepos);
    830  1.1  mrg 
    831  1.1  mrg 	if (poffset)
    832  1.1  mrg 	  {
    833  1.1  mrg 	    split_constant_offset (poffset, &poffset, &off1, nullptr,
    834  1.1  mrg 				   cache, limit);
    835  1.1  mrg 	    off0 = size_binop (PLUS_EXPR, off0, off1);
    836  1.1  mrg 	    base = fold_build_pointer_plus (base, poffset);
    837  1.1  mrg 	  }
    838  1.1  mrg 
    839  1.1  mrg 	var0 = fold_convert (type, base);
    840  1.1  mrg 
    841  1.1  mrg 	/* If variable length types are involved, punt, otherwise casts
    842  1.1  mrg 	   might be converted into ARRAY_REFs in gimplify_conversion.
    843  1.1  mrg 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
    844  1.1  mrg 	   possibly no longer appears in current GIMPLE, might resurface.
    845  1.1  mrg 	   This perhaps could run
    846  1.1  mrg 	   if (CONVERT_EXPR_P (var0))
    847  1.1  mrg 	     {
    848  1.1  mrg 	       gimplify_conversion (&var0);
    849  1.1  mrg 	       // Attempt to fill in any within var0 found ARRAY_REF's
    850  1.1  mrg 	       // element size from corresponding op embedded ARRAY_REF,
    851  1.1  mrg 	       // if unsuccessful, just punt.
    852  1.1  mrg 	     }  */
    853  1.1  mrg 	while (POINTER_TYPE_P (type))
    854  1.1  mrg 	  type = TREE_TYPE (type);
    855  1.1  mrg 	if (int_size_in_bytes (type) < 0)
    856  1.1  mrg 	  return false;
    857  1.1  mrg 
    858  1.1  mrg 	*var = var0;
    859  1.1  mrg 	*off = off0;
    860  1.1  mrg 	return true;
    861  1.1  mrg       }
    862  1.1  mrg 
    863  1.1  mrg     case SSA_NAME:
    864  1.1  mrg       {
    865  1.1  mrg 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
    866  1.1  mrg 	enum tree_code subcode;
    867  1.1  mrg 
    868  1.1  mrg 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
    869  1.1  mrg 	  return false;
    870  1.1  mrg 
    871  1.1  mrg 	subcode = gimple_assign_rhs_code (def_stmt);
    872  1.1  mrg 
    873  1.1  mrg 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
    874  1.1  mrg 	bool use_cache = false;
    875  1.1  mrg 	if (!has_single_use (op0)
    876  1.1  mrg 	    && (subcode == POINTER_PLUS_EXPR
    877  1.1  mrg 		|| subcode == PLUS_EXPR
    878  1.1  mrg 		|| subcode == MINUS_EXPR
    879  1.1  mrg 		|| subcode == MULT_EXPR
    880  1.1  mrg 		|| subcode == ADDR_EXPR
    881  1.1  mrg 		|| CONVERT_EXPR_CODE_P (subcode)))
    882  1.1  mrg 	  {
    883  1.1  mrg 	    use_cache = true;
    884  1.1  mrg 	    bool existed;
    885  1.1  mrg 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
    886  1.1  mrg 	    if (existed)
    887  1.1  mrg 	      {
    888  1.1  mrg 		if (integer_zerop (e.second))
    889  1.1  mrg 		  return false;
    890  1.1  mrg 		*var = e.first;
    891  1.1  mrg 		*off = e.second;
    892  1.1  mrg 		/* The caller sets the range in this case.  */
    893  1.1  mrg 		return true;
    894  1.1  mrg 	      }
    895  1.1  mrg 	    e = std::make_pair (op0, ssize_int (0));
    896  1.1  mrg 	  }
    897  1.1  mrg 
    898  1.1  mrg 	if (*limit == 0)
    899  1.1  mrg 	  return false;
    900  1.1  mrg 	--*limit;
    901  1.1  mrg 
    902  1.1  mrg 	var0 = gimple_assign_rhs1 (def_stmt);
    903  1.1  mrg 	var1 = gimple_assign_rhs2 (def_stmt);
    904  1.1  mrg 
    905  1.1  mrg 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
    906  1.1  mrg 					    var, off, nullptr, cache, limit);
    907  1.1  mrg 	if (res && use_cache)
    908  1.1  mrg 	  *cache.get (op0) = std::make_pair (*var, *off);
    909  1.1  mrg 	/* The caller sets the range in this case.  */
    910  1.1  mrg 	return res;
    911  1.1  mrg       }
    912  1.1  mrg     CASE_CONVERT:
    913  1.1  mrg       {
    914  1.1  mrg 	/* We can only handle the following conversions:
    915  1.1  mrg 
    916  1.1  mrg 	   - Conversions from one pointer type to another pointer type.
    917  1.1  mrg 
    918  1.1  mrg 	   - Conversions from one non-trapping integral type to another
    919  1.1  mrg 	     non-trapping integral type.  In this case, the recursive
    920  1.1  mrg 	     call makes sure that:
    921  1.1  mrg 
    922  1.1  mrg 	       (sizetype) OP0
    923  1.1  mrg 
    924  1.1  mrg 	     can be expressed as a sizetype operation involving VAR and OFF,
    925  1.1  mrg 	     and all we need to do is check whether:
    926  1.1  mrg 
    927  1.1  mrg 	       (sizetype) OP0 == (sizetype) (TYPE) OP0
    928  1.1  mrg 
    929  1.1  mrg 	   - Conversions from a non-trapping sizetype-size integral type to
    930  1.1  mrg 	     a like-sized pointer type.  In this case, the recursive call
    931  1.1  mrg 	     makes sure that:
    932  1.1  mrg 
    933  1.1  mrg 	       (sizetype) OP0 == *VAR + (sizetype) *OFF
    934  1.1  mrg 
    935  1.1  mrg 	     and we can convert that to:
    936  1.1  mrg 
    937  1.1  mrg 	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
    938  1.1  mrg 
    939  1.1  mrg 	   - Conversions from a sizetype-sized pointer type to a like-sized
    940  1.1  mrg 	     non-trapping integral type.  In this case, the recursive call
    941  1.1  mrg 	     makes sure that:
    942  1.1  mrg 
    943  1.1  mrg 	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
    944  1.1  mrg 
    945  1.1  mrg 	     where the POINTER_PLUS and *VAR have the same precision as
    946  1.1  mrg 	     TYPE (and the same precision as sizetype).  Then:
    947  1.1  mrg 
    948  1.1  mrg 	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
    949  1.1  mrg 	tree itype = TREE_TYPE (op0);
    950  1.1  mrg 	if ((POINTER_TYPE_P (itype)
    951  1.1  mrg 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
    952  1.1  mrg 	    && (POINTER_TYPE_P (type)
    953  1.1  mrg 		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
    954  1.1  mrg 	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
    955  1.1  mrg 		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
    956  1.1  mrg 		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
    957  1.1  mrg 	  {
    958  1.1  mrg 	    if (POINTER_TYPE_P (type))
    959  1.1  mrg 	      {
    960  1.1  mrg 		split_constant_offset (op0, var, off, nullptr, cache, limit);
    961  1.1  mrg 		*var = fold_convert (type, *var);
    962  1.1  mrg 	      }
    963  1.1  mrg 	    else if (POINTER_TYPE_P (itype))
    964  1.1  mrg 	      {
    965  1.1  mrg 		split_constant_offset (op0, var, off, nullptr, cache, limit);
    966  1.1  mrg 		*var = fold_convert (sizetype, *var);
    967  1.1  mrg 	      }
    968  1.1  mrg 	    else
    969  1.1  mrg 	      {
    970  1.1  mrg 		split_constant_offset (op0, var, off, &op0_range,
    971  1.1  mrg 				       cache, limit);
    972  1.1  mrg 		if (!nop_conversion_for_offset_p (type, itype, op0_range))
    973  1.1  mrg 		  return false;
    974  1.1  mrg 		if (result_range)
    975  1.1  mrg 		  {
    976  1.1  mrg 		    *result_range = op0_range;
    977  1.1  mrg 		    range_cast (*result_range, type);
    978  1.1  mrg 		  }
    979  1.1  mrg 	      }
    980  1.1  mrg 	    return true;
    981  1.1  mrg 	  }
    982  1.1  mrg 	return false;
    983  1.1  mrg       }
    984  1.1  mrg 
    985  1.1  mrg     default:
    986  1.1  mrg       return false;
    987  1.1  mrg     }
    988  1.1  mrg }
    989  1.1  mrg 
    990  1.1  mrg /* If EXP has pointer type, try to express it as:
    991  1.1  mrg 
    992  1.1  mrg      POINTER_PLUS <*VAR, (sizetype) *OFF>
    993  1.1  mrg 
    994  1.1  mrg    where:
    995  1.1  mrg 
    996  1.1  mrg    - *VAR has the same type as EXP
    997  1.1  mrg    - *OFF is a constant of type ssizetype.
    998  1.1  mrg 
    999  1.1  mrg    If EXP has an integral type, try to express (sizetype) EXP as:
   1000  1.1  mrg 
   1001  1.1  mrg      *VAR + (sizetype) *OFF
   1002  1.1  mrg 
   1003  1.1  mrg    where:
   1004  1.1  mrg 
   1005  1.1  mrg    - *VAR has type sizetype
   1006  1.1  mrg    - *OFF is a constant of type ssizetype.
   1007  1.1  mrg 
   1008  1.1  mrg    If EXP_RANGE is nonnull, set it to the range of EXP.
   1009  1.1  mrg 
   1010  1.1  mrg    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
   1011  1.1  mrg    visited.  LIMIT counts down the number of SSA names that we are
   1012  1.1  mrg    allowed to process before giving up.  */
   1013  1.1  mrg 
   1014  1.1  mrg static void
   1015  1.1  mrg split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
   1016  1.1  mrg 		       hash_map<tree, std::pair<tree, tree> > &cache,
   1017  1.1  mrg 		       unsigned *limit)
   1018  1.1  mrg {
   1019  1.1  mrg   tree type = TREE_TYPE (exp), op0, op1;
   1020  1.1  mrg   enum tree_code code;
   1021  1.1  mrg 
   1022  1.1  mrg   code = TREE_CODE (exp);
   1023  1.1  mrg   if (exp_range)
   1024  1.1  mrg     {
   1025  1.1  mrg       *exp_range = type;
   1026  1.1  mrg       if (code == SSA_NAME)
   1027  1.1  mrg 	{
   1028  1.1  mrg 	  value_range vr;
   1029  1.1  mrg 	  get_range_query (cfun)->range_of_expr (vr, exp);
   1030  1.1  mrg 	  if (vr.undefined_p ())
   1031  1.1  mrg 	    vr.set_varying (TREE_TYPE (exp));
   1032  1.1  mrg 	  wide_int var_min = wi::to_wide (vr.min ());
   1033  1.1  mrg 	  wide_int var_max = wi::to_wide (vr.max ());
   1034  1.1  mrg 	  value_range_kind vr_kind = vr.kind ();
   1035  1.1  mrg 	  wide_int var_nonzero = get_nonzero_bits (exp);
   1036  1.1  mrg 	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
   1037  1.1  mrg 						       &var_min, &var_max,
   1038  1.1  mrg 						       var_nonzero,
   1039  1.1  mrg 						       TYPE_SIGN (type));
   1040  1.1  mrg 	  /* This check for VR_VARYING is here because the old code
   1041  1.1  mrg 	     using get_range_info would return VR_RANGE for the entire
   1042  1.1  mrg 	     domain, instead of VR_VARYING.  The new code normalizes
   1043  1.1  mrg 	     full-domain ranges to VR_VARYING.  */
   1044  1.1  mrg 	  if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
   1045  1.1  mrg 	    *exp_range = value_range (type, var_min, var_max);
   1046  1.1  mrg 	}
   1047  1.1  mrg     }
   1048  1.1  mrg 
   1049  1.1  mrg   if (!tree_is_chrec (exp)
   1050  1.1  mrg       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
   1051  1.1  mrg     {
   1052  1.1  mrg       extract_ops_from_tree (exp, &code, &op0, &op1);
   1053  1.1  mrg       if (split_constant_offset_1 (type, op0, code, op1, var, off,
   1054  1.1  mrg 				   exp_range, cache, limit))
   1055  1.1  mrg 	return;
   1056  1.1  mrg     }
   1057  1.1  mrg 
   1058  1.1  mrg   *var = exp;
   1059  1.1  mrg   if (INTEGRAL_TYPE_P (type))
   1060  1.1  mrg     *var = fold_convert (sizetype, *var);
   1061  1.1  mrg   *off = ssize_int (0);
   1062  1.1  mrg 
   1063  1.1  mrg   value_range r;
   1064  1.1  mrg   if (exp_range && code != SSA_NAME
   1065  1.1  mrg       && get_range_query (cfun)->range_of_expr (r, exp)
   1066  1.1  mrg       && !r.undefined_p ())
   1067  1.1  mrg     *exp_range = r;
   1068  1.1  mrg }
   1069  1.1  mrg 
   1070  1.1  mrg /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
   1071  1.1  mrg    type as EXP while OFF has type ssizetype.  */
   1072  1.1  mrg 
   1073  1.1  mrg void
   1074  1.1  mrg split_constant_offset (tree exp, tree *var, tree *off)
   1075  1.1  mrg {
   1076  1.1  mrg   unsigned limit = param_ssa_name_def_chain_limit;
   1077  1.1  mrg   static hash_map<tree, std::pair<tree, tree> > *cache;
   1078  1.1  mrg   if (!cache)
   1079  1.1  mrg     cache = new hash_map<tree, std::pair<tree, tree> > (37);
   1080  1.1  mrg   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
   1081  1.1  mrg   *var = fold_convert (TREE_TYPE (exp), *var);
   1082  1.1  mrg   cache->empty ();
   1083  1.1  mrg }
   1084  1.1  mrg 
   1085  1.1  mrg /* Returns the address ADDR of an object in a canonical shape (without nop
   1086  1.1  mrg    casts, and with type of pointer to the object).  */
   1087  1.1  mrg 
   1088  1.1  mrg static tree
   1089  1.1  mrg canonicalize_base_object_address (tree addr)
   1090  1.1  mrg {
   1091  1.1  mrg   tree orig = addr;
   1092  1.1  mrg 
   1093  1.1  mrg   STRIP_NOPS (addr);
   1094  1.1  mrg 
   1095  1.1  mrg   /* The base address may be obtained by casting from integer, in that case
   1096  1.1  mrg      keep the cast.  */
   1097  1.1  mrg   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
   1098  1.1  mrg     return orig;
   1099  1.1  mrg 
   1100  1.1  mrg   if (TREE_CODE (addr) != ADDR_EXPR)
   1101  1.1  mrg     return addr;
   1102  1.1  mrg 
   1103  1.1  mrg   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
   1104  1.1  mrg }
   1105  1.1  mrg 
   1106  1.1  mrg /* Analyze the behavior of memory reference REF within STMT.
   1107  1.1  mrg    There are two modes:
   1108  1.1  mrg 
   1109  1.1  mrg    - BB analysis.  In this case we simply split the address into base,
   1110  1.1  mrg      init and offset components, without reference to any containing loop.
   1111  1.1  mrg      The resulting base and offset are general expressions and they can
   1112  1.1  mrg      vary arbitrarily from one iteration of the containing loop to the next.
   1113  1.1  mrg      The step is always zero.
   1114  1.1  mrg 
   1115  1.1  mrg    - loop analysis.  In this case we analyze the reference both wrt LOOP
   1116  1.1  mrg      and on the basis that the reference occurs (is "used") in LOOP;
   1117  1.1  mrg      see the comment above analyze_scalar_evolution_in_loop for more
   1118  1.1  mrg      information about this distinction.  The base, init, offset and
   1119  1.1  mrg      step fields are all invariant in LOOP.
   1120  1.1  mrg 
   1121  1.1  mrg    Perform BB analysis if LOOP is null, or if LOOP is the function's
   1122  1.1  mrg    dummy outermost loop.  In other cases perform loop analysis.
   1123  1.1  mrg 
   1124  1.1  mrg    Return true if the analysis succeeded and store the results in DRB if so.
   1125  1.1  mrg    BB analysis can only fail for bitfield or reversed-storage accesses.  */
   1126  1.1  mrg 
   1127  1.1  mrg opt_result
   1128  1.1  mrg dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
   1129  1.1  mrg 		      class loop *loop, const gimple *stmt)
   1130  1.1  mrg {
   1131  1.1  mrg   poly_int64 pbitsize, pbitpos;
   1132  1.1  mrg   tree base, poffset;
   1133  1.1  mrg   machine_mode pmode;
   1134  1.1  mrg   int punsignedp, preversep, pvolatilep;
   1135  1.1  mrg   affine_iv base_iv, offset_iv;
   1136  1.1  mrg   tree init, dinit, step;
   1137  1.1  mrg   bool in_loop = (loop && loop->num);
   1138  1.1  mrg 
   1139  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   1140  1.1  mrg     fprintf (dump_file, "analyze_innermost: ");
   1141  1.1  mrg 
   1142  1.1  mrg   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
   1143  1.1  mrg 			      &punsignedp, &preversep, &pvolatilep);
   1144  1.1  mrg   gcc_assert (base != NULL_TREE);
   1145  1.1  mrg 
   1146  1.1  mrg   poly_int64 pbytepos;
   1147  1.1  mrg   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
   1148  1.1  mrg     return opt_result::failure_at (stmt,
   1149  1.1  mrg 				   "failed: bit offset alignment.\n");
   1150  1.1  mrg 
   1151  1.1  mrg   if (preversep)
   1152  1.1  mrg     return opt_result::failure_at (stmt,
   1153  1.1  mrg 				   "failed: reverse storage order.\n");
   1154  1.1  mrg 
   1155  1.1  mrg   /* Calculate the alignment and misalignment for the inner reference.  */
   1156  1.1  mrg   unsigned int HOST_WIDE_INT bit_base_misalignment;
   1157  1.1  mrg   unsigned int bit_base_alignment;
   1158  1.1  mrg   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
   1159  1.1  mrg 
   1160  1.1  mrg   /* There are no bitfield references remaining in BASE, so the values
   1161  1.1  mrg      we got back must be whole bytes.  */
   1162  1.1  mrg   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
   1163  1.1  mrg 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
   1164  1.1  mrg   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
   1165  1.1  mrg   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
   1166  1.1  mrg 
   1167  1.1  mrg   if (TREE_CODE (base) == MEM_REF)
   1168  1.1  mrg     {
   1169  1.1  mrg       if (!integer_zerop (TREE_OPERAND (base, 1)))
   1170  1.1  mrg 	{
   1171  1.1  mrg 	  /* Subtract MOFF from the base and add it to POFFSET instead.
   1172  1.1  mrg 	     Adjust the misalignment to reflect the amount we subtracted.  */
   1173  1.1  mrg 	  poly_offset_int moff = mem_ref_offset (base);
   1174  1.1  mrg 	  base_misalignment -= moff.force_shwi ();
   1175  1.1  mrg 	  tree mofft = wide_int_to_tree (sizetype, moff);
   1176  1.1  mrg 	  if (!poffset)
   1177  1.1  mrg 	    poffset = mofft;
   1178  1.1  mrg 	  else
   1179  1.1  mrg 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
   1180  1.1  mrg 	}
   1181  1.1  mrg       base = TREE_OPERAND (base, 0);
   1182  1.1  mrg     }
   1183  1.1  mrg   else
   1184  1.1  mrg     base = build_fold_addr_expr (base);
   1185  1.1  mrg 
   1186  1.1  mrg   if (in_loop)
   1187  1.1  mrg     {
   1188  1.1  mrg       if (!simple_iv (loop, loop, base, &base_iv, true))
   1189  1.1  mrg 	return opt_result::failure_at
   1190  1.1  mrg 	  (stmt, "failed: evolution of base is not affine.\n");
   1191  1.1  mrg     }
   1192  1.1  mrg   else
   1193  1.1  mrg     {
   1194  1.1  mrg       base_iv.base = base;
   1195  1.1  mrg       base_iv.step = ssize_int (0);
   1196  1.1  mrg       base_iv.no_overflow = true;
   1197  1.1  mrg     }
   1198  1.1  mrg 
   1199  1.1  mrg   if (!poffset)
   1200  1.1  mrg     {
   1201  1.1  mrg       offset_iv.base = ssize_int (0);
   1202  1.1  mrg       offset_iv.step = ssize_int (0);
   1203  1.1  mrg     }
   1204  1.1  mrg   else
   1205  1.1  mrg     {
   1206  1.1  mrg       if (!in_loop)
   1207  1.1  mrg         {
   1208  1.1  mrg           offset_iv.base = poffset;
   1209  1.1  mrg           offset_iv.step = ssize_int (0);
   1210  1.1  mrg         }
   1211  1.1  mrg       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
   1212  1.1  mrg 	return opt_result::failure_at
   1213  1.1  mrg 	  (stmt, "failed: evolution of offset is not affine.\n");
   1214  1.1  mrg     }
   1215  1.1  mrg 
   1216  1.1  mrg   init = ssize_int (pbytepos);
   1217  1.1  mrg 
   1218  1.1  mrg   /* Subtract any constant component from the base and add it to INIT instead.
   1219  1.1  mrg      Adjust the misalignment to reflect the amount we subtracted.  */
   1220  1.1  mrg   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
   1221  1.1  mrg   init = size_binop (PLUS_EXPR, init, dinit);
   1222  1.1  mrg   base_misalignment -= TREE_INT_CST_LOW (dinit);
   1223  1.1  mrg 
   1224  1.1  mrg   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
   1225  1.1  mrg   init = size_binop (PLUS_EXPR, init, dinit);
   1226  1.1  mrg 
   1227  1.1  mrg   step = size_binop (PLUS_EXPR,
   1228  1.1  mrg 		     fold_convert (ssizetype, base_iv.step),
   1229  1.1  mrg 		     fold_convert (ssizetype, offset_iv.step));
   1230  1.1  mrg 
   1231  1.1  mrg   base = canonicalize_base_object_address (base_iv.base);
   1232  1.1  mrg 
   1233  1.1  mrg   /* See if get_pointer_alignment can guarantee a higher alignment than
   1234  1.1  mrg      the one we calculated above.  */
   1235  1.1  mrg   unsigned int HOST_WIDE_INT alt_misalignment;
   1236  1.1  mrg   unsigned int alt_alignment;
   1237  1.1  mrg   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
   1238  1.1  mrg 
   1239  1.1  mrg   /* As above, these values must be whole bytes.  */
   1240  1.1  mrg   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
   1241  1.1  mrg 	      && alt_misalignment % BITS_PER_UNIT == 0);
   1242  1.1  mrg   alt_alignment /= BITS_PER_UNIT;
   1243  1.1  mrg   alt_misalignment /= BITS_PER_UNIT;
   1244  1.1  mrg 
   1245  1.1  mrg   if (base_alignment < alt_alignment)
   1246  1.1  mrg     {
   1247  1.1  mrg       base_alignment = alt_alignment;
   1248  1.1  mrg       base_misalignment = alt_misalignment;
   1249  1.1  mrg     }
   1250  1.1  mrg 
   1251  1.1  mrg   drb->base_address = base;
   1252  1.1  mrg   drb->offset = fold_convert (ssizetype, offset_iv.base);
   1253  1.1  mrg   drb->init = init;
   1254  1.1  mrg   drb->step = step;
   1255  1.1  mrg   if (known_misalignment (base_misalignment, base_alignment,
   1256  1.1  mrg 			  &drb->base_misalignment))
   1257  1.1  mrg     drb->base_alignment = base_alignment;
   1258  1.1  mrg   else
   1259  1.1  mrg     {
   1260  1.1  mrg       drb->base_alignment = known_alignment (base_misalignment);
   1261  1.1  mrg       drb->base_misalignment = 0;
   1262  1.1  mrg     }
   1263  1.1  mrg   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
   1264  1.1  mrg   drb->step_alignment = highest_pow2_factor (step);
   1265  1.1  mrg 
   1266  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   1267  1.1  mrg     fprintf (dump_file, "success.\n");
   1268  1.1  mrg 
   1269  1.1  mrg   return opt_result::success ();
   1270  1.1  mrg }
   1271  1.1  mrg 
   1272  1.1  mrg /* Return true if OP is a valid component reference for a DR access
   1273  1.1  mrg    function.  This accepts a subset of what handled_component_p accepts.  */
   1274  1.1  mrg 
   1275  1.1  mrg static bool
   1276  1.1  mrg access_fn_component_p (tree op)
   1277  1.1  mrg {
   1278  1.1  mrg   switch (TREE_CODE (op))
   1279  1.1  mrg     {
   1280  1.1  mrg     case REALPART_EXPR:
   1281  1.1  mrg     case IMAGPART_EXPR:
   1282  1.1  mrg     case ARRAY_REF:
   1283  1.1  mrg       return true;
   1284  1.1  mrg 
   1285  1.1  mrg     case COMPONENT_REF:
   1286  1.1  mrg       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
   1287  1.1  mrg 
   1288  1.1  mrg     default:
   1289  1.1  mrg       return false;
   1290  1.1  mrg     }
   1291  1.1  mrg }
   1292  1.1  mrg 
   1293  1.1  mrg /* Returns whether BASE can have a access_fn_component_p with BASE
   1294  1.1  mrg    as base.  */
   1295  1.1  mrg 
   1296  1.1  mrg static bool
   1297  1.1  mrg base_supports_access_fn_components_p (tree base)
   1298  1.1  mrg {
   1299  1.1  mrg   switch (TREE_CODE (TREE_TYPE (base)))
   1300  1.1  mrg     {
   1301  1.1  mrg     case COMPLEX_TYPE:
   1302  1.1  mrg     case ARRAY_TYPE:
   1303  1.1  mrg     case RECORD_TYPE:
   1304  1.1  mrg       return true;
   1305  1.1  mrg     default:
   1306  1.1  mrg       return false;
   1307  1.1  mrg     }
   1308  1.1  mrg }
   1309  1.1  mrg 
   1310  1.1  mrg /* Determines the base object and the list of indices of memory reference
   1311  1.1  mrg    DR, analyzed in LOOP and instantiated before NEST.  */
   1312  1.1  mrg 
   1313  1.1  mrg static void
   1314  1.1  mrg dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
   1315  1.1  mrg {
   1316  1.1  mrg   /* If analyzing a basic-block there are no indices to analyze
   1317  1.1  mrg      and thus no access functions.  */
   1318  1.1  mrg   if (!nest)
   1319  1.1  mrg     {
   1320  1.1  mrg       dri->base_object = ref;
   1321  1.1  mrg       dri->access_fns.create (0);
   1322  1.1  mrg       return;
   1323  1.1  mrg     }
   1324  1.1  mrg 
   1325  1.1  mrg   vec<tree> access_fns = vNULL;
   1326  1.1  mrg 
   1327  1.1  mrg   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
   1328  1.1  mrg      into a two element array with a constant index.  The base is
   1329  1.1  mrg      then just the immediate underlying object.  */
   1330  1.1  mrg   if (TREE_CODE (ref) == REALPART_EXPR)
   1331  1.1  mrg     {
   1332  1.1  mrg       ref = TREE_OPERAND (ref, 0);
   1333  1.1  mrg       access_fns.safe_push (integer_zero_node);
   1334  1.1  mrg     }
   1335  1.1  mrg   else if (TREE_CODE (ref) == IMAGPART_EXPR)
   1336  1.1  mrg     {
   1337  1.1  mrg       ref = TREE_OPERAND (ref, 0);
   1338  1.1  mrg       access_fns.safe_push (integer_one_node);
   1339  1.1  mrg     }
   1340  1.1  mrg 
   1341  1.1  mrg   /* Analyze access functions of dimensions we know to be independent.
   1342  1.1  mrg      The list of component references handled here should be kept in
   1343  1.1  mrg      sync with access_fn_component_p.  */
   1344  1.1  mrg   while (handled_component_p (ref))
   1345  1.1  mrg     {
   1346  1.1  mrg       if (TREE_CODE (ref) == ARRAY_REF)
   1347  1.1  mrg 	{
   1348  1.1  mrg 	  tree op = TREE_OPERAND (ref, 1);
   1349  1.1  mrg 	  tree access_fn = analyze_scalar_evolution (loop, op);
   1350  1.1  mrg 	  access_fn = instantiate_scev (nest, loop, access_fn);
   1351  1.1  mrg 	  access_fns.safe_push (access_fn);
   1352  1.1  mrg 	}
   1353  1.1  mrg       else if (TREE_CODE (ref) == COMPONENT_REF
   1354  1.1  mrg 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
   1355  1.1  mrg 	{
   1356  1.1  mrg 	  /* For COMPONENT_REFs of records (but not unions!) use the
   1357  1.1  mrg 	     FIELD_DECL offset as constant access function so we can
   1358  1.1  mrg 	     disambiguate a[i].f1 and a[i].f2.  */
   1359  1.1  mrg 	  tree off = component_ref_field_offset (ref);
   1360  1.1  mrg 	  off = size_binop (PLUS_EXPR,
   1361  1.1  mrg 			    size_binop (MULT_EXPR,
   1362  1.1  mrg 					fold_convert (bitsizetype, off),
   1363  1.1  mrg 					bitsize_int (BITS_PER_UNIT)),
   1364  1.1  mrg 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
   1365  1.1  mrg 	  access_fns.safe_push (off);
   1366  1.1  mrg 	}
   1367  1.1  mrg       else
   1368  1.1  mrg 	/* If we have an unhandled component we could not translate
   1369  1.1  mrg 	   to an access function stop analyzing.  We have determined
   1370  1.1  mrg 	   our base object in this case.  */
   1371  1.1  mrg 	break;
   1372  1.1  mrg 
   1373  1.1  mrg       ref = TREE_OPERAND (ref, 0);
   1374  1.1  mrg     }
   1375  1.1  mrg 
   1376  1.1  mrg   /* If the address operand of a MEM_REF base has an evolution in the
   1377  1.1  mrg      analyzed nest, add it as an additional independent access-function.  */
   1378  1.1  mrg   if (TREE_CODE (ref) == MEM_REF)
   1379  1.1  mrg     {
   1380  1.1  mrg       tree op = TREE_OPERAND (ref, 0);
   1381  1.1  mrg       tree access_fn = analyze_scalar_evolution (loop, op);
   1382  1.1  mrg       access_fn = instantiate_scev (nest, loop, access_fn);
   1383  1.1  mrg       STRIP_NOPS (access_fn);
   1384  1.1  mrg       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
   1385  1.1  mrg 	{
   1386  1.1  mrg 	  tree memoff = TREE_OPERAND (ref, 1);
   1387  1.1  mrg 	  tree base = initial_condition (access_fn);
   1388  1.1  mrg 	  tree orig_type = TREE_TYPE (base);
   1389  1.1  mrg 	  STRIP_USELESS_TYPE_CONVERSION (base);
   1390  1.1  mrg 	  tree off;
   1391  1.1  mrg 	  split_constant_offset (base, &base, &off);
   1392  1.1  mrg 	  STRIP_USELESS_TYPE_CONVERSION (base);
   1393  1.1  mrg 	  /* Fold the MEM_REF offset into the evolutions initial
   1394  1.1  mrg 	     value to make more bases comparable.  */
   1395  1.1  mrg 	  if (!integer_zerop (memoff))
   1396  1.1  mrg 	    {
   1397  1.1  mrg 	      off = size_binop (PLUS_EXPR, off,
   1398  1.1  mrg 				fold_convert (ssizetype, memoff));
   1399  1.1  mrg 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
   1400  1.1  mrg 	    }
   1401  1.1  mrg 	  /* Adjust the offset so it is a multiple of the access type
   1402  1.1  mrg 	     size and thus we separate bases that can possibly be used
   1403  1.1  mrg 	     to produce partial overlaps (which the access_fn machinery
   1404  1.1  mrg 	     cannot handle).  */
   1405  1.1  mrg 	  wide_int rem;
   1406  1.1  mrg 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
   1407  1.1  mrg 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
   1408  1.1  mrg 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
   1409  1.1  mrg 	    rem = wi::mod_trunc
   1410  1.1  mrg 	      (wi::to_wide (off),
   1411  1.1  mrg 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
   1412  1.1  mrg 	       SIGNED);
   1413  1.1  mrg 	  else
   1414  1.1  mrg 	    /* If we can't compute the remainder simply force the initial
   1415  1.1  mrg 	       condition to zero.  */
   1416  1.1  mrg 	    rem = wi::to_wide (off);
   1417  1.1  mrg 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
   1418  1.1  mrg 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
   1419  1.1  mrg 	  /* And finally replace the initial condition.  */
   1420  1.1  mrg 	  access_fn = chrec_replace_initial_condition
   1421  1.1  mrg 	      (access_fn, fold_convert (orig_type, off));
   1422  1.1  mrg 	  /* ???  This is still not a suitable base object for
   1423  1.1  mrg 	     dr_may_alias_p - the base object needs to be an
   1424  1.1  mrg 	     access that covers the object as whole.  With
   1425  1.1  mrg 	     an evolution in the pointer this cannot be
   1426  1.1  mrg 	     guaranteed.
   1427  1.1  mrg 	     As a band-aid, mark the access so we can special-case
   1428  1.1  mrg 	     it in dr_may_alias_p.  */
   1429  1.1  mrg 	  tree old = ref;
   1430  1.1  mrg 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
   1431  1.1  mrg 				 MEM_REF, TREE_TYPE (ref),
   1432  1.1  mrg 				 base, memoff);
   1433  1.1  mrg 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
   1434  1.1  mrg 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
   1435  1.1  mrg 	  dri->unconstrained_base = true;
   1436  1.1  mrg 	  access_fns.safe_push (access_fn);
   1437  1.1  mrg 	}
   1438  1.1  mrg     }
   1439  1.1  mrg   else if (DECL_P (ref))
   1440  1.1  mrg     {
   1441  1.1  mrg       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
   1442  1.1  mrg       ref = build2 (MEM_REF, TREE_TYPE (ref),
   1443  1.1  mrg 		    build_fold_addr_expr (ref),
   1444  1.1  mrg 		    build_int_cst (reference_alias_ptr_type (ref), 0));
   1445  1.1  mrg     }
   1446  1.1  mrg 
   1447  1.1  mrg   dri->base_object = ref;
   1448  1.1  mrg   dri->access_fns = access_fns;
   1449  1.1  mrg }
   1450  1.1  mrg 
   1451  1.1  mrg /* Extracts the alias analysis information from the memory reference DR.  */
   1452  1.1  mrg 
   1453  1.1  mrg static void
   1454  1.1  mrg dr_analyze_alias (struct data_reference *dr)
   1455  1.1  mrg {
   1456  1.1  mrg   tree ref = DR_REF (dr);
   1457  1.1  mrg   tree base = get_base_address (ref), addr;
   1458  1.1  mrg 
   1459  1.1  mrg   if (INDIRECT_REF_P (base)
   1460  1.1  mrg       || TREE_CODE (base) == MEM_REF)
   1461  1.1  mrg     {
   1462  1.1  mrg       addr = TREE_OPERAND (base, 0);
   1463  1.1  mrg       if (TREE_CODE (addr) == SSA_NAME)
   1464  1.1  mrg 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
   1465  1.1  mrg     }
   1466  1.1  mrg }
   1467  1.1  mrg 
   1468  1.1  mrg /* Frees data reference DR.  */
   1469  1.1  mrg 
   1470  1.1  mrg void
   1471  1.1  mrg free_data_ref (data_reference_p dr)
   1472  1.1  mrg {
   1473  1.1  mrg   DR_ACCESS_FNS (dr).release ();
   1474  1.1  mrg   if (dr->alt_indices.base_object)
   1475  1.1  mrg     dr->alt_indices.access_fns.release ();
   1476  1.1  mrg   free (dr);
   1477  1.1  mrg }
   1478  1.1  mrg 
   1479  1.1  mrg /* Analyze memory reference MEMREF, which is accessed in STMT.
   1480  1.1  mrg    The reference is a read if IS_READ is true, otherwise it is a write.
   1481  1.1  mrg    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
   1482  1.1  mrg    within STMT, i.e. that it might not occur even if STMT is executed
   1483  1.1  mrg    and runs to completion.
   1484  1.1  mrg 
   1485  1.1  mrg    Return the data_reference description of MEMREF.  NEST is the outermost
   1486  1.1  mrg    loop in which the reference should be instantiated, LOOP is the loop
   1487  1.1  mrg    in which the data reference should be analyzed.  */
   1488  1.1  mrg 
   1489  1.1  mrg struct data_reference *
   1490  1.1  mrg create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
   1491  1.1  mrg 		 bool is_read, bool is_conditional_in_stmt)
   1492  1.1  mrg {
   1493  1.1  mrg   struct data_reference *dr;
   1494  1.1  mrg 
   1495  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   1496  1.1  mrg     {
   1497  1.1  mrg       fprintf (dump_file, "Creating dr for ");
   1498  1.1  mrg       print_generic_expr (dump_file, memref, TDF_SLIM);
   1499  1.1  mrg       fprintf (dump_file, "\n");
   1500  1.1  mrg     }
   1501  1.1  mrg 
   1502  1.1  mrg   dr = XCNEW (struct data_reference);
   1503  1.1  mrg   DR_STMT (dr) = stmt;
   1504  1.1  mrg   DR_REF (dr) = memref;
   1505  1.1  mrg   DR_IS_READ (dr) = is_read;
   1506  1.1  mrg   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
   1507  1.1  mrg 
   1508  1.1  mrg   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
   1509  1.1  mrg 			nest != NULL ? loop : NULL, stmt);
   1510  1.1  mrg   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
   1511  1.1  mrg   dr_analyze_alias (dr);
   1512  1.1  mrg 
   1513  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   1514  1.1  mrg     {
   1515  1.1  mrg       unsigned i;
   1516  1.1  mrg       fprintf (dump_file, "\tbase_address: ");
   1517  1.1  mrg       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
   1518  1.1  mrg       fprintf (dump_file, "\n\toffset from base address: ");
   1519  1.1  mrg       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
   1520  1.1  mrg       fprintf (dump_file, "\n\tconstant offset from base address: ");
   1521  1.1  mrg       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
   1522  1.1  mrg       fprintf (dump_file, "\n\tstep: ");
   1523  1.1  mrg       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
   1524  1.1  mrg       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
   1525  1.1  mrg       fprintf (dump_file, "\n\tbase misalignment: %d",
   1526  1.1  mrg 	       DR_BASE_MISALIGNMENT (dr));
   1527  1.1  mrg       fprintf (dump_file, "\n\toffset alignment: %d",
   1528  1.1  mrg 	       DR_OFFSET_ALIGNMENT (dr));
   1529  1.1  mrg       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
   1530  1.1  mrg       fprintf (dump_file, "\n\tbase_object: ");
   1531  1.1  mrg       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
   1532  1.1  mrg       fprintf (dump_file, "\n");
   1533  1.1  mrg       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
   1534  1.1  mrg 	{
   1535  1.1  mrg 	  fprintf (dump_file, "\tAccess function %d: ", i);
   1536  1.1  mrg 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
   1537  1.1  mrg 	}
   1538  1.1  mrg     }
   1539  1.1  mrg 
   1540  1.1  mrg   return dr;
   1541  1.1  mrg }
   1542  1.1  mrg 
   1543  1.1  mrg /*  A helper function computes order between two tree expressions T1 and T2.
   1544  1.1  mrg     This is used in comparator functions sorting objects based on the order
   1545  1.1  mrg     of tree expressions.  The function returns -1, 0, or 1.  */
   1546  1.1  mrg 
   1547  1.1  mrg int
   1548  1.1  mrg data_ref_compare_tree (tree t1, tree t2)
   1549  1.1  mrg {
   1550  1.1  mrg   int i, cmp;
   1551  1.1  mrg   enum tree_code code;
   1552  1.1  mrg   char tclass;
   1553  1.1  mrg 
   1554  1.1  mrg   if (t1 == t2)
   1555  1.1  mrg     return 0;
   1556  1.1  mrg   if (t1 == NULL)
   1557  1.1  mrg     return -1;
   1558  1.1  mrg   if (t2 == NULL)
   1559  1.1  mrg     return 1;
   1560  1.1  mrg 
   1561  1.1  mrg   STRIP_USELESS_TYPE_CONVERSION (t1);
   1562  1.1  mrg   STRIP_USELESS_TYPE_CONVERSION (t2);
   1563  1.1  mrg   if (t1 == t2)
   1564  1.1  mrg     return 0;
   1565  1.1  mrg 
   1566  1.1  mrg   if (TREE_CODE (t1) != TREE_CODE (t2)
   1567  1.1  mrg       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
   1568  1.1  mrg     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
   1569  1.1  mrg 
   1570  1.1  mrg   code = TREE_CODE (t1);
   1571  1.1  mrg   switch (code)
   1572  1.1  mrg     {
   1573  1.1  mrg     case INTEGER_CST:
   1574  1.1  mrg       return tree_int_cst_compare (t1, t2);
   1575  1.1  mrg 
   1576  1.1  mrg     case STRING_CST:
   1577  1.1  mrg       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
   1578  1.1  mrg 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
   1579  1.1  mrg       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
   1580  1.1  mrg 		     TREE_STRING_LENGTH (t1));
   1581  1.1  mrg 
   1582  1.1  mrg     case SSA_NAME:
   1583  1.1  mrg       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
   1584  1.1  mrg 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
   1585  1.1  mrg       break;
   1586  1.1  mrg 
   1587  1.1  mrg     default:
   1588  1.1  mrg       if (POLY_INT_CST_P (t1))
   1589  1.1  mrg 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
   1590  1.1  mrg 				       wi::to_poly_widest (t2));
   1591  1.1  mrg 
   1592  1.1  mrg       tclass = TREE_CODE_CLASS (code);
   1593  1.1  mrg 
   1594  1.1  mrg       /* For decls, compare their UIDs.  */
   1595  1.1  mrg       if (tclass == tcc_declaration)
   1596  1.1  mrg 	{
   1597  1.1  mrg 	  if (DECL_UID (t1) != DECL_UID (t2))
   1598  1.1  mrg 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
   1599  1.1  mrg 	  break;
   1600  1.1  mrg 	}
   1601  1.1  mrg       /* For expressions, compare their operands recursively.  */
   1602  1.1  mrg       else if (IS_EXPR_CODE_CLASS (tclass))
   1603  1.1  mrg 	{
   1604  1.1  mrg 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
   1605  1.1  mrg 	    {
   1606  1.1  mrg 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
   1607  1.1  mrg 					   TREE_OPERAND (t2, i));
   1608  1.1  mrg 	      if (cmp != 0)
   1609  1.1  mrg 		return cmp;
   1610  1.1  mrg 	    }
   1611  1.1  mrg 	}
   1612  1.1  mrg       else
   1613  1.1  mrg 	gcc_unreachable ();
   1614  1.1  mrg     }
   1615  1.1  mrg 
   1616  1.1  mrg   return 0;
   1617  1.1  mrg }
   1618  1.1  mrg 
   1619  1.1  mrg /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
   1620  1.1  mrg    check.  */
   1621  1.1  mrg 
   1622  1.1  mrg opt_result
   1623  1.1  mrg runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
   1624  1.1  mrg {
   1625  1.1  mrg   if (dump_enabled_p ())
   1626  1.1  mrg     dump_printf (MSG_NOTE,
   1627  1.1  mrg 		 "consider run-time aliasing test between %T and %T\n",
   1628  1.1  mrg 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
   1629  1.1  mrg 
   1630  1.1  mrg   if (!speed_p)
   1631  1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
   1632  1.1  mrg 				   "runtime alias check not supported when"
   1633  1.1  mrg 				   " optimizing for size.\n");
   1634  1.1  mrg 
   1635  1.1  mrg   /* FORNOW: We don't support versioning with outer-loop in either
   1636  1.1  mrg      vectorization or loop distribution.  */
   1637  1.1  mrg   if (loop != NULL && loop->inner != NULL)
   1638  1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
   1639  1.1  mrg 				   "runtime alias check not supported for"
   1640  1.1  mrg 				   " outer loop.\n");
   1641  1.1  mrg 
   1642  1.1  mrg   /* FORNOW: We don't support handling different address spaces.  */
   1643  1.1  mrg   if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
   1644  1.1  mrg       != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
   1645  1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
   1646  1.1  mrg 				   "runtime alias check between different "
   1647  1.1  mrg 				   "address spaces not supported.\n");
   1648  1.1  mrg 
   1649  1.1  mrg   return opt_result::success ();
   1650  1.1  mrg }
   1651  1.1  mrg 
   1652  1.1  mrg /* Operator == between two dr_with_seg_len objects.
   1653  1.1  mrg 
   1654  1.1  mrg    This equality operator is used to make sure two data refs
   1655  1.1  mrg    are the same one so that we will consider to combine the
   1656  1.1  mrg    aliasing checks of those two pairs of data dependent data
   1657  1.1  mrg    refs.  */
   1658  1.1  mrg 
   1659  1.1  mrg static bool
   1660  1.1  mrg operator == (const dr_with_seg_len& d1,
   1661  1.1  mrg 	     const dr_with_seg_len& d2)
   1662  1.1  mrg {
   1663  1.1  mrg   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
   1664  1.1  mrg 			   DR_BASE_ADDRESS (d2.dr), 0)
   1665  1.1  mrg 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
   1666  1.1  mrg 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
   1667  1.1  mrg 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
   1668  1.1  mrg 	  && known_eq (d1.access_size, d2.access_size)
   1669  1.1  mrg 	  && d1.align == d2.align);
   1670  1.1  mrg }
   1671  1.1  mrg 
   1672  1.1  mrg /* Comparison function for sorting objects of dr_with_seg_len_pair_t
   1673  1.1  mrg    so that we can combine aliasing checks in one scan.  */
   1674  1.1  mrg 
   1675  1.1  mrg static int
   1676  1.1  mrg comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
   1677  1.1  mrg {
   1678  1.1  mrg   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
   1679  1.1  mrg   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
   1680  1.1  mrg   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
   1681  1.1  mrg   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
   1682  1.1  mrg 
   1683  1.1  mrg   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
   1684  1.1  mrg      if a and c have the same basic address snd step, and b and d have the same
   1685  1.1  mrg      address and step.  Therefore, if any a&c or b&d don't have the same address
   1686  1.1  mrg      and step, we don't care the order of those two pairs after sorting.  */
   1687  1.1  mrg   int comp_res;
   1688  1.1  mrg 
   1689  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
   1690  1.1  mrg 					 DR_BASE_ADDRESS (b1.dr))) != 0)
   1691  1.1  mrg     return comp_res;
   1692  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
   1693  1.1  mrg 					 DR_BASE_ADDRESS (b2.dr))) != 0)
   1694  1.1  mrg     return comp_res;
   1695  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
   1696  1.1  mrg 					 DR_STEP (b1.dr))) != 0)
   1697  1.1  mrg     return comp_res;
   1698  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
   1699  1.1  mrg 					 DR_STEP (b2.dr))) != 0)
   1700  1.1  mrg     return comp_res;
   1701  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
   1702  1.1  mrg 					 DR_OFFSET (b1.dr))) != 0)
   1703  1.1  mrg     return comp_res;
   1704  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
   1705  1.1  mrg 					 DR_INIT (b1.dr))) != 0)
   1706  1.1  mrg     return comp_res;
   1707  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
   1708  1.1  mrg 					 DR_OFFSET (b2.dr))) != 0)
   1709  1.1  mrg     return comp_res;
   1710  1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
   1711  1.1  mrg 					 DR_INIT (b2.dr))) != 0)
   1712  1.1  mrg     return comp_res;
   1713  1.1  mrg 
   1714  1.1  mrg   return 0;
   1715  1.1  mrg }
   1716  1.1  mrg 
   1717  1.1  mrg /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
   1718  1.1  mrg 
   1719  1.1  mrg static void
   1720  1.1  mrg dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
   1721  1.1  mrg {
   1722  1.1  mrg   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
   1723  1.1  mrg 	       DR_REF (alias_pair->first.dr),
   1724  1.1  mrg 	       DR_REF (alias_pair->second.dr));
   1725  1.1  mrg 
   1726  1.1  mrg   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
   1727  1.1  mrg 	       alias_pair->first.seg_len);
   1728  1.1  mrg   if (!operand_equal_p (alias_pair->first.seg_len,
   1729  1.1  mrg 			alias_pair->second.seg_len, 0))
   1730  1.1  mrg     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
   1731  1.1  mrg 
   1732  1.1  mrg   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
   1733  1.1  mrg   dump_dec (MSG_NOTE, alias_pair->first.access_size);
   1734  1.1  mrg   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
   1735  1.1  mrg     {
   1736  1.1  mrg       dump_printf (MSG_NOTE, " vs. ");
   1737  1.1  mrg       dump_dec (MSG_NOTE, alias_pair->second.access_size);
   1738  1.1  mrg     }
   1739  1.1  mrg 
   1740  1.1  mrg   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
   1741  1.1  mrg 	       alias_pair->first.align);
   1742  1.1  mrg   if (alias_pair->first.align != alias_pair->second.align)
   1743  1.1  mrg     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
   1744  1.1  mrg 
   1745  1.1  mrg   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
   1746  1.1  mrg   if (alias_pair->flags & DR_ALIAS_RAW)
   1747  1.1  mrg     dump_printf (MSG_NOTE, " RAW");
   1748  1.1  mrg   if (alias_pair->flags & DR_ALIAS_WAR)
   1749  1.1  mrg     dump_printf (MSG_NOTE, " WAR");
   1750  1.1  mrg   if (alias_pair->flags & DR_ALIAS_WAW)
   1751  1.1  mrg     dump_printf (MSG_NOTE, " WAW");
   1752  1.1  mrg   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
   1753  1.1  mrg     dump_printf (MSG_NOTE, " ARBITRARY");
   1754  1.1  mrg   if (alias_pair->flags & DR_ALIAS_SWAPPED)
   1755  1.1  mrg     dump_printf (MSG_NOTE, " SWAPPED");
   1756  1.1  mrg   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
   1757  1.1  mrg     dump_printf (MSG_NOTE, " UNSWAPPED");
   1758  1.1  mrg   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
   1759  1.1  mrg     dump_printf (MSG_NOTE, " MIXED_STEPS");
   1760  1.1  mrg   if (alias_pair->flags == 0)
   1761  1.1  mrg     dump_printf (MSG_NOTE, " <none>");
   1762  1.1  mrg   dump_printf (MSG_NOTE, "\n");
   1763  1.1  mrg }
   1764  1.1  mrg 
   1765  1.1  mrg /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
   1766  1.1  mrg    FACTOR is number of iterations that each data reference is accessed.
   1767  1.1  mrg 
   1768  1.1  mrg    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
   1769  1.1  mrg    we create an expression:
   1770  1.1  mrg 
   1771  1.1  mrg    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
   1772  1.1  mrg    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
   1773  1.1  mrg 
   1774  1.1  mrg    for aliasing checks.  However, in some cases we can decrease the number
   1775  1.1  mrg    of checks by combining two checks into one.  For example, suppose we have
   1776  1.1  mrg    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
   1777  1.1  mrg    condition is satisfied:
   1778  1.1  mrg 
   1779  1.1  mrg    load_ptr_0 < load_ptr_1  &&
   1780  1.1  mrg    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
   1781  1.1  mrg 
   1782  1.1  mrg    (this condition means, in each iteration of vectorized loop, the accessed
   1783  1.1  mrg    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
   1784  1.1  mrg    load_ptr_1.)
   1785  1.1  mrg 
   1786  1.1  mrg    we then can use only the following expression to finish the alising checks
   1787  1.1  mrg    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
   1788  1.1  mrg 
   1789  1.1  mrg    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
   1790  1.1  mrg    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
   1791  1.1  mrg 
   1792  1.1  mrg    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
   1793  1.1  mrg    basic address.  */
   1794  1.1  mrg 
   1795  1.1  mrg void
   1796  1.1  mrg prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
   1797  1.1  mrg 			       poly_uint64)
   1798  1.1  mrg {
   1799  1.1  mrg   if (alias_pairs->is_empty ())
   1800  1.1  mrg     return;
   1801  1.1  mrg 
   1802  1.1  mrg   /* Canonicalize each pair so that the base components are ordered wrt
   1803  1.1  mrg      data_ref_compare_tree.  This allows the loop below to merge more
   1804  1.1  mrg      cases.  */
   1805  1.1  mrg   unsigned int i;
   1806  1.1  mrg   dr_with_seg_len_pair_t *alias_pair;
   1807  1.1  mrg   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
   1808  1.1  mrg     {
   1809  1.1  mrg       data_reference_p dr_a = alias_pair->first.dr;
   1810  1.1  mrg       data_reference_p dr_b = alias_pair->second.dr;
   1811  1.1  mrg       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
   1812  1.1  mrg 					    DR_BASE_ADDRESS (dr_b));
   1813  1.1  mrg       if (comp_res == 0)
   1814  1.1  mrg 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
   1815  1.1  mrg       if (comp_res == 0)
   1816  1.1  mrg 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
   1817  1.1  mrg       if (comp_res > 0)
   1818  1.1  mrg 	{
   1819  1.1  mrg 	  std::swap (alias_pair->first, alias_pair->second);
   1820  1.1  mrg 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
   1821  1.1  mrg 	}
   1822  1.1  mrg       else
   1823  1.1  mrg 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
   1824  1.1  mrg     }
   1825  1.1  mrg 
   1826  1.1  mrg   /* Sort the collected data ref pairs so that we can scan them once to
   1827  1.1  mrg      combine all possible aliasing checks.  */
   1828  1.1  mrg   alias_pairs->qsort (comp_dr_with_seg_len_pair);
   1829  1.1  mrg 
   1830  1.1  mrg   /* Scan the sorted dr pairs and check if we can combine alias checks
   1831  1.1  mrg      of two neighboring dr pairs.  */
   1832  1.1  mrg   unsigned int last = 0;
   1833  1.1  mrg   for (i = 1; i < alias_pairs->length (); ++i)
   1834  1.1  mrg     {
   1835  1.1  mrg       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
   1836  1.1  mrg       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
   1837  1.1  mrg       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
   1838  1.1  mrg 
   1839  1.1  mrg       dr_with_seg_len *dr_a1 = &alias_pair1->first;
   1840  1.1  mrg       dr_with_seg_len *dr_b1 = &alias_pair1->second;
   1841  1.1  mrg       dr_with_seg_len *dr_a2 = &alias_pair2->first;
   1842  1.1  mrg       dr_with_seg_len *dr_b2 = &alias_pair2->second;
   1843  1.1  mrg 
   1844  1.1  mrg       /* Remove duplicate data ref pairs.  */
   1845  1.1  mrg       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
   1846  1.1  mrg 	{
   1847  1.1  mrg 	  if (dump_enabled_p ())
   1848  1.1  mrg 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
   1849  1.1  mrg 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
   1850  1.1  mrg 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
   1851  1.1  mrg 	  alias_pair1->flags |= alias_pair2->flags;
   1852  1.1  mrg 	  continue;
   1853  1.1  mrg 	}
   1854  1.1  mrg 
   1855  1.1  mrg       /* Assume that we won't be able to merge the pairs, then correct
   1856  1.1  mrg 	 if we do.  */
   1857  1.1  mrg       last += 1;
   1858  1.1  mrg       if (last != i)
   1859  1.1  mrg 	(*alias_pairs)[last] = (*alias_pairs)[i];
   1860  1.1  mrg 
   1861  1.1  mrg       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
   1862  1.1  mrg 	{
   1863  1.1  mrg 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
   1864  1.1  mrg 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
   1865  1.1  mrg 	  if (*dr_a1 == *dr_a2)
   1866  1.1  mrg 	    {
   1867  1.1  mrg 	      std::swap (dr_a1, dr_b1);
   1868  1.1  mrg 	      std::swap (dr_a2, dr_b2);
   1869  1.1  mrg 	    }
   1870  1.1  mrg 
   1871  1.1  mrg 	  poly_int64 init_a1, init_a2;
   1872  1.1  mrg 	  /* Only consider cases in which the distance between the initial
   1873  1.1  mrg 	     DR_A1 and the initial DR_A2 is known at compile time.  */
   1874  1.1  mrg 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
   1875  1.1  mrg 				DR_BASE_ADDRESS (dr_a2->dr), 0)
   1876  1.1  mrg 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
   1877  1.1  mrg 				   DR_OFFSET (dr_a2->dr), 0)
   1878  1.1  mrg 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
   1879  1.1  mrg 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
   1880  1.1  mrg 	    continue;
   1881  1.1  mrg 
   1882  1.1  mrg 	  /* Don't combine if we can't tell which one comes first.  */
   1883  1.1  mrg 	  if (!ordered_p (init_a1, init_a2))
   1884  1.1  mrg 	    continue;
   1885  1.1  mrg 
   1886  1.1  mrg 	  /* Work out what the segment length would be if we did combine
   1887  1.1  mrg 	     DR_A1 and DR_A2:
   1888  1.1  mrg 
   1889  1.1  mrg 	     - If DR_A1 and DR_A2 have equal lengths, that length is
   1890  1.1  mrg 	       also the combined length.
   1891  1.1  mrg 
   1892  1.1  mrg 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
   1893  1.1  mrg 	       length is the lower bound on those lengths.
   1894  1.1  mrg 
   1895  1.1  mrg 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
   1896  1.1  mrg 	       length is the upper bound on those lengths.
   1897  1.1  mrg 
   1898  1.1  mrg 	     Other cases are unlikely to give a useful combination.
   1899  1.1  mrg 
   1900  1.1  mrg 	     The lengths both have sizetype, so the sign is taken from
   1901  1.1  mrg 	     the step instead.  */
   1902  1.1  mrg 	  poly_uint64 new_seg_len = 0;
   1903  1.1  mrg 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
   1904  1.1  mrg 						 dr_a2->seg_len, 0);
   1905  1.1  mrg 	  if (new_seg_len_p)
   1906  1.1  mrg 	    {
   1907  1.1  mrg 	      poly_uint64 seg_len_a1, seg_len_a2;
   1908  1.1  mrg 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
   1909  1.1  mrg 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
   1910  1.1  mrg 		continue;
   1911  1.1  mrg 
   1912  1.1  mrg 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
   1913  1.1  mrg 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
   1914  1.1  mrg 		continue;
   1915  1.1  mrg 
   1916  1.1  mrg 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
   1917  1.1  mrg 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
   1918  1.1  mrg 		continue;
   1919  1.1  mrg 
   1920  1.1  mrg 	      int sign_a = tree_int_cst_sgn (indicator_a);
   1921  1.1  mrg 	      int sign_b = tree_int_cst_sgn (indicator_b);
   1922  1.1  mrg 
   1923  1.1  mrg 	      if (sign_a <= 0 && sign_b <= 0)
   1924  1.1  mrg 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
   1925  1.1  mrg 	      else if (sign_a >= 0 && sign_b >= 0)
   1926  1.1  mrg 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
   1927  1.1  mrg 	      else
   1928  1.1  mrg 		continue;
   1929  1.1  mrg 	    }
   1930  1.1  mrg 	  /* At this point we're committed to merging the refs.  */
   1931  1.1  mrg 
   1932  1.1  mrg 	  /* Make sure dr_a1 starts left of dr_a2.  */
   1933  1.1  mrg 	  if (maybe_gt (init_a1, init_a2))
   1934  1.1  mrg 	    {
   1935  1.1  mrg 	      std::swap (*dr_a1, *dr_a2);
   1936  1.1  mrg 	      std::swap (init_a1, init_a2);
   1937  1.1  mrg 	    }
   1938  1.1  mrg 
   1939  1.1  mrg 	  /* The DR_Bs are equal, so only the DR_As can introduce
   1940  1.1  mrg 	     mixed steps.  */
   1941  1.1  mrg 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
   1942  1.1  mrg 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
   1943  1.1  mrg 
   1944  1.1  mrg 	  if (new_seg_len_p)
   1945  1.1  mrg 	    {
   1946  1.1  mrg 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
   1947  1.1  mrg 					      new_seg_len);
   1948  1.1  mrg 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
   1949  1.1  mrg 	    }
   1950  1.1  mrg 
   1951  1.1  mrg 	  /* This is always positive due to the swap above.  */
   1952  1.1  mrg 	  poly_uint64 diff = init_a2 - init_a1;
   1953  1.1  mrg 
   1954  1.1  mrg 	  /* The new check will start at DR_A1.  Make sure that its access
   1955  1.1  mrg 	     size encompasses the initial DR_A2.  */
   1956  1.1  mrg 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
   1957  1.1  mrg 	    {
   1958  1.1  mrg 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
   1959  1.1  mrg 						diff + dr_a2->access_size);
   1960  1.1  mrg 	      unsigned int new_align = known_alignment (dr_a1->access_size);
   1961  1.1  mrg 	      dr_a1->align = MIN (dr_a1->align, new_align);
   1962  1.1  mrg 	    }
   1963  1.1  mrg 	  if (dump_enabled_p ())
   1964  1.1  mrg 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
   1965  1.1  mrg 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
   1966  1.1  mrg 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
   1967  1.1  mrg 	  alias_pair1->flags |= alias_pair2->flags;
   1968  1.1  mrg 	  last -= 1;
   1969  1.1  mrg 	}
   1970  1.1  mrg     }
   1971  1.1  mrg   alias_pairs->truncate (last + 1);
   1972  1.1  mrg 
   1973  1.1  mrg   /* Try to restore the original dr_with_seg_len order within each
   1974  1.1  mrg      dr_with_seg_len_pair_t.  If we ended up combining swapped and
   1975  1.1  mrg      unswapped pairs into the same check, we have to invalidate any
   1976  1.1  mrg      RAW, WAR and WAW information for it.  */
   1977  1.1  mrg   if (dump_enabled_p ())
   1978  1.1  mrg     dump_printf (MSG_NOTE, "merged alias checks:\n");
   1979  1.1  mrg   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
   1980  1.1  mrg     {
   1981  1.1  mrg       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
   1982  1.1  mrg       unsigned int swapped = (alias_pair->flags & swap_mask);
   1983  1.1  mrg       if (swapped == DR_ALIAS_SWAPPED)
   1984  1.1  mrg 	std::swap (alias_pair->first, alias_pair->second);
   1985  1.1  mrg       else if (swapped != DR_ALIAS_UNSWAPPED)
   1986  1.1  mrg 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
   1987  1.1  mrg       alias_pair->flags &= ~swap_mask;
   1988  1.1  mrg       if (dump_enabled_p ())
   1989  1.1  mrg 	dump_alias_pair (alias_pair, "  ");
   1990  1.1  mrg     }
   1991  1.1  mrg }
   1992  1.1  mrg 
   1993  1.1  mrg /* A subroutine of create_intersect_range_checks, with a subset of the
   1994  1.1  mrg    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
   1995  1.1  mrg    to optimize cases in which the references form a simple RAW, WAR or
   1996  1.1  mrg    WAR dependence.  */
   1997  1.1  mrg 
   1998  1.1  mrg static bool
   1999  1.1  mrg create_ifn_alias_checks (tree *cond_expr,
   2000  1.1  mrg 			 const dr_with_seg_len_pair_t &alias_pair)
   2001  1.1  mrg {
   2002  1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
   2003  1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
   2004  1.1  mrg 
   2005  1.1  mrg   /* Check for cases in which:
   2006  1.1  mrg 
   2007  1.1  mrg      (a) we have a known RAW, WAR or WAR dependence
   2008  1.1  mrg      (b) the accesses are well-ordered in both the original and new code
   2009  1.1  mrg 	 (see the comment above the DR_ALIAS_* flags for details); and
   2010  1.1  mrg      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
   2011  1.1  mrg   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
   2012  1.1  mrg     return false;
   2013  1.1  mrg 
   2014  1.1  mrg   /* Make sure that both DRs access the same pattern of bytes,
   2015  1.1  mrg      with a constant length and step.  */
   2016  1.1  mrg   poly_uint64 seg_len;
   2017  1.1  mrg   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
   2018  1.1  mrg       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
   2019  1.1  mrg       || maybe_ne (dr_a.access_size, dr_b.access_size)
   2020  1.1  mrg       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
   2021  1.1  mrg       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
   2022  1.1  mrg     return false;
   2023  1.1  mrg 
   2024  1.1  mrg   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
   2025  1.1  mrg   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
   2026  1.1  mrg   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
   2027  1.1  mrg 
   2028  1.1  mrg   /* See whether the target suports what we want to do.  WAW checks are
   2029  1.1  mrg      equivalent to WAR checks here.  */
   2030  1.1  mrg   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
   2031  1.1  mrg 		     ? IFN_CHECK_RAW_PTRS
   2032  1.1  mrg 		     : IFN_CHECK_WAR_PTRS);
   2033  1.1  mrg   unsigned int align = MIN (dr_a.align, dr_b.align);
   2034  1.1  mrg   poly_uint64 full_length = seg_len + bytes;
   2035  1.1  mrg   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
   2036  1.1  mrg 					   full_length, align))
   2037  1.1  mrg     {
   2038  1.1  mrg       full_length = seg_len + dr_a.access_size;
   2039  1.1  mrg       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
   2040  1.1  mrg 					       full_length, align))
   2041  1.1  mrg 	return false;
   2042  1.1  mrg     }
   2043  1.1  mrg 
   2044  1.1  mrg   /* Commit to using this form of test.  */
   2045  1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
   2046  1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
   2047  1.1  mrg 
   2048  1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
   2049  1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
   2050  1.1  mrg 
   2051  1.1  mrg   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
   2052  1.1  mrg 					     ifn, boolean_type_node,
   2053  1.1  mrg 					     4, addr_a, addr_b,
   2054  1.1  mrg 					     size_int (full_length),
   2055  1.1  mrg 					     size_int (align));
   2056  1.1  mrg 
   2057  1.1  mrg   if (dump_enabled_p ())
   2058  1.1  mrg     {
   2059  1.1  mrg       if (ifn == IFN_CHECK_RAW_PTRS)
   2060  1.1  mrg 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
   2061  1.1  mrg       else
   2062  1.1  mrg 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
   2063  1.1  mrg     }
   2064  1.1  mrg   return true;
   2065  1.1  mrg }
   2066  1.1  mrg 
   2067  1.1  mrg /* Try to generate a runtime condition that is true if ALIAS_PAIR is
   2068  1.1  mrg    free of aliases, using a condition based on index values instead
   2069  1.1  mrg    of a condition based on addresses.  Return true on success,
   2070  1.1  mrg    storing the condition in *COND_EXPR.
   2071  1.1  mrg 
   2072  1.1  mrg    This can only be done if the two data references in ALIAS_PAIR access
   2073  1.1  mrg    the same array object and the index is the only difference.  For example,
   2074  1.1  mrg    if the two data references are DR_A and DR_B:
   2075  1.1  mrg 
   2076  1.1  mrg                        DR_A                           DR_B
   2077  1.1  mrg       data-ref         arr[i]                         arr[j]
   2078  1.1  mrg       base_object      arr                            arr
   2079  1.1  mrg       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
   2080  1.1  mrg 
   2081  1.1  mrg    The addresses and their index are like:
   2082  1.1  mrg 
   2083  1.1  mrg         |<- ADDR_A    ->|          |<- ADDR_B    ->|
   2084  1.1  mrg      ------------------------------------------------------->
   2085  1.1  mrg         |   |   |   |   |          |   |   |   |   |
   2086  1.1  mrg      ------------------------------------------------------->
   2087  1.1  mrg         i_0 ...         i_0+4      j_0 ...         j_0+4
   2088  1.1  mrg 
   2089  1.1  mrg    We can create expression based on index rather than address:
   2090  1.1  mrg 
   2091  1.1  mrg      (unsigned) (i_0 - j_0 + 3) <= 6
   2092  1.1  mrg 
   2093  1.1  mrg    i.e. the indices are less than 4 apart.
   2094  1.1  mrg 
   2095  1.1  mrg    Note evolution step of index needs to be considered in comparison.  */
   2096  1.1  mrg 
   2097  1.1  mrg static bool
   2098  1.1  mrg create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
   2099  1.1  mrg 				     const dr_with_seg_len_pair_t &alias_pair)
   2100  1.1  mrg {
   2101  1.1  mrg   const dr_with_seg_len &dr_a = alias_pair.first;
   2102  1.1  mrg   const dr_with_seg_len &dr_b = alias_pair.second;
   2103  1.1  mrg   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
   2104  1.1  mrg       || integer_zerop (DR_STEP (dr_a.dr))
   2105  1.1  mrg       || integer_zerop (DR_STEP (dr_b.dr))
   2106  1.1  mrg       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
   2107  1.1  mrg     return false;
   2108  1.1  mrg 
   2109  1.1  mrg   poly_uint64 seg_len1, seg_len2;
   2110  1.1  mrg   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
   2111  1.1  mrg       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
   2112  1.1  mrg     return false;
   2113  1.1  mrg 
   2114  1.1  mrg   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
   2115  1.1  mrg     return false;
   2116  1.1  mrg 
   2117  1.1  mrg   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
   2118  1.1  mrg     return false;
   2119  1.1  mrg 
   2120  1.1  mrg   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
   2121  1.1  mrg     return false;
   2122  1.1  mrg 
   2123  1.1  mrg   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
   2124  1.1  mrg 
   2125  1.1  mrg   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
   2126  1.1  mrg   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
   2127  1.1  mrg   if (neg_step)
   2128  1.1  mrg     {
   2129  1.1  mrg       abs_step = -abs_step;
   2130  1.1  mrg       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
   2131  1.1  mrg       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
   2132  1.1  mrg     }
   2133  1.1  mrg 
   2134  1.1  mrg   /* Infer the number of iterations with which the memory segment is accessed
   2135  1.1  mrg      by DR.  In other words, alias is checked if memory segment accessed by
   2136  1.1  mrg      DR_A in some iterations intersect with memory segment accessed by DR_B
   2137  1.1  mrg      in the same amount iterations.
   2138  1.1  mrg      Note segnment length is a linear function of number of iterations with
   2139  1.1  mrg      DR_STEP as the coefficient.  */
   2140  1.1  mrg   poly_uint64 niter_len1, niter_len2;
   2141  1.1  mrg   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
   2142  1.1  mrg       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
   2143  1.1  mrg     return false;
   2144  1.1  mrg 
   2145  1.1  mrg   /* Divide each access size by the byte step, rounding up.  */
   2146  1.1  mrg   poly_uint64 niter_access1, niter_access2;
   2147  1.1  mrg   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
   2148  1.1  mrg 			abs_step, &niter_access1)
   2149  1.1  mrg       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
   2150  1.1  mrg 			   abs_step, &niter_access2))
   2151  1.1  mrg     return false;
   2152  1.1  mrg 
   2153  1.1  mrg   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
   2154  1.1  mrg 
   2155  1.1  mrg   int found = -1;
   2156  1.1  mrg   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
   2157  1.1  mrg     {
   2158  1.1  mrg       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
   2159  1.1  mrg       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
   2160  1.1  mrg       /* Two indices must be the same if they are not scev, or not scev wrto
   2161  1.1  mrg 	 current loop being vecorized.  */
   2162  1.1  mrg       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
   2163  1.1  mrg 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
   2164  1.1  mrg 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
   2165  1.1  mrg 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
   2166  1.1  mrg 	{
   2167  1.1  mrg 	  if (operand_equal_p (access1, access2, 0))
   2168  1.1  mrg 	    continue;
   2169  1.1  mrg 
   2170  1.1  mrg 	  return false;
   2171  1.1  mrg 	}
   2172  1.1  mrg       if (found >= 0)
   2173  1.1  mrg 	return false;
   2174  1.1  mrg       found = i;
   2175  1.1  mrg     }
   2176  1.1  mrg 
   2177  1.1  mrg   /* Ought not to happen in practice, since if all accesses are equal then the
   2178  1.1  mrg      alias should be decidable at compile time.  */
   2179  1.1  mrg   if (found < 0)
   2180  1.1  mrg     return false;
   2181  1.1  mrg 
   2182  1.1  mrg   /* The two indices must have the same step.  */
   2183  1.1  mrg   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
   2184  1.1  mrg   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
   2185  1.1  mrg   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
   2186  1.1  mrg     return false;
   2187  1.1  mrg 
   2188  1.1  mrg   tree idx_step = CHREC_RIGHT (access1);
   2189  1.1  mrg   /* Index must have const step, otherwise DR_STEP won't be constant.  */
   2190  1.1  mrg   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
   2191  1.1  mrg   /* Index must evaluate in the same direction as DR.  */
   2192  1.1  mrg   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
   2193  1.1  mrg 
   2194  1.1  mrg   tree min1 = CHREC_LEFT (access1);
   2195  1.1  mrg   tree min2 = CHREC_LEFT (access2);
   2196  1.1  mrg   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
   2197  1.1  mrg     return false;
   2198  1.1  mrg 
   2199  1.1  mrg   /* Ideally, alias can be checked against loop's control IV, but we
   2200  1.1  mrg      need to prove linear mapping between control IV and reference
   2201  1.1  mrg      index.  Although that should be true, we check against (array)
   2202  1.1  mrg      index of data reference.  Like segment length, index length is
   2203  1.1  mrg      linear function of the number of iterations with index_step as
   2204  1.1  mrg      the coefficient, i.e, niter_len * idx_step.  */
   2205  1.1  mrg   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
   2206  1.1  mrg 					      SIGNED);
   2207  1.1  mrg   if (neg_step)
   2208  1.1  mrg     abs_idx_step = -abs_idx_step;
   2209  1.1  mrg   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
   2210  1.1  mrg   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
   2211  1.1  mrg   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
   2212  1.1  mrg   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
   2213  1.1  mrg 
   2214  1.1  mrg   gcc_assert (known_ge (idx_len1, 0)
   2215  1.1  mrg 	      && known_ge (idx_len2, 0)
   2216  1.1  mrg 	      && known_ge (idx_access1, 0)
   2217  1.1  mrg 	      && known_ge (idx_access2, 0));
   2218  1.1  mrg 
   2219  1.1  mrg   /* Each access has the following pattern, with lengths measured
   2220  1.1  mrg      in units of INDEX:
   2221  1.1  mrg 
   2222  1.1  mrg 	  <-- idx_len -->
   2223  1.1  mrg 	  <--- A: -ve step --->
   2224  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2225  1.1  mrg 	  | n-1 | ..... |  0  | ..... | n-1 |
   2226  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2227  1.1  mrg 			<--- B: +ve step --->
   2228  1.1  mrg 			<-- idx_len -->
   2229  1.1  mrg 			|
   2230  1.1  mrg 		       min
   2231  1.1  mrg 
   2232  1.1  mrg      where "n" is the number of scalar iterations covered by the segment
   2233  1.1  mrg      and where each access spans idx_access units.
   2234  1.1  mrg 
   2235  1.1  mrg      A is the range of bytes accessed when the step is negative,
   2236  1.1  mrg      B is the range when the step is positive.
   2237  1.1  mrg 
   2238  1.1  mrg      When checking for general overlap, we need to test whether
   2239  1.1  mrg      the range:
   2240  1.1  mrg 
   2241  1.1  mrg        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
   2242  1.1  mrg 
   2243  1.1  mrg      overlaps:
   2244  1.1  mrg 
   2245  1.1  mrg        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
   2246  1.1  mrg 
   2247  1.1  mrg      where:
   2248  1.1  mrg 
   2249  1.1  mrg 	low_offsetN = +ve step ? 0 : -idx_lenN;
   2250  1.1  mrg        high_offsetN = +ve step ? idx_lenN : 0;
   2251  1.1  mrg 
   2252  1.1  mrg      This is equivalent to testing whether:
   2253  1.1  mrg 
   2254  1.1  mrg        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
   2255  1.1  mrg        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
   2256  1.1  mrg 
   2257  1.1  mrg      Converting this into a single test, there is an overlap if:
   2258  1.1  mrg 
   2259  1.1  mrg        0 <= min2 - min1 + bias <= limit
   2260  1.1  mrg 
   2261  1.1  mrg      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
   2262  1.1  mrg 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
   2263  1.1  mrg 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
   2264  1.1  mrg       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
   2265  1.1  mrg 
   2266  1.1  mrg      Combining the tests requires limit to be computable in an unsigned
   2267  1.1  mrg      form of the index type; if it isn't, we fall back to the usual
   2268  1.1  mrg      pointer-based checks.
   2269  1.1  mrg 
   2270  1.1  mrg      We can do better if DR_B is a write and if DR_A and DR_B are
   2271  1.1  mrg      well-ordered in both the original and the new code (see the
   2272  1.1  mrg      comment above the DR_ALIAS_* flags for details).  In this case
   2273  1.1  mrg      we know that for each i in [0, n-1], the write performed by
   2274  1.1  mrg      access i of DR_B occurs after access numbers j<=i of DR_A in
   2275  1.1  mrg      both the original and the new code.  Any write or anti
   2276  1.1  mrg      dependencies wrt those DR_A accesses are therefore maintained.
   2277  1.1  mrg 
   2278  1.1  mrg      We just need to make sure that each individual write in DR_B does not
   2279  1.1  mrg      overlap any higher-indexed access in DR_A; such DR_A accesses happen
   2280  1.1  mrg      after the DR_B access in the original code but happen before it in
   2281  1.1  mrg      the new code.
   2282  1.1  mrg 
   2283  1.1  mrg      We know the steps for both accesses are equal, so by induction, we
   2284  1.1  mrg      just need to test whether the first write of DR_B overlaps a later
   2285  1.1  mrg      access of DR_A.  In other words, we need to move min1 along by
   2286  1.1  mrg      one iteration:
   2287  1.1  mrg 
   2288  1.1  mrg        min1' = min1 + idx_step
   2289  1.1  mrg 
   2290  1.1  mrg      and use the ranges:
   2291  1.1  mrg 
   2292  1.1  mrg        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
   2293  1.1  mrg 
   2294  1.1  mrg      and:
   2295  1.1  mrg 
   2296  1.1  mrg        [min2, min2 + idx_access2 - 1]
   2297  1.1  mrg 
   2298  1.1  mrg      where:
   2299  1.1  mrg 
   2300  1.1  mrg 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
   2301  1.1  mrg        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
   2302  1.1  mrg   if (waw_or_war_p)
   2303  1.1  mrg     idx_len1 -= abs_idx_step;
   2304  1.1  mrg 
   2305  1.1  mrg   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
   2306  1.1  mrg   if (!waw_or_war_p)
   2307  1.1  mrg     limit += idx_len2;
   2308  1.1  mrg 
   2309  1.1  mrg   tree utype = unsigned_type_for (TREE_TYPE (min1));
   2310  1.1  mrg   if (!wi::fits_to_tree_p (limit, utype))
   2311  1.1  mrg     return false;
   2312  1.1  mrg 
   2313  1.1  mrg   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
   2314  1.1  mrg   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
   2315  1.1  mrg   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
   2316  1.1  mrg   /* Equivalent to adding IDX_STEP to MIN1.  */
   2317  1.1  mrg   if (waw_or_war_p)
   2318  1.1  mrg     bias -= wi::to_offset (idx_step);
   2319  1.1  mrg 
   2320  1.1  mrg   tree subject = fold_build2 (MINUS_EXPR, utype,
   2321  1.1  mrg 			      fold_convert (utype, min2),
   2322  1.1  mrg 			      fold_convert (utype, min1));
   2323  1.1  mrg   subject = fold_build2 (PLUS_EXPR, utype, subject,
   2324  1.1  mrg 			 wide_int_to_tree (utype, bias));
   2325  1.1  mrg   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
   2326  1.1  mrg 				     wide_int_to_tree (utype, limit));
   2327  1.1  mrg   if (*cond_expr)
   2328  1.1  mrg     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
   2329  1.1  mrg 			      *cond_expr, part_cond_expr);
   2330  1.1  mrg   else
   2331  1.1  mrg     *cond_expr = part_cond_expr;
   2332  1.1  mrg   if (dump_enabled_p ())
   2333  1.1  mrg     {
   2334  1.1  mrg       if (waw_or_war_p)
   2335  1.1  mrg 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
   2336  1.1  mrg       else
   2337  1.1  mrg 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
   2338  1.1  mrg     }
   2339  1.1  mrg   return true;
   2340  1.1  mrg }
   2341  1.1  mrg 
   2342  1.1  mrg /* A subroutine of create_intersect_range_checks, with a subset of the
   2343  1.1  mrg    same arguments.  Try to optimize cases in which the second access
   2344  1.1  mrg    is a write and in which some overlap is valid.  */
   2345  1.1  mrg 
   2346  1.1  mrg static bool
   2347  1.1  mrg create_waw_or_war_checks (tree *cond_expr,
   2348  1.1  mrg 			  const dr_with_seg_len_pair_t &alias_pair)
   2349  1.1  mrg {
   2350  1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
   2351  1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
   2352  1.1  mrg 
   2353  1.1  mrg   /* Check for cases in which:
   2354  1.1  mrg 
   2355  1.1  mrg      (a) DR_B is always a write;
   2356  1.1  mrg      (b) the accesses are well-ordered in both the original and new code
   2357  1.1  mrg 	 (see the comment above the DR_ALIAS_* flags for details); and
   2358  1.1  mrg      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
   2359  1.1  mrg   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
   2360  1.1  mrg     return false;
   2361  1.1  mrg 
   2362  1.1  mrg   /* Check for equal (but possibly variable) steps.  */
   2363  1.1  mrg   tree step = DR_STEP (dr_a.dr);
   2364  1.1  mrg   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
   2365  1.1  mrg     return false;
   2366  1.1  mrg 
   2367  1.1  mrg   /* Make sure that we can operate on sizetype without loss of precision.  */
   2368  1.1  mrg   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
   2369  1.1  mrg   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
   2370  1.1  mrg     return false;
   2371  1.1  mrg 
   2372  1.1  mrg   /* All addresses involved are known to have a common alignment ALIGN.
   2373  1.1  mrg      We can therefore subtract ALIGN from an exclusive endpoint to get
   2374  1.1  mrg      an inclusive endpoint.  In the best (and common) case, ALIGN is the
   2375  1.1  mrg      same as the access sizes of both DRs, and so subtracting ALIGN
   2376  1.1  mrg      cancels out the addition of an access size.  */
   2377  1.1  mrg   unsigned int align = MIN (dr_a.align, dr_b.align);
   2378  1.1  mrg   poly_uint64 last_chunk_a = dr_a.access_size - align;
   2379  1.1  mrg   poly_uint64 last_chunk_b = dr_b.access_size - align;
   2380  1.1  mrg 
   2381  1.1  mrg   /* Get a boolean expression that is true when the step is negative.  */
   2382  1.1  mrg   tree indicator = dr_direction_indicator (dr_a.dr);
   2383  1.1  mrg   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
   2384  1.1  mrg 			       fold_convert (ssizetype, indicator),
   2385  1.1  mrg 			       ssize_int (0));
   2386  1.1  mrg 
   2387  1.1  mrg   /* Get lengths in sizetype.  */
   2388  1.1  mrg   tree seg_len_a
   2389  1.1  mrg     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
   2390  1.1  mrg   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
   2391  1.1  mrg 
   2392  1.1  mrg   /* Each access has the following pattern:
   2393  1.1  mrg 
   2394  1.1  mrg 	  <- |seg_len| ->
   2395  1.1  mrg 	  <--- A: -ve step --->
   2396  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2397  1.1  mrg 	  | n-1 | ..... |  0  | ..... | n-1 |
   2398  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2399  1.1  mrg 			<--- B: +ve step --->
   2400  1.1  mrg 			<- |seg_len| ->
   2401  1.1  mrg 			|
   2402  1.1  mrg 		   base address
   2403  1.1  mrg 
   2404  1.1  mrg      where "n" is the number of scalar iterations covered by the segment.
   2405  1.1  mrg 
   2406  1.1  mrg      A is the range of bytes accessed when the step is negative,
   2407  1.1  mrg      B is the range when the step is positive.
   2408  1.1  mrg 
   2409  1.1  mrg      We know that DR_B is a write.  We also know (from checking that
   2410  1.1  mrg      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
   2411  1.1  mrg      the write performed by access i of DR_B occurs after access numbers
   2412  1.1  mrg      j<=i of DR_A in both the original and the new code.  Any write or
   2413  1.1  mrg      anti dependencies wrt those DR_A accesses are therefore maintained.
   2414  1.1  mrg 
   2415  1.1  mrg      We just need to make sure that each individual write in DR_B does not
   2416  1.1  mrg      overlap any higher-indexed access in DR_A; such DR_A accesses happen
   2417  1.1  mrg      after the DR_B access in the original code but happen before it in
   2418  1.1  mrg      the new code.
   2419  1.1  mrg 
   2420  1.1  mrg      We know the steps for both accesses are equal, so by induction, we
   2421  1.1  mrg      just need to test whether the first write of DR_B overlaps a later
   2422  1.1  mrg      access of DR_A.  In other words, we need to move addr_a along by
   2423  1.1  mrg      one iteration:
   2424  1.1  mrg 
   2425  1.1  mrg        addr_a' = addr_a + step
   2426  1.1  mrg 
   2427  1.1  mrg      and check whether:
   2428  1.1  mrg 
   2429  1.1  mrg        [addr_b, addr_b + last_chunk_b]
   2430  1.1  mrg 
   2431  1.1  mrg      overlaps:
   2432  1.1  mrg 
   2433  1.1  mrg        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
   2434  1.1  mrg 
   2435  1.1  mrg      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
   2436  1.1  mrg 
   2437  1.1  mrg 	low_offset_a = +ve step ? 0 : seg_len_a - step
   2438  1.1  mrg        high_offset_a = +ve step ? seg_len_a - step : 0
   2439  1.1  mrg 
   2440  1.1  mrg      This is equivalent to testing whether:
   2441  1.1  mrg 
   2442  1.1  mrg        addr_a' + low_offset_a <= addr_b + last_chunk_b
   2443  1.1  mrg        && addr_b <= addr_a' + high_offset_a + last_chunk_a
   2444  1.1  mrg 
   2445  1.1  mrg      Converting this into a single test, there is an overlap if:
   2446  1.1  mrg 
   2447  1.1  mrg        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
   2448  1.1  mrg 
   2449  1.1  mrg      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
   2450  1.1  mrg 
   2451  1.1  mrg      If DR_A is performed, limit + |step| - last_chunk_b is known to be
   2452  1.1  mrg      less than the size of the object underlying DR_A.  We also know
   2453  1.1  mrg      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
   2454  1.1  mrg      guaranteed at compile time.  There can therefore be no overflow if
   2455  1.1  mrg      "limit" is calculated in an unsigned type with pointer precision.  */
   2456  1.1  mrg   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
   2457  1.1  mrg 					 DR_OFFSET (dr_a.dr));
   2458  1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
   2459  1.1  mrg 
   2460  1.1  mrg   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
   2461  1.1  mrg 					 DR_OFFSET (dr_b.dr));
   2462  1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
   2463  1.1  mrg 
   2464  1.1  mrg   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
   2465  1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, step);
   2466  1.1  mrg   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
   2467  1.1  mrg 					   seg_len_a, step);
   2468  1.1  mrg   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
   2469  1.1  mrg     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
   2470  1.1  mrg 
   2471  1.1  mrg   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
   2472  1.1  mrg 				   seg_len_a_minus_step, size_zero_node);
   2473  1.1  mrg   if (!CONSTANT_CLASS_P (low_offset_a))
   2474  1.1  mrg     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
   2475  1.1  mrg 
   2476  1.1  mrg   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
   2477  1.1  mrg      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
   2478  1.1  mrg   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
   2479  1.1  mrg 				    low_offset_a);
   2480  1.1  mrg 
   2481  1.1  mrg   /* The amount added to addr_b - addr_a'.  */
   2482  1.1  mrg   tree bias = fold_build2 (MINUS_EXPR, sizetype,
   2483  1.1  mrg 			   size_int (last_chunk_b), low_offset_a);
   2484  1.1  mrg 
   2485  1.1  mrg   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
   2486  1.1  mrg   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
   2487  1.1  mrg 		       size_int (last_chunk_a + last_chunk_b));
   2488  1.1  mrg 
   2489  1.1  mrg   tree subject = fold_build2 (MINUS_EXPR, sizetype,
   2490  1.1  mrg 			      fold_convert (sizetype, addr_b),
   2491  1.1  mrg 			      fold_convert (sizetype, addr_a));
   2492  1.1  mrg   subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
   2493  1.1  mrg 
   2494  1.1  mrg   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
   2495  1.1  mrg   if (dump_enabled_p ())
   2496  1.1  mrg     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
   2497  1.1  mrg   return true;
   2498  1.1  mrg }
   2499  1.1  mrg 
   2500  1.1  mrg /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
   2501  1.1  mrg    every address ADDR accessed by D:
   2502  1.1  mrg 
   2503  1.1  mrg      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
   2504  1.1  mrg 
   2505  1.1  mrg    In this case, every element accessed by D is aligned to at least
   2506  1.1  mrg    ALIGN bytes.
   2507  1.1  mrg 
   2508  1.1  mrg    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
   2509  1.1  mrg 
   2510  1.1  mrg      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
   2511  1.1  mrg 
   2512  1.1  mrg static void
   2513  1.1  mrg get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
   2514  1.1  mrg 		     tree *seg_max_out, HOST_WIDE_INT align)
   2515  1.1  mrg {
   2516  1.1  mrg   /* Each access has the following pattern:
   2517  1.1  mrg 
   2518  1.1  mrg 	  <- |seg_len| ->
   2519  1.1  mrg 	  <--- A: -ve step --->
   2520  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2521  1.1  mrg 	  | n-1 | ,.... |  0  | ..... | n-1 |
   2522  1.1  mrg 	  +-----+-------+-----+-------+-----+
   2523  1.1  mrg 			<--- B: +ve step --->
   2524  1.1  mrg 			<- |seg_len| ->
   2525  1.1  mrg 			|
   2526  1.1  mrg 		   base address
   2527  1.1  mrg 
   2528  1.1  mrg      where "n" is the number of scalar iterations covered by the segment.
   2529  1.1  mrg      (This should be VF for a particular pair if we know that both steps
   2530  1.1  mrg      are the same, otherwise it will be the full number of scalar loop
   2531  1.1  mrg      iterations.)
   2532  1.1  mrg 
   2533  1.1  mrg      A is the range of bytes accessed when the step is negative,
   2534  1.1  mrg      B is the range when the step is positive.
   2535  1.1  mrg 
   2536  1.1  mrg      If the access size is "access_size" bytes, the lowest addressed byte is:
   2537  1.1  mrg 
   2538  1.1  mrg 	 base + (step < 0 ? seg_len : 0)   [LB]
   2539  1.1  mrg 
   2540  1.1  mrg      and the highest addressed byte is always below:
   2541  1.1  mrg 
   2542  1.1  mrg 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
   2543  1.1  mrg 
   2544  1.1  mrg      Thus:
   2545  1.1  mrg 
   2546  1.1  mrg 	 LB <= ADDR < UB
   2547  1.1  mrg 
   2548  1.1  mrg      If ALIGN is nonzero, all three values are aligned to at least ALIGN
   2549  1.1  mrg      bytes, so:
   2550  1.1  mrg 
   2551  1.1  mrg 	 LB <= ADDR <= UB - ALIGN
   2552  1.1  mrg 
   2553  1.1  mrg      where "- ALIGN" folds naturally with the "+ access_size" and often
   2554  1.1  mrg      cancels it out.
   2555  1.1  mrg 
   2556  1.1  mrg      We don't try to simplify LB and UB beyond this (e.g. by using
   2557  1.1  mrg      MIN and MAX based on whether seg_len rather than the stride is
   2558  1.1  mrg      negative) because it is possible for the absolute size of the
   2559  1.1  mrg      segment to overflow the range of a ssize_t.
   2560  1.1  mrg 
   2561  1.1  mrg      Keeping the pointer_plus outside of the cond_expr should allow
   2562  1.1  mrg      the cond_exprs to be shared with other alias checks.  */
   2563  1.1  mrg   tree indicator = dr_direction_indicator (d.dr);
   2564  1.1  mrg   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
   2565  1.1  mrg 			       fold_convert (ssizetype, indicator),
   2566  1.1  mrg 			       ssize_int (0));
   2567  1.1  mrg   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
   2568  1.1  mrg 					    DR_OFFSET (d.dr));
   2569  1.1  mrg   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
   2570  1.1  mrg   tree seg_len
   2571  1.1  mrg     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
   2572  1.1  mrg 
   2573  1.1  mrg   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
   2574  1.1  mrg 				seg_len, size_zero_node);
   2575  1.1  mrg   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
   2576  1.1  mrg 				size_zero_node, seg_len);
   2577  1.1  mrg   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
   2578  1.1  mrg 			   size_int (d.access_size - align));
   2579  1.1  mrg 
   2580  1.1  mrg   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
   2581  1.1  mrg   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
   2582  1.1  mrg }
   2583  1.1  mrg 
   2584  1.1  mrg /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
   2585  1.1  mrg    storing the condition in *COND_EXPR.  The fallback is to generate a
   2586  1.1  mrg    a test that the two accesses do not overlap:
   2587  1.1  mrg 
   2588  1.1  mrg      end_a <= start_b || end_b <= start_a.  */
   2589  1.1  mrg 
   2590  1.1  mrg static void
   2591  1.1  mrg create_intersect_range_checks (class loop *loop, tree *cond_expr,
   2592  1.1  mrg 			       const dr_with_seg_len_pair_t &alias_pair)
   2593  1.1  mrg {
   2594  1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
   2595  1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
   2596  1.1  mrg   *cond_expr = NULL_TREE;
   2597  1.1  mrg   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
   2598  1.1  mrg     return;
   2599  1.1  mrg 
   2600  1.1  mrg   if (create_ifn_alias_checks (cond_expr, alias_pair))
   2601  1.1  mrg     return;
   2602  1.1  mrg 
   2603  1.1  mrg   if (create_waw_or_war_checks (cond_expr, alias_pair))
   2604  1.1  mrg     return;
   2605  1.1  mrg 
   2606  1.1  mrg   unsigned HOST_WIDE_INT min_align;
   2607  1.1  mrg   tree_code cmp_code;
   2608  1.1  mrg   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
   2609  1.1  mrg      are equivalent.  This is just an optimization heuristic.  */
   2610  1.1  mrg   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
   2611  1.1  mrg       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
   2612  1.1  mrg     {
   2613  1.1  mrg       /* In this case adding access_size to seg_len is likely to give
   2614  1.1  mrg 	 a simple X * step, where X is either the number of scalar
   2615  1.1  mrg 	 iterations or the vectorization factor.  We're better off
   2616  1.1  mrg 	 keeping that, rather than subtracting an alignment from it.
   2617  1.1  mrg 
   2618  1.1  mrg 	 In this case the maximum values are exclusive and so there is
   2619  1.1  mrg 	 no alias if the maximum of one segment equals the minimum
   2620  1.1  mrg 	 of another.  */
   2621  1.1  mrg       min_align = 0;
   2622  1.1  mrg       cmp_code = LE_EXPR;
   2623  1.1  mrg     }
   2624  1.1  mrg   else
   2625  1.1  mrg     {
   2626  1.1  mrg       /* Calculate the minimum alignment shared by all four pointers,
   2627  1.1  mrg 	 then arrange for this alignment to be subtracted from the
   2628  1.1  mrg 	 exclusive maximum values to get inclusive maximum values.
   2629  1.1  mrg 	 This "- min_align" is cumulative with a "+ access_size"
   2630  1.1  mrg 	 in the calculation of the maximum values.  In the best
   2631  1.1  mrg 	 (and common) case, the two cancel each other out, leaving
   2632  1.1  mrg 	 us with an inclusive bound based only on seg_len.  In the
   2633  1.1  mrg 	 worst case we're simply adding a smaller number than before.
   2634  1.1  mrg 
   2635  1.1  mrg 	 Because the maximum values are inclusive, there is an alias
   2636  1.1  mrg 	 if the maximum value of one segment is equal to the minimum
   2637  1.1  mrg 	 value of the other.  */
   2638  1.1  mrg       min_align = std::min (dr_a.align, dr_b.align);
   2639  1.1  mrg       cmp_code = LT_EXPR;
   2640  1.1  mrg     }
   2641  1.1  mrg 
   2642  1.1  mrg   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
   2643  1.1  mrg   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
   2644  1.1  mrg   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
   2645  1.1  mrg 
   2646  1.1  mrg   *cond_expr
   2647  1.1  mrg     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
   2648  1.1  mrg 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
   2649  1.1  mrg 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
   2650  1.1  mrg   if (dump_enabled_p ())
   2651  1.1  mrg     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
   2652  1.1  mrg }
   2653  1.1  mrg 
   2654  1.1  mrg /* Create a conditional expression that represents the run-time checks for
   2655  1.1  mrg    overlapping of address ranges represented by a list of data references
   2656  1.1  mrg    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
   2657  1.1  mrg    COND_EXPR is the conditional expression to be used in the if statement
   2658  1.1  mrg    that controls which version of the loop gets executed at runtime.  */
   2659  1.1  mrg 
   2660  1.1  mrg void
   2661  1.1  mrg create_runtime_alias_checks (class loop *loop,
   2662  1.1  mrg 			     const vec<dr_with_seg_len_pair_t> *alias_pairs,
   2663  1.1  mrg 			     tree * cond_expr)
   2664  1.1  mrg {
   2665  1.1  mrg   tree part_cond_expr;
   2666  1.1  mrg 
   2667  1.1  mrg   fold_defer_overflow_warnings ();
   2668  1.1  mrg   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
   2669  1.1  mrg     {
   2670  1.1  mrg       gcc_assert (alias_pair.flags);
   2671  1.1  mrg       if (dump_enabled_p ())
   2672  1.1  mrg 	dump_printf (MSG_NOTE,
   2673  1.1  mrg 		     "create runtime check for data references %T and %T\n",
   2674  1.1  mrg 		     DR_REF (alias_pair.first.dr),
   2675  1.1  mrg 		     DR_REF (alias_pair.second.dr));
   2676  1.1  mrg 
   2677  1.1  mrg       /* Create condition expression for each pair data references.  */
   2678  1.1  mrg       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
   2679  1.1  mrg       if (*cond_expr)
   2680  1.1  mrg 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
   2681  1.1  mrg 				  *cond_expr, part_cond_expr);
   2682  1.1  mrg       else
   2683  1.1  mrg 	*cond_expr = part_cond_expr;
   2684  1.1  mrg     }
   2685  1.1  mrg   fold_undefer_and_ignore_overflow_warnings ();
   2686  1.1  mrg }
   2687  1.1  mrg 
   2688  1.1  mrg /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
   2689  1.1  mrg    expressions.  */
   2690  1.1  mrg static bool
   2691  1.1  mrg dr_equal_offsets_p1 (tree offset1, tree offset2)
   2692  1.1  mrg {
   2693  1.1  mrg   bool res;
   2694  1.1  mrg 
   2695  1.1  mrg   STRIP_NOPS (offset1);
   2696  1.1  mrg   STRIP_NOPS (offset2);
   2697  1.1  mrg 
   2698  1.1  mrg   if (offset1 == offset2)
   2699  1.1  mrg     return true;
   2700  1.1  mrg 
   2701  1.1  mrg   if (TREE_CODE (offset1) != TREE_CODE (offset2)
   2702  1.1  mrg       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
   2703  1.1  mrg     return false;
   2704  1.1  mrg 
   2705  1.1  mrg   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
   2706  1.1  mrg                              TREE_OPERAND (offset2, 0));
   2707  1.1  mrg 
   2708  1.1  mrg   if (!res || !BINARY_CLASS_P (offset1))
   2709  1.1  mrg     return res;
   2710  1.1  mrg 
   2711  1.1  mrg   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
   2712  1.1  mrg                              TREE_OPERAND (offset2, 1));
   2713  1.1  mrg 
   2714  1.1  mrg   return res;
   2715  1.1  mrg }
   2716  1.1  mrg 
   2717  1.1  mrg /* Check if DRA and DRB have equal offsets.  */
   2718  1.1  mrg bool
   2719  1.1  mrg dr_equal_offsets_p (struct data_reference *dra,
   2720  1.1  mrg                     struct data_reference *drb)
   2721  1.1  mrg {
   2722  1.1  mrg   tree offset1, offset2;
   2723  1.1  mrg 
   2724  1.1  mrg   offset1 = DR_OFFSET (dra);
   2725  1.1  mrg   offset2 = DR_OFFSET (drb);
   2726  1.1  mrg 
   2727  1.1  mrg   return dr_equal_offsets_p1 (offset1, offset2);
   2728  1.1  mrg }
   2729  1.1  mrg 
   2730  1.1  mrg /* Returns true if FNA == FNB.  */
   2731  1.1  mrg 
   2732  1.1  mrg static bool
   2733  1.1  mrg affine_function_equal_p (affine_fn fna, affine_fn fnb)
   2734  1.1  mrg {
   2735  1.1  mrg   unsigned i, n = fna.length ();
   2736  1.1  mrg 
   2737  1.1  mrg   if (n != fnb.length ())
   2738  1.1  mrg     return false;
   2739  1.1  mrg 
   2740  1.1  mrg   for (i = 0; i < n; i++)
   2741  1.1  mrg     if (!operand_equal_p (fna[i], fnb[i], 0))
   2742  1.1  mrg       return false;
   2743  1.1  mrg 
   2744  1.1  mrg   return true;
   2745  1.1  mrg }
   2746  1.1  mrg 
   2747  1.1  mrg /* If all the functions in CF are the same, returns one of them,
   2748  1.1  mrg    otherwise returns NULL.  */
   2749  1.1  mrg 
   2750  1.1  mrg static affine_fn
   2751  1.1  mrg common_affine_function (conflict_function *cf)
   2752  1.1  mrg {
   2753  1.1  mrg   unsigned i;
   2754  1.1  mrg   affine_fn comm;
   2755  1.1  mrg 
   2756  1.1  mrg   if (!CF_NONTRIVIAL_P (cf))
   2757  1.1  mrg     return affine_fn ();
   2758  1.1  mrg 
   2759  1.1  mrg   comm = cf->fns[0];
   2760  1.1  mrg 
   2761  1.1  mrg   for (i = 1; i < cf->n; i++)
   2762  1.1  mrg     if (!affine_function_equal_p (comm, cf->fns[i]))
   2763  1.1  mrg       return affine_fn ();
   2764  1.1  mrg 
   2765  1.1  mrg   return comm;
   2766  1.1  mrg }
   2767  1.1  mrg 
   2768  1.1  mrg /* Returns the base of the affine function FN.  */
   2769  1.1  mrg 
   2770  1.1  mrg static tree
   2771  1.1  mrg affine_function_base (affine_fn fn)
   2772  1.1  mrg {
   2773  1.1  mrg   return fn[0];
   2774  1.1  mrg }
   2775  1.1  mrg 
   2776  1.1  mrg /* Returns true if FN is a constant.  */
   2777  1.1  mrg 
   2778  1.1  mrg static bool
   2779  1.1  mrg affine_function_constant_p (affine_fn fn)
   2780  1.1  mrg {
   2781  1.1  mrg   unsigned i;
   2782  1.1  mrg   tree coef;
   2783  1.1  mrg 
   2784  1.1  mrg   for (i = 1; fn.iterate (i, &coef); i++)
   2785  1.1  mrg     if (!integer_zerop (coef))
   2786  1.1  mrg       return false;
   2787  1.1  mrg 
   2788  1.1  mrg   return true;
   2789  1.1  mrg }
   2790  1.1  mrg 
   2791  1.1  mrg /* Returns true if FN is the zero constant function.  */
   2792  1.1  mrg 
   2793  1.1  mrg static bool
   2794  1.1  mrg affine_function_zero_p (affine_fn fn)
   2795  1.1  mrg {
   2796  1.1  mrg   return (integer_zerop (affine_function_base (fn))
   2797  1.1  mrg 	  && affine_function_constant_p (fn));
   2798  1.1  mrg }
   2799  1.1  mrg 
   2800  1.1  mrg /* Returns a signed integer type with the largest precision from TA
   2801  1.1  mrg    and TB.  */
   2802  1.1  mrg 
   2803  1.1  mrg static tree
   2804  1.1  mrg signed_type_for_types (tree ta, tree tb)
   2805  1.1  mrg {
   2806  1.1  mrg   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
   2807  1.1  mrg     return signed_type_for (ta);
   2808  1.1  mrg   else
   2809  1.1  mrg     return signed_type_for (tb);
   2810  1.1  mrg }
   2811  1.1  mrg 
   2812  1.1  mrg /* Applies operation OP on affine functions FNA and FNB, and returns the
   2813  1.1  mrg    result.  */
   2814  1.1  mrg 
   2815  1.1  mrg static affine_fn
   2816  1.1  mrg affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
   2817  1.1  mrg {
   2818  1.1  mrg   unsigned i, n, m;
   2819  1.1  mrg   affine_fn ret;
   2820  1.1  mrg   tree coef;
   2821  1.1  mrg 
   2822  1.1  mrg   if (fnb.length () > fna.length ())
   2823  1.1  mrg     {
   2824  1.1  mrg       n = fna.length ();
   2825  1.1  mrg       m = fnb.length ();
   2826  1.1  mrg     }
   2827  1.1  mrg   else
   2828  1.1  mrg     {
   2829  1.1  mrg       n = fnb.length ();
   2830  1.1  mrg       m = fna.length ();
   2831  1.1  mrg     }
   2832  1.1  mrg 
   2833  1.1  mrg   ret.create (m);
   2834  1.1  mrg   for (i = 0; i < n; i++)
   2835  1.1  mrg     {
   2836  1.1  mrg       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
   2837  1.1  mrg 					 TREE_TYPE (fnb[i]));
   2838  1.1  mrg       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
   2839  1.1  mrg     }
   2840  1.1  mrg 
   2841  1.1  mrg   for (; fna.iterate (i, &coef); i++)
   2842  1.1  mrg     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
   2843  1.1  mrg 				 coef, integer_zero_node));
   2844  1.1  mrg   for (; fnb.iterate (i, &coef); i++)
   2845  1.1  mrg     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
   2846  1.1  mrg 				 integer_zero_node, coef));
   2847  1.1  mrg 
   2848  1.1  mrg   return ret;
   2849  1.1  mrg }
   2850  1.1  mrg 
   2851  1.1  mrg /* Returns the sum of affine functions FNA and FNB.  */
   2852  1.1  mrg 
   2853  1.1  mrg static affine_fn
   2854  1.1  mrg affine_fn_plus (affine_fn fna, affine_fn fnb)
   2855  1.1  mrg {
   2856  1.1  mrg   return affine_fn_op (PLUS_EXPR, fna, fnb);
   2857  1.1  mrg }
   2858  1.1  mrg 
   2859  1.1  mrg /* Returns the difference of affine functions FNA and FNB.  */
   2860  1.1  mrg 
   2861  1.1  mrg static affine_fn
   2862  1.1  mrg affine_fn_minus (affine_fn fna, affine_fn fnb)
   2863  1.1  mrg {
   2864  1.1  mrg   return affine_fn_op (MINUS_EXPR, fna, fnb);
   2865  1.1  mrg }
   2866  1.1  mrg 
   2867  1.1  mrg /* Frees affine function FN.  */
   2868  1.1  mrg 
   2869  1.1  mrg static void
   2870  1.1  mrg affine_fn_free (affine_fn fn)
   2871  1.1  mrg {
   2872  1.1  mrg   fn.release ();
   2873  1.1  mrg }
   2874  1.1  mrg 
   2875  1.1  mrg /* Determine for each subscript in the data dependence relation DDR
   2876  1.1  mrg    the distance.  */
   2877  1.1  mrg 
   2878  1.1  mrg static void
   2879  1.1  mrg compute_subscript_distance (struct data_dependence_relation *ddr)
   2880  1.1  mrg {
   2881  1.1  mrg   conflict_function *cf_a, *cf_b;
   2882  1.1  mrg   affine_fn fn_a, fn_b, diff;
   2883  1.1  mrg 
   2884  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
   2885  1.1  mrg     {
   2886  1.1  mrg       unsigned int i;
   2887  1.1  mrg 
   2888  1.1  mrg       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
   2889  1.1  mrg  	{
   2890  1.1  mrg  	  struct subscript *subscript;
   2891  1.1  mrg 
   2892  1.1  mrg  	  subscript = DDR_SUBSCRIPT (ddr, i);
   2893  1.1  mrg  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
   2894  1.1  mrg  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
   2895  1.1  mrg 
   2896  1.1  mrg 	  fn_a = common_affine_function (cf_a);
   2897  1.1  mrg 	  fn_b = common_affine_function (cf_b);
   2898  1.1  mrg 	  if (!fn_a.exists () || !fn_b.exists ())
   2899  1.1  mrg 	    {
   2900  1.1  mrg 	      SUB_DISTANCE (subscript) = chrec_dont_know;
   2901  1.1  mrg 	      return;
   2902  1.1  mrg 	    }
   2903  1.1  mrg 	  diff = affine_fn_minus (fn_a, fn_b);
   2904  1.1  mrg 
   2905  1.1  mrg  	  if (affine_function_constant_p (diff))
   2906  1.1  mrg  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
   2907  1.1  mrg  	  else
   2908  1.1  mrg  	    SUB_DISTANCE (subscript) = chrec_dont_know;
   2909  1.1  mrg 
   2910  1.1  mrg 	  affine_fn_free (diff);
   2911  1.1  mrg  	}
   2912  1.1  mrg     }
   2913  1.1  mrg }
   2914  1.1  mrg 
   2915  1.1  mrg /* Returns the conflict function for "unknown".  */
   2916  1.1  mrg 
   2917  1.1  mrg static conflict_function *
   2918  1.1  mrg conflict_fn_not_known (void)
   2919  1.1  mrg {
   2920  1.1  mrg   conflict_function *fn = XCNEW (conflict_function);
   2921  1.1  mrg   fn->n = NOT_KNOWN;
   2922  1.1  mrg 
   2923  1.1  mrg   return fn;
   2924  1.1  mrg }
   2925  1.1  mrg 
   2926  1.1  mrg /* Returns the conflict function for "independent".  */
   2927  1.1  mrg 
   2928  1.1  mrg static conflict_function *
   2929  1.1  mrg conflict_fn_no_dependence (void)
   2930  1.1  mrg {
   2931  1.1  mrg   conflict_function *fn = XCNEW (conflict_function);
   2932  1.1  mrg   fn->n = NO_DEPENDENCE;
   2933  1.1  mrg 
   2934  1.1  mrg   return fn;
   2935  1.1  mrg }
   2936  1.1  mrg 
   2937  1.1  mrg /* Returns true if the address of OBJ is invariant in LOOP.  */
   2938  1.1  mrg 
   2939  1.1  mrg static bool
   2940  1.1  mrg object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
   2941  1.1  mrg {
   2942  1.1  mrg   while (handled_component_p (obj))
   2943  1.1  mrg     {
   2944  1.1  mrg       if (TREE_CODE (obj) == ARRAY_REF)
   2945  1.1  mrg 	{
   2946  1.1  mrg 	  for (int i = 1; i < 4; ++i)
   2947  1.1  mrg 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
   2948  1.1  mrg 							loop->num))
   2949  1.1  mrg 	      return false;
   2950  1.1  mrg 	}
   2951  1.1  mrg       else if (TREE_CODE (obj) == COMPONENT_REF)
   2952  1.1  mrg 	{
   2953  1.1  mrg 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
   2954  1.1  mrg 						      loop->num))
   2955  1.1  mrg 	    return false;
   2956  1.1  mrg 	}
   2957  1.1  mrg       obj = TREE_OPERAND (obj, 0);
   2958  1.1  mrg     }
   2959  1.1  mrg 
   2960  1.1  mrg   if (!INDIRECT_REF_P (obj)
   2961  1.1  mrg       && TREE_CODE (obj) != MEM_REF)
   2962  1.1  mrg     return true;
   2963  1.1  mrg 
   2964  1.1  mrg   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
   2965  1.1  mrg 						  loop->num);
   2966  1.1  mrg }
   2967  1.1  mrg 
   2968  1.1  mrg /* Returns false if we can prove that data references A and B do not alias,
   2969  1.1  mrg    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
   2970  1.1  mrg    considered.  */
   2971  1.1  mrg 
   2972  1.1  mrg bool
   2973  1.1  mrg dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
   2974  1.1  mrg 		class loop *loop_nest)
   2975  1.1  mrg {
   2976  1.1  mrg   tree addr_a = DR_BASE_OBJECT (a);
   2977  1.1  mrg   tree addr_b = DR_BASE_OBJECT (b);
   2978  1.1  mrg 
   2979  1.1  mrg   /* If we are not processing a loop nest but scalar code we
   2980  1.1  mrg      do not need to care about possible cross-iteration dependences
   2981  1.1  mrg      and thus can process the full original reference.  Do so,
   2982  1.1  mrg      similar to how loop invariant motion applies extra offset-based
   2983  1.1  mrg      disambiguation.  */
   2984  1.1  mrg   if (!loop_nest)
   2985  1.1  mrg     {
   2986  1.1  mrg       aff_tree off1, off2;
   2987  1.1  mrg       poly_widest_int size1, size2;
   2988  1.1  mrg       get_inner_reference_aff (DR_REF (a), &off1, &size1);
   2989  1.1  mrg       get_inner_reference_aff (DR_REF (b), &off2, &size2);
   2990  1.1  mrg       aff_combination_scale (&off1, -1);
   2991  1.1  mrg       aff_combination_add (&off2, &off1);
   2992  1.1  mrg       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
   2993  1.1  mrg 	return false;
   2994  1.1  mrg     }
   2995  1.1  mrg 
   2996  1.1  mrg   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
   2997  1.1  mrg       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
   2998  1.1  mrg       /* For cross-iteration dependences the cliques must be valid for the
   2999  1.1  mrg 	 whole loop, not just individual iterations.  */
   3000  1.1  mrg       && (!loop_nest
   3001  1.1  mrg 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
   3002  1.1  mrg 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
   3003  1.1  mrg       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
   3004  1.1  mrg       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
   3005  1.1  mrg     return false;
   3006  1.1  mrg 
   3007  1.1  mrg   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
   3008  1.1  mrg      do not know the size of the base-object.  So we cannot do any
   3009  1.1  mrg      offset/overlap based analysis but have to rely on points-to
   3010  1.1  mrg      information only.  */
   3011  1.1  mrg   if (TREE_CODE (addr_a) == MEM_REF
   3012  1.1  mrg       && (DR_UNCONSTRAINED_BASE (a)
   3013  1.1  mrg 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
   3014  1.1  mrg     {
   3015  1.1  mrg       /* For true dependences we can apply TBAA.  */
   3016  1.1  mrg       if (flag_strict_aliasing
   3017  1.1  mrg 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
   3018  1.1  mrg 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
   3019  1.1  mrg 				     get_alias_set (DR_REF (b))))
   3020  1.1  mrg 	return false;
   3021  1.1  mrg       if (TREE_CODE (addr_b) == MEM_REF)
   3022  1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
   3023  1.1  mrg 				       TREE_OPERAND (addr_b, 0));
   3024  1.1  mrg       else
   3025  1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
   3026  1.1  mrg 				       build_fold_addr_expr (addr_b));
   3027  1.1  mrg     }
   3028  1.1  mrg   else if (TREE_CODE (addr_b) == MEM_REF
   3029  1.1  mrg 	   && (DR_UNCONSTRAINED_BASE (b)
   3030  1.1  mrg 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
   3031  1.1  mrg     {
   3032  1.1  mrg       /* For true dependences we can apply TBAA.  */
   3033  1.1  mrg       if (flag_strict_aliasing
   3034  1.1  mrg 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
   3035  1.1  mrg 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
   3036  1.1  mrg 				     get_alias_set (DR_REF (b))))
   3037  1.1  mrg 	return false;
   3038  1.1  mrg       if (TREE_CODE (addr_a) == MEM_REF)
   3039  1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
   3040  1.1  mrg 				       TREE_OPERAND (addr_b, 0));
   3041  1.1  mrg       else
   3042  1.1  mrg 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
   3043  1.1  mrg 				       TREE_OPERAND (addr_b, 0));
   3044  1.1  mrg     }
   3045  1.1  mrg 
   3046  1.1  mrg   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
   3047  1.1  mrg      that is being subsetted in the loop nest.  */
   3048  1.1  mrg   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
   3049  1.1  mrg     return refs_output_dependent_p (addr_a, addr_b);
   3050  1.1  mrg   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
   3051  1.1  mrg     return refs_anti_dependent_p (addr_a, addr_b);
   3052  1.1  mrg   return refs_may_alias_p (addr_a, addr_b);
   3053  1.1  mrg }
   3054  1.1  mrg 
   3055  1.1  mrg /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
   3056  1.1  mrg    if it is meaningful to compare their associated access functions
   3057  1.1  mrg    when checking for dependencies.  */
   3058  1.1  mrg 
   3059  1.1  mrg static bool
   3060  1.1  mrg access_fn_components_comparable_p (tree ref_a, tree ref_b)
   3061  1.1  mrg {
   3062  1.1  mrg   /* Allow pairs of component refs from the following sets:
   3063  1.1  mrg 
   3064  1.1  mrg        { REALPART_EXPR, IMAGPART_EXPR }
   3065  1.1  mrg        { COMPONENT_REF }
   3066  1.1  mrg        { ARRAY_REF }.  */
   3067  1.1  mrg   tree_code code_a = TREE_CODE (ref_a);
   3068  1.1  mrg   tree_code code_b = TREE_CODE (ref_b);
   3069  1.1  mrg   if (code_a == IMAGPART_EXPR)
   3070  1.1  mrg     code_a = REALPART_EXPR;
   3071  1.1  mrg   if (code_b == IMAGPART_EXPR)
   3072  1.1  mrg     code_b = REALPART_EXPR;
   3073  1.1  mrg   if (code_a != code_b)
   3074  1.1  mrg     return false;
   3075  1.1  mrg 
   3076  1.1  mrg   if (TREE_CODE (ref_a) == COMPONENT_REF)
   3077  1.1  mrg     /* ??? We cannot simply use the type of operand #0 of the refs here as
   3078  1.1  mrg        the Fortran compiler smuggles type punning into COMPONENT_REFs.
   3079  1.1  mrg        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
   3080  1.1  mrg     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
   3081  1.1  mrg 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
   3082  1.1  mrg 
   3083  1.1  mrg   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
   3084  1.1  mrg 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
   3085  1.1  mrg }
   3086  1.1  mrg 
   3087  1.1  mrg /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
   3088  1.1  mrg    is true when the main indices of A and B were not comparable so we try again
   3089  1.1  mrg    with alternate indices computed on an indirect reference.  */
   3090  1.1  mrg 
   3091  1.1  mrg struct data_dependence_relation *
   3092  1.1  mrg initialize_data_dependence_relation (struct data_dependence_relation *res,
   3093  1.1  mrg 				     vec<loop_p> loop_nest,
   3094  1.1  mrg 				     bool use_alt_indices)
   3095  1.1  mrg {
   3096  1.1  mrg   struct data_reference *a = DDR_A (res);
   3097  1.1  mrg   struct data_reference *b = DDR_B (res);
   3098  1.1  mrg   unsigned int i;
   3099  1.1  mrg 
   3100  1.1  mrg   struct indices *indices_a = &a->indices;
   3101  1.1  mrg   struct indices *indices_b = &b->indices;
   3102  1.1  mrg   if (use_alt_indices)
   3103  1.1  mrg     {
   3104  1.1  mrg       if (TREE_CODE (DR_REF (a)) != MEM_REF)
   3105  1.1  mrg 	indices_a = &a->alt_indices;
   3106  1.1  mrg       if (TREE_CODE (DR_REF (b)) != MEM_REF)
   3107  1.1  mrg 	indices_b = &b->alt_indices;
   3108  1.1  mrg     }
   3109  1.1  mrg   unsigned int num_dimensions_a = indices_a->access_fns.length ();
   3110  1.1  mrg   unsigned int num_dimensions_b = indices_b->access_fns.length ();
   3111  1.1  mrg   if (num_dimensions_a == 0 || num_dimensions_b == 0)
   3112  1.1  mrg     {
   3113  1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
   3114  1.1  mrg       return res;
   3115  1.1  mrg     }
   3116  1.1  mrg 
   3117  1.1  mrg   /* For unconstrained bases, the root (highest-indexed) subscript
   3118  1.1  mrg      describes a variation in the base of the original DR_REF rather
   3119  1.1  mrg      than a component access.  We have no type that accurately describes
   3120  1.1  mrg      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
   3121  1.1  mrg      applying this subscript) so limit the search to the last real
   3122  1.1  mrg      component access.
   3123  1.1  mrg 
   3124  1.1  mrg      E.g. for:
   3125  1.1  mrg 
   3126  1.1  mrg 	void
   3127  1.1  mrg 	f (int a[][8], int b[][8])
   3128  1.1  mrg 	{
   3129  1.1  mrg 	  for (int i = 0; i < 8; ++i)
   3130  1.1  mrg 	    a[i * 2][0] = b[i][0];
   3131  1.1  mrg 	}
   3132  1.1  mrg 
   3133  1.1  mrg      the a and b accesses have a single ARRAY_REF component reference [0]
   3134  1.1  mrg      but have two subscripts.  */
   3135  1.1  mrg   if (indices_a->unconstrained_base)
   3136  1.1  mrg     num_dimensions_a -= 1;
   3137  1.1  mrg   if (indices_b->unconstrained_base)
   3138  1.1  mrg     num_dimensions_b -= 1;
   3139  1.1  mrg 
   3140  1.1  mrg   /* These structures describe sequences of component references in
   3141  1.1  mrg      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
   3142  1.1  mrg      specific access function.  */
   3143  1.1  mrg   struct {
   3144  1.1  mrg     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
   3145  1.1  mrg        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
   3146  1.1  mrg        indices.  In C notation, these are the indices of the rightmost
   3147  1.1  mrg        component references; e.g. for a sequence .b.c.d, the start
   3148  1.1  mrg        index is for .d.  */
   3149  1.1  mrg     unsigned int start_a;
   3150  1.1  mrg     unsigned int start_b;
   3151  1.1  mrg 
   3152  1.1  mrg     /* The sequence contains LENGTH consecutive access functions from
   3153  1.1  mrg        each DR.  */
   3154  1.1  mrg     unsigned int length;
   3155  1.1  mrg 
   3156  1.1  mrg     /* The enclosing objects for the A and B sequences respectively,
   3157  1.1  mrg        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
   3158  1.1  mrg        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
   3159  1.1  mrg     tree object_a;
   3160  1.1  mrg     tree object_b;
   3161  1.1  mrg   } full_seq = {}, struct_seq = {};
   3162  1.1  mrg 
   3163  1.1  mrg   /* Before each iteration of the loop:
   3164  1.1  mrg 
   3165  1.1  mrg      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
   3166  1.1  mrg      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
   3167  1.1  mrg   unsigned int index_a = 0;
   3168  1.1  mrg   unsigned int index_b = 0;
   3169  1.1  mrg   tree ref_a = DR_REF (a);
   3170  1.1  mrg   tree ref_b = DR_REF (b);
   3171  1.1  mrg 
   3172  1.1  mrg   /* Now walk the component references from the final DR_REFs back up to
   3173  1.1  mrg      the enclosing base objects.  Each component reference corresponds
   3174  1.1  mrg      to one access function in the DR, with access function 0 being for
   3175  1.1  mrg      the final DR_REF and the highest-indexed access function being the
   3176  1.1  mrg      one that is applied to the base of the DR.
   3177  1.1  mrg 
   3178  1.1  mrg      Look for a sequence of component references whose access functions
   3179  1.1  mrg      are comparable (see access_fn_components_comparable_p).  If more
   3180  1.1  mrg      than one such sequence exists, pick the one nearest the base
   3181  1.1  mrg      (which is the leftmost sequence in C notation).  Store this sequence
   3182  1.1  mrg      in FULL_SEQ.
   3183  1.1  mrg 
   3184  1.1  mrg      For example, if we have:
   3185  1.1  mrg 
   3186  1.1  mrg 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
   3187  1.1  mrg 
   3188  1.1  mrg 	A: a[0][i].s.c.d
   3189  1.1  mrg 	B: __real b[0][i].s.e[i].f
   3190  1.1  mrg 
   3191  1.1  mrg      (where d is the same type as the real component of f) then the access
   3192  1.1  mrg      functions would be:
   3193  1.1  mrg 
   3194  1.1  mrg 			 0   1   2   3
   3195  1.1  mrg 	A:              .d  .c  .s [i]
   3196  1.1  mrg 
   3197  1.1  mrg 		 0   1   2   3   4   5
   3198  1.1  mrg 	B:  __real  .f [i]  .e  .s [i]
   3199  1.1  mrg 
   3200  1.1  mrg      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
   3201  1.1  mrg      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
   3202  1.1  mrg      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
   3203  1.1  mrg      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
   3204  1.1  mrg      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
   3205  1.1  mrg      index foo[10] arrays, so is again comparable.  The sequence is
   3206  1.1  mrg      therefore:
   3207  1.1  mrg 
   3208  1.1  mrg         A: [1, 3]  (i.e. [i].s.c)
   3209  1.1  mrg         B: [3, 5]  (i.e. [i].s.e)
   3210  1.1  mrg 
   3211  1.1  mrg      Also look for sequences of component references whose access
   3212  1.1  mrg      functions are comparable and whose enclosing objects have the same
   3213  1.1  mrg      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
   3214  1.1  mrg      example, STRUCT_SEQ would be:
   3215  1.1  mrg 
   3216  1.1  mrg         A: [1, 2]  (i.e. s.c)
   3217  1.1  mrg         B: [3, 4]  (i.e. s.e)  */
   3218  1.1  mrg   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
   3219  1.1  mrg     {
   3220  1.1  mrg       /* The alternate indices form always has a single dimension
   3221  1.1  mrg 	 with unconstrained base.  */
   3222  1.1  mrg       gcc_assert (!use_alt_indices);
   3223  1.1  mrg 
   3224  1.1  mrg       /* REF_A and REF_B must be one of the component access types
   3225  1.1  mrg 	 allowed by dr_analyze_indices.  */
   3226  1.1  mrg       gcc_checking_assert (access_fn_component_p (ref_a));
   3227  1.1  mrg       gcc_checking_assert (access_fn_component_p (ref_b));
   3228  1.1  mrg 
   3229  1.1  mrg       /* Get the immediately-enclosing objects for REF_A and REF_B,
   3230  1.1  mrg 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
   3231  1.1  mrg 	 and DR_ACCESS_FN (B, INDEX_B).  */
   3232  1.1  mrg       tree object_a = TREE_OPERAND (ref_a, 0);
   3233  1.1  mrg       tree object_b = TREE_OPERAND (ref_b, 0);
   3234  1.1  mrg 
   3235  1.1  mrg       tree type_a = TREE_TYPE (object_a);
   3236  1.1  mrg       tree type_b = TREE_TYPE (object_b);
   3237  1.1  mrg       if (access_fn_components_comparable_p (ref_a, ref_b))
   3238  1.1  mrg 	{
   3239  1.1  mrg 	  /* This pair of component accesses is comparable for dependence
   3240  1.1  mrg 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
   3241  1.1  mrg 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
   3242  1.1  mrg 	  if (full_seq.start_a + full_seq.length != index_a
   3243  1.1  mrg 	      || full_seq.start_b + full_seq.length != index_b)
   3244  1.1  mrg 	    {
   3245  1.1  mrg 	      /* The accesses don't extend the current sequence,
   3246  1.1  mrg 		 so start a new one here.  */
   3247  1.1  mrg 	      full_seq.start_a = index_a;
   3248  1.1  mrg 	      full_seq.start_b = index_b;
   3249  1.1  mrg 	      full_seq.length = 0;
   3250  1.1  mrg 	    }
   3251  1.1  mrg 
   3252  1.1  mrg 	  /* Add this pair of references to the sequence.  */
   3253  1.1  mrg 	  full_seq.length += 1;
   3254  1.1  mrg 	  full_seq.object_a = object_a;
   3255  1.1  mrg 	  full_seq.object_b = object_b;
   3256  1.1  mrg 
   3257  1.1  mrg 	  /* If the enclosing objects are structures (and thus have the
   3258  1.1  mrg 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
   3259  1.1  mrg 	  if (TREE_CODE (type_a) == RECORD_TYPE)
   3260  1.1  mrg 	    struct_seq = full_seq;
   3261  1.1  mrg 
   3262  1.1  mrg 	  /* Move to the next containing reference for both A and B.  */
   3263  1.1  mrg 	  ref_a = object_a;
   3264  1.1  mrg 	  ref_b = object_b;
   3265  1.1  mrg 	  index_a += 1;
   3266  1.1  mrg 	  index_b += 1;
   3267  1.1  mrg 	  continue;
   3268  1.1  mrg 	}
   3269  1.1  mrg 
   3270  1.1  mrg       /* Try to approach equal type sizes.  */
   3271  1.1  mrg       if (!COMPLETE_TYPE_P (type_a)
   3272  1.1  mrg 	  || !COMPLETE_TYPE_P (type_b)
   3273  1.1  mrg 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
   3274  1.1  mrg 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
   3275  1.1  mrg 	break;
   3276  1.1  mrg 
   3277  1.1  mrg       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
   3278  1.1  mrg       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
   3279  1.1  mrg       if (size_a <= size_b)
   3280  1.1  mrg 	{
   3281  1.1  mrg 	  index_a += 1;
   3282  1.1  mrg 	  ref_a = object_a;
   3283  1.1  mrg 	}
   3284  1.1  mrg       if (size_b <= size_a)
   3285  1.1  mrg 	{
   3286  1.1  mrg 	  index_b += 1;
   3287  1.1  mrg 	  ref_b = object_b;
   3288  1.1  mrg 	}
   3289  1.1  mrg     }
   3290  1.1  mrg 
   3291  1.1  mrg   /* See whether FULL_SEQ ends at the base and whether the two bases
   3292  1.1  mrg      are equal.  We do not care about TBAA or alignment info so we can
   3293  1.1  mrg      use OEP_ADDRESS_OF to avoid false negatives.  */
   3294  1.1  mrg   tree base_a = indices_a->base_object;
   3295  1.1  mrg   tree base_b = indices_b->base_object;
   3296  1.1  mrg   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
   3297  1.1  mrg 		      && full_seq.start_b + full_seq.length == num_dimensions_b
   3298  1.1  mrg 		      && (indices_a->unconstrained_base
   3299  1.1  mrg 			  == indices_b->unconstrained_base)
   3300  1.1  mrg 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
   3301  1.1  mrg 		      && (types_compatible_p (TREE_TYPE (base_a),
   3302  1.1  mrg 					      TREE_TYPE (base_b))
   3303  1.1  mrg 			  || (!base_supports_access_fn_components_p (base_a)
   3304  1.1  mrg 			      && !base_supports_access_fn_components_p (base_b)
   3305  1.1  mrg 			      && operand_equal_p
   3306  1.1  mrg 				   (TYPE_SIZE (TREE_TYPE (base_a)),
   3307  1.1  mrg 				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
   3308  1.1  mrg 		      && (!loop_nest.exists ()
   3309  1.1  mrg 			  || (object_address_invariant_in_loop_p
   3310  1.1  mrg 			      (loop_nest[0], base_a))));
   3311  1.1  mrg 
   3312  1.1  mrg   /* If the bases are the same, we can include the base variation too.
   3313  1.1  mrg      E.g. the b accesses in:
   3314  1.1  mrg 
   3315  1.1  mrg        for (int i = 0; i < n; ++i)
   3316  1.1  mrg          b[i + 4][0] = b[i][0];
   3317  1.1  mrg 
   3318  1.1  mrg      have a definite dependence distance of 4, while for:
   3319  1.1  mrg 
   3320  1.1  mrg        for (int i = 0; i < n; ++i)
   3321  1.1  mrg          a[i + 4][0] = b[i][0];
   3322  1.1  mrg 
   3323  1.1  mrg      the dependence distance depends on the gap between a and b.
   3324  1.1  mrg 
   3325  1.1  mrg      If the bases are different then we can only rely on the sequence
   3326  1.1  mrg      rooted at a structure access, since arrays are allowed to overlap
   3327  1.1  mrg      arbitrarily and change shape arbitrarily.  E.g. we treat this as
   3328  1.1  mrg      valid code:
   3329  1.1  mrg 
   3330  1.1  mrg        int a[256];
   3331  1.1  mrg        ...
   3332  1.1  mrg        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
   3333  1.1  mrg 
   3334  1.1  mrg      where two lvalues with the same int[4][3] type overlap, and where
   3335  1.1  mrg      both lvalues are distinct from the object's declared type.  */
   3336  1.1  mrg   if (same_base_p)
   3337  1.1  mrg     {
   3338  1.1  mrg       if (indices_a->unconstrained_base)
   3339  1.1  mrg 	full_seq.length += 1;
   3340  1.1  mrg     }
   3341  1.1  mrg   else
   3342  1.1  mrg     full_seq = struct_seq;
   3343  1.1  mrg 
   3344  1.1  mrg   /* Punt if we didn't find a suitable sequence.  */
   3345  1.1  mrg   if (full_seq.length == 0)
   3346  1.1  mrg     {
   3347  1.1  mrg       if (use_alt_indices
   3348  1.1  mrg 	  || (TREE_CODE (DR_REF (a)) == MEM_REF
   3349  1.1  mrg 	      && TREE_CODE (DR_REF (b)) == MEM_REF)
   3350  1.1  mrg 	  || may_be_nonaddressable_p (DR_REF (a))
   3351  1.1  mrg 	  || may_be_nonaddressable_p (DR_REF (b)))
   3352  1.1  mrg 	{
   3353  1.1  mrg 	  /* Fully exhausted possibilities.  */
   3354  1.1  mrg 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
   3355  1.1  mrg 	  return res;
   3356  1.1  mrg 	}
   3357  1.1  mrg 
   3358  1.1  mrg       /* Try evaluating both DRs as dereferences of pointers.  */
   3359  1.1  mrg       if (!a->alt_indices.base_object
   3360  1.1  mrg 	  && TREE_CODE (DR_REF (a)) != MEM_REF)
   3361  1.1  mrg 	{
   3362  1.1  mrg 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
   3363  1.1  mrg 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
   3364  1.1  mrg 				 build_int_cst
   3365  1.1  mrg 				   (reference_alias_ptr_type (DR_REF (a)), 0));
   3366  1.1  mrg 	  dr_analyze_indices (&a->alt_indices, alt_ref,
   3367  1.1  mrg 			      loop_preheader_edge (loop_nest[0]),
   3368  1.1  mrg 			      loop_containing_stmt (DR_STMT (a)));
   3369  1.1  mrg 	}
   3370  1.1  mrg       if (!b->alt_indices.base_object
   3371  1.1  mrg 	  && TREE_CODE (DR_REF (b)) != MEM_REF)
   3372  1.1  mrg 	{
   3373  1.1  mrg 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
   3374  1.1  mrg 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
   3375  1.1  mrg 				 build_int_cst
   3376  1.1  mrg 				   (reference_alias_ptr_type (DR_REF (b)), 0));
   3377  1.1  mrg 	  dr_analyze_indices (&b->alt_indices, alt_ref,
   3378  1.1  mrg 			      loop_preheader_edge (loop_nest[0]),
   3379  1.1  mrg 			      loop_containing_stmt (DR_STMT (b)));
   3380  1.1  mrg 	}
   3381  1.1  mrg       return initialize_data_dependence_relation (res, loop_nest, true);
   3382  1.1  mrg     }
   3383  1.1  mrg 
   3384  1.1  mrg   if (!same_base_p)
   3385  1.1  mrg     {
   3386  1.1  mrg       /* Partial overlap is possible for different bases when strict aliasing
   3387  1.1  mrg 	 is not in effect.  It's also possible if either base involves a union
   3388  1.1  mrg 	 access; e.g. for:
   3389  1.1  mrg 
   3390  1.1  mrg 	   struct s1 { int a[2]; };
   3391  1.1  mrg 	   struct s2 { struct s1 b; int c; };
   3392  1.1  mrg 	   struct s3 { int d; struct s1 e; };
   3393  1.1  mrg 	   union u { struct s2 f; struct s3 g; } *p, *q;
   3394  1.1  mrg 
   3395  1.1  mrg 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
   3396  1.1  mrg 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
   3397  1.1  mrg 	 "q->g.e" (base "q->g").  */
   3398  1.1  mrg       if (!flag_strict_aliasing
   3399  1.1  mrg 	  || ref_contains_union_access_p (full_seq.object_a)
   3400  1.1  mrg 	  || ref_contains_union_access_p (full_seq.object_b))
   3401  1.1  mrg 	{
   3402  1.1  mrg 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
   3403  1.1  mrg 	  return res;
   3404  1.1  mrg 	}
   3405  1.1  mrg 
   3406  1.1  mrg       DDR_COULD_BE_INDEPENDENT_P (res) = true;
   3407  1.1  mrg       if (!loop_nest.exists ()
   3408  1.1  mrg 	  || (object_address_invariant_in_loop_p (loop_nest[0],
   3409  1.1  mrg 						  full_seq.object_a)
   3410  1.1  mrg 	      && object_address_invariant_in_loop_p (loop_nest[0],
   3411  1.1  mrg 						     full_seq.object_b)))
   3412  1.1  mrg 	{
   3413  1.1  mrg 	  DDR_OBJECT_A (res) = full_seq.object_a;
   3414  1.1  mrg 	  DDR_OBJECT_B (res) = full_seq.object_b;
   3415  1.1  mrg 	}
   3416  1.1  mrg     }
   3417  1.1  mrg 
   3418  1.1  mrg   DDR_AFFINE_P (res) = true;
   3419  1.1  mrg   DDR_ARE_DEPENDENT (res) = NULL_TREE;
   3420  1.1  mrg   DDR_SUBSCRIPTS (res).create (full_seq.length);
   3421  1.1  mrg   DDR_LOOP_NEST (res) = loop_nest;
   3422  1.1  mrg   DDR_SELF_REFERENCE (res) = false;
   3423  1.1  mrg 
   3424  1.1  mrg   for (i = 0; i < full_seq.length; ++i)
   3425  1.1  mrg     {
   3426  1.1  mrg       struct subscript *subscript;
   3427  1.1  mrg 
   3428  1.1  mrg       subscript = XNEW (struct subscript);
   3429  1.1  mrg       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
   3430  1.1  mrg       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
   3431  1.1  mrg       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
   3432  1.1  mrg       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
   3433  1.1  mrg       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
   3434  1.1  mrg       SUB_DISTANCE (subscript) = chrec_dont_know;
   3435  1.1  mrg       DDR_SUBSCRIPTS (res).safe_push (subscript);
   3436  1.1  mrg     }
   3437  1.1  mrg 
   3438  1.1  mrg   return res;
   3439  1.1  mrg }
   3440  1.1  mrg 
   3441  1.1  mrg /* Initialize a data dependence relation between data accesses A and
   3442  1.1  mrg    B.  NB_LOOPS is the number of loops surrounding the references: the
   3443  1.1  mrg    size of the classic distance/direction vectors.  */
   3444  1.1  mrg 
   3445  1.1  mrg struct data_dependence_relation *
   3446  1.1  mrg initialize_data_dependence_relation (struct data_reference *a,
   3447  1.1  mrg 				     struct data_reference *b,
   3448  1.1  mrg 				     vec<loop_p> loop_nest)
   3449  1.1  mrg {
   3450  1.1  mrg   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
   3451  1.1  mrg   DDR_A (res) = a;
   3452  1.1  mrg   DDR_B (res) = b;
   3453  1.1  mrg   DDR_LOOP_NEST (res).create (0);
   3454  1.1  mrg   DDR_SUBSCRIPTS (res).create (0);
   3455  1.1  mrg   DDR_DIR_VECTS (res).create (0);
   3456  1.1  mrg   DDR_DIST_VECTS (res).create (0);
   3457  1.1  mrg 
   3458  1.1  mrg   if (a == NULL || b == NULL)
   3459  1.1  mrg     {
   3460  1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
   3461  1.1  mrg       return res;
   3462  1.1  mrg     }
   3463  1.1  mrg 
   3464  1.1  mrg   /* If the data references do not alias, then they are independent.  */
   3465  1.1  mrg   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
   3466  1.1  mrg     {
   3467  1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_known;
   3468  1.1  mrg       return res;
   3469  1.1  mrg     }
   3470  1.1  mrg 
   3471  1.1  mrg   return initialize_data_dependence_relation (res, loop_nest, false);
   3472  1.1  mrg }
   3473  1.1  mrg 
   3474  1.1  mrg 
   3475  1.1  mrg /* Frees memory used by the conflict function F.  */
   3476  1.1  mrg 
   3477  1.1  mrg static void
   3478  1.1  mrg free_conflict_function (conflict_function *f)
   3479  1.1  mrg {
   3480  1.1  mrg   unsigned i;
   3481  1.1  mrg 
   3482  1.1  mrg   if (CF_NONTRIVIAL_P (f))
   3483  1.1  mrg     {
   3484  1.1  mrg       for (i = 0; i < f->n; i++)
   3485  1.1  mrg 	affine_fn_free (f->fns[i]);
   3486  1.1  mrg     }
   3487  1.1  mrg   free (f);
   3488  1.1  mrg }
   3489  1.1  mrg 
   3490  1.1  mrg /* Frees memory used by SUBSCRIPTS.  */
   3491  1.1  mrg 
   3492  1.1  mrg static void
   3493  1.1  mrg free_subscripts (vec<subscript_p> subscripts)
   3494  1.1  mrg {
   3495  1.1  mrg   for (subscript_p s : subscripts)
   3496  1.1  mrg     {
   3497  1.1  mrg       free_conflict_function (s->conflicting_iterations_in_a);
   3498  1.1  mrg       free_conflict_function (s->conflicting_iterations_in_b);
   3499  1.1  mrg       free (s);
   3500  1.1  mrg     }
   3501  1.1  mrg   subscripts.release ();
   3502  1.1  mrg }
   3503  1.1  mrg 
   3504  1.1  mrg /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
   3505  1.1  mrg    description.  */
   3506  1.1  mrg 
   3507  1.1  mrg static inline void
   3508  1.1  mrg finalize_ddr_dependent (struct data_dependence_relation *ddr,
   3509  1.1  mrg 			tree chrec)
   3510  1.1  mrg {
   3511  1.1  mrg   DDR_ARE_DEPENDENT (ddr) = chrec;
   3512  1.1  mrg   free_subscripts (DDR_SUBSCRIPTS (ddr));
   3513  1.1  mrg   DDR_SUBSCRIPTS (ddr).create (0);
   3514  1.1  mrg }
   3515  1.1  mrg 
   3516  1.1  mrg /* The dependence relation DDR cannot be represented by a distance
   3517  1.1  mrg    vector.  */
   3518  1.1  mrg 
   3519  1.1  mrg static inline void
   3520  1.1  mrg non_affine_dependence_relation (struct data_dependence_relation *ddr)
   3521  1.1  mrg {
   3522  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   3523  1.1  mrg     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
   3524  1.1  mrg 
   3525  1.1  mrg   DDR_AFFINE_P (ddr) = false;
   3526  1.1  mrg }
   3527  1.1  mrg 
   3528  1.1  mrg 
   3529  1.1  mrg 
   3531  1.1  mrg /* This section contains the classic Banerjee tests.  */
   3532  1.1  mrg 
   3533  1.1  mrg /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
   3534  1.1  mrg    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
   3535  1.1  mrg 
   3536  1.1  mrg static inline bool
   3537  1.1  mrg ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
   3538  1.1  mrg {
   3539  1.1  mrg   return (evolution_function_is_constant_p (chrec_a)
   3540  1.1  mrg 	  && evolution_function_is_constant_p (chrec_b));
   3541  1.1  mrg }
   3542  1.1  mrg 
   3543  1.1  mrg /* Returns true iff CHREC_A and CHREC_B are dependent on an index
   3544  1.1  mrg    variable, i.e., if the SIV (Single Index Variable) test is true.  */
   3545  1.1  mrg 
   3546  1.1  mrg static bool
   3547  1.1  mrg siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
   3548  1.1  mrg {
   3549  1.1  mrg   if ((evolution_function_is_constant_p (chrec_a)
   3550  1.1  mrg        && evolution_function_is_univariate_p (chrec_b))
   3551  1.1  mrg       || (evolution_function_is_constant_p (chrec_b)
   3552  1.1  mrg 	  && evolution_function_is_univariate_p (chrec_a)))
   3553  1.1  mrg     return true;
   3554  1.1  mrg 
   3555  1.1  mrg   if (evolution_function_is_univariate_p (chrec_a)
   3556  1.1  mrg       && evolution_function_is_univariate_p (chrec_b))
   3557  1.1  mrg     {
   3558  1.1  mrg       switch (TREE_CODE (chrec_a))
   3559  1.1  mrg 	{
   3560  1.1  mrg 	case POLYNOMIAL_CHREC:
   3561  1.1  mrg 	  switch (TREE_CODE (chrec_b))
   3562  1.1  mrg 	    {
   3563  1.1  mrg 	    case POLYNOMIAL_CHREC:
   3564  1.1  mrg 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
   3565  1.1  mrg 		return false;
   3566  1.1  mrg 	      /* FALLTHRU */
   3567  1.1  mrg 
   3568  1.1  mrg 	    default:
   3569  1.1  mrg 	      return true;
   3570  1.1  mrg 	    }
   3571  1.1  mrg 
   3572  1.1  mrg 	default:
   3573  1.1  mrg 	  return true;
   3574  1.1  mrg 	}
   3575  1.1  mrg     }
   3576  1.1  mrg 
   3577  1.1  mrg   return false;
   3578  1.1  mrg }
   3579  1.1  mrg 
   3580  1.1  mrg /* Creates a conflict function with N dimensions.  The affine functions
   3581  1.1  mrg    in each dimension follow.  */
   3582  1.1  mrg 
   3583  1.1  mrg static conflict_function *
   3584  1.1  mrg conflict_fn (unsigned n, ...)
   3585  1.1  mrg {
   3586  1.1  mrg   unsigned i;
   3587  1.1  mrg   conflict_function *ret = XCNEW (conflict_function);
   3588  1.1  mrg   va_list ap;
   3589  1.1  mrg 
   3590  1.1  mrg   gcc_assert (n > 0 && n <= MAX_DIM);
   3591  1.1  mrg   va_start (ap, n);
   3592  1.1  mrg 
   3593  1.1  mrg   ret->n = n;
   3594  1.1  mrg   for (i = 0; i < n; i++)
   3595  1.1  mrg     ret->fns[i] = va_arg (ap, affine_fn);
   3596  1.1  mrg   va_end (ap);
   3597  1.1  mrg 
   3598  1.1  mrg   return ret;
   3599  1.1  mrg }
   3600  1.1  mrg 
   3601  1.1  mrg /* Returns constant affine function with value CST.  */
   3602  1.1  mrg 
   3603  1.1  mrg static affine_fn
   3604  1.1  mrg affine_fn_cst (tree cst)
   3605  1.1  mrg {
   3606  1.1  mrg   affine_fn fn;
   3607  1.1  mrg   fn.create (1);
   3608  1.1  mrg   fn.quick_push (cst);
   3609  1.1  mrg   return fn;
   3610  1.1  mrg }
   3611  1.1  mrg 
   3612  1.1  mrg /* Returns affine function with single variable, CST + COEF * x_DIM.  */
   3613  1.1  mrg 
   3614  1.1  mrg static affine_fn
   3615  1.1  mrg affine_fn_univar (tree cst, unsigned dim, tree coef)
   3616  1.1  mrg {
   3617  1.1  mrg   affine_fn fn;
   3618  1.1  mrg   fn.create (dim + 1);
   3619  1.1  mrg   unsigned i;
   3620  1.1  mrg 
   3621  1.1  mrg   gcc_assert (dim > 0);
   3622  1.1  mrg   fn.quick_push (cst);
   3623  1.1  mrg   for (i = 1; i < dim; i++)
   3624  1.1  mrg     fn.quick_push (integer_zero_node);
   3625  1.1  mrg   fn.quick_push (coef);
   3626  1.1  mrg   return fn;
   3627  1.1  mrg }
   3628  1.1  mrg 
   3629  1.1  mrg /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
   3630  1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
   3631  1.1  mrg    relation between the elements accessed twice by CHREC_A and
   3632  1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
   3633  1.1  mrg 
   3634  1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
   3635  1.1  mrg 
   3636  1.1  mrg static void
   3637  1.1  mrg analyze_ziv_subscript (tree chrec_a,
   3638  1.1  mrg 		       tree chrec_b,
   3639  1.1  mrg 		       conflict_function **overlaps_a,
   3640  1.1  mrg 		       conflict_function **overlaps_b,
   3641  1.1  mrg 		       tree *last_conflicts)
   3642  1.1  mrg {
   3643  1.1  mrg   tree type, difference;
   3644  1.1  mrg   dependence_stats.num_ziv++;
   3645  1.1  mrg 
   3646  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   3647  1.1  mrg     fprintf (dump_file, "(analyze_ziv_subscript \n");
   3648  1.1  mrg 
   3649  1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
   3650  1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
   3651  1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
   3652  1.1  mrg   difference = chrec_fold_minus (type, chrec_a, chrec_b);
   3653  1.1  mrg 
   3654  1.1  mrg   switch (TREE_CODE (difference))
   3655  1.1  mrg     {
   3656  1.1  mrg     case INTEGER_CST:
   3657  1.1  mrg       if (integer_zerop (difference))
   3658  1.1  mrg 	{
   3659  1.1  mrg 	  /* The difference is equal to zero: the accessed index
   3660  1.1  mrg 	     overlaps for each iteration in the loop.  */
   3661  1.1  mrg 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3662  1.1  mrg 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3663  1.1  mrg 	  *last_conflicts = chrec_dont_know;
   3664  1.1  mrg 	  dependence_stats.num_ziv_dependent++;
   3665  1.1  mrg 	}
   3666  1.1  mrg       else
   3667  1.1  mrg 	{
   3668  1.1  mrg 	  /* The accesses do not overlap.  */
   3669  1.1  mrg 	  *overlaps_a = conflict_fn_no_dependence ();
   3670  1.1  mrg 	  *overlaps_b = conflict_fn_no_dependence ();
   3671  1.1  mrg 	  *last_conflicts = integer_zero_node;
   3672  1.1  mrg 	  dependence_stats.num_ziv_independent++;
   3673  1.1  mrg 	}
   3674  1.1  mrg       break;
   3675  1.1  mrg 
   3676  1.1  mrg     default:
   3677  1.1  mrg       /* We're not sure whether the indexes overlap.  For the moment,
   3678  1.1  mrg 	 conservatively answer "don't know".  */
   3679  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   3680  1.1  mrg 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
   3681  1.1  mrg 
   3682  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   3683  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   3684  1.1  mrg       *last_conflicts = chrec_dont_know;
   3685  1.1  mrg       dependence_stats.num_ziv_unimplemented++;
   3686  1.1  mrg       break;
   3687  1.1  mrg     }
   3688  1.1  mrg 
   3689  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   3690  1.1  mrg     fprintf (dump_file, ")\n");
   3691  1.1  mrg }
   3692  1.1  mrg 
   3693  1.1  mrg /* Similar to max_stmt_executions_int, but returns the bound as a tree,
   3694  1.1  mrg    and only if it fits to the int type.  If this is not the case, or the
   3695  1.1  mrg    bound  on the number of iterations of LOOP could not be derived, returns
   3696  1.1  mrg    chrec_dont_know.  */
   3697  1.1  mrg 
   3698  1.1  mrg static tree
   3699  1.1  mrg max_stmt_executions_tree (class loop *loop)
   3700  1.1  mrg {
   3701  1.1  mrg   widest_int nit;
   3702  1.1  mrg 
   3703  1.1  mrg   if (!max_stmt_executions (loop, &nit))
   3704  1.1  mrg     return chrec_dont_know;
   3705  1.1  mrg 
   3706  1.1  mrg   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
   3707  1.1  mrg     return chrec_dont_know;
   3708  1.1  mrg 
   3709  1.1  mrg   return wide_int_to_tree (unsigned_type_node, nit);
   3710  1.1  mrg }
   3711  1.1  mrg 
   3712  1.1  mrg /* Determine whether the CHREC is always positive/negative.  If the expression
   3713  1.1  mrg    cannot be statically analyzed, return false, otherwise set the answer into
   3714  1.1  mrg    VALUE.  */
   3715  1.1  mrg 
   3716  1.1  mrg static bool
   3717  1.1  mrg chrec_is_positive (tree chrec, bool *value)
   3718  1.1  mrg {
   3719  1.1  mrg   bool value0, value1, value2;
   3720  1.1  mrg   tree end_value, nb_iter;
   3721  1.1  mrg 
   3722  1.1  mrg   switch (TREE_CODE (chrec))
   3723  1.1  mrg     {
   3724  1.1  mrg     case POLYNOMIAL_CHREC:
   3725  1.1  mrg       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
   3726  1.1  mrg 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
   3727  1.1  mrg 	return false;
   3728  1.1  mrg 
   3729  1.1  mrg       /* FIXME -- overflows.  */
   3730  1.1  mrg       if (value0 == value1)
   3731  1.1  mrg 	{
   3732  1.1  mrg 	  *value = value0;
   3733  1.1  mrg 	  return true;
   3734  1.1  mrg 	}
   3735  1.1  mrg 
   3736  1.1  mrg       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
   3737  1.1  mrg 	 and the proof consists in showing that the sign never
   3738  1.1  mrg 	 changes during the execution of the loop, from 0 to
   3739  1.1  mrg 	 loop->nb_iterations.  */
   3740  1.1  mrg       if (!evolution_function_is_affine_p (chrec))
   3741  1.1  mrg 	return false;
   3742  1.1  mrg 
   3743  1.1  mrg       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
   3744  1.1  mrg       if (chrec_contains_undetermined (nb_iter))
   3745  1.1  mrg 	return false;
   3746  1.1  mrg 
   3747  1.1  mrg #if 0
   3748  1.1  mrg       /* TODO -- If the test is after the exit, we may decrease the number of
   3749  1.1  mrg 	 iterations by one.  */
   3750  1.1  mrg       if (after_exit)
   3751  1.1  mrg 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
   3752  1.1  mrg #endif
   3753  1.1  mrg 
   3754  1.1  mrg       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
   3755  1.1  mrg 
   3756  1.1  mrg       if (!chrec_is_positive (end_value, &value2))
   3757  1.1  mrg 	return false;
   3758  1.1  mrg 
   3759  1.1  mrg       *value = value0;
   3760  1.1  mrg       return value0 == value1;
   3761  1.1  mrg 
   3762  1.1  mrg     case INTEGER_CST:
   3763  1.1  mrg       switch (tree_int_cst_sgn (chrec))
   3764  1.1  mrg 	{
   3765  1.1  mrg 	case -1:
   3766  1.1  mrg 	  *value = false;
   3767  1.1  mrg 	  break;
   3768  1.1  mrg 	case 1:
   3769  1.1  mrg 	  *value = true;
   3770  1.1  mrg 	  break;
   3771  1.1  mrg 	default:
   3772  1.1  mrg 	  return false;
   3773  1.1  mrg 	}
   3774  1.1  mrg       return true;
   3775  1.1  mrg 
   3776  1.1  mrg     default:
   3777  1.1  mrg       return false;
   3778  1.1  mrg     }
   3779  1.1  mrg }
   3780  1.1  mrg 
   3781  1.1  mrg 
   3782  1.1  mrg /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
   3783  1.1  mrg    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
   3784  1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
   3785  1.1  mrg    relation between the elements accessed twice by CHREC_A and
   3786  1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
   3787  1.1  mrg 
   3788  1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
   3789  1.1  mrg 
   3790  1.1  mrg static void
   3791  1.1  mrg analyze_siv_subscript_cst_affine (tree chrec_a,
   3792  1.1  mrg 				  tree chrec_b,
   3793  1.1  mrg 				  conflict_function **overlaps_a,
   3794  1.1  mrg 				  conflict_function **overlaps_b,
   3795  1.1  mrg 				  tree *last_conflicts)
   3796  1.1  mrg {
   3797  1.1  mrg   bool value0, value1, value2;
   3798  1.1  mrg   tree type, difference, tmp;
   3799  1.1  mrg 
   3800  1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
   3801  1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
   3802  1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
   3803  1.1  mrg   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
   3804  1.1  mrg 
   3805  1.1  mrg   /* Special case overlap in the first iteration.  */
   3806  1.1  mrg   if (integer_zerop (difference))
   3807  1.1  mrg     {
   3808  1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3809  1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3810  1.1  mrg       *last_conflicts = integer_one_node;
   3811  1.1  mrg       return;
   3812  1.1  mrg     }
   3813  1.1  mrg 
   3814  1.1  mrg   if (!chrec_is_positive (initial_condition (difference), &value0))
   3815  1.1  mrg     {
   3816  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   3817  1.1  mrg 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
   3818  1.1  mrg 
   3819  1.1  mrg       dependence_stats.num_siv_unimplemented++;
   3820  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   3821  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   3822  1.1  mrg       *last_conflicts = chrec_dont_know;
   3823  1.1  mrg       return;
   3824  1.1  mrg     }
   3825  1.1  mrg   else
   3826  1.1  mrg     {
   3827  1.1  mrg       if (value0 == false)
   3828  1.1  mrg 	{
   3829  1.1  mrg 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
   3830  1.1  mrg 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
   3831  1.1  mrg 	    {
   3832  1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
   3833  1.1  mrg 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
   3834  1.1  mrg 
   3835  1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
   3836  1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
   3837  1.1  mrg 	      *last_conflicts = chrec_dont_know;
   3838  1.1  mrg 	      dependence_stats.num_siv_unimplemented++;
   3839  1.1  mrg 	      return;
   3840  1.1  mrg 	    }
   3841  1.1  mrg 	  else
   3842  1.1  mrg 	    {
   3843  1.1  mrg 	      if (value1 == true)
   3844  1.1  mrg 		{
   3845  1.1  mrg 		  /* Example:
   3846  1.1  mrg 		     chrec_a = 12
   3847  1.1  mrg 		     chrec_b = {10, +, 1}
   3848  1.1  mrg 		  */
   3849  1.1  mrg 
   3850  1.1  mrg 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
   3851  1.1  mrg 		    {
   3852  1.1  mrg 		      HOST_WIDE_INT numiter;
   3853  1.1  mrg 		      class loop *loop = get_chrec_loop (chrec_b);
   3854  1.1  mrg 
   3855  1.1  mrg 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3856  1.1  mrg 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
   3857  1.1  mrg 					 fold_build1 (ABS_EXPR, type, difference),
   3858  1.1  mrg 					 CHREC_RIGHT (chrec_b));
   3859  1.1  mrg 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
   3860  1.1  mrg 		      *last_conflicts = integer_one_node;
   3861  1.1  mrg 
   3862  1.1  mrg 
   3863  1.1  mrg 		      /* Perform weak-zero siv test to see if overlap is
   3864  1.1  mrg 			 outside the loop bounds.  */
   3865  1.1  mrg 		      numiter = max_stmt_executions_int (loop);
   3866  1.1  mrg 
   3867  1.1  mrg 		      if (numiter >= 0
   3868  1.1  mrg 			  && compare_tree_int (tmp, numiter) > 0)
   3869  1.1  mrg 			{
   3870  1.1  mrg 			  free_conflict_function (*overlaps_a);
   3871  1.1  mrg 			  free_conflict_function (*overlaps_b);
   3872  1.1  mrg 			  *overlaps_a = conflict_fn_no_dependence ();
   3873  1.1  mrg 			  *overlaps_b = conflict_fn_no_dependence ();
   3874  1.1  mrg 			  *last_conflicts = integer_zero_node;
   3875  1.1  mrg 			  dependence_stats.num_siv_independent++;
   3876  1.1  mrg 			  return;
   3877  1.1  mrg 			}
   3878  1.1  mrg 		      dependence_stats.num_siv_dependent++;
   3879  1.1  mrg 		      return;
   3880  1.1  mrg 		    }
   3881  1.1  mrg 
   3882  1.1  mrg 		  /* When the step does not divide the difference, there are
   3883  1.1  mrg 		     no overlaps.  */
   3884  1.1  mrg 		  else
   3885  1.1  mrg 		    {
   3886  1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
   3887  1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
   3888  1.1  mrg 		      *last_conflicts = integer_zero_node;
   3889  1.1  mrg 		      dependence_stats.num_siv_independent++;
   3890  1.1  mrg 		      return;
   3891  1.1  mrg 		    }
   3892  1.1  mrg 		}
   3893  1.1  mrg 
   3894  1.1  mrg 	      else
   3895  1.1  mrg 		{
   3896  1.1  mrg 		  /* Example:
   3897  1.1  mrg 		     chrec_a = 12
   3898  1.1  mrg 		     chrec_b = {10, +, -1}
   3899  1.1  mrg 
   3900  1.1  mrg 		     In this case, chrec_a will not overlap with chrec_b.  */
   3901  1.1  mrg 		  *overlaps_a = conflict_fn_no_dependence ();
   3902  1.1  mrg 		  *overlaps_b = conflict_fn_no_dependence ();
   3903  1.1  mrg 		  *last_conflicts = integer_zero_node;
   3904  1.1  mrg 		  dependence_stats.num_siv_independent++;
   3905  1.1  mrg 		  return;
   3906  1.1  mrg 		}
   3907  1.1  mrg 	    }
   3908  1.1  mrg 	}
   3909  1.1  mrg       else
   3910  1.1  mrg 	{
   3911  1.1  mrg 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
   3912  1.1  mrg 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
   3913  1.1  mrg 	    {
   3914  1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
   3915  1.1  mrg 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
   3916  1.1  mrg 
   3917  1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
   3918  1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
   3919  1.1  mrg 	      *last_conflicts = chrec_dont_know;
   3920  1.1  mrg 	      dependence_stats.num_siv_unimplemented++;
   3921  1.1  mrg 	      return;
   3922  1.1  mrg 	    }
   3923  1.1  mrg 	  else
   3924  1.1  mrg 	    {
   3925  1.1  mrg 	      if (value2 == false)
   3926  1.1  mrg 		{
   3927  1.1  mrg 		  /* Example:
   3928  1.1  mrg 		     chrec_a = 3
   3929  1.1  mrg 		     chrec_b = {10, +, -1}
   3930  1.1  mrg 		  */
   3931  1.1  mrg 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
   3932  1.1  mrg 		    {
   3933  1.1  mrg 		      HOST_WIDE_INT numiter;
   3934  1.1  mrg 		      class loop *loop = get_chrec_loop (chrec_b);
   3935  1.1  mrg 
   3936  1.1  mrg 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   3937  1.1  mrg 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
   3938  1.1  mrg 					 CHREC_RIGHT (chrec_b));
   3939  1.1  mrg 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
   3940  1.1  mrg 		      *last_conflicts = integer_one_node;
   3941  1.1  mrg 
   3942  1.1  mrg 		      /* Perform weak-zero siv test to see if overlap is
   3943  1.1  mrg 			 outside the loop bounds.  */
   3944  1.1  mrg 		      numiter = max_stmt_executions_int (loop);
   3945  1.1  mrg 
   3946  1.1  mrg 		      if (numiter >= 0
   3947  1.1  mrg 			  && compare_tree_int (tmp, numiter) > 0)
   3948  1.1  mrg 			{
   3949  1.1  mrg 			  free_conflict_function (*overlaps_a);
   3950  1.1  mrg 			  free_conflict_function (*overlaps_b);
   3951  1.1  mrg 			  *overlaps_a = conflict_fn_no_dependence ();
   3952  1.1  mrg 			  *overlaps_b = conflict_fn_no_dependence ();
   3953  1.1  mrg 			  *last_conflicts = integer_zero_node;
   3954  1.1  mrg 			  dependence_stats.num_siv_independent++;
   3955  1.1  mrg 			  return;
   3956  1.1  mrg 			}
   3957  1.1  mrg 		      dependence_stats.num_siv_dependent++;
   3958  1.1  mrg 		      return;
   3959  1.1  mrg 		    }
   3960  1.1  mrg 
   3961  1.1  mrg 		  /* When the step does not divide the difference, there
   3962  1.1  mrg 		     are no overlaps.  */
   3963  1.1  mrg 		  else
   3964  1.1  mrg 		    {
   3965  1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
   3966  1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
   3967  1.1  mrg 		      *last_conflicts = integer_zero_node;
   3968  1.1  mrg 		      dependence_stats.num_siv_independent++;
   3969  1.1  mrg 		      return;
   3970  1.1  mrg 		    }
   3971  1.1  mrg 		}
   3972  1.1  mrg 	      else
   3973  1.1  mrg 		{
   3974  1.1  mrg 		  /* Example:
   3975  1.1  mrg 		     chrec_a = 3
   3976  1.1  mrg 		     chrec_b = {4, +, 1}
   3977  1.1  mrg 
   3978  1.1  mrg 		     In this case, chrec_a will not overlap with chrec_b.  */
   3979  1.1  mrg 		  *overlaps_a = conflict_fn_no_dependence ();
   3980  1.1  mrg 		  *overlaps_b = conflict_fn_no_dependence ();
   3981  1.1  mrg 		  *last_conflicts = integer_zero_node;
   3982  1.1  mrg 		  dependence_stats.num_siv_independent++;
   3983  1.1  mrg 		  return;
   3984  1.1  mrg 		}
   3985  1.1  mrg 	    }
   3986  1.1  mrg 	}
   3987  1.1  mrg     }
   3988  1.1  mrg }
   3989  1.1  mrg 
   3990  1.1  mrg /* Helper recursive function for initializing the matrix A.  Returns
   3991  1.1  mrg    the initial value of CHREC.  */
   3992  1.1  mrg 
   3993  1.1  mrg static tree
   3994  1.1  mrg initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
   3995  1.1  mrg {
   3996  1.1  mrg   gcc_assert (chrec);
   3997  1.1  mrg 
   3998  1.1  mrg   switch (TREE_CODE (chrec))
   3999  1.1  mrg     {
   4000  1.1  mrg     case POLYNOMIAL_CHREC:
   4001  1.1  mrg       HOST_WIDE_INT chrec_right;
   4002  1.1  mrg       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
   4003  1.1  mrg 	return chrec_dont_know;
   4004  1.1  mrg       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
   4005  1.1  mrg       /* We want to be able to negate without overflow.  */
   4006  1.1  mrg       if (chrec_right == HOST_WIDE_INT_MIN)
   4007  1.1  mrg 	return chrec_dont_know;
   4008  1.1  mrg       A[index][0] = mult * chrec_right;
   4009  1.1  mrg       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
   4010  1.1  mrg 
   4011  1.1  mrg     case PLUS_EXPR:
   4012  1.1  mrg     case MULT_EXPR:
   4013  1.1  mrg     case MINUS_EXPR:
   4014  1.1  mrg       {
   4015  1.1  mrg 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
   4016  1.1  mrg 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
   4017  1.1  mrg 
   4018  1.1  mrg 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
   4019  1.1  mrg       }
   4020  1.1  mrg 
   4021  1.1  mrg     CASE_CONVERT:
   4022  1.1  mrg       {
   4023  1.1  mrg 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
   4024  1.1  mrg 	return chrec_convert (chrec_type (chrec), op, NULL);
   4025  1.1  mrg       }
   4026  1.1  mrg 
   4027  1.1  mrg     case BIT_NOT_EXPR:
   4028  1.1  mrg       {
   4029  1.1  mrg 	/* Handle ~X as -1 - X.  */
   4030  1.1  mrg 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
   4031  1.1  mrg 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
   4032  1.1  mrg 			      build_int_cst (TREE_TYPE (chrec), -1), op);
   4033  1.1  mrg       }
   4034  1.1  mrg 
   4035  1.1  mrg     case INTEGER_CST:
   4036  1.1  mrg       return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
   4037  1.1  mrg 
   4038  1.1  mrg     default:
   4039  1.1  mrg       gcc_unreachable ();
   4040  1.1  mrg       return NULL_TREE;
   4041  1.1  mrg     }
   4042  1.1  mrg }
   4043  1.1  mrg 
   4044  1.1  mrg #define FLOOR_DIV(x,y) ((x) / (y))
   4045  1.1  mrg 
   4046  1.1  mrg /* Solves the special case of the Diophantine equation:
   4047  1.1  mrg    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
   4048  1.1  mrg 
   4049  1.1  mrg    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
   4050  1.1  mrg    number of iterations that loops X and Y run.  The overlaps will be
   4051  1.1  mrg    constructed as evolutions in dimension DIM.  */
   4052  1.1  mrg 
   4053  1.1  mrg static void
   4054  1.1  mrg compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
   4055  1.1  mrg 					 HOST_WIDE_INT step_a,
   4056  1.1  mrg 					 HOST_WIDE_INT step_b,
   4057  1.1  mrg 					 affine_fn *overlaps_a,
   4058  1.1  mrg 					 affine_fn *overlaps_b,
   4059  1.1  mrg 					 tree *last_conflicts, int dim)
   4060  1.1  mrg {
   4061  1.1  mrg   if (((step_a > 0 && step_b > 0)
   4062  1.1  mrg        || (step_a < 0 && step_b < 0)))
   4063  1.1  mrg     {
   4064  1.1  mrg       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
   4065  1.1  mrg       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
   4066  1.1  mrg 
   4067  1.1  mrg       gcd_steps_a_b = gcd (step_a, step_b);
   4068  1.1  mrg       step_overlaps_a = step_b / gcd_steps_a_b;
   4069  1.1  mrg       step_overlaps_b = step_a / gcd_steps_a_b;
   4070  1.1  mrg 
   4071  1.1  mrg       if (niter > 0)
   4072  1.1  mrg 	{
   4073  1.1  mrg 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
   4074  1.1  mrg 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
   4075  1.1  mrg 	  last_conflict = tau2;
   4076  1.1  mrg 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
   4077  1.1  mrg 	}
   4078  1.1  mrg       else
   4079  1.1  mrg 	*last_conflicts = chrec_dont_know;
   4080  1.1  mrg 
   4081  1.1  mrg       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
   4082  1.1  mrg 				      build_int_cst (NULL_TREE,
   4083  1.1  mrg 						     step_overlaps_a));
   4084  1.1  mrg       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
   4085  1.1  mrg 				      build_int_cst (NULL_TREE,
   4086  1.1  mrg 						     step_overlaps_b));
   4087  1.1  mrg     }
   4088  1.1  mrg 
   4089  1.1  mrg   else
   4090  1.1  mrg     {
   4091  1.1  mrg       *overlaps_a = affine_fn_cst (integer_zero_node);
   4092  1.1  mrg       *overlaps_b = affine_fn_cst (integer_zero_node);
   4093  1.1  mrg       *last_conflicts = integer_zero_node;
   4094  1.1  mrg     }
   4095  1.1  mrg }
   4096  1.1  mrg 
   4097  1.1  mrg /* Solves the special case of a Diophantine equation where CHREC_A is
   4098  1.1  mrg    an affine bivariate function, and CHREC_B is an affine univariate
   4099  1.1  mrg    function.  For example,
   4100  1.1  mrg 
   4101  1.1  mrg    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
   4102  1.1  mrg 
   4103  1.1  mrg    has the following overlapping functions:
   4104  1.1  mrg 
   4105  1.1  mrg    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
   4106  1.1  mrg    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
   4107  1.1  mrg    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
   4108  1.1  mrg 
   4109  1.1  mrg    FORNOW: This is a specialized implementation for a case occurring in
   4110  1.1  mrg    a common benchmark.  Implement the general algorithm.  */
   4111  1.1  mrg 
   4112  1.1  mrg static void
   4113  1.1  mrg compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
   4114  1.1  mrg 				      conflict_function **overlaps_a,
   4115  1.1  mrg 				      conflict_function **overlaps_b,
   4116  1.1  mrg 				      tree *last_conflicts)
   4117  1.1  mrg {
   4118  1.1  mrg   bool xz_p, yz_p, xyz_p;
   4119  1.1  mrg   HOST_WIDE_INT step_x, step_y, step_z;
   4120  1.1  mrg   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
   4121  1.1  mrg   affine_fn overlaps_a_xz, overlaps_b_xz;
   4122  1.1  mrg   affine_fn overlaps_a_yz, overlaps_b_yz;
   4123  1.1  mrg   affine_fn overlaps_a_xyz, overlaps_b_xyz;
   4124  1.1  mrg   affine_fn ova1, ova2, ovb;
   4125  1.1  mrg   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
   4126  1.1  mrg 
   4127  1.1  mrg   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
   4128  1.1  mrg   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
   4129  1.1  mrg   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
   4130  1.1  mrg 
   4131  1.1  mrg   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
   4132  1.1  mrg   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
   4133  1.1  mrg   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
   4134  1.1  mrg 
   4135  1.1  mrg   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
   4136  1.1  mrg     {
   4137  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4138  1.1  mrg 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
   4139  1.1  mrg 
   4140  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4141  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4142  1.1  mrg       *last_conflicts = chrec_dont_know;
   4143  1.1  mrg       return;
   4144  1.1  mrg     }
   4145  1.1  mrg 
   4146  1.1  mrg   niter = MIN (niter_x, niter_z);
   4147  1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
   4148  1.1  mrg 					   &overlaps_a_xz,
   4149  1.1  mrg 					   &overlaps_b_xz,
   4150  1.1  mrg 					   &last_conflicts_xz, 1);
   4151  1.1  mrg   niter = MIN (niter_y, niter_z);
   4152  1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
   4153  1.1  mrg 					   &overlaps_a_yz,
   4154  1.1  mrg 					   &overlaps_b_yz,
   4155  1.1  mrg 					   &last_conflicts_yz, 2);
   4156  1.1  mrg   niter = MIN (niter_x, niter_z);
   4157  1.1  mrg   niter = MIN (niter_y, niter);
   4158  1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
   4159  1.1  mrg 					   &overlaps_a_xyz,
   4160  1.1  mrg 					   &overlaps_b_xyz,
   4161  1.1  mrg 					   &last_conflicts_xyz, 3);
   4162  1.1  mrg 
   4163  1.1  mrg   xz_p = !integer_zerop (last_conflicts_xz);
   4164  1.1  mrg   yz_p = !integer_zerop (last_conflicts_yz);
   4165  1.1  mrg   xyz_p = !integer_zerop (last_conflicts_xyz);
   4166  1.1  mrg 
   4167  1.1  mrg   if (xz_p || yz_p || xyz_p)
   4168  1.1  mrg     {
   4169  1.1  mrg       ova1 = affine_fn_cst (integer_zero_node);
   4170  1.1  mrg       ova2 = affine_fn_cst (integer_zero_node);
   4171  1.1  mrg       ovb = affine_fn_cst (integer_zero_node);
   4172  1.1  mrg       if (xz_p)
   4173  1.1  mrg 	{
   4174  1.1  mrg 	  affine_fn t0 = ova1;
   4175  1.1  mrg 	  affine_fn t2 = ovb;
   4176  1.1  mrg 
   4177  1.1  mrg 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
   4178  1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
   4179  1.1  mrg 	  affine_fn_free (t0);
   4180  1.1  mrg 	  affine_fn_free (t2);
   4181  1.1  mrg 	  *last_conflicts = last_conflicts_xz;
   4182  1.1  mrg 	}
   4183  1.1  mrg       if (yz_p)
   4184  1.1  mrg 	{
   4185  1.1  mrg 	  affine_fn t0 = ova2;
   4186  1.1  mrg 	  affine_fn t2 = ovb;
   4187  1.1  mrg 
   4188  1.1  mrg 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
   4189  1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
   4190  1.1  mrg 	  affine_fn_free (t0);
   4191  1.1  mrg 	  affine_fn_free (t2);
   4192  1.1  mrg 	  *last_conflicts = last_conflicts_yz;
   4193  1.1  mrg 	}
   4194  1.1  mrg       if (xyz_p)
   4195  1.1  mrg 	{
   4196  1.1  mrg 	  affine_fn t0 = ova1;
   4197  1.1  mrg 	  affine_fn t2 = ova2;
   4198  1.1  mrg 	  affine_fn t4 = ovb;
   4199  1.1  mrg 
   4200  1.1  mrg 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
   4201  1.1  mrg 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
   4202  1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
   4203  1.1  mrg 	  affine_fn_free (t0);
   4204  1.1  mrg 	  affine_fn_free (t2);
   4205  1.1  mrg 	  affine_fn_free (t4);
   4206  1.1  mrg 	  *last_conflicts = last_conflicts_xyz;
   4207  1.1  mrg 	}
   4208  1.1  mrg       *overlaps_a = conflict_fn (2, ova1, ova2);
   4209  1.1  mrg       *overlaps_b = conflict_fn (1, ovb);
   4210  1.1  mrg     }
   4211  1.1  mrg   else
   4212  1.1  mrg     {
   4213  1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4214  1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4215  1.1  mrg       *last_conflicts = integer_zero_node;
   4216  1.1  mrg     }
   4217  1.1  mrg 
   4218  1.1  mrg   affine_fn_free (overlaps_a_xz);
   4219  1.1  mrg   affine_fn_free (overlaps_b_xz);
   4220  1.1  mrg   affine_fn_free (overlaps_a_yz);
   4221  1.1  mrg   affine_fn_free (overlaps_b_yz);
   4222  1.1  mrg   affine_fn_free (overlaps_a_xyz);
   4223  1.1  mrg   affine_fn_free (overlaps_b_xyz);
   4224  1.1  mrg }
   4225  1.1  mrg 
   4226  1.1  mrg /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
   4227  1.1  mrg 
   4228  1.1  mrg static void
   4229  1.1  mrg lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
   4230  1.1  mrg 		    int size)
   4231  1.1  mrg {
   4232  1.1  mrg   memcpy (vec2, vec1, size * sizeof (*vec1));
   4233  1.1  mrg }
   4234  1.1  mrg 
   4235  1.1  mrg /* Copy the elements of M x N matrix MAT1 to MAT2.  */
   4236  1.1  mrg 
   4237  1.1  mrg static void
   4238  1.1  mrg lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
   4239  1.1  mrg 		    int m, int n)
   4240  1.1  mrg {
   4241  1.1  mrg   int i;
   4242  1.1  mrg 
   4243  1.1  mrg   for (i = 0; i < m; i++)
   4244  1.1  mrg     lambda_vector_copy (mat1[i], mat2[i], n);
   4245  1.1  mrg }
   4246  1.1  mrg 
   4247  1.1  mrg /* Store the N x N identity matrix in MAT.  */
   4248  1.1  mrg 
   4249  1.1  mrg static void
   4250  1.1  mrg lambda_matrix_id (lambda_matrix mat, int size)
   4251  1.1  mrg {
   4252  1.1  mrg   int i, j;
   4253  1.1  mrg 
   4254  1.1  mrg   for (i = 0; i < size; i++)
   4255  1.1  mrg     for (j = 0; j < size; j++)
   4256  1.1  mrg       mat[i][j] = (i == j) ? 1 : 0;
   4257  1.1  mrg }
   4258  1.1  mrg 
   4259  1.1  mrg /* Return the index of the first nonzero element of vector VEC1 between
   4260  1.1  mrg    START and N.  We must have START <= N.
   4261  1.1  mrg    Returns N if VEC1 is the zero vector.  */
   4262  1.1  mrg 
   4263  1.1  mrg static int
   4264  1.1  mrg lambda_vector_first_nz (lambda_vector vec1, int n, int start)
   4265  1.1  mrg {
   4266  1.1  mrg   int j = start;
   4267  1.1  mrg   while (j < n && vec1[j] == 0)
   4268  1.1  mrg     j++;
   4269  1.1  mrg   return j;
   4270  1.1  mrg }
   4271  1.1  mrg 
   4272  1.1  mrg /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
   4273  1.1  mrg    R2 = R2 + CONST1 * R1.  */
   4274  1.1  mrg 
   4275  1.1  mrg static bool
   4276  1.1  mrg lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
   4277  1.1  mrg 		       lambda_int const1)
   4278  1.1  mrg {
   4279  1.1  mrg   int i;
   4280  1.1  mrg 
   4281  1.1  mrg   if (const1 == 0)
   4282  1.1  mrg     return true;
   4283  1.1  mrg 
   4284  1.1  mrg   for (i = 0; i < n; i++)
   4285  1.1  mrg     {
   4286  1.1  mrg       bool ovf;
   4287  1.1  mrg       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
   4288  1.1  mrg       if (ovf)
   4289  1.1  mrg 	return false;
   4290  1.1  mrg       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
   4291  1.1  mrg       if (ovf || tem2 == HOST_WIDE_INT_MIN)
   4292  1.1  mrg 	return false;
   4293  1.1  mrg       mat[r2][i] = tem2;
   4294  1.1  mrg     }
   4295  1.1  mrg 
   4296  1.1  mrg   return true;
   4297  1.1  mrg }
   4298  1.1  mrg 
   4299  1.1  mrg /* Multiply vector VEC1 of length SIZE by a constant CONST1,
   4300  1.1  mrg    and store the result in VEC2.  */
   4301  1.1  mrg 
   4302  1.1  mrg static void
   4303  1.1  mrg lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
   4304  1.1  mrg 			  int size, lambda_int const1)
   4305  1.1  mrg {
   4306  1.1  mrg   int i;
   4307  1.1  mrg 
   4308  1.1  mrg   if (const1 == 0)
   4309  1.1  mrg     lambda_vector_clear (vec2, size);
   4310  1.1  mrg   else
   4311  1.1  mrg     for (i = 0; i < size; i++)
   4312  1.1  mrg       vec2[i] = const1 * vec1[i];
   4313  1.1  mrg }
   4314  1.1  mrg 
   4315  1.1  mrg /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
   4316  1.1  mrg 
   4317  1.1  mrg static void
   4318  1.1  mrg lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
   4319  1.1  mrg 		      int size)
   4320  1.1  mrg {
   4321  1.1  mrg   lambda_vector_mult_const (vec1, vec2, size, -1);
   4322  1.1  mrg }
   4323  1.1  mrg 
   4324  1.1  mrg /* Negate row R1 of matrix MAT which has N columns.  */
   4325  1.1  mrg 
   4326  1.1  mrg static void
   4327  1.1  mrg lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
   4328  1.1  mrg {
   4329  1.1  mrg   lambda_vector_negate (mat[r1], mat[r1], n);
   4330  1.1  mrg }
   4331  1.1  mrg 
   4332  1.1  mrg /* Return true if two vectors are equal.  */
   4333  1.1  mrg 
   4334  1.1  mrg static bool
   4335  1.1  mrg lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
   4336  1.1  mrg {
   4337  1.1  mrg   int i;
   4338  1.1  mrg   for (i = 0; i < size; i++)
   4339  1.1  mrg     if (vec1[i] != vec2[i])
   4340  1.1  mrg       return false;
   4341  1.1  mrg   return true;
   4342  1.1  mrg }
   4343  1.1  mrg 
   4344  1.1  mrg /* Given an M x N integer matrix A, this function determines an M x
   4345  1.1  mrg    M unimodular matrix U, and an M x N echelon matrix S such that
   4346  1.1  mrg    "U.A = S".  This decomposition is also known as "right Hermite".
   4347  1.1  mrg 
   4348  1.1  mrg    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
   4349  1.1  mrg    Restructuring Compilers" Utpal Banerjee.  */
   4350  1.1  mrg 
   4351  1.1  mrg static bool
   4352  1.1  mrg lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
   4353  1.1  mrg 			     lambda_matrix S, lambda_matrix U)
   4354  1.1  mrg {
   4355  1.1  mrg   int i, j, i0 = 0;
   4356  1.1  mrg 
   4357  1.1  mrg   lambda_matrix_copy (A, S, m, n);
   4358  1.1  mrg   lambda_matrix_id (U, m);
   4359  1.1  mrg 
   4360  1.1  mrg   for (j = 0; j < n; j++)
   4361  1.1  mrg     {
   4362  1.1  mrg       if (lambda_vector_first_nz (S[j], m, i0) < m)
   4363  1.1  mrg 	{
   4364  1.1  mrg 	  ++i0;
   4365  1.1  mrg 	  for (i = m - 1; i >= i0; i--)
   4366  1.1  mrg 	    {
   4367  1.1  mrg 	      while (S[i][j] != 0)
   4368  1.1  mrg 		{
   4369  1.1  mrg 		  lambda_int factor, a, b;
   4370  1.1  mrg 
   4371  1.1  mrg 		  a = S[i-1][j];
   4372  1.1  mrg 		  b = S[i][j];
   4373  1.1  mrg 		  gcc_assert (a != HOST_WIDE_INT_MIN);
   4374  1.1  mrg 		  factor = a / b;
   4375  1.1  mrg 
   4376  1.1  mrg 		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
   4377  1.1  mrg 		    return false;
   4378  1.1  mrg 		  std::swap (S[i], S[i-1]);
   4379  1.1  mrg 
   4380  1.1  mrg 		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
   4381  1.1  mrg 		    return false;
   4382  1.1  mrg 		  std::swap (U[i], U[i-1]);
   4383  1.1  mrg 		}
   4384  1.1  mrg 	    }
   4385  1.1  mrg 	}
   4386  1.1  mrg     }
   4387  1.1  mrg 
   4388  1.1  mrg   return true;
   4389  1.1  mrg }
   4390  1.1  mrg 
   4391  1.1  mrg /* Determines the overlapping elements due to accesses CHREC_A and
   4392  1.1  mrg    CHREC_B, that are affine functions.  This function cannot handle
   4393  1.1  mrg    symbolic evolution functions, ie. when initial conditions are
   4394  1.1  mrg    parameters, because it uses lambda matrices of integers.  */
   4395  1.1  mrg 
   4396  1.1  mrg static void
   4397  1.1  mrg analyze_subscript_affine_affine (tree chrec_a,
   4398  1.1  mrg 				 tree chrec_b,
   4399  1.1  mrg 				 conflict_function **overlaps_a,
   4400  1.1  mrg 				 conflict_function **overlaps_b,
   4401  1.1  mrg 				 tree *last_conflicts)
   4402  1.1  mrg {
   4403  1.1  mrg   unsigned nb_vars_a, nb_vars_b, dim;
   4404  1.1  mrg   lambda_int gamma, gcd_alpha_beta;
   4405  1.1  mrg   lambda_matrix A, U, S;
   4406  1.1  mrg   struct obstack scratch_obstack;
   4407  1.1  mrg 
   4408  1.1  mrg   if (eq_evolutions_p (chrec_a, chrec_b))
   4409  1.1  mrg     {
   4410  1.1  mrg       /* The accessed index overlaps for each iteration in the
   4411  1.1  mrg 	 loop.  */
   4412  1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4413  1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4414  1.1  mrg       *last_conflicts = chrec_dont_know;
   4415  1.1  mrg       return;
   4416  1.1  mrg     }
   4417  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4418  1.1  mrg     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
   4419  1.1  mrg 
   4420  1.1  mrg   /* For determining the initial intersection, we have to solve a
   4421  1.1  mrg      Diophantine equation.  This is the most time consuming part.
   4422  1.1  mrg 
   4423  1.1  mrg      For answering to the question: "Is there a dependence?" we have
   4424  1.1  mrg      to prove that there exists a solution to the Diophantine
   4425  1.1  mrg      equation, and that the solution is in the iteration domain,
   4426  1.1  mrg      i.e. the solution is positive or zero, and that the solution
   4427  1.1  mrg      happens before the upper bound loop.nb_iterations.  Otherwise
   4428  1.1  mrg      there is no dependence.  This function outputs a description of
   4429  1.1  mrg      the iterations that hold the intersections.  */
   4430  1.1  mrg 
   4431  1.1  mrg   nb_vars_a = nb_vars_in_chrec (chrec_a);
   4432  1.1  mrg   nb_vars_b = nb_vars_in_chrec (chrec_b);
   4433  1.1  mrg 
   4434  1.1  mrg   gcc_obstack_init (&scratch_obstack);
   4435  1.1  mrg 
   4436  1.1  mrg   dim = nb_vars_a + nb_vars_b;
   4437  1.1  mrg   U = lambda_matrix_new (dim, dim, &scratch_obstack);
   4438  1.1  mrg   A = lambda_matrix_new (dim, 1, &scratch_obstack);
   4439  1.1  mrg   S = lambda_matrix_new (dim, 1, &scratch_obstack);
   4440  1.1  mrg 
   4441  1.1  mrg   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
   4442  1.1  mrg   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
   4443  1.1  mrg   if (init_a == chrec_dont_know
   4444  1.1  mrg       || init_b == chrec_dont_know)
   4445  1.1  mrg     {
   4446  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4447  1.1  mrg 	fprintf (dump_file, "affine-affine test failed: "
   4448  1.1  mrg 		 "representation issue.\n");
   4449  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4450  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4451  1.1  mrg       *last_conflicts = chrec_dont_know;
   4452  1.1  mrg       goto end_analyze_subs_aa;
   4453  1.1  mrg     }
   4454  1.1  mrg   gamma = int_cst_value (init_b) - int_cst_value (init_a);
   4455  1.1  mrg 
   4456  1.1  mrg   /* Don't do all the hard work of solving the Diophantine equation
   4457  1.1  mrg      when we already know the solution: for example,
   4458  1.1  mrg      | {3, +, 1}_1
   4459  1.1  mrg      | {3, +, 4}_2
   4460  1.1  mrg      | gamma = 3 - 3 = 0.
   4461  1.1  mrg      Then the first overlap occurs during the first iterations:
   4462  1.1  mrg      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
   4463  1.1  mrg   */
   4464  1.1  mrg   if (gamma == 0)
   4465  1.1  mrg     {
   4466  1.1  mrg       if (nb_vars_a == 1 && nb_vars_b == 1)
   4467  1.1  mrg 	{
   4468  1.1  mrg 	  HOST_WIDE_INT step_a, step_b;
   4469  1.1  mrg 	  HOST_WIDE_INT niter, niter_a, niter_b;
   4470  1.1  mrg 	  affine_fn ova, ovb;
   4471  1.1  mrg 
   4472  1.1  mrg 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
   4473  1.1  mrg 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
   4474  1.1  mrg 	  niter = MIN (niter_a, niter_b);
   4475  1.1  mrg 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
   4476  1.1  mrg 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
   4477  1.1  mrg 
   4478  1.1  mrg 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
   4479  1.1  mrg 						   &ova, &ovb,
   4480  1.1  mrg 						   last_conflicts, 1);
   4481  1.1  mrg 	  *overlaps_a = conflict_fn (1, ova);
   4482  1.1  mrg 	  *overlaps_b = conflict_fn (1, ovb);
   4483  1.1  mrg 	}
   4484  1.1  mrg 
   4485  1.1  mrg       else if (nb_vars_a == 2 && nb_vars_b == 1)
   4486  1.1  mrg 	compute_overlap_steps_for_affine_1_2
   4487  1.1  mrg 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
   4488  1.1  mrg 
   4489  1.1  mrg       else if (nb_vars_a == 1 && nb_vars_b == 2)
   4490  1.1  mrg 	compute_overlap_steps_for_affine_1_2
   4491  1.1  mrg 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
   4492  1.1  mrg 
   4493  1.1  mrg       else
   4494  1.1  mrg 	{
   4495  1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   4496  1.1  mrg 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
   4497  1.1  mrg 	  *overlaps_a = conflict_fn_not_known ();
   4498  1.1  mrg 	  *overlaps_b = conflict_fn_not_known ();
   4499  1.1  mrg 	  *last_conflicts = chrec_dont_know;
   4500  1.1  mrg 	}
   4501  1.1  mrg       goto end_analyze_subs_aa;
   4502  1.1  mrg     }
   4503  1.1  mrg 
   4504  1.1  mrg   /* U.A = S */
   4505  1.1  mrg   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
   4506  1.1  mrg     {
   4507  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4508  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4509  1.1  mrg       *last_conflicts = chrec_dont_know;
   4510  1.1  mrg       goto end_analyze_subs_aa;
   4511  1.1  mrg     }
   4512  1.1  mrg 
   4513  1.1  mrg   if (S[0][0] < 0)
   4514  1.1  mrg     {
   4515  1.1  mrg       S[0][0] *= -1;
   4516  1.1  mrg       lambda_matrix_row_negate (U, dim, 0);
   4517  1.1  mrg     }
   4518  1.1  mrg   gcd_alpha_beta = S[0][0];
   4519  1.1  mrg 
   4520  1.1  mrg   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
   4521  1.1  mrg      but that is a quite strange case.  Instead of ICEing, answer
   4522  1.1  mrg      don't know.  */
   4523  1.1  mrg   if (gcd_alpha_beta == 0)
   4524  1.1  mrg     {
   4525  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4526  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4527  1.1  mrg       *last_conflicts = chrec_dont_know;
   4528  1.1  mrg       goto end_analyze_subs_aa;
   4529  1.1  mrg     }
   4530  1.1  mrg 
   4531  1.1  mrg   /* The classic "gcd-test".  */
   4532  1.1  mrg   if (!int_divides_p (gcd_alpha_beta, gamma))
   4533  1.1  mrg     {
   4534  1.1  mrg       /* The "gcd-test" has determined that there is no integer
   4535  1.1  mrg 	 solution, i.e. there is no dependence.  */
   4536  1.1  mrg       *overlaps_a = conflict_fn_no_dependence ();
   4537  1.1  mrg       *overlaps_b = conflict_fn_no_dependence ();
   4538  1.1  mrg       *last_conflicts = integer_zero_node;
   4539  1.1  mrg     }
   4540  1.1  mrg 
   4541  1.1  mrg   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
   4542  1.1  mrg   else if (nb_vars_a == 1 && nb_vars_b == 1)
   4543  1.1  mrg     {
   4544  1.1  mrg       /* Both functions should have the same evolution sign.  */
   4545  1.1  mrg       if (((A[0][0] > 0 && -A[1][0] > 0)
   4546  1.1  mrg 	   || (A[0][0] < 0 && -A[1][0] < 0)))
   4547  1.1  mrg 	{
   4548  1.1  mrg 	  /* The solutions are given by:
   4549  1.1  mrg 	     |
   4550  1.1  mrg 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
   4551  1.1  mrg 	     |                           [u21 u22]    [y0]
   4552  1.1  mrg 
   4553  1.1  mrg 	     For a given integer t.  Using the following variables,
   4554  1.1  mrg 
   4555  1.1  mrg 	     | i0 = u11 * gamma / gcd_alpha_beta
   4556  1.1  mrg 	     | j0 = u12 * gamma / gcd_alpha_beta
   4557  1.1  mrg 	     | i1 = u21
   4558  1.1  mrg 	     | j1 = u22
   4559  1.1  mrg 
   4560  1.1  mrg 	     the solutions are:
   4561  1.1  mrg 
   4562  1.1  mrg 	     | x0 = i0 + i1 * t,
   4563  1.1  mrg 	     | y0 = j0 + j1 * t.  */
   4564  1.1  mrg       	  HOST_WIDE_INT i0, j0, i1, j1;
   4565  1.1  mrg 
   4566  1.1  mrg 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
   4567  1.1  mrg 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
   4568  1.1  mrg 	  i1 = U[1][0];
   4569  1.1  mrg 	  j1 = U[1][1];
   4570  1.1  mrg 
   4571  1.1  mrg 	  if ((i1 == 0 && i0 < 0)
   4572  1.1  mrg 	      || (j1 == 0 && j0 < 0))
   4573  1.1  mrg 	    {
   4574  1.1  mrg 	      /* There is no solution.
   4575  1.1  mrg 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
   4576  1.1  mrg 		 falls in here, but for the moment we don't look at the
   4577  1.1  mrg 		 upper bound of the iteration domain.  */
   4578  1.1  mrg 	      *overlaps_a = conflict_fn_no_dependence ();
   4579  1.1  mrg 	      *overlaps_b = conflict_fn_no_dependence ();
   4580  1.1  mrg 	      *last_conflicts = integer_zero_node;
   4581  1.1  mrg 	      goto end_analyze_subs_aa;
   4582  1.1  mrg 	    }
   4583  1.1  mrg 
   4584  1.1  mrg 	  if (i1 > 0 && j1 > 0)
   4585  1.1  mrg 	    {
   4586  1.1  mrg 	      HOST_WIDE_INT niter_a
   4587  1.1  mrg 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
   4588  1.1  mrg 	      HOST_WIDE_INT niter_b
   4589  1.1  mrg 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
   4590  1.1  mrg 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
   4591  1.1  mrg 
   4592  1.1  mrg 	      /* (X0, Y0) is a solution of the Diophantine equation:
   4593  1.1  mrg 		 "chrec_a (X0) = chrec_b (Y0)".  */
   4594  1.1  mrg 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
   4595  1.1  mrg 					CEIL (-j0, j1));
   4596  1.1  mrg 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
   4597  1.1  mrg 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
   4598  1.1  mrg 
   4599  1.1  mrg 	      /* (X1, Y1) is the smallest positive solution of the eq
   4600  1.1  mrg 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
   4601  1.1  mrg 		 first conflict occurs.  */
   4602  1.1  mrg 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
   4603  1.1  mrg 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
   4604  1.1  mrg 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
   4605  1.1  mrg 
   4606  1.1  mrg 	      if (niter > 0)
   4607  1.1  mrg 		{
   4608  1.1  mrg 		  /* If the overlap occurs outside of the bounds of the
   4609  1.1  mrg 		     loop, there is no dependence.  */
   4610  1.1  mrg 		  if (x1 >= niter_a || y1 >= niter_b)
   4611  1.1  mrg 		    {
   4612  1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
   4613  1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
   4614  1.1  mrg 		      *last_conflicts = integer_zero_node;
   4615  1.1  mrg 		      goto end_analyze_subs_aa;
   4616  1.1  mrg 		    }
   4617  1.1  mrg 
   4618  1.1  mrg 		  /* max stmt executions can get quite large, avoid
   4619  1.1  mrg 		     overflows by using wide ints here.  */
   4620  1.1  mrg 		  widest_int tau2
   4621  1.1  mrg 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
   4622  1.1  mrg 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
   4623  1.1  mrg 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
   4624  1.1  mrg 		  if (wi::min_precision (last_conflict, SIGNED)
   4625  1.1  mrg 		      <= TYPE_PRECISION (integer_type_node))
   4626  1.1  mrg 		    *last_conflicts
   4627  1.1  mrg 		       = build_int_cst (integer_type_node,
   4628  1.1  mrg 					last_conflict.to_shwi ());
   4629  1.1  mrg 		  else
   4630  1.1  mrg 		    *last_conflicts = chrec_dont_know;
   4631  1.1  mrg 		}
   4632  1.1  mrg 	      else
   4633  1.1  mrg 		*last_conflicts = chrec_dont_know;
   4634  1.1  mrg 
   4635  1.1  mrg 	      *overlaps_a
   4636  1.1  mrg 		= conflict_fn (1,
   4637  1.1  mrg 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
   4638  1.1  mrg 						 1,
   4639  1.1  mrg 						 build_int_cst (NULL_TREE, i1)));
   4640  1.1  mrg 	      *overlaps_b
   4641  1.1  mrg 		= conflict_fn (1,
   4642  1.1  mrg 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
   4643  1.1  mrg 						 1,
   4644  1.1  mrg 						 build_int_cst (NULL_TREE, j1)));
   4645  1.1  mrg 	    }
   4646  1.1  mrg 	  else
   4647  1.1  mrg 	    {
   4648  1.1  mrg 	      /* FIXME: For the moment, the upper bound of the
   4649  1.1  mrg 		 iteration domain for i and j is not checked.  */
   4650  1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
   4651  1.1  mrg 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
   4652  1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
   4653  1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
   4654  1.1  mrg 	      *last_conflicts = chrec_dont_know;
   4655  1.1  mrg 	    }
   4656  1.1  mrg 	}
   4657  1.1  mrg       else
   4658  1.1  mrg 	{
   4659  1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   4660  1.1  mrg 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
   4661  1.1  mrg 	  *overlaps_a = conflict_fn_not_known ();
   4662  1.1  mrg 	  *overlaps_b = conflict_fn_not_known ();
   4663  1.1  mrg 	  *last_conflicts = chrec_dont_know;
   4664  1.1  mrg 	}
   4665  1.1  mrg     }
   4666  1.1  mrg   else
   4667  1.1  mrg     {
   4668  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4669  1.1  mrg 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
   4670  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4671  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4672  1.1  mrg       *last_conflicts = chrec_dont_know;
   4673  1.1  mrg     }
   4674  1.1  mrg 
   4675  1.1  mrg end_analyze_subs_aa:
   4676  1.1  mrg   obstack_free (&scratch_obstack, NULL);
   4677  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4678  1.1  mrg     {
   4679  1.1  mrg       fprintf (dump_file, "  (overlaps_a = ");
   4680  1.1  mrg       dump_conflict_function (dump_file, *overlaps_a);
   4681  1.1  mrg       fprintf (dump_file, ")\n  (overlaps_b = ");
   4682  1.1  mrg       dump_conflict_function (dump_file, *overlaps_b);
   4683  1.1  mrg       fprintf (dump_file, "))\n");
   4684  1.1  mrg     }
   4685  1.1  mrg }
   4686  1.1  mrg 
   4687  1.1  mrg /* Returns true when analyze_subscript_affine_affine can be used for
   4688  1.1  mrg    determining the dependence relation between chrec_a and chrec_b,
   4689  1.1  mrg    that contain symbols.  This function modifies chrec_a and chrec_b
   4690  1.1  mrg    such that the analysis result is the same, and such that they don't
   4691  1.1  mrg    contain symbols, and then can safely be passed to the analyzer.
   4692  1.1  mrg 
   4693  1.1  mrg    Example: The analysis of the following tuples of evolutions produce
   4694  1.1  mrg    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
   4695  1.1  mrg    vs. {0, +, 1}_1
   4696  1.1  mrg 
   4697  1.1  mrg    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
   4698  1.1  mrg    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
   4699  1.1  mrg */
   4700  1.1  mrg 
   4701  1.1  mrg static bool
   4702  1.1  mrg can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
   4703  1.1  mrg {
   4704  1.1  mrg   tree diff, type, left_a, left_b, right_b;
   4705  1.1  mrg 
   4706  1.1  mrg   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
   4707  1.1  mrg       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
   4708  1.1  mrg     /* FIXME: For the moment not handled.  Might be refined later.  */
   4709  1.1  mrg     return false;
   4710  1.1  mrg 
   4711  1.1  mrg   type = chrec_type (*chrec_a);
   4712  1.1  mrg   left_a = CHREC_LEFT (*chrec_a);
   4713  1.1  mrg   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
   4714  1.1  mrg   diff = chrec_fold_minus (type, left_a, left_b);
   4715  1.1  mrg 
   4716  1.1  mrg   if (!evolution_function_is_constant_p (diff))
   4717  1.1  mrg     return false;
   4718  1.1  mrg 
   4719  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4720  1.1  mrg     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
   4721  1.1  mrg 
   4722  1.1  mrg   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
   4723  1.1  mrg 				     diff, CHREC_RIGHT (*chrec_a));
   4724  1.1  mrg   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
   4725  1.1  mrg   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
   4726  1.1  mrg 				     build_int_cst (type, 0),
   4727  1.1  mrg 				     right_b);
   4728  1.1  mrg   return true;
   4729  1.1  mrg }
   4730  1.1  mrg 
   4731  1.1  mrg /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
   4732  1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
   4733  1.1  mrg    relation between the elements accessed twice by CHREC_A and
   4734  1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
   4735  1.1  mrg 
   4736  1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
   4737  1.1  mrg 
   4738  1.1  mrg static void
   4739  1.1  mrg analyze_siv_subscript (tree chrec_a,
   4740  1.1  mrg 		       tree chrec_b,
   4741  1.1  mrg 		       conflict_function **overlaps_a,
   4742  1.1  mrg 		       conflict_function **overlaps_b,
   4743  1.1  mrg 		       tree *last_conflicts,
   4744  1.1  mrg 		       int loop_nest_num)
   4745  1.1  mrg {
   4746  1.1  mrg   dependence_stats.num_siv++;
   4747  1.1  mrg 
   4748  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4749  1.1  mrg     fprintf (dump_file, "(analyze_siv_subscript \n");
   4750  1.1  mrg 
   4751  1.1  mrg   if (evolution_function_is_constant_p (chrec_a)
   4752  1.1  mrg       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
   4753  1.1  mrg     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
   4754  1.1  mrg 				      overlaps_a, overlaps_b, last_conflicts);
   4755  1.1  mrg 
   4756  1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
   4757  1.1  mrg 	   && evolution_function_is_constant_p (chrec_b))
   4758  1.1  mrg     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
   4759  1.1  mrg 				      overlaps_b, overlaps_a, last_conflicts);
   4760  1.1  mrg 
   4761  1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
   4762  1.1  mrg 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
   4763  1.1  mrg     {
   4764  1.1  mrg       if (!chrec_contains_symbols (chrec_a)
   4765  1.1  mrg 	  && !chrec_contains_symbols (chrec_b))
   4766  1.1  mrg 	{
   4767  1.1  mrg 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
   4768  1.1  mrg 					   overlaps_a, overlaps_b,
   4769  1.1  mrg 					   last_conflicts);
   4770  1.1  mrg 
   4771  1.1  mrg 	  if (CF_NOT_KNOWN_P (*overlaps_a)
   4772  1.1  mrg 	      || CF_NOT_KNOWN_P (*overlaps_b))
   4773  1.1  mrg 	    dependence_stats.num_siv_unimplemented++;
   4774  1.1  mrg 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
   4775  1.1  mrg 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
   4776  1.1  mrg 	    dependence_stats.num_siv_independent++;
   4777  1.1  mrg 	  else
   4778  1.1  mrg 	    dependence_stats.num_siv_dependent++;
   4779  1.1  mrg 	}
   4780  1.1  mrg       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
   4781  1.1  mrg 							&chrec_b))
   4782  1.1  mrg 	{
   4783  1.1  mrg 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
   4784  1.1  mrg 					   overlaps_a, overlaps_b,
   4785  1.1  mrg 					   last_conflicts);
   4786  1.1  mrg 
   4787  1.1  mrg 	  if (CF_NOT_KNOWN_P (*overlaps_a)
   4788  1.1  mrg 	      || CF_NOT_KNOWN_P (*overlaps_b))
   4789  1.1  mrg 	    dependence_stats.num_siv_unimplemented++;
   4790  1.1  mrg 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
   4791  1.1  mrg 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
   4792  1.1  mrg 	    dependence_stats.num_siv_independent++;
   4793  1.1  mrg 	  else
   4794  1.1  mrg 	    dependence_stats.num_siv_dependent++;
   4795  1.1  mrg 	}
   4796  1.1  mrg       else
   4797  1.1  mrg 	goto siv_subscript_dontknow;
   4798  1.1  mrg     }
   4799  1.1  mrg 
   4800  1.1  mrg   else
   4801  1.1  mrg     {
   4802  1.1  mrg     siv_subscript_dontknow:;
   4803  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4804  1.1  mrg 	fprintf (dump_file, "  siv test failed: unimplemented");
   4805  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4806  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4807  1.1  mrg       *last_conflicts = chrec_dont_know;
   4808  1.1  mrg       dependence_stats.num_siv_unimplemented++;
   4809  1.1  mrg     }
   4810  1.1  mrg 
   4811  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4812  1.1  mrg     fprintf (dump_file, ")\n");
   4813  1.1  mrg }
   4814  1.1  mrg 
   4815  1.1  mrg /* Returns false if we can prove that the greatest common divisor of the steps
   4816  1.1  mrg    of CHREC does not divide CST, false otherwise.  */
   4817  1.1  mrg 
   4818  1.1  mrg static bool
   4819  1.1  mrg gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
   4820  1.1  mrg {
   4821  1.1  mrg   HOST_WIDE_INT cd = 0, val;
   4822  1.1  mrg   tree step;
   4823  1.1  mrg 
   4824  1.1  mrg   if (!tree_fits_shwi_p (cst))
   4825  1.1  mrg     return true;
   4826  1.1  mrg   val = tree_to_shwi (cst);
   4827  1.1  mrg 
   4828  1.1  mrg   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
   4829  1.1  mrg     {
   4830  1.1  mrg       step = CHREC_RIGHT (chrec);
   4831  1.1  mrg       if (!tree_fits_shwi_p (step))
   4832  1.1  mrg 	return true;
   4833  1.1  mrg       cd = gcd (cd, tree_to_shwi (step));
   4834  1.1  mrg       chrec = CHREC_LEFT (chrec);
   4835  1.1  mrg     }
   4836  1.1  mrg 
   4837  1.1  mrg   return val % cd == 0;
   4838  1.1  mrg }
   4839  1.1  mrg 
   4840  1.1  mrg /* Analyze a MIV (Multiple Index Variable) subscript with respect to
   4841  1.1  mrg    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
   4842  1.1  mrg    functions that describe the relation between the elements accessed
   4843  1.1  mrg    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
   4844  1.1  mrg    is verified:
   4845  1.1  mrg 
   4846  1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
   4847  1.1  mrg 
   4848  1.1  mrg static void
   4849  1.1  mrg analyze_miv_subscript (tree chrec_a,
   4850  1.1  mrg 		       tree chrec_b,
   4851  1.1  mrg 		       conflict_function **overlaps_a,
   4852  1.1  mrg 		       conflict_function **overlaps_b,
   4853  1.1  mrg 		       tree *last_conflicts,
   4854  1.1  mrg 		       class loop *loop_nest)
   4855  1.1  mrg {
   4856  1.1  mrg   tree type, difference;
   4857  1.1  mrg 
   4858  1.1  mrg   dependence_stats.num_miv++;
   4859  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4860  1.1  mrg     fprintf (dump_file, "(analyze_miv_subscript \n");
   4861  1.1  mrg 
   4862  1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
   4863  1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
   4864  1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
   4865  1.1  mrg   difference = chrec_fold_minus (type, chrec_a, chrec_b);
   4866  1.1  mrg 
   4867  1.1  mrg   if (eq_evolutions_p (chrec_a, chrec_b))
   4868  1.1  mrg     {
   4869  1.1  mrg       /* Access functions are the same: all the elements are accessed
   4870  1.1  mrg 	 in the same order.  */
   4871  1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4872  1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4873  1.1  mrg       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
   4874  1.1  mrg       dependence_stats.num_miv_dependent++;
   4875  1.1  mrg     }
   4876  1.1  mrg 
   4877  1.1  mrg   else if (evolution_function_is_constant_p (difference)
   4878  1.1  mrg 	   && evolution_function_is_affine_multivariate_p (chrec_a,
   4879  1.1  mrg 							   loop_nest->num)
   4880  1.1  mrg 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
   4881  1.1  mrg     {
   4882  1.1  mrg       /* testsuite/.../ssa-chrec-33.c
   4883  1.1  mrg 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
   4884  1.1  mrg 
   4885  1.1  mrg 	 The difference is 1, and all the evolution steps are multiples
   4886  1.1  mrg 	 of 2, consequently there are no overlapping elements.  */
   4887  1.1  mrg       *overlaps_a = conflict_fn_no_dependence ();
   4888  1.1  mrg       *overlaps_b = conflict_fn_no_dependence ();
   4889  1.1  mrg       *last_conflicts = integer_zero_node;
   4890  1.1  mrg       dependence_stats.num_miv_independent++;
   4891  1.1  mrg     }
   4892  1.1  mrg 
   4893  1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
   4894  1.1  mrg 	   && !chrec_contains_symbols (chrec_a, loop_nest)
   4895  1.1  mrg 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
   4896  1.1  mrg 	   && !chrec_contains_symbols (chrec_b, loop_nest))
   4897  1.1  mrg     {
   4898  1.1  mrg       /* testsuite/.../ssa-chrec-35.c
   4899  1.1  mrg 	 {0, +, 1}_2  vs.  {0, +, 1}_3
   4900  1.1  mrg 	 the overlapping elements are respectively located at iterations:
   4901  1.1  mrg 	 {0, +, 1}_x and {0, +, 1}_x,
   4902  1.1  mrg 	 in other words, we have the equality:
   4903  1.1  mrg 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
   4904  1.1  mrg 
   4905  1.1  mrg 	 Other examples:
   4906  1.1  mrg 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
   4907  1.1  mrg 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
   4908  1.1  mrg 
   4909  1.1  mrg 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
   4910  1.1  mrg 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
   4911  1.1  mrg       */
   4912  1.1  mrg       analyze_subscript_affine_affine (chrec_a, chrec_b,
   4913  1.1  mrg 				       overlaps_a, overlaps_b, last_conflicts);
   4914  1.1  mrg 
   4915  1.1  mrg       if (CF_NOT_KNOWN_P (*overlaps_a)
   4916  1.1  mrg  	  || CF_NOT_KNOWN_P (*overlaps_b))
   4917  1.1  mrg 	dependence_stats.num_miv_unimplemented++;
   4918  1.1  mrg       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
   4919  1.1  mrg 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
   4920  1.1  mrg 	dependence_stats.num_miv_independent++;
   4921  1.1  mrg       else
   4922  1.1  mrg 	dependence_stats.num_miv_dependent++;
   4923  1.1  mrg     }
   4924  1.1  mrg 
   4925  1.1  mrg   else
   4926  1.1  mrg     {
   4927  1.1  mrg       /* When the analysis is too difficult, answer "don't know".  */
   4928  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4929  1.1  mrg 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
   4930  1.1  mrg 
   4931  1.1  mrg       *overlaps_a = conflict_fn_not_known ();
   4932  1.1  mrg       *overlaps_b = conflict_fn_not_known ();
   4933  1.1  mrg       *last_conflicts = chrec_dont_know;
   4934  1.1  mrg       dependence_stats.num_miv_unimplemented++;
   4935  1.1  mrg     }
   4936  1.1  mrg 
   4937  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4938  1.1  mrg     fprintf (dump_file, ")\n");
   4939  1.1  mrg }
   4940  1.1  mrg 
   4941  1.1  mrg /* Determines the iterations for which CHREC_A is equal to CHREC_B in
   4942  1.1  mrg    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
   4943  1.1  mrg    OVERLAP_ITERATIONS_B are initialized with two functions that
   4944  1.1  mrg    describe the iterations that contain conflicting elements.
   4945  1.1  mrg 
   4946  1.1  mrg    Remark: For an integer k >= 0, the following equality is true:
   4947  1.1  mrg 
   4948  1.1  mrg    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
   4949  1.1  mrg */
   4950  1.1  mrg 
   4951  1.1  mrg static void
   4952  1.1  mrg analyze_overlapping_iterations (tree chrec_a,
   4953  1.1  mrg 				tree chrec_b,
   4954  1.1  mrg 				conflict_function **overlap_iterations_a,
   4955  1.1  mrg 				conflict_function **overlap_iterations_b,
   4956  1.1  mrg 				tree *last_conflicts, class loop *loop_nest)
   4957  1.1  mrg {
   4958  1.1  mrg   unsigned int lnn = loop_nest->num;
   4959  1.1  mrg 
   4960  1.1  mrg   dependence_stats.num_subscript_tests++;
   4961  1.1  mrg 
   4962  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4963  1.1  mrg     {
   4964  1.1  mrg       fprintf (dump_file, "(analyze_overlapping_iterations \n");
   4965  1.1  mrg       fprintf (dump_file, "  (chrec_a = ");
   4966  1.1  mrg       print_generic_expr (dump_file, chrec_a);
   4967  1.1  mrg       fprintf (dump_file, ")\n  (chrec_b = ");
   4968  1.1  mrg       print_generic_expr (dump_file, chrec_b);
   4969  1.1  mrg       fprintf (dump_file, ")\n");
   4970  1.1  mrg     }
   4971  1.1  mrg 
   4972  1.1  mrg   if (chrec_a == NULL_TREE
   4973  1.1  mrg       || chrec_b == NULL_TREE
   4974  1.1  mrg       || chrec_contains_undetermined (chrec_a)
   4975  1.1  mrg       || chrec_contains_undetermined (chrec_b))
   4976  1.1  mrg     {
   4977  1.1  mrg       dependence_stats.num_subscript_undetermined++;
   4978  1.1  mrg 
   4979  1.1  mrg       *overlap_iterations_a = conflict_fn_not_known ();
   4980  1.1  mrg       *overlap_iterations_b = conflict_fn_not_known ();
   4981  1.1  mrg     }
   4982  1.1  mrg 
   4983  1.1  mrg   /* If they are the same chrec, and are affine, they overlap
   4984  1.1  mrg      on every iteration.  */
   4985  1.1  mrg   else if (eq_evolutions_p (chrec_a, chrec_b)
   4986  1.1  mrg 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
   4987  1.1  mrg 	       || operand_equal_p (chrec_a, chrec_b, 0)))
   4988  1.1  mrg     {
   4989  1.1  mrg       dependence_stats.num_same_subscript_function++;
   4990  1.1  mrg       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4991  1.1  mrg       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
   4992  1.1  mrg       *last_conflicts = chrec_dont_know;
   4993  1.1  mrg     }
   4994  1.1  mrg 
   4995  1.1  mrg   /* If they aren't the same, and aren't affine, we can't do anything
   4996  1.1  mrg      yet.  */
   4997  1.1  mrg   else if ((chrec_contains_symbols (chrec_a)
   4998  1.1  mrg 	    || chrec_contains_symbols (chrec_b))
   4999  1.1  mrg 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
   5000  1.1  mrg 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
   5001  1.1  mrg     {
   5002  1.1  mrg       dependence_stats.num_subscript_undetermined++;
   5003  1.1  mrg       *overlap_iterations_a = conflict_fn_not_known ();
   5004  1.1  mrg       *overlap_iterations_b = conflict_fn_not_known ();
   5005  1.1  mrg     }
   5006  1.1  mrg 
   5007  1.1  mrg   else if (ziv_subscript_p (chrec_a, chrec_b))
   5008  1.1  mrg     analyze_ziv_subscript (chrec_a, chrec_b,
   5009  1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
   5010  1.1  mrg 			   last_conflicts);
   5011  1.1  mrg 
   5012  1.1  mrg   else if (siv_subscript_p (chrec_a, chrec_b))
   5013  1.1  mrg     analyze_siv_subscript (chrec_a, chrec_b,
   5014  1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
   5015  1.1  mrg 			   last_conflicts, lnn);
   5016  1.1  mrg 
   5017  1.1  mrg   else
   5018  1.1  mrg     analyze_miv_subscript (chrec_a, chrec_b,
   5019  1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
   5020  1.1  mrg 			   last_conflicts, loop_nest);
   5021  1.1  mrg 
   5022  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   5023  1.1  mrg     {
   5024  1.1  mrg       fprintf (dump_file, "  (overlap_iterations_a = ");
   5025  1.1  mrg       dump_conflict_function (dump_file, *overlap_iterations_a);
   5026  1.1  mrg       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
   5027  1.1  mrg       dump_conflict_function (dump_file, *overlap_iterations_b);
   5028  1.1  mrg       fprintf (dump_file, "))\n");
   5029  1.1  mrg     }
   5030  1.1  mrg }
   5031  1.1  mrg 
   5032  1.1  mrg /* Helper function for uniquely inserting distance vectors.  */
   5033  1.1  mrg 
   5034  1.1  mrg static void
   5035  1.1  mrg save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
   5036  1.1  mrg {
   5037  1.1  mrg   for (lambda_vector v : DDR_DIST_VECTS (ddr))
   5038  1.1  mrg     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
   5039  1.1  mrg       return;
   5040  1.1  mrg 
   5041  1.1  mrg   DDR_DIST_VECTS (ddr).safe_push (dist_v);
   5042  1.1  mrg }
   5043  1.1  mrg 
   5044  1.1  mrg /* Helper function for uniquely inserting direction vectors.  */
   5045  1.1  mrg 
   5046  1.1  mrg static void
   5047  1.1  mrg save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
   5048  1.1  mrg {
   5049  1.1  mrg   for (lambda_vector v : DDR_DIR_VECTS (ddr))
   5050  1.1  mrg     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
   5051  1.1  mrg       return;
   5052  1.1  mrg 
   5053  1.1  mrg   DDR_DIR_VECTS (ddr).safe_push (dir_v);
   5054  1.1  mrg }
   5055  1.1  mrg 
   5056  1.1  mrg /* Add a distance of 1 on all the loops outer than INDEX.  If we
   5057  1.1  mrg    haven't yet determined a distance for this outer loop, push a new
   5058  1.1  mrg    distance vector composed of the previous distance, and a distance
   5059  1.1  mrg    of 1 for this outer loop.  Example:
   5060  1.1  mrg 
   5061  1.1  mrg    | loop_1
   5062  1.1  mrg    |   loop_2
   5063  1.1  mrg    |     A[10]
   5064  1.1  mrg    |   endloop_2
   5065  1.1  mrg    | endloop_1
   5066  1.1  mrg 
   5067  1.1  mrg    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
   5068  1.1  mrg    save (0, 1), then we have to save (1, 0).  */
   5069  1.1  mrg 
   5070  1.1  mrg static void
   5071  1.1  mrg add_outer_distances (struct data_dependence_relation *ddr,
   5072  1.1  mrg 		     lambda_vector dist_v, int index)
   5073  1.1  mrg {
   5074  1.1  mrg   /* For each outer loop where init_v is not set, the accesses are
   5075  1.1  mrg      in dependence of distance 1 in the loop.  */
   5076  1.1  mrg   while (--index >= 0)
   5077  1.1  mrg     {
   5078  1.1  mrg       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5079  1.1  mrg       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
   5080  1.1  mrg       save_v[index] = 1;
   5081  1.1  mrg       save_dist_v (ddr, save_v);
   5082  1.1  mrg     }
   5083  1.1  mrg }
   5084  1.1  mrg 
   5085  1.1  mrg /* Return false when fail to represent the data dependence as a
   5086  1.1  mrg    distance vector.  A_INDEX is the index of the first reference
   5087  1.1  mrg    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
   5088  1.1  mrg    second reference.  INIT_B is set to true when a component has been
   5089  1.1  mrg    added to the distance vector DIST_V.  INDEX_CARRY is then set to
   5090  1.1  mrg    the index in DIST_V that carries the dependence.  */
   5091  1.1  mrg 
   5092  1.1  mrg static bool
   5093  1.1  mrg build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
   5094  1.1  mrg 			     unsigned int a_index, unsigned int b_index,
   5095  1.1  mrg 			     lambda_vector dist_v, bool *init_b,
   5096  1.1  mrg 			     int *index_carry)
   5097  1.1  mrg {
   5098  1.1  mrg   unsigned i;
   5099  1.1  mrg   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5100  1.1  mrg   class loop *loop = DDR_LOOP_NEST (ddr)[0];
   5101  1.1  mrg 
   5102  1.1  mrg   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
   5103  1.1  mrg     {
   5104  1.1  mrg       tree access_fn_a, access_fn_b;
   5105  1.1  mrg       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
   5106  1.1  mrg 
   5107  1.1  mrg       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
   5108  1.1  mrg 	{
   5109  1.1  mrg 	  non_affine_dependence_relation (ddr);
   5110  1.1  mrg 	  return false;
   5111  1.1  mrg 	}
   5112  1.1  mrg 
   5113  1.1  mrg       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
   5114  1.1  mrg       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
   5115  1.1  mrg 
   5116  1.1  mrg       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
   5117  1.1  mrg 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
   5118  1.1  mrg 	{
   5119  1.1  mrg 	  HOST_WIDE_INT dist;
   5120  1.1  mrg 	  int index;
   5121  1.1  mrg 	  int var_a = CHREC_VARIABLE (access_fn_a);
   5122  1.1  mrg 	  int var_b = CHREC_VARIABLE (access_fn_b);
   5123  1.1  mrg 
   5124  1.1  mrg 	  if (var_a != var_b
   5125  1.1  mrg 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
   5126  1.1  mrg 	    {
   5127  1.1  mrg 	      non_affine_dependence_relation (ddr);
   5128  1.1  mrg 	      return false;
   5129  1.1  mrg 	    }
   5130  1.1  mrg 
   5131  1.1  mrg 	  /* When data references are collected in a loop while data
   5132  1.1  mrg 	     dependences are analyzed in loop nest nested in the loop, we
   5133  1.1  mrg 	     would have more number of access functions than number of
   5134  1.1  mrg 	     loops.  Skip access functions of loops not in the loop nest.
   5135  1.1  mrg 
   5136  1.1  mrg 	     See PR89725 for more information.  */
   5137  1.1  mrg 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
   5138  1.1  mrg 	    continue;
   5139  1.1  mrg 
   5140  1.1  mrg 	  dist = int_cst_value (SUB_DISTANCE (subscript));
   5141  1.1  mrg 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
   5142  1.1  mrg 	  *index_carry = MIN (index, *index_carry);
   5143  1.1  mrg 
   5144  1.1  mrg 	  /* This is the subscript coupling test.  If we have already
   5145  1.1  mrg 	     recorded a distance for this loop (a distance coming from
   5146  1.1  mrg 	     another subscript), it should be the same.  For example,
   5147  1.1  mrg 	     in the following code, there is no dependence:
   5148  1.1  mrg 
   5149  1.1  mrg 	     | loop i = 0, N, 1
   5150  1.1  mrg 	     |   T[i+1][i] = ...
   5151  1.1  mrg 	     |   ... = T[i][i]
   5152  1.1  mrg 	     | endloop
   5153  1.1  mrg 	  */
   5154  1.1  mrg 	  if (init_v[index] != 0 && dist_v[index] != dist)
   5155  1.1  mrg 	    {
   5156  1.1  mrg 	      finalize_ddr_dependent (ddr, chrec_known);
   5157  1.1  mrg 	      return false;
   5158  1.1  mrg 	    }
   5159  1.1  mrg 
   5160  1.1  mrg 	  dist_v[index] = dist;
   5161  1.1  mrg 	  init_v[index] = 1;
   5162  1.1  mrg 	  *init_b = true;
   5163  1.1  mrg 	}
   5164  1.1  mrg       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
   5165  1.1  mrg 	{
   5166  1.1  mrg 	  /* This can be for example an affine vs. constant dependence
   5167  1.1  mrg 	     (T[i] vs. T[3]) that is not an affine dependence and is
   5168  1.1  mrg 	     not representable as a distance vector.  */
   5169  1.1  mrg 	  non_affine_dependence_relation (ddr);
   5170  1.1  mrg 	  return false;
   5171  1.1  mrg 	}
   5172  1.1  mrg     }
   5173  1.1  mrg 
   5174  1.1  mrg   return true;
   5175  1.1  mrg }
   5176  1.1  mrg 
   5177  1.1  mrg /* Return true when the DDR contains only invariant access functions wrto. loop
   5178  1.1  mrg    number LNUM.  */
   5179  1.1  mrg 
   5180  1.1  mrg static bool
   5181  1.1  mrg invariant_access_functions (const struct data_dependence_relation *ddr,
   5182  1.1  mrg 			    int lnum)
   5183  1.1  mrg {
   5184  1.1  mrg   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
   5185  1.1  mrg     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
   5186  1.1  mrg 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
   5187  1.1  mrg       return false;
   5188  1.1  mrg 
   5189  1.1  mrg   return true;
   5190  1.1  mrg }
   5191  1.1  mrg 
   5192  1.1  mrg /* Helper function for the case where DDR_A and DDR_B are the same
   5193  1.1  mrg    multivariate access function with a constant step.  For an example
   5194  1.1  mrg    see pr34635-1.c.  */
   5195  1.1  mrg 
   5196  1.1  mrg static void
   5197  1.1  mrg add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
   5198  1.1  mrg {
   5199  1.1  mrg   int x_1, x_2;
   5200  1.1  mrg   tree c_1 = CHREC_LEFT (c_2);
   5201  1.1  mrg   tree c_0 = CHREC_LEFT (c_1);
   5202  1.1  mrg   lambda_vector dist_v;
   5203  1.1  mrg   HOST_WIDE_INT v1, v2, cd;
   5204  1.1  mrg 
   5205  1.1  mrg   /* Polynomials with more than 2 variables are not handled yet.  When
   5206  1.1  mrg      the evolution steps are parameters, it is not possible to
   5207  1.1  mrg      represent the dependence using classical distance vectors.  */
   5208  1.1  mrg   if (TREE_CODE (c_0) != INTEGER_CST
   5209  1.1  mrg       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
   5210  1.1  mrg       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
   5211  1.1  mrg     {
   5212  1.1  mrg       DDR_AFFINE_P (ddr) = false;
   5213  1.1  mrg       return;
   5214  1.1  mrg     }
   5215  1.1  mrg 
   5216  1.1  mrg   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
   5217  1.1  mrg   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
   5218  1.1  mrg 
   5219  1.1  mrg   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
   5220  1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5221  1.1  mrg   v1 = int_cst_value (CHREC_RIGHT (c_1));
   5222  1.1  mrg   v2 = int_cst_value (CHREC_RIGHT (c_2));
   5223  1.1  mrg   cd = gcd (v1, v2);
   5224  1.1  mrg   v1 /= cd;
   5225  1.1  mrg   v2 /= cd;
   5226  1.1  mrg 
   5227  1.1  mrg   if (v2 < 0)
   5228  1.1  mrg     {
   5229  1.1  mrg       v2 = -v2;
   5230  1.1  mrg       v1 = -v1;
   5231  1.1  mrg     }
   5232  1.1  mrg 
   5233  1.1  mrg   dist_v[x_1] = v2;
   5234  1.1  mrg   dist_v[x_2] = -v1;
   5235  1.1  mrg   save_dist_v (ddr, dist_v);
   5236  1.1  mrg 
   5237  1.1  mrg   add_outer_distances (ddr, dist_v, x_1);
   5238  1.1  mrg }
   5239  1.1  mrg 
   5240  1.1  mrg /* Helper function for the case where DDR_A and DDR_B are the same
   5241  1.1  mrg    access functions.  */
   5242  1.1  mrg 
   5243  1.1  mrg static void
   5244  1.1  mrg add_other_self_distances (struct data_dependence_relation *ddr)
   5245  1.1  mrg {
   5246  1.1  mrg   lambda_vector dist_v;
   5247  1.1  mrg   unsigned i;
   5248  1.1  mrg   int index_carry = DDR_NB_LOOPS (ddr);
   5249  1.1  mrg   subscript *sub;
   5250  1.1  mrg   class loop *loop = DDR_LOOP_NEST (ddr)[0];
   5251  1.1  mrg 
   5252  1.1  mrg   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
   5253  1.1  mrg     {
   5254  1.1  mrg       tree access_fun = SUB_ACCESS_FN (sub, 0);
   5255  1.1  mrg 
   5256  1.1  mrg       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
   5257  1.1  mrg 	{
   5258  1.1  mrg 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
   5259  1.1  mrg 	    {
   5260  1.1  mrg 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
   5261  1.1  mrg 		{
   5262  1.1  mrg 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
   5263  1.1  mrg 		  return;
   5264  1.1  mrg 		}
   5265  1.1  mrg 
   5266  1.1  mrg 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
   5267  1.1  mrg 
   5268  1.1  mrg 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
   5269  1.1  mrg 		add_multivariate_self_dist (ddr, access_fun);
   5270  1.1  mrg 	      else
   5271  1.1  mrg 		/* The evolution step is not constant: it varies in
   5272  1.1  mrg 		   the outer loop, so this cannot be represented by a
   5273  1.1  mrg 		   distance vector.  For example in pr34635.c the
   5274  1.1  mrg 		   evolution is {0, +, {0, +, 4}_1}_2.  */
   5275  1.1  mrg 		DDR_AFFINE_P (ddr) = false;
   5276  1.1  mrg 
   5277  1.1  mrg 	      return;
   5278  1.1  mrg 	    }
   5279  1.1  mrg 
   5280  1.1  mrg 	  /* When data references are collected in a loop while data
   5281  1.1  mrg 	     dependences are analyzed in loop nest nested in the loop, we
   5282  1.1  mrg 	     would have more number of access functions than number of
   5283  1.1  mrg 	     loops.  Skip access functions of loops not in the loop nest.
   5284  1.1  mrg 
   5285  1.1  mrg 	     See PR89725 for more information.  */
   5286  1.1  mrg 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
   5287  1.1  mrg 				  loop))
   5288  1.1  mrg 	    continue;
   5289  1.1  mrg 
   5290  1.1  mrg 	  index_carry = MIN (index_carry,
   5291  1.1  mrg 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
   5292  1.1  mrg 						 DDR_LOOP_NEST (ddr)));
   5293  1.1  mrg 	}
   5294  1.1  mrg     }
   5295  1.1  mrg 
   5296  1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5297  1.1  mrg   add_outer_distances (ddr, dist_v, index_carry);
   5298  1.1  mrg }
   5299  1.1  mrg 
   5300  1.1  mrg static void
   5301  1.1  mrg insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
   5302  1.1  mrg {
   5303  1.1  mrg   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5304  1.1  mrg 
   5305  1.1  mrg   dist_v[0] = 1;
   5306  1.1  mrg   save_dist_v (ddr, dist_v);
   5307  1.1  mrg }
   5308  1.1  mrg 
   5309  1.1  mrg /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
   5310  1.1  mrg    is the case for example when access functions are the same and
   5311  1.1  mrg    equal to a constant, as in:
   5312  1.1  mrg 
   5313  1.1  mrg    | loop_1
   5314  1.1  mrg    |   A[3] = ...
   5315  1.1  mrg    |   ... = A[3]
   5316  1.1  mrg    | endloop_1
   5317  1.1  mrg 
   5318  1.1  mrg    in which case the distance vectors are (0) and (1).  */
   5319  1.1  mrg 
   5320  1.1  mrg static void
   5321  1.1  mrg add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
   5322  1.1  mrg {
   5323  1.1  mrg   unsigned i, j;
   5324  1.1  mrg 
   5325  1.1  mrg   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
   5326  1.1  mrg     {
   5327  1.1  mrg       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
   5328  1.1  mrg       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
   5329  1.1  mrg       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
   5330  1.1  mrg 
   5331  1.1  mrg       for (j = 0; j < ca->n; j++)
   5332  1.1  mrg 	if (affine_function_zero_p (ca->fns[j]))
   5333  1.1  mrg 	  {
   5334  1.1  mrg 	    insert_innermost_unit_dist_vector (ddr);
   5335  1.1  mrg 	    return;
   5336  1.1  mrg 	  }
   5337  1.1  mrg 
   5338  1.1  mrg       for (j = 0; j < cb->n; j++)
   5339  1.1  mrg 	if (affine_function_zero_p (cb->fns[j]))
   5340  1.1  mrg 	  {
   5341  1.1  mrg 	    insert_innermost_unit_dist_vector (ddr);
   5342  1.1  mrg 	    return;
   5343  1.1  mrg 	  }
   5344  1.1  mrg     }
   5345  1.1  mrg }
   5346  1.1  mrg 
   5347  1.1  mrg /* Return true when the DDR contains two data references that have the
   5348  1.1  mrg    same access functions.  */
   5349  1.1  mrg 
   5350  1.1  mrg static inline bool
   5351  1.1  mrg same_access_functions (const struct data_dependence_relation *ddr)
   5352  1.1  mrg {
   5353  1.1  mrg   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
   5354  1.1  mrg     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
   5355  1.1  mrg 			  SUB_ACCESS_FN (sub, 1)))
   5356  1.1  mrg       return false;
   5357  1.1  mrg 
   5358  1.1  mrg   return true;
   5359  1.1  mrg }
   5360  1.1  mrg 
   5361  1.1  mrg /* Compute the classic per loop distance vector.  DDR is the data
   5362  1.1  mrg    dependence relation to build a vector from.  Return false when fail
   5363  1.1  mrg    to represent the data dependence as a distance vector.  */
   5364  1.1  mrg 
   5365  1.1  mrg static bool
   5366  1.1  mrg build_classic_dist_vector (struct data_dependence_relation *ddr,
   5367  1.1  mrg 			   class loop *loop_nest)
   5368  1.1  mrg {
   5369  1.1  mrg   bool init_b = false;
   5370  1.1  mrg   int index_carry = DDR_NB_LOOPS (ddr);
   5371  1.1  mrg   lambda_vector dist_v;
   5372  1.1  mrg 
   5373  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
   5374  1.1  mrg     return false;
   5375  1.1  mrg 
   5376  1.1  mrg   if (same_access_functions (ddr))
   5377  1.1  mrg     {
   5378  1.1  mrg       /* Save the 0 vector.  */
   5379  1.1  mrg       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5380  1.1  mrg       save_dist_v (ddr, dist_v);
   5381  1.1  mrg 
   5382  1.1  mrg       if (invariant_access_functions (ddr, loop_nest->num))
   5383  1.1  mrg 	add_distance_for_zero_overlaps (ddr);
   5384  1.1  mrg 
   5385  1.1  mrg       if (DDR_NB_LOOPS (ddr) > 1)
   5386  1.1  mrg 	add_other_self_distances (ddr);
   5387  1.1  mrg 
   5388  1.1  mrg       return true;
   5389  1.1  mrg     }
   5390  1.1  mrg 
   5391  1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5392  1.1  mrg   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
   5393  1.1  mrg     return false;
   5394  1.1  mrg 
   5395  1.1  mrg   /* Save the distance vector if we initialized one.  */
   5396  1.1  mrg   if (init_b)
   5397  1.1  mrg     {
   5398  1.1  mrg       /* Verify a basic constraint: classic distance vectors should
   5399  1.1  mrg 	 always be lexicographically positive.
   5400  1.1  mrg 
   5401  1.1  mrg 	 Data references are collected in the order of execution of
   5402  1.1  mrg 	 the program, thus for the following loop
   5403  1.1  mrg 
   5404  1.1  mrg 	 | for (i = 1; i < 100; i++)
   5405  1.1  mrg 	 |   for (j = 1; j < 100; j++)
   5406  1.1  mrg 	 |     {
   5407  1.1  mrg 	 |       t = T[j+1][i-1];  // A
   5408  1.1  mrg 	 |       T[j][i] = t + 2;  // B
   5409  1.1  mrg 	 |     }
   5410  1.1  mrg 
   5411  1.1  mrg 	 references are collected following the direction of the wind:
   5412  1.1  mrg 	 A then B.  The data dependence tests are performed also
   5413  1.1  mrg 	 following this order, such that we're looking at the distance
   5414  1.1  mrg 	 separating the elements accessed by A from the elements later
   5415  1.1  mrg 	 accessed by B.  But in this example, the distance returned by
   5416  1.1  mrg 	 test_dep (A, B) is lexicographically negative (-1, 1), that
   5417  1.1  mrg 	 means that the access A occurs later than B with respect to
   5418  1.1  mrg 	 the outer loop, ie. we're actually looking upwind.  In this
   5419  1.1  mrg 	 case we solve test_dep (B, A) looking downwind to the
   5420  1.1  mrg 	 lexicographically positive solution, that returns the
   5421  1.1  mrg 	 distance vector (1, -1).  */
   5422  1.1  mrg       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
   5423  1.1  mrg 	{
   5424  1.1  mrg 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5425  1.1  mrg 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
   5426  1.1  mrg 	    return false;
   5427  1.1  mrg 	  compute_subscript_distance (ddr);
   5428  1.1  mrg 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
   5429  1.1  mrg 					    &index_carry))
   5430  1.1  mrg 	    return false;
   5431  1.1  mrg 	  save_dist_v (ddr, save_v);
   5432  1.1  mrg 	  DDR_REVERSED_P (ddr) = true;
   5433  1.1  mrg 
   5434  1.1  mrg 	  /* In this case there is a dependence forward for all the
   5435  1.1  mrg 	     outer loops:
   5436  1.1  mrg 
   5437  1.1  mrg 	     | for (k = 1; k < 100; k++)
   5438  1.1  mrg 	     |  for (i = 1; i < 100; i++)
   5439  1.1  mrg 	     |   for (j = 1; j < 100; j++)
   5440  1.1  mrg 	     |     {
   5441  1.1  mrg 	     |       t = T[j+1][i-1];  // A
   5442  1.1  mrg 	     |       T[j][i] = t + 2;  // B
   5443  1.1  mrg 	     |     }
   5444  1.1  mrg 
   5445  1.1  mrg 	     the vectors are:
   5446  1.1  mrg 	     (0,  1, -1)
   5447  1.1  mrg 	     (1,  1, -1)
   5448  1.1  mrg 	     (1, -1,  1)
   5449  1.1  mrg 	  */
   5450  1.1  mrg 	  if (DDR_NB_LOOPS (ddr) > 1)
   5451  1.1  mrg 	    {
   5452  1.1  mrg  	      add_outer_distances (ddr, save_v, index_carry);
   5453  1.1  mrg 	      add_outer_distances (ddr, dist_v, index_carry);
   5454  1.1  mrg 	    }
   5455  1.1  mrg 	}
   5456  1.1  mrg       else
   5457  1.1  mrg 	{
   5458  1.1  mrg 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5459  1.1  mrg 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
   5460  1.1  mrg 
   5461  1.1  mrg 	  if (DDR_NB_LOOPS (ddr) > 1)
   5462  1.1  mrg 	    {
   5463  1.1  mrg 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5464  1.1  mrg 
   5465  1.1  mrg 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
   5466  1.1  mrg 		return false;
   5467  1.1  mrg 	      compute_subscript_distance (ddr);
   5468  1.1  mrg 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
   5469  1.1  mrg 						&index_carry))
   5470  1.1  mrg 		return false;
   5471  1.1  mrg 
   5472  1.1  mrg 	      save_dist_v (ddr, save_v);
   5473  1.1  mrg 	      add_outer_distances (ddr, dist_v, index_carry);
   5474  1.1  mrg 	      add_outer_distances (ddr, opposite_v, index_carry);
   5475  1.1  mrg 	    }
   5476  1.1  mrg 	  else
   5477  1.1  mrg 	    save_dist_v (ddr, save_v);
   5478  1.1  mrg 	}
   5479  1.1  mrg     }
   5480  1.1  mrg   else
   5481  1.1  mrg     {
   5482  1.1  mrg       /* There is a distance of 1 on all the outer loops: Example:
   5483  1.1  mrg 	 there is a dependence of distance 1 on loop_1 for the array A.
   5484  1.1  mrg 
   5485  1.1  mrg 	 | loop_1
   5486  1.1  mrg 	 |   A[5] = ...
   5487  1.1  mrg 	 | endloop
   5488  1.1  mrg       */
   5489  1.1  mrg       add_outer_distances (ddr, dist_v,
   5490  1.1  mrg 			   lambda_vector_first_nz (dist_v,
   5491  1.1  mrg 						   DDR_NB_LOOPS (ddr), 0));
   5492  1.1  mrg     }
   5493  1.1  mrg 
   5494  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   5495  1.1  mrg     {
   5496  1.1  mrg       unsigned i;
   5497  1.1  mrg 
   5498  1.1  mrg       fprintf (dump_file, "(build_classic_dist_vector\n");
   5499  1.1  mrg       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
   5500  1.1  mrg 	{
   5501  1.1  mrg 	  fprintf (dump_file, "  dist_vector = (");
   5502  1.1  mrg 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
   5503  1.1  mrg 			       DDR_NB_LOOPS (ddr));
   5504  1.1  mrg 	  fprintf (dump_file, "  )\n");
   5505  1.1  mrg 	}
   5506  1.1  mrg       fprintf (dump_file, ")\n");
   5507  1.1  mrg     }
   5508  1.1  mrg 
   5509  1.1  mrg   return true;
   5510  1.1  mrg }
   5511  1.1  mrg 
   5512  1.1  mrg /* Return the direction for a given distance.
   5513  1.1  mrg    FIXME: Computing dir this way is suboptimal, since dir can catch
   5514  1.1  mrg    cases that dist is unable to represent.  */
   5515  1.1  mrg 
   5516  1.1  mrg static inline enum data_dependence_direction
   5517  1.1  mrg dir_from_dist (int dist)
   5518  1.1  mrg {
   5519  1.1  mrg   if (dist > 0)
   5520  1.1  mrg     return dir_positive;
   5521  1.1  mrg   else if (dist < 0)
   5522  1.1  mrg     return dir_negative;
   5523  1.1  mrg   else
   5524  1.1  mrg     return dir_equal;
   5525  1.1  mrg }
   5526  1.1  mrg 
   5527  1.1  mrg /* Compute the classic per loop direction vector.  DDR is the data
   5528  1.1  mrg    dependence relation to build a vector from.  */
   5529  1.1  mrg 
   5530  1.1  mrg static void
   5531  1.1  mrg build_classic_dir_vector (struct data_dependence_relation *ddr)
   5532  1.1  mrg {
   5533  1.1  mrg   unsigned i, j;
   5534  1.1  mrg   lambda_vector dist_v;
   5535  1.1  mrg 
   5536  1.1  mrg   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
   5537  1.1  mrg     {
   5538  1.1  mrg       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
   5539  1.1  mrg 
   5540  1.1  mrg       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
   5541  1.1  mrg 	dir_v[j] = dir_from_dist (dist_v[j]);
   5542  1.1  mrg 
   5543  1.1  mrg       save_dir_v (ddr, dir_v);
   5544  1.1  mrg     }
   5545  1.1  mrg }
   5546  1.1  mrg 
   5547  1.1  mrg /* Helper function.  Returns true when there is a dependence between the
   5548  1.1  mrg    data references.  A_INDEX is the index of the first reference (0 for
   5549  1.1  mrg    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
   5550  1.1  mrg 
   5551  1.1  mrg static bool
   5552  1.1  mrg subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
   5553  1.1  mrg 			       unsigned int a_index, unsigned int b_index,
   5554  1.1  mrg 			       class loop *loop_nest)
   5555  1.1  mrg {
   5556  1.1  mrg   unsigned int i;
   5557  1.1  mrg   tree last_conflicts;
   5558  1.1  mrg   struct subscript *subscript;
   5559  1.1  mrg   tree res = NULL_TREE;
   5560  1.1  mrg 
   5561  1.1  mrg   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
   5562  1.1  mrg     {
   5563  1.1  mrg       conflict_function *overlaps_a, *overlaps_b;
   5564  1.1  mrg 
   5565  1.1  mrg       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
   5566  1.1  mrg 				      SUB_ACCESS_FN (subscript, b_index),
   5567  1.1  mrg 				      &overlaps_a, &overlaps_b,
   5568  1.1  mrg 				      &last_conflicts, loop_nest);
   5569  1.1  mrg 
   5570  1.1  mrg       if (SUB_CONFLICTS_IN_A (subscript))
   5571  1.1  mrg 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
   5572  1.1  mrg       if (SUB_CONFLICTS_IN_B (subscript))
   5573  1.1  mrg 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
   5574  1.1  mrg 
   5575  1.1  mrg       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
   5576  1.1  mrg       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
   5577  1.1  mrg       SUB_LAST_CONFLICT (subscript) = last_conflicts;
   5578  1.1  mrg 
   5579  1.1  mrg       /* If there is any undetermined conflict function we have to
   5580  1.1  mrg          give a conservative answer in case we cannot prove that
   5581  1.1  mrg 	 no dependence exists when analyzing another subscript.  */
   5582  1.1  mrg       if (CF_NOT_KNOWN_P (overlaps_a)
   5583  1.1  mrg  	  || CF_NOT_KNOWN_P (overlaps_b))
   5584  1.1  mrg  	{
   5585  1.1  mrg 	  res = chrec_dont_know;
   5586  1.1  mrg 	  continue;
   5587  1.1  mrg  	}
   5588  1.1  mrg 
   5589  1.1  mrg       /* When there is a subscript with no dependence we can stop.  */
   5590  1.1  mrg       else if (CF_NO_DEPENDENCE_P (overlaps_a)
   5591  1.1  mrg  	       || CF_NO_DEPENDENCE_P (overlaps_b))
   5592  1.1  mrg  	{
   5593  1.1  mrg 	  res = chrec_known;
   5594  1.1  mrg 	  break;
   5595  1.1  mrg  	}
   5596  1.1  mrg     }
   5597  1.1  mrg 
   5598  1.1  mrg   if (res == NULL_TREE)
   5599  1.1  mrg     return true;
   5600  1.1  mrg 
   5601  1.1  mrg   if (res == chrec_known)
   5602  1.1  mrg     dependence_stats.num_dependence_independent++;
   5603  1.1  mrg   else
   5604  1.1  mrg     dependence_stats.num_dependence_undetermined++;
   5605  1.1  mrg   finalize_ddr_dependent (ddr, res);
   5606  1.1  mrg   return false;
   5607  1.1  mrg }
   5608  1.1  mrg 
   5609  1.1  mrg /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
   5610  1.1  mrg 
   5611  1.1  mrg static void
   5612  1.1  mrg subscript_dependence_tester (struct data_dependence_relation *ddr,
   5613  1.1  mrg 			     class loop *loop_nest)
   5614  1.1  mrg {
   5615  1.1  mrg   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
   5616  1.1  mrg     dependence_stats.num_dependence_dependent++;
   5617  1.1  mrg 
   5618  1.1  mrg   compute_subscript_distance (ddr);
   5619  1.1  mrg   if (build_classic_dist_vector (ddr, loop_nest))
   5620  1.1  mrg     build_classic_dir_vector (ddr);
   5621  1.1  mrg }
   5622  1.1  mrg 
   5623  1.1  mrg /* Returns true when all the access functions of A are affine or
   5624  1.1  mrg    constant with respect to LOOP_NEST.  */
   5625  1.1  mrg 
   5626  1.1  mrg static bool
   5627  1.1  mrg access_functions_are_affine_or_constant_p (const struct data_reference *a,
   5628  1.1  mrg 					   const class loop *loop_nest)
   5629  1.1  mrg {
   5630  1.1  mrg   vec<tree> fns = DR_ACCESS_FNS (a);
   5631  1.1  mrg   for (tree t : fns)
   5632  1.1  mrg     if (!evolution_function_is_invariant_p (t, loop_nest->num)
   5633  1.1  mrg 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
   5634  1.1  mrg       return false;
   5635  1.1  mrg 
   5636  1.1  mrg   return true;
   5637  1.1  mrg }
   5638  1.1  mrg 
   5639  1.1  mrg /* This computes the affine dependence relation between A and B with
   5640  1.1  mrg    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
   5641  1.1  mrg    independence between two accesses, while CHREC_DONT_KNOW is used
   5642  1.1  mrg    for representing the unknown relation.
   5643  1.1  mrg 
   5644  1.1  mrg    Note that it is possible to stop the computation of the dependence
   5645  1.1  mrg    relation the first time we detect a CHREC_KNOWN element for a given
   5646  1.1  mrg    subscript.  */
   5647  1.1  mrg 
   5648  1.1  mrg void
   5649  1.1  mrg compute_affine_dependence (struct data_dependence_relation *ddr,
   5650  1.1  mrg 			   class loop *loop_nest)
   5651  1.1  mrg {
   5652  1.1  mrg   struct data_reference *dra = DDR_A (ddr);
   5653  1.1  mrg   struct data_reference *drb = DDR_B (ddr);
   5654  1.1  mrg 
   5655  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   5656  1.1  mrg     {
   5657  1.1  mrg       fprintf (dump_file, "(compute_affine_dependence\n");
   5658  1.1  mrg       fprintf (dump_file, "  ref_a: ");
   5659  1.1  mrg       print_generic_expr (dump_file, DR_REF (dra));
   5660  1.1  mrg       fprintf (dump_file, ", stmt_a: ");
   5661  1.1  mrg       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
   5662  1.1  mrg       fprintf (dump_file, "  ref_b: ");
   5663  1.1  mrg       print_generic_expr (dump_file, DR_REF (drb));
   5664  1.1  mrg       fprintf (dump_file, ", stmt_b: ");
   5665  1.1  mrg       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
   5666  1.1  mrg     }
   5667  1.1  mrg 
   5668  1.1  mrg   /* Analyze only when the dependence relation is not yet known.  */
   5669  1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
   5670  1.1  mrg     {
   5671  1.1  mrg       dependence_stats.num_dependence_tests++;
   5672  1.1  mrg 
   5673  1.1  mrg       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
   5674  1.1  mrg 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
   5675  1.1  mrg 	subscript_dependence_tester (ddr, loop_nest);
   5676  1.1  mrg 
   5677  1.1  mrg       /* As a last case, if the dependence cannot be determined, or if
   5678  1.1  mrg 	 the dependence is considered too difficult to determine, answer
   5679  1.1  mrg 	 "don't know".  */
   5680  1.1  mrg       else
   5681  1.1  mrg 	{
   5682  1.1  mrg 	  dependence_stats.num_dependence_undetermined++;
   5683  1.1  mrg 
   5684  1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   5685  1.1  mrg 	    {
   5686  1.1  mrg 	      fprintf (dump_file, "Data ref a:\n");
   5687  1.1  mrg 	      dump_data_reference (dump_file, dra);
   5688  1.1  mrg 	      fprintf (dump_file, "Data ref b:\n");
   5689  1.1  mrg 	      dump_data_reference (dump_file, drb);
   5690  1.1  mrg 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
   5691  1.1  mrg 	    }
   5692  1.1  mrg 	  finalize_ddr_dependent (ddr, chrec_dont_know);
   5693  1.1  mrg 	}
   5694  1.1  mrg     }
   5695  1.1  mrg 
   5696  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   5697  1.1  mrg     {
   5698  1.1  mrg       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   5699  1.1  mrg 	fprintf (dump_file, ") -> no dependence\n");
   5700  1.1  mrg       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
   5701  1.1  mrg 	fprintf (dump_file, ") -> dependence analysis failed\n");
   5702  1.1  mrg       else
   5703  1.1  mrg 	fprintf (dump_file, ")\n");
   5704  1.1  mrg     }
   5705  1.1  mrg }
   5706  1.1  mrg 
   5707  1.1  mrg /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
   5708  1.1  mrg    the data references in DATAREFS, in the LOOP_NEST.  When
   5709  1.1  mrg    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
   5710  1.1  mrg    relations.  Return true when successful, i.e. data references number
   5711  1.1  mrg    is small enough to be handled.  */
   5712  1.1  mrg 
   5713  1.1  mrg bool
   5714  1.1  mrg compute_all_dependences (const vec<data_reference_p> &datarefs,
   5715  1.1  mrg 			 vec<ddr_p> *dependence_relations,
   5716  1.1  mrg 			 const vec<loop_p> &loop_nest,
   5717  1.1  mrg 			 bool compute_self_and_rr)
   5718  1.1  mrg {
   5719  1.1  mrg   struct data_dependence_relation *ddr;
   5720  1.1  mrg   struct data_reference *a, *b;
   5721  1.1  mrg   unsigned int i, j;
   5722  1.1  mrg 
   5723  1.1  mrg   if ((int) datarefs.length ()
   5724  1.1  mrg       > param_loop_max_datarefs_for_datadeps)
   5725  1.1  mrg     {
   5726  1.1  mrg       struct data_dependence_relation *ddr;
   5727  1.1  mrg 
   5728  1.1  mrg       /* Insert a single relation into dependence_relations:
   5729  1.1  mrg 	 chrec_dont_know.  */
   5730  1.1  mrg       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
   5731  1.1  mrg       dependence_relations->safe_push (ddr);
   5732  1.1  mrg       return false;
   5733  1.1  mrg     }
   5734  1.1  mrg 
   5735  1.1  mrg   FOR_EACH_VEC_ELT (datarefs, i, a)
   5736  1.1  mrg     for (j = i + 1; datarefs.iterate (j, &b); j++)
   5737  1.1  mrg       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
   5738  1.1  mrg 	{
   5739  1.1  mrg 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
   5740  1.1  mrg 	  dependence_relations->safe_push (ddr);
   5741  1.1  mrg           if (loop_nest.exists ())
   5742  1.1  mrg    	    compute_affine_dependence (ddr, loop_nest[0]);
   5743  1.1  mrg 	}
   5744  1.1  mrg 
   5745  1.1  mrg   if (compute_self_and_rr)
   5746  1.1  mrg     FOR_EACH_VEC_ELT (datarefs, i, a)
   5747  1.1  mrg       {
   5748  1.1  mrg 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
   5749  1.1  mrg 	dependence_relations->safe_push (ddr);
   5750  1.1  mrg         if (loop_nest.exists ())
   5751  1.1  mrg    	  compute_affine_dependence (ddr, loop_nest[0]);
   5752  1.1  mrg       }
   5753  1.1  mrg 
   5754  1.1  mrg   return true;
   5755  1.1  mrg }
   5756  1.1  mrg 
   5757  1.1  mrg /* Describes a location of a memory reference.  */
   5758  1.1  mrg 
   5759  1.1  mrg struct data_ref_loc
   5760  1.1  mrg {
   5761  1.1  mrg   /* The memory reference.  */
   5762  1.1  mrg   tree ref;
   5763  1.1  mrg 
   5764  1.1  mrg   /* True if the memory reference is read.  */
   5765  1.1  mrg   bool is_read;
   5766  1.1  mrg 
   5767  1.1  mrg   /* True if the data reference is conditional within the containing
   5768  1.1  mrg      statement, i.e. if it might not occur even when the statement
   5769  1.1  mrg      is executed and runs to completion.  */
   5770  1.1  mrg   bool is_conditional_in_stmt;
   5771  1.1  mrg };
   5772  1.1  mrg 
   5773  1.1  mrg 
   5774  1.1  mrg /* Stores the locations of memory references in STMT to REFERENCES.  Returns
   5775  1.1  mrg    true if STMT clobbers memory, false otherwise.  */
   5776  1.1  mrg 
   5777  1.1  mrg static bool
   5778  1.1  mrg get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
   5779  1.1  mrg {
   5780  1.1  mrg   bool clobbers_memory = false;
   5781  1.1  mrg   data_ref_loc ref;
   5782  1.1  mrg   tree op0, op1;
   5783  1.1  mrg   enum gimple_code stmt_code = gimple_code (stmt);
   5784  1.1  mrg 
   5785  1.1  mrg   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
   5786  1.1  mrg      As we cannot model data-references to not spelled out
   5787  1.1  mrg      accesses give up if they may occur.  */
   5788  1.1  mrg   if (stmt_code == GIMPLE_CALL
   5789  1.1  mrg       && !(gimple_call_flags (stmt) & ECF_CONST))
   5790  1.1  mrg     {
   5791  1.1  mrg       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
   5792  1.1  mrg       if (gimple_call_internal_p (stmt))
   5793  1.1  mrg 	switch (gimple_call_internal_fn (stmt))
   5794  1.1  mrg 	  {
   5795  1.1  mrg 	  case IFN_GOMP_SIMD_LANE:
   5796  1.1  mrg 	    {
   5797  1.1  mrg 	      class loop *loop = gimple_bb (stmt)->loop_father;
   5798  1.1  mrg 	      tree uid = gimple_call_arg (stmt, 0);
   5799  1.1  mrg 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
   5800  1.1  mrg 	      if (loop == NULL
   5801  1.1  mrg 		  || loop->simduid != SSA_NAME_VAR (uid))
   5802  1.1  mrg 		clobbers_memory = true;
   5803  1.1  mrg 	      break;
   5804  1.1  mrg 	    }
   5805  1.1  mrg 	  case IFN_MASK_LOAD:
   5806  1.1  mrg 	  case IFN_MASK_STORE:
   5807  1.1  mrg 	    break;
   5808  1.1  mrg 	  default:
   5809  1.1  mrg 	    clobbers_memory = true;
   5810  1.1  mrg 	    break;
   5811  1.1  mrg 	  }
   5812  1.1  mrg       else
   5813  1.1  mrg 	clobbers_memory = true;
   5814  1.1  mrg     }
   5815  1.1  mrg   else if (stmt_code == GIMPLE_ASM
   5816  1.1  mrg 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
   5817  1.1  mrg 	       || gimple_vuse (stmt)))
   5818  1.1  mrg     clobbers_memory = true;
   5819  1.1  mrg 
   5820  1.1  mrg   if (!gimple_vuse (stmt))
   5821  1.1  mrg     return clobbers_memory;
   5822  1.1  mrg 
   5823  1.1  mrg   if (stmt_code == GIMPLE_ASSIGN)
   5824  1.1  mrg     {
   5825  1.1  mrg       tree base;
   5826  1.1  mrg       op0 = gimple_assign_lhs (stmt);
   5827  1.1  mrg       op1 = gimple_assign_rhs1 (stmt);
   5828  1.1  mrg 
   5829  1.1  mrg       if (DECL_P (op1)
   5830  1.1  mrg 	  || (REFERENCE_CLASS_P (op1)
   5831  1.1  mrg 	      && (base = get_base_address (op1))
   5832  1.1  mrg 	      && TREE_CODE (base) != SSA_NAME
   5833  1.1  mrg 	      && !is_gimple_min_invariant (base)))
   5834  1.1  mrg 	{
   5835  1.1  mrg 	  ref.ref = op1;
   5836  1.1  mrg 	  ref.is_read = true;
   5837  1.1  mrg 	  ref.is_conditional_in_stmt = false;
   5838  1.1  mrg 	  references->safe_push (ref);
   5839  1.1  mrg 	}
   5840  1.1  mrg     }
   5841  1.1  mrg   else if (stmt_code == GIMPLE_CALL)
   5842  1.1  mrg     {
   5843  1.1  mrg       unsigned i, n;
   5844  1.1  mrg       tree ptr, type;
   5845  1.1  mrg       unsigned int align;
   5846  1.1  mrg 
   5847  1.1  mrg       ref.is_read = false;
   5848  1.1  mrg       if (gimple_call_internal_p (stmt))
   5849  1.1  mrg 	switch (gimple_call_internal_fn (stmt))
   5850  1.1  mrg 	  {
   5851  1.1  mrg 	  case IFN_MASK_LOAD:
   5852  1.1  mrg 	    if (gimple_call_lhs (stmt) == NULL_TREE)
   5853  1.1  mrg 	      break;
   5854  1.1  mrg 	    ref.is_read = true;
   5855  1.1  mrg 	    /* FALLTHRU */
   5856  1.1  mrg 	  case IFN_MASK_STORE:
   5857  1.1  mrg 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
   5858  1.1  mrg 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
   5859  1.1  mrg 	    if (ref.is_read)
   5860  1.1  mrg 	      type = TREE_TYPE (gimple_call_lhs (stmt));
   5861  1.1  mrg 	    else
   5862  1.1  mrg 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
   5863  1.1  mrg 	    if (TYPE_ALIGN (type) != align)
   5864  1.1  mrg 	      type = build_aligned_type (type, align);
   5865  1.1  mrg 	    ref.is_conditional_in_stmt = true;
   5866  1.1  mrg 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
   5867  1.1  mrg 				   ptr);
   5868  1.1  mrg 	    references->safe_push (ref);
   5869  1.1  mrg 	    return false;
   5870  1.1  mrg 	  default:
   5871  1.1  mrg 	    break;
   5872  1.1  mrg 	  }
   5873  1.1  mrg 
   5874  1.1  mrg       op0 = gimple_call_lhs (stmt);
   5875  1.1  mrg       n = gimple_call_num_args (stmt);
   5876  1.1  mrg       for (i = 0; i < n; i++)
   5877  1.1  mrg 	{
   5878  1.1  mrg 	  op1 = gimple_call_arg (stmt, i);
   5879  1.1  mrg 
   5880  1.1  mrg 	  if (DECL_P (op1)
   5881  1.1  mrg 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
   5882  1.1  mrg 	    {
   5883  1.1  mrg 	      ref.ref = op1;
   5884  1.1  mrg 	      ref.is_read = true;
   5885  1.1  mrg 	      ref.is_conditional_in_stmt = false;
   5886  1.1  mrg 	      references->safe_push (ref);
   5887  1.1  mrg 	    }
   5888  1.1  mrg 	}
   5889  1.1  mrg     }
   5890  1.1  mrg   else
   5891  1.1  mrg     return clobbers_memory;
   5892  1.1  mrg 
   5893  1.1  mrg   if (op0
   5894  1.1  mrg       && (DECL_P (op0)
   5895  1.1  mrg 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
   5896  1.1  mrg     {
   5897  1.1  mrg       ref.ref = op0;
   5898  1.1  mrg       ref.is_read = false;
   5899  1.1  mrg       ref.is_conditional_in_stmt = false;
   5900  1.1  mrg       references->safe_push (ref);
   5901  1.1  mrg     }
   5902  1.1  mrg   return clobbers_memory;
   5903  1.1  mrg }
   5904  1.1  mrg 
   5905  1.1  mrg 
   5906  1.1  mrg /* Returns true if the loop-nest has any data reference.  */
   5907  1.1  mrg 
   5908  1.1  mrg bool
   5909  1.1  mrg loop_nest_has_data_refs (loop_p loop)
   5910  1.1  mrg {
   5911  1.1  mrg   basic_block *bbs = get_loop_body (loop);
   5912  1.1  mrg   auto_vec<data_ref_loc, 3> references;
   5913  1.1  mrg 
   5914  1.1  mrg   for (unsigned i = 0; i < loop->num_nodes; i++)
   5915  1.1  mrg     {
   5916  1.1  mrg       basic_block bb = bbs[i];
   5917  1.1  mrg       gimple_stmt_iterator bsi;
   5918  1.1  mrg 
   5919  1.1  mrg       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
   5920  1.1  mrg 	{
   5921  1.1  mrg 	  gimple *stmt = gsi_stmt (bsi);
   5922  1.1  mrg 	  get_references_in_stmt (stmt, &references);
   5923  1.1  mrg 	  if (references.length ())
   5924  1.1  mrg 	    {
   5925  1.1  mrg 	      free (bbs);
   5926  1.1  mrg 	      return true;
   5927  1.1  mrg 	    }
   5928  1.1  mrg 	}
   5929  1.1  mrg     }
   5930  1.1  mrg   free (bbs);
   5931  1.1  mrg   return false;
   5932  1.1  mrg }
   5933  1.1  mrg 
   5934  1.1  mrg /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
   5935  1.1  mrg    reference, returns false, otherwise returns true.  NEST is the outermost
   5936  1.1  mrg    loop of the loop nest in which the references should be analyzed.  */
   5937  1.1  mrg 
   5938  1.1  mrg opt_result
   5939  1.1  mrg find_data_references_in_stmt (class loop *nest, gimple *stmt,
   5940  1.1  mrg 			      vec<data_reference_p> *datarefs)
   5941  1.1  mrg {
   5942  1.1  mrg   auto_vec<data_ref_loc, 2> references;
   5943  1.1  mrg   data_reference_p dr;
   5944  1.1  mrg 
   5945  1.1  mrg   if (get_references_in_stmt (stmt, &references))
   5946  1.1  mrg     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
   5947  1.1  mrg 				   stmt);
   5948  1.1  mrg 
   5949  1.1  mrg   for (const data_ref_loc &ref : references)
   5950  1.1  mrg     {
   5951  1.1  mrg       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
   5952  1.1  mrg 			    loop_containing_stmt (stmt), ref.ref,
   5953  1.1  mrg 			    stmt, ref.is_read, ref.is_conditional_in_stmt);
   5954  1.1  mrg       gcc_assert (dr != NULL);
   5955  1.1  mrg       datarefs->safe_push (dr);
   5956  1.1  mrg     }
   5957  1.1  mrg 
   5958  1.1  mrg   return opt_result::success ();
   5959  1.1  mrg }
   5960  1.1  mrg 
   5961  1.1  mrg /* Stores the data references in STMT to DATAREFS.  If there is an
   5962  1.1  mrg    unanalyzable reference, returns false, otherwise returns true.
   5963  1.1  mrg    NEST is the outermost loop of the loop nest in which the references
   5964  1.1  mrg    should be instantiated, LOOP is the loop in which the references
   5965  1.1  mrg    should be analyzed.  */
   5966  1.1  mrg 
   5967  1.1  mrg bool
   5968  1.1  mrg graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
   5969  1.1  mrg 				       vec<data_reference_p> *datarefs)
   5970  1.1  mrg {
   5971  1.1  mrg   auto_vec<data_ref_loc, 2> references;
   5972  1.1  mrg   bool ret = true;
   5973  1.1  mrg   data_reference_p dr;
   5974  1.1  mrg 
   5975  1.1  mrg   if (get_references_in_stmt (stmt, &references))
   5976  1.1  mrg     return false;
   5977  1.1  mrg 
   5978  1.1  mrg   for (const data_ref_loc &ref : references)
   5979  1.1  mrg     {
   5980  1.1  mrg       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
   5981  1.1  mrg 			    ref.is_conditional_in_stmt);
   5982  1.1  mrg       gcc_assert (dr != NULL);
   5983  1.1  mrg       datarefs->safe_push (dr);
   5984  1.1  mrg     }
   5985  1.1  mrg 
   5986  1.1  mrg   return ret;
   5987  1.1  mrg }
   5988  1.1  mrg 
   5989  1.1  mrg /* Search the data references in LOOP, and record the information into
   5990  1.1  mrg    DATAREFS.  Returns chrec_dont_know when failing to analyze a
   5991  1.1  mrg    difficult case, returns NULL_TREE otherwise.  */
   5992  1.1  mrg 
   5993  1.1  mrg tree
   5994  1.1  mrg find_data_references_in_bb (class loop *loop, basic_block bb,
   5995  1.1  mrg                             vec<data_reference_p> *datarefs)
   5996  1.1  mrg {
   5997  1.1  mrg   gimple_stmt_iterator bsi;
   5998  1.1  mrg 
   5999  1.1  mrg   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
   6000  1.1  mrg     {
   6001  1.1  mrg       gimple *stmt = gsi_stmt (bsi);
   6002  1.1  mrg 
   6003  1.1  mrg       if (!find_data_references_in_stmt (loop, stmt, datarefs))
   6004  1.1  mrg         {
   6005  1.1  mrg           struct data_reference *res;
   6006  1.1  mrg           res = XCNEW (struct data_reference);
   6007  1.1  mrg           datarefs->safe_push (res);
   6008  1.1  mrg 
   6009  1.1  mrg           return chrec_dont_know;
   6010  1.1  mrg         }
   6011  1.1  mrg     }
   6012  1.1  mrg 
   6013  1.1  mrg   return NULL_TREE;
   6014  1.1  mrg }
   6015  1.1  mrg 
   6016  1.1  mrg /* Search the data references in LOOP, and record the information into
   6017  1.1  mrg    DATAREFS.  Returns chrec_dont_know when failing to analyze a
   6018  1.1  mrg    difficult case, returns NULL_TREE otherwise.
   6019  1.1  mrg 
   6020  1.1  mrg    TODO: This function should be made smarter so that it can handle address
   6021  1.1  mrg    arithmetic as if they were array accesses, etc.  */
   6022  1.1  mrg 
   6023  1.1  mrg tree
   6024  1.1  mrg find_data_references_in_loop (class loop *loop,
   6025  1.1  mrg 			      vec<data_reference_p> *datarefs)
   6026  1.1  mrg {
   6027  1.1  mrg   basic_block bb, *bbs;
   6028  1.1  mrg   unsigned int i;
   6029  1.1  mrg 
   6030  1.1  mrg   bbs = get_loop_body_in_dom_order (loop);
   6031  1.1  mrg 
   6032  1.1  mrg   for (i = 0; i < loop->num_nodes; i++)
   6033  1.1  mrg     {
   6034  1.1  mrg       bb = bbs[i];
   6035  1.1  mrg 
   6036  1.1  mrg       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
   6037  1.1  mrg         {
   6038  1.1  mrg           free (bbs);
   6039  1.1  mrg           return chrec_dont_know;
   6040  1.1  mrg         }
   6041  1.1  mrg     }
   6042  1.1  mrg   free (bbs);
   6043  1.1  mrg 
   6044  1.1  mrg   return NULL_TREE;
   6045  1.1  mrg }
   6046  1.1  mrg 
   6047  1.1  mrg /* Return the alignment in bytes that DRB is guaranteed to have at all
   6048  1.1  mrg    times.  */
   6049  1.1  mrg 
   6050  1.1  mrg unsigned int
   6051  1.1  mrg dr_alignment (innermost_loop_behavior *drb)
   6052  1.1  mrg {
   6053  1.1  mrg   /* Get the alignment of BASE_ADDRESS + INIT.  */
   6054  1.1  mrg   unsigned int alignment = drb->base_alignment;
   6055  1.1  mrg   unsigned int misalignment = (drb->base_misalignment
   6056  1.1  mrg 			       + TREE_INT_CST_LOW (drb->init));
   6057  1.1  mrg   if (misalignment != 0)
   6058  1.1  mrg     alignment = MIN (alignment, misalignment & -misalignment);
   6059  1.1  mrg 
   6060  1.1  mrg   /* Cap it to the alignment of OFFSET.  */
   6061  1.1  mrg   if (!integer_zerop (drb->offset))
   6062  1.1  mrg     alignment = MIN (alignment, drb->offset_alignment);
   6063  1.1  mrg 
   6064  1.1  mrg   /* Cap it to the alignment of STEP.  */
   6065  1.1  mrg   if (!integer_zerop (drb->step))
   6066  1.1  mrg     alignment = MIN (alignment, drb->step_alignment);
   6067  1.1  mrg 
   6068  1.1  mrg   return alignment;
   6069  1.1  mrg }
   6070  1.1  mrg 
   6071  1.1  mrg /* If BASE is a pointer-typed SSA name, try to find the object that it
   6072  1.1  mrg    is based on.  Return this object X on success and store the alignment
   6073  1.1  mrg    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
   6074  1.1  mrg 
   6075  1.1  mrg static tree
   6076  1.1  mrg get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
   6077  1.1  mrg {
   6078  1.1  mrg   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
   6079  1.1  mrg     return NULL_TREE;
   6080  1.1  mrg 
   6081  1.1  mrg   gimple *def = SSA_NAME_DEF_STMT (base);
   6082  1.1  mrg   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
   6083  1.1  mrg 
   6084  1.1  mrg   /* Peel chrecs and record the minimum alignment preserved by
   6085  1.1  mrg      all steps.  */
   6086  1.1  mrg   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
   6087  1.1  mrg   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
   6088  1.1  mrg     {
   6089  1.1  mrg       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
   6090  1.1  mrg       alignment = MIN (alignment, step_alignment);
   6091  1.1  mrg       base = CHREC_LEFT (base);
   6092  1.1  mrg     }
   6093  1.1  mrg 
   6094  1.1  mrg   /* Punt if the expression is too complicated to handle.  */
   6095  1.1  mrg   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
   6096  1.1  mrg     return NULL_TREE;
   6097  1.1  mrg 
   6098  1.1  mrg   /* The only useful cases are those for which a dereference folds to something
   6099  1.1  mrg      other than an INDIRECT_REF.  */
   6100  1.1  mrg   tree ref_type = TREE_TYPE (TREE_TYPE (base));
   6101  1.1  mrg   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
   6102  1.1  mrg   if (!ref)
   6103  1.1  mrg     return NULL_TREE;
   6104  1.1  mrg 
   6105  1.1  mrg   /* Analyze the base to which the steps we peeled were applied.  */
   6106  1.1  mrg   poly_int64 bitsize, bitpos, bytepos;
   6107  1.1  mrg   machine_mode mode;
   6108  1.1  mrg   int unsignedp, reversep, volatilep;
   6109  1.1  mrg   tree offset;
   6110  1.1  mrg   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
   6111  1.1  mrg 			      &unsignedp, &reversep, &volatilep);
   6112  1.1  mrg   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
   6113  1.1  mrg     return NULL_TREE;
   6114  1.1  mrg 
   6115  1.1  mrg   /* Restrict the alignment to that guaranteed by the offsets.  */
   6116  1.1  mrg   unsigned int bytepos_alignment = known_alignment (bytepos);
   6117  1.1  mrg   if (bytepos_alignment != 0)
   6118  1.1  mrg     alignment = MIN (alignment, bytepos_alignment);
   6119  1.1  mrg   if (offset)
   6120  1.1  mrg     {
   6121  1.1  mrg       unsigned int offset_alignment = highest_pow2_factor (offset);
   6122  1.1  mrg       alignment = MIN (alignment, offset_alignment);
   6123  1.1  mrg     }
   6124  1.1  mrg 
   6125  1.1  mrg   *alignment_out = alignment;
   6126  1.1  mrg   return base;
   6127  1.1  mrg }
   6128  1.1  mrg 
   6129  1.1  mrg /* Return the object whose alignment would need to be changed in order
   6130  1.1  mrg    to increase the alignment of ADDR.  Store the maximum achievable
   6131  1.1  mrg    alignment in *MAX_ALIGNMENT.  */
   6132  1.1  mrg 
   6133  1.1  mrg tree
   6134  1.1  mrg get_base_for_alignment (tree addr, unsigned int *max_alignment)
   6135  1.1  mrg {
   6136  1.1  mrg   tree base = get_base_for_alignment_1 (addr, max_alignment);
   6137  1.1  mrg   if (base)
   6138  1.1  mrg     return base;
   6139  1.1  mrg 
   6140  1.1  mrg   if (TREE_CODE (addr) == ADDR_EXPR)
   6141  1.1  mrg     addr = TREE_OPERAND (addr, 0);
   6142  1.1  mrg   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
   6143  1.1  mrg   return addr;
   6144  1.1  mrg }
   6145  1.1  mrg 
   6146  1.1  mrg /* Recursive helper function.  */
   6147  1.1  mrg 
   6148  1.1  mrg static bool
   6149  1.1  mrg find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
   6150  1.1  mrg {
   6151  1.1  mrg   /* Inner loops of the nest should not contain siblings.  Example:
   6152  1.1  mrg      when there are two consecutive loops,
   6153  1.1  mrg 
   6154  1.1  mrg      | loop_0
   6155  1.1  mrg      |   loop_1
   6156  1.1  mrg      |     A[{0, +, 1}_1]
   6157  1.1  mrg      |   endloop_1
   6158  1.1  mrg      |   loop_2
   6159  1.1  mrg      |     A[{0, +, 1}_2]
   6160  1.1  mrg      |   endloop_2
   6161  1.1  mrg      | endloop_0
   6162  1.1  mrg 
   6163  1.1  mrg      the dependence relation cannot be captured by the distance
   6164  1.1  mrg      abstraction.  */
   6165  1.1  mrg   if (loop->next)
   6166  1.1  mrg     return false;
   6167  1.1  mrg 
   6168  1.1  mrg   loop_nest->safe_push (loop);
   6169  1.1  mrg   if (loop->inner)
   6170  1.1  mrg     return find_loop_nest_1 (loop->inner, loop_nest);
   6171  1.1  mrg   return true;
   6172  1.1  mrg }
   6173  1.1  mrg 
   6174  1.1  mrg /* Return false when the LOOP is not well nested.  Otherwise return
   6175  1.1  mrg    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
   6176  1.1  mrg    contain the loops from the outermost to the innermost, as they will
   6177  1.1  mrg    appear in the classic distance vector.  */
   6178  1.1  mrg 
   6179  1.1  mrg bool
   6180  1.1  mrg find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
   6181  1.1  mrg {
   6182  1.1  mrg   loop_nest->safe_push (loop);
   6183  1.1  mrg   if (loop->inner)
   6184  1.1  mrg     return find_loop_nest_1 (loop->inner, loop_nest);
   6185  1.1  mrg   return true;
   6186  1.1  mrg }
   6187  1.1  mrg 
   6188  1.1  mrg /* Returns true when the data dependences have been computed, false otherwise.
   6189  1.1  mrg    Given a loop nest LOOP, the following vectors are returned:
   6190  1.1  mrg    DATAREFS is initialized to all the array elements contained in this loop,
   6191  1.1  mrg    DEPENDENCE_RELATIONS contains the relations between the data references.
   6192  1.1  mrg    Compute read-read and self relations if
   6193  1.1  mrg    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
   6194  1.1  mrg 
   6195  1.1  mrg bool
   6196  1.1  mrg compute_data_dependences_for_loop (class loop *loop,
   6197  1.1  mrg 				   bool compute_self_and_read_read_dependences,
   6198  1.1  mrg 				   vec<loop_p> *loop_nest,
   6199  1.1  mrg 				   vec<data_reference_p> *datarefs,
   6200  1.1  mrg 				   vec<ddr_p> *dependence_relations)
   6201  1.1  mrg {
   6202  1.1  mrg   bool res = true;
   6203  1.1  mrg 
   6204  1.1  mrg   memset (&dependence_stats, 0, sizeof (dependence_stats));
   6205  1.1  mrg 
   6206  1.1  mrg   /* If the loop nest is not well formed, or one of the data references
   6207  1.1  mrg      is not computable, give up without spending time to compute other
   6208  1.1  mrg      dependences.  */
   6209  1.1  mrg   if (!loop
   6210  1.1  mrg       || !find_loop_nest (loop, loop_nest)
   6211  1.1  mrg       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
   6212  1.1  mrg       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
   6213  1.1  mrg 				   compute_self_and_read_read_dependences))
   6214  1.1  mrg     res = false;
   6215  1.1  mrg 
   6216  1.1  mrg   if (dump_file && (dump_flags & TDF_STATS))
   6217  1.1  mrg     {
   6218  1.1  mrg       fprintf (dump_file, "Dependence tester statistics:\n");
   6219  1.1  mrg 
   6220  1.1  mrg       fprintf (dump_file, "Number of dependence tests: %d\n",
   6221  1.1  mrg 	       dependence_stats.num_dependence_tests);
   6222  1.1  mrg       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
   6223  1.1  mrg 	       dependence_stats.num_dependence_dependent);
   6224  1.1  mrg       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
   6225  1.1  mrg 	       dependence_stats.num_dependence_independent);
   6226  1.1  mrg       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
   6227  1.1  mrg 	       dependence_stats.num_dependence_undetermined);
   6228  1.1  mrg 
   6229  1.1  mrg       fprintf (dump_file, "Number of subscript tests: %d\n",
   6230  1.1  mrg 	       dependence_stats.num_subscript_tests);
   6231  1.1  mrg       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
   6232  1.1  mrg 	       dependence_stats.num_subscript_undetermined);
   6233  1.1  mrg       fprintf (dump_file, "Number of same subscript function: %d\n",
   6234  1.1  mrg 	       dependence_stats.num_same_subscript_function);
   6235  1.1  mrg 
   6236  1.1  mrg       fprintf (dump_file, "Number of ziv tests: %d\n",
   6237  1.1  mrg 	       dependence_stats.num_ziv);
   6238  1.1  mrg       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
   6239  1.1  mrg 	       dependence_stats.num_ziv_dependent);
   6240  1.1  mrg       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
   6241  1.1  mrg 	       dependence_stats.num_ziv_independent);
   6242  1.1  mrg       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
   6243  1.1  mrg 	       dependence_stats.num_ziv_unimplemented);
   6244  1.1  mrg 
   6245  1.1  mrg       fprintf (dump_file, "Number of siv tests: %d\n",
   6246  1.1  mrg 	       dependence_stats.num_siv);
   6247  1.1  mrg       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
   6248  1.1  mrg 	       dependence_stats.num_siv_dependent);
   6249  1.1  mrg       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
   6250  1.1  mrg 	       dependence_stats.num_siv_independent);
   6251  1.1  mrg       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
   6252  1.1  mrg 	       dependence_stats.num_siv_unimplemented);
   6253  1.1  mrg 
   6254  1.1  mrg       fprintf (dump_file, "Number of miv tests: %d\n",
   6255  1.1  mrg 	       dependence_stats.num_miv);
   6256  1.1  mrg       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
   6257  1.1  mrg 	       dependence_stats.num_miv_dependent);
   6258  1.1  mrg       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
   6259  1.1  mrg 	       dependence_stats.num_miv_independent);
   6260  1.1  mrg       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
   6261  1.1  mrg 	       dependence_stats.num_miv_unimplemented);
   6262  1.1  mrg     }
   6263  1.1  mrg 
   6264  1.1  mrg   return res;
   6265  1.1  mrg }
   6266  1.1  mrg 
   6267  1.1  mrg /* Free the memory used by a data dependence relation DDR.  */
   6268  1.1  mrg 
   6269  1.1  mrg void
   6270  1.1  mrg free_dependence_relation (struct data_dependence_relation *ddr)
   6271  1.1  mrg {
   6272  1.1  mrg   if (ddr == NULL)
   6273  1.1  mrg     return;
   6274  1.1  mrg 
   6275  1.1  mrg   if (DDR_SUBSCRIPTS (ddr).exists ())
   6276  1.1  mrg     free_subscripts (DDR_SUBSCRIPTS (ddr));
   6277  1.1  mrg   DDR_DIST_VECTS (ddr).release ();
   6278  1.1  mrg   DDR_DIR_VECTS (ddr).release ();
   6279  1.1  mrg 
   6280  1.1  mrg   free (ddr);
   6281  1.1  mrg }
   6282  1.1  mrg 
   6283  1.1  mrg /* Free the memory used by the data dependence relations from
   6284  1.1  mrg    DEPENDENCE_RELATIONS.  */
   6285  1.1  mrg 
   6286  1.1  mrg void
   6287  1.1  mrg free_dependence_relations (vec<ddr_p>& dependence_relations)
   6288  1.1  mrg {
   6289  1.1  mrg   for (data_dependence_relation *ddr : dependence_relations)
   6290  1.1  mrg     if (ddr)
   6291  1.1  mrg       free_dependence_relation (ddr);
   6292  1.1  mrg 
   6293  1.1  mrg   dependence_relations.release ();
   6294  1.1  mrg }
   6295  1.1  mrg 
   6296  1.1  mrg /* Free the memory used by the data references from DATAREFS.  */
   6297  1.1  mrg 
   6298  1.1  mrg void
   6299  1.1  mrg free_data_refs (vec<data_reference_p>& datarefs)
   6300  1.1  mrg {
   6301  1.1  mrg   for (data_reference *dr : datarefs)
   6302  1.1  mrg     free_data_ref (dr);
   6303  1.1  mrg   datarefs.release ();
   6304  1.1  mrg }
   6305  1.1  mrg 
   6306  1.1  mrg /* Common routine implementing both dr_direction_indicator and
   6307  1.1  mrg    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
   6308  1.1  mrg    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
   6309  1.1  mrg    Return the step as the indicator otherwise.  */
   6310  1.1  mrg 
   6311  1.1  mrg static tree
   6312  1.1  mrg dr_step_indicator (struct data_reference *dr, int useful_min)
   6313  1.1  mrg {
   6314  1.1  mrg   tree step = DR_STEP (dr);
   6315  1.1  mrg   if (!step)
   6316  1.1  mrg     return NULL_TREE;
   6317  1.1  mrg   STRIP_NOPS (step);
   6318  1.1  mrg   /* Look for cases where the step is scaled by a positive constant
   6319  1.1  mrg      integer, which will often be the access size.  If the multiplication
   6320  1.1  mrg      doesn't change the sign (due to overflow effects) then we can
   6321  1.1  mrg      test the unscaled value instead.  */
   6322  1.1  mrg   if (TREE_CODE (step) == MULT_EXPR
   6323  1.1  mrg       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
   6324  1.1  mrg       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
   6325  1.1  mrg     {
   6326  1.1  mrg       tree factor = TREE_OPERAND (step, 1);
   6327  1.1  mrg       step = TREE_OPERAND (step, 0);
   6328  1.1  mrg 
   6329  1.1  mrg       /* Strip widening and truncating conversions as well as nops.  */
   6330  1.1  mrg       if (CONVERT_EXPR_P (step)
   6331  1.1  mrg 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
   6332  1.1  mrg 	step = TREE_OPERAND (step, 0);
   6333  1.1  mrg       tree type = TREE_TYPE (step);
   6334  1.1  mrg 
   6335  1.1  mrg       /* Get the range of step values that would not cause overflow.  */
   6336  1.1  mrg       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
   6337  1.1  mrg 			 / wi::to_widest (factor));
   6338  1.1  mrg       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
   6339  1.1  mrg 			 / wi::to_widest (factor));
   6340  1.1  mrg 
   6341  1.1  mrg       /* Get the range of values that the unconverted step actually has.  */
   6342  1.1  mrg       wide_int step_min, step_max;
   6343  1.1  mrg       value_range vr;
   6344  1.1  mrg       if (TREE_CODE (step) != SSA_NAME
   6345  1.1  mrg 	  || !get_range_query (cfun)->range_of_expr (vr, step)
   6346  1.1  mrg 	  || vr.kind () != VR_RANGE)
   6347  1.1  mrg 	{
   6348  1.1  mrg 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
   6349  1.1  mrg 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
   6350  1.1  mrg 	}
   6351  1.1  mrg       else
   6352  1.1  mrg 	{
   6353  1.1  mrg 	  step_min = vr.lower_bound ();
   6354  1.1  mrg 	  step_max = vr.upper_bound ();
   6355  1.1  mrg 	}
   6356  1.1  mrg 
   6357  1.1  mrg       /* Check whether the unconverted step has an acceptable range.  */
   6358  1.1  mrg       signop sgn = TYPE_SIGN (type);
   6359  1.1  mrg       if (wi::les_p (minv, widest_int::from (step_min, sgn))
   6360  1.1  mrg 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
   6361  1.1  mrg 	{
   6362  1.1  mrg 	  if (wi::ge_p (step_min, useful_min, sgn))
   6363  1.1  mrg 	    return ssize_int (useful_min);
   6364  1.1  mrg 	  else if (wi::lt_p (step_max, 0, sgn))
   6365  1.1  mrg 	    return ssize_int (-1);
   6366  1.1  mrg 	  else
   6367  1.1  mrg 	    return fold_convert (ssizetype, step);
   6368  1.1  mrg 	}
   6369  1.1  mrg     }
   6370  1.1  mrg   return DR_STEP (dr);
   6371  1.1  mrg }
   6372  1.1  mrg 
   6373  1.1  mrg /* Return a value that is negative iff DR has a negative step.  */
   6374  1.1  mrg 
   6375  1.1  mrg tree
   6376  1.1  mrg dr_direction_indicator (struct data_reference *dr)
   6377  1.1  mrg {
   6378  1.1  mrg   return dr_step_indicator (dr, 0);
   6379  1.1  mrg }
   6380  1.1  mrg 
   6381  1.1  mrg /* Return a value that is zero iff DR has a zero step.  */
   6382  1.1  mrg 
   6383  1.1  mrg tree
   6384  1.1  mrg dr_zero_step_indicator (struct data_reference *dr)
   6385  1.1  mrg {
   6386  1.1  mrg   return dr_step_indicator (dr, 1);
   6387  1.1  mrg }
   6388  1.1  mrg 
   6389  1.1  mrg /* Return true if DR is known to have a nonnegative (but possibly zero)
   6390  1.1  mrg    step.  */
   6391  1.1  mrg 
   6392  1.1  mrg bool
   6393  1.1  mrg dr_known_forward_stride_p (struct data_reference *dr)
   6394  1.1  mrg {
   6395  1.1  mrg   tree indicator = dr_direction_indicator (dr);
   6396  1.1  mrg   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
   6397  1.1  mrg 				   fold_convert (ssizetype, indicator),
   6398  1.1  mrg 				   ssize_int (0));
   6399             return neg_step_val && integer_zerop (neg_step_val);
   6400           }
   6401