dist/gcc/tree-data-ref.cc

1.1  mrg /* Data references and dependences detectors.
1.1  mrg    Copyright (C) 2003-2022 Free Software Foundation, Inc.
1.1  mrg    Contributed by Sebastian Pop <pop (at) cri.ensmp.fr>
1.1  mrg
1.1  mrg This file is part of GCC.
1.1  mrg
1.1  mrg GCC is free software; you can redistribute it and/or modify it under
1.1  mrg the terms of the GNU General Public License as published by the Free
1.1  mrg Software Foundation; either version 3, or (at your option) any later
1.1  mrg version.
1.1  mrg
1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1  mrg for more details.
1.1  mrg
1.1  mrg You should have received a copy of the GNU General Public License
1.1  mrg along with GCC; see the file COPYING3.  If not see
1.1  mrg <http://www.gnu.org/licenses/>.  */
1.1  mrg
1.1  mrg /* This pass walks a given loop structure searching for array
1.1  mrg    references.  The information about the array accesses is recorded
1.1  mrg    in DATA_REFERENCE structures.
1.1  mrg
1.1  mrg    The basic test for determining the dependences is:
1.1  mrg    given two access functions chrec1 and chrec2 to a same array, and
1.1  mrg    x and y two vectors from the iteration domain, the same element of
1.1  mrg    the array is accessed twice at iterations x and y if and only if:
1.1  mrg    |             chrec1 (x) == chrec2 (y).
1.1  mrg
1.1  mrg    The goals of this analysis are:
1.1  mrg
1.1  mrg    - to determine the independence: the relation between two
1.1  mrg      independent accesses is qualified with the chrec_known (this
1.1  mrg      information allows a loop parallelization),
1.1  mrg
1.1  mrg    - when two data references access the same data, to qualify the
1.1  mrg      dependence relation with classic dependence representations:
1.1  mrg
1.1  mrg        - distance vectors
1.1  mrg        - direction vectors
1.1  mrg        - loop carried level dependence
1.1  mrg        - polyhedron dependence
1.1  mrg      or with the chains of recurrences based representation,
1.1  mrg
1.1  mrg    - to define a knowledge base for storing the data dependence
1.1  mrg      information,
1.1  mrg
1.1  mrg    - to define an interface to access this data.
1.1  mrg
1.1  mrg
1.1  mrg    Definitions:
1.1  mrg
1.1  mrg    - subscript: given two array accesses a subscript is the tuple
1.1  mrg    composed of the access functions for a given dimension.  Example:
1.1  mrg    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
1.1  mrg    (f1, g1), (f2, g2), (f3, g3).
1.1  mrg
1.1  mrg    - Diophantine equation: an equation whose coefficients and
1.1  mrg    solutions are integer constants, for example the equation
1.1  mrg    |   3*x + 2*y = 1
1.1  mrg    has an integer solution x = 1 and y = -1.
1.1  mrg
1.1  mrg    References:
1.1  mrg
1.1  mrg    - "Advanced Compilation for High Performance Computing" by Randy
1.1  mrg    Allen and Ken Kennedy.
1.1  mrg    http://citeseer.ist.psu.edu/goff91practical.html
1.1  mrg
1.1  mrg    - "Loop Transformations for Restructuring Compilers - The Foundations"
1.1  mrg    by Utpal Banerjee.
1.1  mrg
1.1  mrg
1.1  mrg */
1.1  mrg
1.1  mrg #define INCLUDE_ALGORITHM
1.1  mrg #include "config.h"
1.1  mrg #include "system.h"
1.1  mrg #include "coretypes.h"
1.1  mrg #include "backend.h"
1.1  mrg #include "rtl.h"
1.1  mrg #include "tree.h"
1.1  mrg #include "gimple.h"
1.1  mrg #include "gimple-pretty-print.h"
1.1  mrg #include "alias.h"
1.1  mrg #include "fold-const.h"
1.1  mrg #include "expr.h"
1.1  mrg #include "gimple-iterator.h"
1.1  mrg #include "tree-ssa-loop-niter.h"
1.1  mrg #include "tree-ssa-loop.h"
1.1  mrg #include "tree-ssa.h"
1.1  mrg #include "cfgloop.h"
1.1  mrg #include "tree-data-ref.h"
1.1  mrg #include "tree-scalar-evolution.h"
1.1  mrg #include "dumpfile.h"
1.1  mrg #include "tree-affine.h"
1.1  mrg #include "builtins.h"
1.1  mrg #include "tree-eh.h"
1.1  mrg #include "ssa.h"
1.1  mrg #include "internal-fn.h"
1.1  mrg #include "vr-values.h"
1.1  mrg #include "range-op.h"
1.1  mrg #include "tree-ssa-loop-ivopts.h"
1.1  mrg
1.1  mrg static struct datadep_stats
1.1  mrg {
1.1  mrg   int num_dependence_tests;
1.1  mrg   int num_dependence_dependent;
1.1  mrg   int num_dependence_independent;
1.1  mrg   int num_dependence_undetermined;
1.1  mrg
1.1  mrg   int num_subscript_tests;
1.1  mrg   int num_subscript_undetermined;
1.1  mrg   int num_same_subscript_function;
1.1  mrg
1.1  mrg   int num_ziv;
1.1  mrg   int num_ziv_independent;
1.1  mrg   int num_ziv_dependent;
1.1  mrg   int num_ziv_unimplemented;
1.1  mrg
1.1  mrg   int num_siv;
1.1  mrg   int num_siv_independent;
1.1  mrg   int num_siv_dependent;
1.1  mrg   int num_siv_unimplemented;
1.1  mrg
1.1  mrg   int num_miv;
1.1  mrg   int num_miv_independent;
1.1  mrg   int num_miv_dependent;
1.1  mrg   int num_miv_unimplemented;
1.1  mrg } dependence_stats;
1.1  mrg
1.1  mrg static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
1.1  mrg 					   unsigned int, unsigned int,
1.1  mrg 					   class loop *);
1.1  mrg /* Returns true iff A divides B.  */
1.1  mrg
1.1  mrg static inline bool
1.1  mrg tree_fold_divides_p (const_tree a, const_tree b)
1.1  mrg {
1.1  mrg   gcc_assert (TREE_CODE (a) == INTEGER_CST);
1.1  mrg   gcc_assert (TREE_CODE (b) == INTEGER_CST);
1.1  mrg   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true iff A divides B.  */
1.1  mrg
1.1  mrg static inline bool
1.1  mrg int_divides_p (lambda_int a, lambda_int b)
1.1  mrg {
1.1  mrg   return ((b % a) == 0);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if reference REF contains a union access.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg ref_contains_union_access_p (tree ref)
1.1  mrg {
1.1  mrg   while (handled_component_p (ref))
1.1  mrg     {
1.1  mrg       ref = TREE_OPERAND (ref, 0);
1.1  mrg       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
1.1  mrg 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
1.1  mrg 	return true;
1.1  mrg     }
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg
1.1  mrg /* Dump into FILE all the data references from DATAREFS.  */
1.1  mrg
1.1  mrg static void
1.1  mrg dump_data_references (FILE *file, vec<data_reference_p> datarefs)
1.1  mrg {
1.1  mrg   for (data_reference *dr : datarefs)
1.1  mrg     dump_data_reference (file, dr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Unified dump into FILE all the data references from DATAREFS.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (vec<data_reference_p> &ref)
1.1  mrg {
1.1  mrg   dump_data_references (stderr, ref);
1.1  mrg }
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (vec<data_reference_p> *ptr)
1.1  mrg {
1.1  mrg   if (ptr)
1.1  mrg     debug (*ptr);
1.1  mrg   else
1.1  mrg     fprintf (stderr, "<nil>\n");
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Dump into STDERR all the data references from DATAREFS.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug_data_references (vec<data_reference_p> datarefs)
1.1  mrg {
1.1  mrg   dump_data_references (stderr, datarefs);
1.1  mrg }
1.1  mrg
1.1  mrg /* Print to STDERR the data_reference DR.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug_data_reference (struct data_reference *dr)
1.1  mrg {
1.1  mrg   dump_data_reference (stderr, dr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Dump function for a DATA_REFERENCE structure.  */
1.1  mrg
1.1  mrg void
1.1  mrg dump_data_reference (FILE *outf,
1.1  mrg 		     struct data_reference *dr)
1.1  mrg {
1.1  mrg   unsigned int i;
1.1  mrg
1.1  mrg   fprintf (outf, "#(Data Ref: \n");
1.1  mrg   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
1.1  mrg   fprintf (outf, "#  stmt: ");
1.1  mrg   print_gimple_stmt (outf, DR_STMT (dr), 0);
1.1  mrg   fprintf (outf, "#  ref: ");
1.1  mrg   print_generic_stmt (outf, DR_REF (dr));
1.1  mrg   fprintf (outf, "#  base_object: ");
1.1  mrg   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
1.1  mrg
1.1  mrg   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1.1  mrg     {
1.1  mrg       fprintf (outf, "#  Access function %d: ", i);
1.1  mrg       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
1.1  mrg     }
1.1  mrg   fprintf (outf, "#)\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Unified dump function for a DATA_REFERENCE structure.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (data_reference &ref)
1.1  mrg {
1.1  mrg   dump_data_reference (stderr, &ref);
1.1  mrg }
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (data_reference *ptr)
1.1  mrg {
1.1  mrg   if (ptr)
1.1  mrg     debug (*ptr);
1.1  mrg   else
1.1  mrg     fprintf (stderr, "<nil>\n");
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Dumps the affine function described by FN to the file OUTF.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_affine_function (FILE *outf, affine_fn fn)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg   tree coef;
1.1  mrg
1.1  mrg   print_generic_expr (outf, fn[0], TDF_SLIM);
1.1  mrg   for (i = 1; fn.iterate (i, &coef); i++)
1.1  mrg     {
1.1  mrg       fprintf (outf, " + ");
1.1  mrg       print_generic_expr (outf, coef, TDF_SLIM);
1.1  mrg       fprintf (outf, " * x_%u", i);
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Dumps the conflict function CF to the file OUTF.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_conflict_function (FILE *outf, conflict_function *cf)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg
1.1  mrg   if (cf->n == NO_DEPENDENCE)
1.1  mrg     fprintf (outf, "no dependence");
1.1  mrg   else if (cf->n == NOT_KNOWN)
1.1  mrg     fprintf (outf, "not known");
1.1  mrg   else
1.1  mrg     {
1.1  mrg       for (i = 0; i < cf->n; i++)
1.1  mrg 	{
1.1  mrg 	  if (i != 0)
1.1  mrg 	    fprintf (outf, " ");
1.1  mrg 	  fprintf (outf, "[");
1.1  mrg 	  dump_affine_function (outf, cf->fns[i]);
1.1  mrg 	  fprintf (outf, "]");
1.1  mrg 	}
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Dump function for a SUBSCRIPT structure.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_subscript (FILE *outf, struct subscript *subscript)
1.1  mrg {
1.1  mrg   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
1.1  mrg
1.1  mrg   fprintf (outf, "\n (subscript \n");
1.1  mrg   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
1.1  mrg   dump_conflict_function (outf, cf);
1.1  mrg   if (CF_NONTRIVIAL_P (cf))
1.1  mrg     {
1.1  mrg       tree last_iteration = SUB_LAST_CONFLICT (subscript);
1.1  mrg       fprintf (outf, "\n  last_conflict: ");
1.1  mrg       print_generic_expr (outf, last_iteration);
1.1  mrg     }
1.1  mrg
1.1  mrg   cf = SUB_CONFLICTS_IN_B (subscript);
1.1  mrg   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
1.1  mrg   dump_conflict_function (outf, cf);
1.1  mrg   if (CF_NONTRIVIAL_P (cf))
1.1  mrg     {
1.1  mrg       tree last_iteration = SUB_LAST_CONFLICT (subscript);
1.1  mrg       fprintf (outf, "\n  last_conflict: ");
1.1  mrg       print_generic_expr (outf, last_iteration);
1.1  mrg     }
1.1  mrg
1.1  mrg   fprintf (outf, "\n  (Subscript distance: ");
1.1  mrg   print_generic_expr (outf, SUB_DISTANCE (subscript));
1.1  mrg   fprintf (outf, " ))\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Print the classic direction vector DIRV to OUTF.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg print_direction_vector (FILE *outf,
1.1  mrg 			lambda_vector dirv,
1.1  mrg 			int length)
1.1  mrg {
1.1  mrg   int eq;
1.1  mrg
1.1  mrg   for (eq = 0; eq < length; eq++)
1.1  mrg     {
1.1  mrg       enum data_dependence_direction dir = ((enum data_dependence_direction)
1.1  mrg 					    dirv[eq]);
1.1  mrg
1.1  mrg       switch (dir)
1.1  mrg 	{
1.1  mrg 	case dir_positive:
1.1  mrg 	  fprintf (outf, "    +");
1.1  mrg 	  break;
1.1  mrg 	case dir_negative:
1.1  mrg 	  fprintf (outf, "    -");
1.1  mrg 	  break;
1.1  mrg 	case dir_equal:
1.1  mrg 	  fprintf (outf, "    =");
1.1  mrg 	  break;
1.1  mrg 	case dir_positive_or_equal:
1.1  mrg 	  fprintf (outf, "   +=");
1.1  mrg 	  break;
1.1  mrg 	case dir_positive_or_negative:
1.1  mrg 	  fprintf (outf, "   +-");
1.1  mrg 	  break;
1.1  mrg 	case dir_negative_or_equal:
1.1  mrg 	  fprintf (outf, "   -=");
1.1  mrg 	  break;
1.1  mrg 	case dir_star:
1.1  mrg 	  fprintf (outf, "    *");
1.1  mrg 	  break;
1.1  mrg 	default:
1.1  mrg 	  fprintf (outf, "indep");
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   fprintf (outf, "\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Print a vector of direction vectors.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
1.1  mrg 		   int length)
1.1  mrg {
1.1  mrg   for (lambda_vector v : dir_vects)
1.1  mrg     print_direction_vector (outf, v, length);
1.1  mrg }
1.1  mrg
1.1  mrg /* Print out a vector VEC of length N to OUTFILE.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
1.1  mrg {
1.1  mrg   int i;
1.1  mrg
1.1  mrg   for (i = 0; i < n; i++)
1.1  mrg     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
1.1  mrg   fprintf (outfile, "\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Print a vector of distance vectors.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
1.1  mrg 		    int length)
1.1  mrg {
1.1  mrg   for (lambda_vector v : dist_vects)
1.1  mrg     print_lambda_vector (outf, v, length);
1.1  mrg }
1.1  mrg
1.1  mrg /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   struct data_reference *dra, *drb;
1.1  mrg
1.1  mrg   fprintf (outf, "(Data Dep: \n");
1.1  mrg
1.1  mrg   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1.1  mrg     {
1.1  mrg       if (ddr)
1.1  mrg 	{
1.1  mrg 	  dra = DDR_A (ddr);
1.1  mrg 	  drb = DDR_B (ddr);
1.1  mrg 	  if (dra)
1.1  mrg 	    dump_data_reference (outf, dra);
1.1  mrg 	  else
1.1  mrg 	    fprintf (outf, "    (nil)\n");
1.1  mrg 	  if (drb)
1.1  mrg 	    dump_data_reference (outf, drb);
1.1  mrg 	  else
1.1  mrg 	    fprintf (outf, "    (nil)\n");
1.1  mrg 	}
1.1  mrg       fprintf (outf, "    (don't know)\n)\n");
1.1  mrg       return;
1.1  mrg     }
1.1  mrg
1.1  mrg   dra = DDR_A (ddr);
1.1  mrg   drb = DDR_B (ddr);
1.1  mrg   dump_data_reference (outf, dra);
1.1  mrg   dump_data_reference (outf, drb);
1.1  mrg
1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1.1  mrg     fprintf (outf, "    (no dependence)\n");
1.1  mrg
1.1  mrg   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1.1  mrg     {
1.1  mrg       unsigned int i;
1.1  mrg       class loop *loopi;
1.1  mrg
1.1  mrg       subscript *sub;
1.1  mrg       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
1.1  mrg 	{
1.1  mrg 	  fprintf (outf, "  access_fn_A: ");
1.1  mrg 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
1.1  mrg 	  fprintf (outf, "  access_fn_B: ");
1.1  mrg 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
1.1  mrg 	  dump_subscript (outf, sub);
1.1  mrg 	}
1.1  mrg
1.1  mrg       fprintf (outf, "  loop nest: (");
1.1  mrg       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
1.1  mrg 	fprintf (outf, "%d ", loopi->num);
1.1  mrg       fprintf (outf, ")\n");
1.1  mrg
1.1  mrg       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
1.1  mrg 	{
1.1  mrg 	  fprintf (outf, "  distance_vector: ");
1.1  mrg 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
1.1  mrg 			       DDR_NB_LOOPS (ddr));
1.1  mrg 	}
1.1  mrg
1.1  mrg       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
1.1  mrg 	{
1.1  mrg 	  fprintf (outf, "  direction_vector: ");
1.1  mrg 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
1.1  mrg 				  DDR_NB_LOOPS (ddr));
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   fprintf (outf, ")\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Debug version.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug_data_dependence_relation (const struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   dump_data_dependence_relation (stderr, ddr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Dump into FILE all the dependence relations from DDRS.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
1.1  mrg {
1.1  mrg   for (auto ddr : ddrs)
1.1  mrg     dump_data_dependence_relation (file, ddr);
1.1  mrg }
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (vec<ddr_p> &ref)
1.1  mrg {
1.1  mrg   dump_data_dependence_relations (stderr, ref);
1.1  mrg }
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug (vec<ddr_p> *ptr)
1.1  mrg {
1.1  mrg   if (ptr)
1.1  mrg     debug (*ptr);
1.1  mrg   else
1.1  mrg     fprintf (stderr, "<nil>\n");
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Dump to STDERR all the dependence relations from DDRS.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug_data_dependence_relations (vec<ddr_p> ddrs)
1.1  mrg {
1.1  mrg   dump_data_dependence_relations (stderr, ddrs);
1.1  mrg }
1.1  mrg
1.1  mrg /* Dumps the distance and direction vectors in FILE.  DDRS contains
1.1  mrg    the dependence relations, and VECT_SIZE is the size of the
1.1  mrg    dependence vectors, or in other words the number of loops in the
1.1  mrg    considered nest.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
1.1  mrg {
1.1  mrg   for (data_dependence_relation *ddr : ddrs)
1.1  mrg     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
1.1  mrg       {
1.1  mrg 	for (lambda_vector v : DDR_DIST_VECTS (ddr))
1.1  mrg 	  {
1.1  mrg 	    fprintf (file, "DISTANCE_V (");
1.1  mrg 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
1.1  mrg 	    fprintf (file, ")\n");
1.1  mrg 	  }
1.1  mrg
1.1  mrg 	for (lambda_vector v : DDR_DIR_VECTS (ddr))
1.1  mrg 	  {
1.1  mrg 	    fprintf (file, "DIRECTION_V (");
1.1  mrg 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
1.1  mrg 	    fprintf (file, ")\n");
1.1  mrg 	  }
1.1  mrg       }
1.1  mrg
1.1  mrg   fprintf (file, "\n\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Dumps the data dependence relations DDRS in FILE.  */
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg dump_ddrs (FILE *file, vec<ddr_p> ddrs)
1.1  mrg {
1.1  mrg   for (data_dependence_relation *ddr : ddrs)
1.1  mrg     dump_data_dependence_relation (file, ddr);
1.1  mrg
1.1  mrg   fprintf (file, "\n\n");
1.1  mrg }
1.1  mrg
1.1  mrg DEBUG_FUNCTION void
1.1  mrg debug_ddrs (vec<ddr_p> ddrs)
1.1  mrg {
1.1  mrg   dump_ddrs (stderr, ddrs);
1.1  mrg }
1.1  mrg
1.1  mrg /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
1.1  mrg    OP0 CODE OP1, where:
1.1  mrg
1.1  mrg    - OP0 CODE OP1 has integral type TYPE
1.1  mrg    - the range of OP0 is given by OP0_RANGE and
1.1  mrg    - the range of OP1 is given by OP1_RANGE.
1.1  mrg
1.1  mrg    Independently of RESULT_RANGE, try to compute:
1.1  mrg
1.1  mrg      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
1.1  mrg 	     - (sizetype) (OP0 CODE OP1)
1.1  mrg
1.1  mrg    as a constant and subtract DELTA from the ssizetype constant in *OFF.
1.1  mrg    Return true on success, or false if DELTA is not known at compile time.
1.1  mrg
1.1  mrg    Truncation and sign changes are known to distribute over CODE, i.e.
1.1  mrg
1.1  mrg      (itype) (A CODE B) == (itype) A CODE (itype) B
1.1  mrg
1.1  mrg    for any integral type ITYPE whose precision is no greater than the
1.1  mrg    precision of A and B.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg compute_distributive_range (tree type, value_range &op0_range,
1.1  mrg 			    tree_code code, value_range &op1_range,
1.1  mrg 			    tree *off, value_range *result_range)
1.1  mrg {
1.1  mrg   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
1.1  mrg   if (result_range)
1.1  mrg     {
1.1  mrg       range_operator *op = range_op_handler (code, type);
1.1  mrg       op->fold_range (*result_range, type, op0_range, op1_range);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* The distributive property guarantees that if TYPE is no narrower
1.1  mrg      than SIZETYPE,
1.1  mrg
1.1  mrg        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
1.1  mrg
1.1  mrg      and so we can treat DELTA as zero.  */
1.1  mrg   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
1.1  mrg     return true;
1.1  mrg
1.1  mrg   /* If overflow is undefined, we can assume that:
1.1  mrg
1.1  mrg        X == (ssizetype) OP0 CODE (ssizetype) OP1
1.1  mrg
1.1  mrg      is within the range of TYPE, i.e.:
1.1  mrg
1.1  mrg        X == (ssizetype) (TYPE) X
1.1  mrg
1.1  mrg      Distributing the (TYPE) truncation over X gives:
1.1  mrg
1.1  mrg        X == (ssizetype) (OP0 CODE OP1)
1.1  mrg
1.1  mrg      Casting both sides to sizetype and distributing the sizetype cast
1.1  mrg      over X gives:
1.1  mrg
1.1  mrg        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
1.1  mrg
1.1  mrg      and so we can treat DELTA as zero.  */
1.1  mrg   if (TYPE_OVERFLOW_UNDEFINED (type))
1.1  mrg     return true;
1.1  mrg
1.1  mrg   /* Compute the range of:
1.1  mrg
1.1  mrg        (ssizetype) OP0 CODE (ssizetype) OP1
1.1  mrg
1.1  mrg      The distributive property guarantees that this has the same bitpattern as:
1.1  mrg
1.1  mrg        (sizetype) OP0 CODE (sizetype) OP1
1.1  mrg
1.1  mrg      but its range is more conducive to analysis.  */
1.1  mrg   range_cast (op0_range, ssizetype);
1.1  mrg   range_cast (op1_range, ssizetype);
1.1  mrg   value_range wide_range;
1.1  mrg   range_operator *op = range_op_handler (code, ssizetype);
1.1  mrg   bool saved_flag_wrapv = flag_wrapv;
1.1  mrg   flag_wrapv = 1;
1.1  mrg   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
1.1  mrg   flag_wrapv = saved_flag_wrapv;
1.1  mrg   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   wide_int lb = wide_range.lower_bound ();
1.1  mrg   wide_int ub = wide_range.upper_bound ();
1.1  mrg
1.1  mrg   /* Calculate the number of times that each end of the range overflows or
1.1  mrg      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
1.1  mrg   unsigned int precision = TYPE_PRECISION (type);
1.1  mrg   if (!TYPE_UNSIGNED (type))
1.1  mrg     {
1.1  mrg       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
1.1  mrg       lb -= type_min;
1.1  mrg       ub -= type_min;
1.1  mrg     }
1.1  mrg   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
1.1  mrg   lb &= upper_bits;
1.1  mrg   ub &= upper_bits;
1.1  mrg   if (lb != ub)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
1.1  mrg      negative values indicating underflow.  The low PRECISION bits of LB
1.1  mrg      are clear, so DELTA is therefore LB (== UB).  */
1.1  mrg   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
1.1  mrg    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
1.1  mrg    FROM_TYPE are integral types.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
1.1  mrg {
1.1  mrg   gcc_assert (INTEGRAL_TYPE_P (to_type)
1.1  mrg 	      && INTEGRAL_TYPE_P (from_type)
1.1  mrg 	      && !TYPE_OVERFLOW_TRAPS (to_type)
1.1  mrg 	      && !TYPE_OVERFLOW_TRAPS (from_type));
1.1  mrg
1.1  mrg   /* Converting to something no narrower than sizetype and then to sizetype
1.1  mrg      is equivalent to converting directly to sizetype.  */
1.1  mrg   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
1.1  mrg     return true;
1.1  mrg
1.1  mrg   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
1.1  mrg   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
1.1  mrg       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
1.1  mrg     return true;
1.1  mrg
1.1  mrg   /* For narrowing conversions, we could in principle test whether
1.1  mrg      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
1.1  mrg      and apply a constant adjustment.
1.1  mrg
1.1  mrg      For other conversions (which involve a sign change) we could
1.1  mrg      check that the signs are always equal, and apply a constant
1.1  mrg      adjustment if the signs are negative.
1.1  mrg
1.1  mrg      However, both cases should be rare.  */
1.1  mrg   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
1.1  mrg 			    TYPE_SIGN (to_type));
1.1  mrg }
1.1  mrg
1.1  mrg static void
1.1  mrg split_constant_offset (tree type, tree *var, tree *off,
1.1  mrg 		       value_range *result_range,
1.1  mrg 		       hash_map<tree, std::pair<tree, tree> > &cache,
1.1  mrg 		       unsigned *limit);
1.1  mrg
1.1  mrg /* Helper function for split_constant_offset.  If TYPE is a pointer type,
1.1  mrg    try to express OP0 CODE OP1 as:
1.1  mrg
1.1  mrg      POINTER_PLUS <*VAR, (sizetype) *OFF>
1.1  mrg
1.1  mrg    where:
1.1  mrg
1.1  mrg    - *VAR has type TYPE
1.1  mrg    - *OFF is a constant of type ssizetype.
1.1  mrg
1.1  mrg    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
1.1  mrg
1.1  mrg      *VAR + (sizetype) *OFF
1.1  mrg
1.1  mrg    where:
1.1  mrg
1.1  mrg    - *VAR has type sizetype
1.1  mrg    - *OFF is a constant of type ssizetype.
1.1  mrg
1.1  mrg    In both cases, OP0 CODE OP1 has type TYPE.
1.1  mrg
1.1  mrg    Return true on success.  A false return value indicates that we can't
1.1  mrg    do better than set *OFF to zero.
1.1  mrg
1.1  mrg    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
1.1  mrg    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
1.1  mrg
1.1  mrg    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1.1  mrg    visited.  LIMIT counts down the number of SSA names that we are
1.1  mrg    allowed to process before giving up.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
1.1  mrg 			 tree *var, tree *off, value_range *result_range,
1.1  mrg 			 hash_map<tree, std::pair<tree, tree> > &cache,
1.1  mrg 			 unsigned *limit)
1.1  mrg {
1.1  mrg   tree var0, var1;
1.1  mrg   tree off0, off1;
1.1  mrg   value_range op0_range, op1_range;
1.1  mrg
1.1  mrg   *var = NULL_TREE;
1.1  mrg   *off = NULL_TREE;
1.1  mrg
1.1  mrg   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (TREE_CODE (op0) == SSA_NAME
1.1  mrg       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
1.1  mrg     return false;
1.1  mrg   if (op1
1.1  mrg       && TREE_CODE (op1) == SSA_NAME
1.1  mrg       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   switch (code)
1.1  mrg     {
1.1  mrg     case INTEGER_CST:
1.1  mrg       *var = size_int (0);
1.1  mrg       *off = fold_convert (ssizetype, op0);
1.1  mrg       if (result_range)
1.1  mrg 	result_range->set (op0, op0);
1.1  mrg       return true;
1.1  mrg
1.1  mrg     case POINTER_PLUS_EXPR:
1.1  mrg       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
1.1  mrg       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
1.1  mrg       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
1.1  mrg       *off = size_binop (PLUS_EXPR, off0, off1);
1.1  mrg       return true;
1.1  mrg
1.1  mrg     case PLUS_EXPR:
1.1  mrg     case MINUS_EXPR:
1.1  mrg       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
1.1  mrg       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
1.1  mrg       *off = size_binop (code, off0, off1);
1.1  mrg       if (!compute_distributive_range (type, op0_range, code, op1_range,
1.1  mrg 				       off, result_range))
1.1  mrg 	return false;
1.1  mrg       *var = fold_build2 (code, sizetype, var0, var1);
1.1  mrg       return true;
1.1  mrg
1.1  mrg     case MULT_EXPR:
1.1  mrg       if (TREE_CODE (op1) != INTEGER_CST)
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
1.1  mrg       op1_range.set (op1, op1);
1.1  mrg       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
1.1  mrg       if (!compute_distributive_range (type, op0_range, code, op1_range,
1.1  mrg 				       off, result_range))
1.1  mrg 	return false;
1.1  mrg       *var = fold_build2 (MULT_EXPR, sizetype, var0,
1.1  mrg 			  fold_convert (sizetype, op1));
1.1  mrg       return true;
1.1  mrg
1.1  mrg     case ADDR_EXPR:
1.1  mrg       {
1.1  mrg 	tree base, poffset;
1.1  mrg 	poly_int64 pbitsize, pbitpos, pbytepos;
1.1  mrg 	machine_mode pmode;
1.1  mrg 	int punsignedp, preversep, pvolatilep;
1.1  mrg
1.1  mrg 	op0 = TREE_OPERAND (op0, 0);
1.1  mrg 	base
1.1  mrg 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
1.1  mrg 				 &punsignedp, &preversep, &pvolatilep);
1.1  mrg
1.1  mrg 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1.1  mrg 	  return false;
1.1  mrg 	base = build_fold_addr_expr (base);
1.1  mrg 	off0 = ssize_int (pbytepos);
1.1  mrg
1.1  mrg 	if (poffset)
1.1  mrg 	  {
1.1  mrg 	    split_constant_offset (poffset, &poffset, &off1, nullptr,
1.1  mrg 				   cache, limit);
1.1  mrg 	    off0 = size_binop (PLUS_EXPR, off0, off1);
1.1  mrg 	    base = fold_build_pointer_plus (base, poffset);
1.1  mrg 	  }
1.1  mrg
1.1  mrg 	var0 = fold_convert (type, base);
1.1  mrg
1.1  mrg 	/* If variable length types are involved, punt, otherwise casts
1.1  mrg 	   might be converted into ARRAY_REFs in gimplify_conversion.
1.1  mrg 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
1.1  mrg 	   possibly no longer appears in current GIMPLE, might resurface.
1.1  mrg 	   This perhaps could run
1.1  mrg 	   if (CONVERT_EXPR_P (var0))
1.1  mrg 	     {
1.1  mrg 	       gimplify_conversion (&var0);
1.1  mrg 	       // Attempt to fill in any within var0 found ARRAY_REF's
1.1  mrg 	       // element size from corresponding op embedded ARRAY_REF,
1.1  mrg 	       // if unsuccessful, just punt.
1.1  mrg 	     }  */
1.1  mrg 	while (POINTER_TYPE_P (type))
1.1  mrg 	  type = TREE_TYPE (type);
1.1  mrg 	if (int_size_in_bytes (type) < 0)
1.1  mrg 	  return false;
1.1  mrg
1.1  mrg 	*var = var0;
1.1  mrg 	*off = off0;
1.1  mrg 	return true;
1.1  mrg       }
1.1  mrg
1.1  mrg     case SSA_NAME:
1.1  mrg       {
1.1  mrg 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
1.1  mrg 	enum tree_code subcode;
1.1  mrg
1.1  mrg 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
1.1  mrg 	  return false;
1.1  mrg
1.1  mrg 	subcode = gimple_assign_rhs_code (def_stmt);
1.1  mrg
1.1  mrg 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
1.1  mrg 	bool use_cache = false;
1.1  mrg 	if (!has_single_use (op0)
1.1  mrg 	    && (subcode == POINTER_PLUS_EXPR
1.1  mrg 		|| subcode == PLUS_EXPR
1.1  mrg 		|| subcode == MINUS_EXPR
1.1  mrg 		|| subcode == MULT_EXPR
1.1  mrg 		|| subcode == ADDR_EXPR
1.1  mrg 		|| CONVERT_EXPR_CODE_P (subcode)))
1.1  mrg 	  {
1.1  mrg 	    use_cache = true;
1.1  mrg 	    bool existed;
1.1  mrg 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
1.1  mrg 	    if (existed)
1.1  mrg 	      {
1.1  mrg 		if (integer_zerop (e.second))
1.1  mrg 		  return false;
1.1  mrg 		*var = e.first;
1.1  mrg 		*off = e.second;
1.1  mrg 		/* The caller sets the range in this case.  */
1.1  mrg 		return true;
1.1  mrg 	      }
1.1  mrg 	    e = std::make_pair (op0, ssize_int (0));
1.1  mrg 	  }
1.1  mrg
1.1  mrg 	if (*limit == 0)
1.1  mrg 	  return false;
1.1  mrg 	--*limit;
1.1  mrg
1.1  mrg 	var0 = gimple_assign_rhs1 (def_stmt);
1.1  mrg 	var1 = gimple_assign_rhs2 (def_stmt);
1.1  mrg
1.1  mrg 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
1.1  mrg 					    var, off, nullptr, cache, limit);
1.1  mrg 	if (res && use_cache)
1.1  mrg 	  *cache.get (op0) = std::make_pair (*var, *off);
1.1  mrg 	/* The caller sets the range in this case.  */
1.1  mrg 	return res;
1.1  mrg       }
1.1  mrg     CASE_CONVERT:
1.1  mrg       {
1.1  mrg 	/* We can only handle the following conversions:
1.1  mrg
1.1  mrg 	   - Conversions from one pointer type to another pointer type.
1.1  mrg
1.1  mrg 	   - Conversions from one non-trapping integral type to another
1.1  mrg 	     non-trapping integral type.  In this case, the recursive
1.1  mrg 	     call makes sure that:
1.1  mrg
1.1  mrg 	       (sizetype) OP0
1.1  mrg
1.1  mrg 	     can be expressed as a sizetype operation involving VAR and OFF,
1.1  mrg 	     and all we need to do is check whether:
1.1  mrg
1.1  mrg 	       (sizetype) OP0 == (sizetype) (TYPE) OP0
1.1  mrg
1.1  mrg 	   - Conversions from a non-trapping sizetype-size integral type to
1.1  mrg 	     a like-sized pointer type.  In this case, the recursive call
1.1  mrg 	     makes sure that:
1.1  mrg
1.1  mrg 	       (sizetype) OP0 == *VAR + (sizetype) *OFF
1.1  mrg
1.1  mrg 	     and we can convert that to:
1.1  mrg
1.1  mrg 	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
1.1  mrg
1.1  mrg 	   - Conversions from a sizetype-sized pointer type to a like-sized
1.1  mrg 	     non-trapping integral type.  In this case, the recursive call
1.1  mrg 	     makes sure that:
1.1  mrg
1.1  mrg 	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
1.1  mrg
1.1  mrg 	     where the POINTER_PLUS and *VAR have the same precision as
1.1  mrg 	     TYPE (and the same precision as sizetype).  Then:
1.1  mrg
1.1  mrg 	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
1.1  mrg 	tree itype = TREE_TYPE (op0);
1.1  mrg 	if ((POINTER_TYPE_P (itype)
1.1  mrg 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
1.1  mrg 	    && (POINTER_TYPE_P (type)
1.1  mrg 		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
1.1  mrg 	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
1.1  mrg 		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
1.1  mrg 		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
1.1  mrg 	  {
1.1  mrg 	    if (POINTER_TYPE_P (type))
1.1  mrg 	      {
1.1  mrg 		split_constant_offset (op0, var, off, nullptr, cache, limit);
1.1  mrg 		*var = fold_convert (type, *var);
1.1  mrg 	      }
1.1  mrg 	    else if (POINTER_TYPE_P (itype))
1.1  mrg 	      {
1.1  mrg 		split_constant_offset (op0, var, off, nullptr, cache, limit);
1.1  mrg 		*var = fold_convert (sizetype, *var);
1.1  mrg 	      }
1.1  mrg 	    else
1.1  mrg 	      {
1.1  mrg 		split_constant_offset (op0, var, off, &op0_range,
1.1  mrg 				       cache, limit);
1.1  mrg 		if (!nop_conversion_for_offset_p (type, itype, op0_range))
1.1  mrg 		  return false;
1.1  mrg 		if (result_range)
1.1  mrg 		  {
1.1  mrg 		    *result_range = op0_range;
1.1  mrg 		    range_cast (*result_range, type);
1.1  mrg 		  }
1.1  mrg 	      }
1.1  mrg 	    return true;
1.1  mrg 	  }
1.1  mrg 	return false;
1.1  mrg       }
1.1  mrg
1.1  mrg     default:
1.1  mrg       return false;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* If EXP has pointer type, try to express it as:
1.1  mrg
1.1  mrg      POINTER_PLUS <*VAR, (sizetype) *OFF>
1.1  mrg
1.1  mrg    where:
1.1  mrg
1.1  mrg    - *VAR has the same type as EXP
1.1  mrg    - *OFF is a constant of type ssizetype.
1.1  mrg
1.1  mrg    If EXP has an integral type, try to express (sizetype) EXP as:
1.1  mrg
1.1  mrg      *VAR + (sizetype) *OFF
1.1  mrg
1.1  mrg    where:
1.1  mrg
1.1  mrg    - *VAR has type sizetype
1.1  mrg    - *OFF is a constant of type ssizetype.
1.1  mrg
1.1  mrg    If EXP_RANGE is nonnull, set it to the range of EXP.
1.1  mrg
1.1  mrg    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1.1  mrg    visited.  LIMIT counts down the number of SSA names that we are
1.1  mrg    allowed to process before giving up.  */
1.1  mrg
1.1  mrg static void
1.1  mrg split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1.1  mrg 		       hash_map<tree, std::pair<tree, tree> > &cache,
1.1  mrg 		       unsigned *limit)
1.1  mrg {
1.1  mrg   tree type = TREE_TYPE (exp), op0, op1;
1.1  mrg   enum tree_code code;
1.1  mrg
1.1  mrg   code = TREE_CODE (exp);
1.1  mrg   if (exp_range)
1.1  mrg     {
1.1  mrg       *exp_range = type;
1.1  mrg       if (code == SSA_NAME)
1.1  mrg 	{
1.1  mrg 	  value_range vr;
1.1  mrg 	  get_range_query (cfun)->range_of_expr (vr, exp);
1.1  mrg 	  if (vr.undefined_p ())
1.1  mrg 	    vr.set_varying (TREE_TYPE (exp));
1.1  mrg 	  wide_int var_min = wi::to_wide (vr.min ());
1.1  mrg 	  wide_int var_max = wi::to_wide (vr.max ());
1.1  mrg 	  value_range_kind vr_kind = vr.kind ();
1.1  mrg 	  wide_int var_nonzero = get_nonzero_bits (exp);
1.1  mrg 	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1.1  mrg 						       &var_min, &var_max,
1.1  mrg 						       var_nonzero,
1.1  mrg 						       TYPE_SIGN (type));
1.1  mrg 	  /* This check for VR_VARYING is here because the old code
1.1  mrg 	     using get_range_info would return VR_RANGE for the entire
1.1  mrg 	     domain, instead of VR_VARYING.  The new code normalizes
1.1  mrg 	     full-domain ranges to VR_VARYING.  */
1.1  mrg 	  if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1.1  mrg 	    *exp_range = value_range (type, var_min, var_max);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!tree_is_chrec (exp)
1.1  mrg       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1.1  mrg     {
1.1  mrg       extract_ops_from_tree (exp, &code, &op0, &op1);
1.1  mrg       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1.1  mrg 				   exp_range, cache, limit))
1.1  mrg 	return;
1.1  mrg     }
1.1  mrg
1.1  mrg   *var = exp;
1.1  mrg   if (INTEGRAL_TYPE_P (type))
1.1  mrg     *var = fold_convert (sizetype, *var);
1.1  mrg   *off = ssize_int (0);
1.1  mrg
1.1  mrg   value_range r;
1.1  mrg   if (exp_range && code != SSA_NAME
1.1  mrg       && get_range_query (cfun)->range_of_expr (r, exp)
1.1  mrg       && !r.undefined_p ())
1.1  mrg     *exp_range = r;
1.1  mrg }
1.1  mrg
1.1  mrg /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1.1  mrg    type as EXP while OFF has type ssizetype.  */
1.1  mrg
1.1  mrg void
1.1  mrg split_constant_offset (tree exp, tree *var, tree *off)
1.1  mrg {
1.1  mrg   unsigned limit = param_ssa_name_def_chain_limit;
1.1  mrg   static hash_map<tree, std::pair<tree, tree> > *cache;
1.1  mrg   if (!cache)
1.1  mrg     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1.1  mrg   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1.1  mrg   *var = fold_convert (TREE_TYPE (exp), *var);
1.1  mrg   cache->empty ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the address ADDR of an object in a canonical shape (without nop
1.1  mrg    casts, and with type of pointer to the object).  */
1.1  mrg
1.1  mrg static tree
1.1  mrg canonicalize_base_object_address (tree addr)
1.1  mrg {
1.1  mrg   tree orig = addr;
1.1  mrg
1.1  mrg   STRIP_NOPS (addr);
1.1  mrg
1.1  mrg   /* The base address may be obtained by casting from integer, in that case
1.1  mrg      keep the cast.  */
1.1  mrg   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1.1  mrg     return orig;
1.1  mrg
1.1  mrg   if (TREE_CODE (addr) != ADDR_EXPR)
1.1  mrg     return addr;
1.1  mrg
1.1  mrg   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1.1  mrg }
1.1  mrg
1.1  mrg /* Analyze the behavior of memory reference REF within STMT.
1.1  mrg    There are two modes:
1.1  mrg
1.1  mrg    - BB analysis.  In this case we simply split the address into base,
1.1  mrg      init and offset components, without reference to any containing loop.
1.1  mrg      The resulting base and offset are general expressions and they can
1.1  mrg      vary arbitrarily from one iteration of the containing loop to the next.
1.1  mrg      The step is always zero.
1.1  mrg
1.1  mrg    - loop analysis.  In this case we analyze the reference both wrt LOOP
1.1  mrg      and on the basis that the reference occurs (is "used") in LOOP;
1.1  mrg      see the comment above analyze_scalar_evolution_in_loop for more
1.1  mrg      information about this distinction.  The base, init, offset and
1.1  mrg      step fields are all invariant in LOOP.
1.1  mrg
1.1  mrg    Perform BB analysis if LOOP is null, or if LOOP is the function's
1.1  mrg    dummy outermost loop.  In other cases perform loop analysis.
1.1  mrg
1.1  mrg    Return true if the analysis succeeded and store the results in DRB if so.
1.1  mrg    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1.1  mrg
1.1  mrg opt_result
1.1  mrg dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1.1  mrg 		      class loop *loop, const gimple *stmt)
1.1  mrg {
1.1  mrg   poly_int64 pbitsize, pbitpos;
1.1  mrg   tree base, poffset;
1.1  mrg   machine_mode pmode;
1.1  mrg   int punsignedp, preversep, pvolatilep;
1.1  mrg   affine_iv base_iv, offset_iv;
1.1  mrg   tree init, dinit, step;
1.1  mrg   bool in_loop = (loop && loop->num);
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "analyze_innermost: ");
1.1  mrg
1.1  mrg   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1.1  mrg 			      &punsignedp, &preversep, &pvolatilep);
1.1  mrg   gcc_assert (base != NULL_TREE);
1.1  mrg
1.1  mrg   poly_int64 pbytepos;
1.1  mrg   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1.1  mrg     return opt_result::failure_at (stmt,
1.1  mrg 				   "failed: bit offset alignment.\n");
1.1  mrg
1.1  mrg   if (preversep)
1.1  mrg     return opt_result::failure_at (stmt,
1.1  mrg 				   "failed: reverse storage order.\n");
1.1  mrg
1.1  mrg   /* Calculate the alignment and misalignment for the inner reference.  */
1.1  mrg   unsigned int HOST_WIDE_INT bit_base_misalignment;
1.1  mrg   unsigned int bit_base_alignment;
1.1  mrg   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1.1  mrg
1.1  mrg   /* There are no bitfield references remaining in BASE, so the values
1.1  mrg      we got back must be whole bytes.  */
1.1  mrg   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1.1  mrg 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
1.1  mrg   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1.1  mrg   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1.1  mrg
1.1  mrg   if (TREE_CODE (base) == MEM_REF)
1.1  mrg     {
1.1  mrg       if (!integer_zerop (TREE_OPERAND (base, 1)))
1.1  mrg 	{
1.1  mrg 	  /* Subtract MOFF from the base and add it to POFFSET instead.
1.1  mrg 	     Adjust the misalignment to reflect the amount we subtracted.  */
1.1  mrg 	  poly_offset_int moff = mem_ref_offset (base);
1.1  mrg 	  base_misalignment -= moff.force_shwi ();
1.1  mrg 	  tree mofft = wide_int_to_tree (sizetype, moff);
1.1  mrg 	  if (!poffset)
1.1  mrg 	    poffset = mofft;
1.1  mrg 	  else
1.1  mrg 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
1.1  mrg 	}
1.1  mrg       base = TREE_OPERAND (base, 0);
1.1  mrg     }
1.1  mrg   else
1.1  mrg     base = build_fold_addr_expr (base);
1.1  mrg
1.1  mrg   if (in_loop)
1.1  mrg     {
1.1  mrg       if (!simple_iv (loop, loop, base, &base_iv, true))
1.1  mrg 	return opt_result::failure_at
1.1  mrg 	  (stmt, "failed: evolution of base is not affine.\n");
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       base_iv.base = base;
1.1  mrg       base_iv.step = ssize_int (0);
1.1  mrg       base_iv.no_overflow = true;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!poffset)
1.1  mrg     {
1.1  mrg       offset_iv.base = ssize_int (0);
1.1  mrg       offset_iv.step = ssize_int (0);
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       if (!in_loop)
1.1  mrg         {
1.1  mrg           offset_iv.base = poffset;
1.1  mrg           offset_iv.step = ssize_int (0);
1.1  mrg         }
1.1  mrg       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1.1  mrg 	return opt_result::failure_at
1.1  mrg 	  (stmt, "failed: evolution of offset is not affine.\n");
1.1  mrg     }
1.1  mrg
1.1  mrg   init = ssize_int (pbytepos);
1.1  mrg
1.1  mrg   /* Subtract any constant component from the base and add it to INIT instead.
1.1  mrg      Adjust the misalignment to reflect the amount we subtracted.  */
1.1  mrg   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1.1  mrg   init = size_binop (PLUS_EXPR, init, dinit);
1.1  mrg   base_misalignment -= TREE_INT_CST_LOW (dinit);
1.1  mrg
1.1  mrg   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1.1  mrg   init = size_binop (PLUS_EXPR, init, dinit);
1.1  mrg
1.1  mrg   step = size_binop (PLUS_EXPR,
1.1  mrg 		     fold_convert (ssizetype, base_iv.step),
1.1  mrg 		     fold_convert (ssizetype, offset_iv.step));
1.1  mrg
1.1  mrg   base = canonicalize_base_object_address (base_iv.base);
1.1  mrg
1.1  mrg   /* See if get_pointer_alignment can guarantee a higher alignment than
1.1  mrg      the one we calculated above.  */
1.1  mrg   unsigned int HOST_WIDE_INT alt_misalignment;
1.1  mrg   unsigned int alt_alignment;
1.1  mrg   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1.1  mrg
1.1  mrg   /* As above, these values must be whole bytes.  */
1.1  mrg   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1.1  mrg 	      && alt_misalignment % BITS_PER_UNIT == 0);
1.1  mrg   alt_alignment /= BITS_PER_UNIT;
1.1  mrg   alt_misalignment /= BITS_PER_UNIT;
1.1  mrg
1.1  mrg   if (base_alignment < alt_alignment)
1.1  mrg     {
1.1  mrg       base_alignment = alt_alignment;
1.1  mrg       base_misalignment = alt_misalignment;
1.1  mrg     }
1.1  mrg
1.1  mrg   drb->base_address = base;
1.1  mrg   drb->offset = fold_convert (ssizetype, offset_iv.base);
1.1  mrg   drb->init = init;
1.1  mrg   drb->step = step;
1.1  mrg   if (known_misalignment (base_misalignment, base_alignment,
1.1  mrg 			  &drb->base_misalignment))
1.1  mrg     drb->base_alignment = base_alignment;
1.1  mrg   else
1.1  mrg     {
1.1  mrg       drb->base_alignment = known_alignment (base_misalignment);
1.1  mrg       drb->base_misalignment = 0;
1.1  mrg     }
1.1  mrg   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1.1  mrg   drb->step_alignment = highest_pow2_factor (step);
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "success.\n");
1.1  mrg
1.1  mrg   return opt_result::success ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if OP is a valid component reference for a DR access
1.1  mrg    function.  This accepts a subset of what handled_component_p accepts.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg access_fn_component_p (tree op)
1.1  mrg {
1.1  mrg   switch (TREE_CODE (op))
1.1  mrg     {
1.1  mrg     case REALPART_EXPR:
1.1  mrg     case IMAGPART_EXPR:
1.1  mrg     case ARRAY_REF:
1.1  mrg       return true;
1.1  mrg
1.1  mrg     case COMPONENT_REF:
1.1  mrg       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1.1  mrg
1.1  mrg     default:
1.1  mrg       return false;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns whether BASE can have a access_fn_component_p with BASE
1.1  mrg    as base.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg base_supports_access_fn_components_p (tree base)
1.1  mrg {
1.1  mrg   switch (TREE_CODE (TREE_TYPE (base)))
1.1  mrg     {
1.1  mrg     case COMPLEX_TYPE:
1.1  mrg     case ARRAY_TYPE:
1.1  mrg     case RECORD_TYPE:
1.1  mrg       return true;
1.1  mrg     default:
1.1  mrg       return false;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Determines the base object and the list of indices of memory reference
1.1  mrg    DR, analyzed in LOOP and instantiated before NEST.  */
1.1  mrg
1.1  mrg static void
1.1  mrg dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1.1  mrg {
1.1  mrg   /* If analyzing a basic-block there are no indices to analyze
1.1  mrg      and thus no access functions.  */
1.1  mrg   if (!nest)
1.1  mrg     {
1.1  mrg       dri->base_object = ref;
1.1  mrg       dri->access_fns.create (0);
1.1  mrg       return;
1.1  mrg     }
1.1  mrg
1.1  mrg   vec<tree> access_fns = vNULL;
1.1  mrg
1.1  mrg   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1.1  mrg      into a two element array with a constant index.  The base is
1.1  mrg      then just the immediate underlying object.  */
1.1  mrg   if (TREE_CODE (ref) == REALPART_EXPR)
1.1  mrg     {
1.1  mrg       ref = TREE_OPERAND (ref, 0);
1.1  mrg       access_fns.safe_push (integer_zero_node);
1.1  mrg     }
1.1  mrg   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1.1  mrg     {
1.1  mrg       ref = TREE_OPERAND (ref, 0);
1.1  mrg       access_fns.safe_push (integer_one_node);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Analyze access functions of dimensions we know to be independent.
1.1  mrg      The list of component references handled here should be kept in
1.1  mrg      sync with access_fn_component_p.  */
1.1  mrg   while (handled_component_p (ref))
1.1  mrg     {
1.1  mrg       if (TREE_CODE (ref) == ARRAY_REF)
1.1  mrg 	{
1.1  mrg 	  tree op = TREE_OPERAND (ref, 1);
1.1  mrg 	  tree access_fn = analyze_scalar_evolution (loop, op);
1.1  mrg 	  access_fn = instantiate_scev (nest, loop, access_fn);
1.1  mrg 	  access_fns.safe_push (access_fn);
1.1  mrg 	}
1.1  mrg       else if (TREE_CODE (ref) == COMPONENT_REF
1.1  mrg 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1.1  mrg 	{
1.1  mrg 	  /* For COMPONENT_REFs of records (but not unions!) use the
1.1  mrg 	     FIELD_DECL offset as constant access function so we can
1.1  mrg 	     disambiguate a[i].f1 and a[i].f2.  */
1.1  mrg 	  tree off = component_ref_field_offset (ref);
1.1  mrg 	  off = size_binop (PLUS_EXPR,
1.1  mrg 			    size_binop (MULT_EXPR,
1.1  mrg 					fold_convert (bitsizetype, off),
1.1  mrg 					bitsize_int (BITS_PER_UNIT)),
1.1  mrg 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1.1  mrg 	  access_fns.safe_push (off);
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	/* If we have an unhandled component we could not translate
1.1  mrg 	   to an access function stop analyzing.  We have determined
1.1  mrg 	   our base object in this case.  */
1.1  mrg 	break;
1.1  mrg
1.1  mrg       ref = TREE_OPERAND (ref, 0);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* If the address operand of a MEM_REF base has an evolution in the
1.1  mrg      analyzed nest, add it as an additional independent access-function.  */
1.1  mrg   if (TREE_CODE (ref) == MEM_REF)
1.1  mrg     {
1.1  mrg       tree op = TREE_OPERAND (ref, 0);
1.1  mrg       tree access_fn = analyze_scalar_evolution (loop, op);
1.1  mrg       access_fn = instantiate_scev (nest, loop, access_fn);
1.1  mrg       STRIP_NOPS (access_fn);
1.1  mrg       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1.1  mrg 	{
1.1  mrg 	  tree memoff = TREE_OPERAND (ref, 1);
1.1  mrg 	  tree base = initial_condition (access_fn);
1.1  mrg 	  tree orig_type = TREE_TYPE (base);
1.1  mrg 	  STRIP_USELESS_TYPE_CONVERSION (base);
1.1  mrg 	  tree off;
1.1  mrg 	  split_constant_offset (base, &base, &off);
1.1  mrg 	  STRIP_USELESS_TYPE_CONVERSION (base);
1.1  mrg 	  /* Fold the MEM_REF offset into the evolutions initial
1.1  mrg 	     value to make more bases comparable.  */
1.1  mrg 	  if (!integer_zerop (memoff))
1.1  mrg 	    {
1.1  mrg 	      off = size_binop (PLUS_EXPR, off,
1.1  mrg 				fold_convert (ssizetype, memoff));
1.1  mrg 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1.1  mrg 	    }
1.1  mrg 	  /* Adjust the offset so it is a multiple of the access type
1.1  mrg 	     size and thus we separate bases that can possibly be used
1.1  mrg 	     to produce partial overlaps (which the access_fn machinery
1.1  mrg 	     cannot handle).  */
1.1  mrg 	  wide_int rem;
1.1  mrg 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1.1  mrg 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1.1  mrg 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1.1  mrg 	    rem = wi::mod_trunc
1.1  mrg 	      (wi::to_wide (off),
1.1  mrg 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1.1  mrg 	       SIGNED);
1.1  mrg 	  else
1.1  mrg 	    /* If we can't compute the remainder simply force the initial
1.1  mrg 	       condition to zero.  */
1.1  mrg 	    rem = wi::to_wide (off);
1.1  mrg 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1.1  mrg 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1.1  mrg 	  /* And finally replace the initial condition.  */
1.1  mrg 	  access_fn = chrec_replace_initial_condition
1.1  mrg 	      (access_fn, fold_convert (orig_type, off));
1.1  mrg 	  /* ???  This is still not a suitable base object for
1.1  mrg 	     dr_may_alias_p - the base object needs to be an
1.1  mrg 	     access that covers the object as whole.  With
1.1  mrg 	     an evolution in the pointer this cannot be
1.1  mrg 	     guaranteed.
1.1  mrg 	     As a band-aid, mark the access so we can special-case
1.1  mrg 	     it in dr_may_alias_p.  */
1.1  mrg 	  tree old = ref;
1.1  mrg 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1.1  mrg 				 MEM_REF, TREE_TYPE (ref),
1.1  mrg 				 base, memoff);
1.1  mrg 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1.1  mrg 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1.1  mrg 	  dri->unconstrained_base = true;
1.1  mrg 	  access_fns.safe_push (access_fn);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else if (DECL_P (ref))
1.1  mrg     {
1.1  mrg       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1.1  mrg       ref = build2 (MEM_REF, TREE_TYPE (ref),
1.1  mrg 		    build_fold_addr_expr (ref),
1.1  mrg 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1.1  mrg     }
1.1  mrg
1.1  mrg   dri->base_object = ref;
1.1  mrg   dri->access_fns = access_fns;
1.1  mrg }
1.1  mrg
1.1  mrg /* Extracts the alias analysis information from the memory reference DR.  */
1.1  mrg
1.1  mrg static void
1.1  mrg dr_analyze_alias (struct data_reference *dr)
1.1  mrg {
1.1  mrg   tree ref = DR_REF (dr);
1.1  mrg   tree base = get_base_address (ref), addr;
1.1  mrg
1.1  mrg   if (INDIRECT_REF_P (base)
1.1  mrg       || TREE_CODE (base) == MEM_REF)
1.1  mrg     {
1.1  mrg       addr = TREE_OPERAND (base, 0);
1.1  mrg       if (TREE_CODE (addr) == SSA_NAME)
1.1  mrg 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Frees data reference DR.  */
1.1  mrg
1.1  mrg void
1.1  mrg free_data_ref (data_reference_p dr)
1.1  mrg {
1.1  mrg   DR_ACCESS_FNS (dr).release ();
1.1  mrg   if (dr->alt_indices.base_object)
1.1  mrg     dr->alt_indices.access_fns.release ();
1.1  mrg   free (dr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Analyze memory reference MEMREF, which is accessed in STMT.
1.1  mrg    The reference is a read if IS_READ is true, otherwise it is a write.
1.1  mrg    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1.1  mrg    within STMT, i.e. that it might not occur even if STMT is executed
1.1  mrg    and runs to completion.
1.1  mrg
1.1  mrg    Return the data_reference description of MEMREF.  NEST is the outermost
1.1  mrg    loop in which the reference should be instantiated, LOOP is the loop
1.1  mrg    in which the data reference should be analyzed.  */
1.1  mrg
1.1  mrg struct data_reference *
1.1  mrg create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1.1  mrg 		 bool is_read, bool is_conditional_in_stmt)
1.1  mrg {
1.1  mrg   struct data_reference *dr;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "Creating dr for ");
1.1  mrg       print_generic_expr (dump_file, memref, TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n");
1.1  mrg     }
1.1  mrg
1.1  mrg   dr = XCNEW (struct data_reference);
1.1  mrg   DR_STMT (dr) = stmt;
1.1  mrg   DR_REF (dr) = memref;
1.1  mrg   DR_IS_READ (dr) = is_read;
1.1  mrg   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1.1  mrg
1.1  mrg   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1.1  mrg 			nest != NULL ? loop : NULL, stmt);
1.1  mrg   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1.1  mrg   dr_analyze_alias (dr);
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       unsigned i;
1.1  mrg       fprintf (dump_file, "\tbase_address: ");
1.1  mrg       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n\toffset from base address: ");
1.1  mrg       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n\tconstant offset from base address: ");
1.1  mrg       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n\tstep: ");
1.1  mrg       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1.1  mrg       fprintf (dump_file, "\n\tbase misalignment: %d",
1.1  mrg 	       DR_BASE_MISALIGNMENT (dr));
1.1  mrg       fprintf (dump_file, "\n\toffset alignment: %d",
1.1  mrg 	       DR_OFFSET_ALIGNMENT (dr));
1.1  mrg       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1.1  mrg       fprintf (dump_file, "\n\tbase_object: ");
1.1  mrg       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1.1  mrg       fprintf (dump_file, "\n");
1.1  mrg       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1.1  mrg 	{
1.1  mrg 	  fprintf (dump_file, "\tAccess function %d: ", i);
1.1  mrg 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   return dr;
1.1  mrg }
1.1  mrg
1.1  mrg /*  A helper function computes order between two tree expressions T1 and T2.
1.1  mrg     This is used in comparator functions sorting objects based on the order
1.1  mrg     of tree expressions.  The function returns -1, 0, or 1.  */
1.1  mrg
1.1  mrg int
1.1  mrg data_ref_compare_tree (tree t1, tree t2)
1.1  mrg {
1.1  mrg   int i, cmp;
1.1  mrg   enum tree_code code;
1.1  mrg   char tclass;
1.1  mrg
1.1  mrg   if (t1 == t2)
1.1  mrg     return 0;
1.1  mrg   if (t1 == NULL)
1.1  mrg     return -1;
1.1  mrg   if (t2 == NULL)
1.1  mrg     return 1;
1.1  mrg
1.1  mrg   STRIP_USELESS_TYPE_CONVERSION (t1);
1.1  mrg   STRIP_USELESS_TYPE_CONVERSION (t2);
1.1  mrg   if (t1 == t2)
1.1  mrg     return 0;
1.1  mrg
1.1  mrg   if (TREE_CODE (t1) != TREE_CODE (t2)
1.1  mrg       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1.1  mrg     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1.1  mrg
1.1  mrg   code = TREE_CODE (t1);
1.1  mrg   switch (code)
1.1  mrg     {
1.1  mrg     case INTEGER_CST:
1.1  mrg       return tree_int_cst_compare (t1, t2);
1.1  mrg
1.1  mrg     case STRING_CST:
1.1  mrg       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1.1  mrg 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1.1  mrg       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1.1  mrg 		     TREE_STRING_LENGTH (t1));
1.1  mrg
1.1  mrg     case SSA_NAME:
1.1  mrg       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1.1  mrg 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1.1  mrg       break;
1.1  mrg
1.1  mrg     default:
1.1  mrg       if (POLY_INT_CST_P (t1))
1.1  mrg 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1.1  mrg 				       wi::to_poly_widest (t2));
1.1  mrg
1.1  mrg       tclass = TREE_CODE_CLASS (code);
1.1  mrg
1.1  mrg       /* For decls, compare their UIDs.  */
1.1  mrg       if (tclass == tcc_declaration)
1.1  mrg 	{
1.1  mrg 	  if (DECL_UID (t1) != DECL_UID (t2))
1.1  mrg 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg       /* For expressions, compare their operands recursively.  */
1.1  mrg       else if (IS_EXPR_CODE_CLASS (tclass))
1.1  mrg 	{
1.1  mrg 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1.1  mrg 	    {
1.1  mrg 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1.1  mrg 					   TREE_OPERAND (t2, i));
1.1  mrg 	      if (cmp != 0)
1.1  mrg 		return cmp;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	gcc_unreachable ();
1.1  mrg     }
1.1  mrg
1.1  mrg   return 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1.1  mrg    check.  */
1.1  mrg
1.1  mrg opt_result
1.1  mrg runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1.1  mrg {
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     dump_printf (MSG_NOTE,
1.1  mrg 		 "consider run-time aliasing test between %T and %T\n",
1.1  mrg 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1.1  mrg
1.1  mrg   if (!speed_p)
1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1.1  mrg 				   "runtime alias check not supported when"
1.1  mrg 				   " optimizing for size.\n");
1.1  mrg
1.1  mrg   /* FORNOW: We don't support versioning with outer-loop in either
1.1  mrg      vectorization or loop distribution.  */
1.1  mrg   if (loop != NULL && loop->inner != NULL)
1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1.1  mrg 				   "runtime alias check not supported for"
1.1  mrg 				   " outer loop.\n");
1.1  mrg
1.1  mrg   /* FORNOW: We don't support handling different address spaces.  */
1.1  mrg   if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1.1  mrg       != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1.1  mrg     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1.1  mrg 				   "runtime alias check between different "
1.1  mrg 				   "address spaces not supported.\n");
1.1  mrg
1.1  mrg   return opt_result::success ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Operator == between two dr_with_seg_len objects.
1.1  mrg
1.1  mrg    This equality operator is used to make sure two data refs
1.1  mrg    are the same one so that we will consider to combine the
1.1  mrg    aliasing checks of those two pairs of data dependent data
1.1  mrg    refs.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg operator == (const dr_with_seg_len& d1,
1.1  mrg 	     const dr_with_seg_len& d2)
1.1  mrg {
1.1  mrg   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1.1  mrg 			   DR_BASE_ADDRESS (d2.dr), 0)
1.1  mrg 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1.1  mrg 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1.1  mrg 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1.1  mrg 	  && known_eq (d1.access_size, d2.access_size)
1.1  mrg 	  && d1.align == d2.align);
1.1  mrg }
1.1  mrg
1.1  mrg /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1.1  mrg    so that we can combine aliasing checks in one scan.  */
1.1  mrg
1.1  mrg static int
1.1  mrg comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1.1  mrg {
1.1  mrg   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1.1  mrg   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1.1  mrg   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1.1  mrg   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1.1  mrg
1.1  mrg   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1.1  mrg      if a and c have the same basic address snd step, and b and d have the same
1.1  mrg      address and step.  Therefore, if any a&c or b&d don't have the same address
1.1  mrg      and step, we don't care the order of those two pairs after sorting.  */
1.1  mrg   int comp_res;
1.1  mrg
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1.1  mrg 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1.1  mrg 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1.1  mrg 					 DR_STEP (b1.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1.1  mrg 					 DR_STEP (b2.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1.1  mrg 					 DR_OFFSET (b1.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1.1  mrg 					 DR_INIT (b1.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1.1  mrg 					 DR_OFFSET (b2.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1.1  mrg 					 DR_INIT (b2.dr))) != 0)
1.1  mrg     return comp_res;
1.1  mrg
1.1  mrg   return 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1.1  mrg
1.1  mrg static void
1.1  mrg dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1.1  mrg {
1.1  mrg   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1.1  mrg 	       DR_REF (alias_pair->first.dr),
1.1  mrg 	       DR_REF (alias_pair->second.dr));
1.1  mrg
1.1  mrg   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1.1  mrg 	       alias_pair->first.seg_len);
1.1  mrg   if (!operand_equal_p (alias_pair->first.seg_len,
1.1  mrg 			alias_pair->second.seg_len, 0))
1.1  mrg     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1.1  mrg
1.1  mrg   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1.1  mrg   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1.1  mrg   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1.1  mrg     {
1.1  mrg       dump_printf (MSG_NOTE, " vs. ");
1.1  mrg       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1.1  mrg     }
1.1  mrg
1.1  mrg   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1.1  mrg 	       alias_pair->first.align);
1.1  mrg   if (alias_pair->first.align != alias_pair->second.align)
1.1  mrg     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1.1  mrg
1.1  mrg   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1.1  mrg   if (alias_pair->flags & DR_ALIAS_RAW)
1.1  mrg     dump_printf (MSG_NOTE, " RAW");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_WAR)
1.1  mrg     dump_printf (MSG_NOTE, " WAR");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_WAW)
1.1  mrg     dump_printf (MSG_NOTE, " WAW");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1.1  mrg     dump_printf (MSG_NOTE, " ARBITRARY");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1.1  mrg     dump_printf (MSG_NOTE, " SWAPPED");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1.1  mrg     dump_printf (MSG_NOTE, " UNSWAPPED");
1.1  mrg   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1.1  mrg     dump_printf (MSG_NOTE, " MIXED_STEPS");
1.1  mrg   if (alias_pair->flags == 0)
1.1  mrg     dump_printf (MSG_NOTE, " <none>");
1.1  mrg   dump_printf (MSG_NOTE, "\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1.1  mrg    FACTOR is number of iterations that each data reference is accessed.
1.1  mrg
1.1  mrg    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1.1  mrg    we create an expression:
1.1  mrg
1.1  mrg    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1.1  mrg    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1.1  mrg
1.1  mrg    for aliasing checks.  However, in some cases we can decrease the number
1.1  mrg    of checks by combining two checks into one.  For example, suppose we have
1.1  mrg    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1.1  mrg    condition is satisfied:
1.1  mrg
1.1  mrg    load_ptr_0 < load_ptr_1  &&
1.1  mrg    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1.1  mrg
1.1  mrg    (this condition means, in each iteration of vectorized loop, the accessed
1.1  mrg    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1.1  mrg    load_ptr_1.)
1.1  mrg
1.1  mrg    we then can use only the following expression to finish the alising checks
1.1  mrg    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1.1  mrg
1.1  mrg    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1.1  mrg    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1.1  mrg
1.1  mrg    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1.1  mrg    basic address.  */
1.1  mrg
1.1  mrg void
1.1  mrg prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1.1  mrg 			       poly_uint64)
1.1  mrg {
1.1  mrg   if (alias_pairs->is_empty ())
1.1  mrg     return;
1.1  mrg
1.1  mrg   /* Canonicalize each pair so that the base components are ordered wrt
1.1  mrg      data_ref_compare_tree.  This allows the loop below to merge more
1.1  mrg      cases.  */
1.1  mrg   unsigned int i;
1.1  mrg   dr_with_seg_len_pair_t *alias_pair;
1.1  mrg   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1.1  mrg     {
1.1  mrg       data_reference_p dr_a = alias_pair->first.dr;
1.1  mrg       data_reference_p dr_b = alias_pair->second.dr;
1.1  mrg       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1.1  mrg 					    DR_BASE_ADDRESS (dr_b));
1.1  mrg       if (comp_res == 0)
1.1  mrg 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1.1  mrg       if (comp_res == 0)
1.1  mrg 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1.1  mrg       if (comp_res > 0)
1.1  mrg 	{
1.1  mrg 	  std::swap (alias_pair->first, alias_pair->second);
1.1  mrg 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Sort the collected data ref pairs so that we can scan them once to
1.1  mrg      combine all possible aliasing checks.  */
1.1  mrg   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1.1  mrg
1.1  mrg   /* Scan the sorted dr pairs and check if we can combine alias checks
1.1  mrg      of two neighboring dr pairs.  */
1.1  mrg   unsigned int last = 0;
1.1  mrg   for (i = 1; i < alias_pairs->length (); ++i)
1.1  mrg     {
1.1  mrg       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1.1  mrg       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1.1  mrg       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1.1  mrg
1.1  mrg       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1.1  mrg       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1.1  mrg       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1.1  mrg       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1.1  mrg
1.1  mrg       /* Remove duplicate data ref pairs.  */
1.1  mrg       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1.1  mrg 	{
1.1  mrg 	  if (dump_enabled_p ())
1.1  mrg 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1.1  mrg 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1.1  mrg 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1.1  mrg 	  alias_pair1->flags |= alias_pair2->flags;
1.1  mrg 	  continue;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Assume that we won't be able to merge the pairs, then correct
1.1  mrg 	 if we do.  */
1.1  mrg       last += 1;
1.1  mrg       if (last != i)
1.1  mrg 	(*alias_pairs)[last] = (*alias_pairs)[i];
1.1  mrg
1.1  mrg       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1.1  mrg 	{
1.1  mrg 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1.1  mrg 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1.1  mrg 	  if (*dr_a1 == *dr_a2)
1.1  mrg 	    {
1.1  mrg 	      std::swap (dr_a1, dr_b1);
1.1  mrg 	      std::swap (dr_a2, dr_b2);
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  poly_int64 init_a1, init_a2;
1.1  mrg 	  /* Only consider cases in which the distance between the initial
1.1  mrg 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1.1  mrg 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1.1  mrg 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1.1  mrg 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1.1  mrg 				   DR_OFFSET (dr_a2->dr), 0)
1.1  mrg 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1.1  mrg 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1.1  mrg 	    continue;
1.1  mrg
1.1  mrg 	  /* Don't combine if we can't tell which one comes first.  */
1.1  mrg 	  if (!ordered_p (init_a1, init_a2))
1.1  mrg 	    continue;
1.1  mrg
1.1  mrg 	  /* Work out what the segment length would be if we did combine
1.1  mrg 	     DR_A1 and DR_A2:
1.1  mrg
1.1  mrg 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1.1  mrg 	       also the combined length.
1.1  mrg
1.1  mrg 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1.1  mrg 	       length is the lower bound on those lengths.
1.1  mrg
1.1  mrg 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1.1  mrg 	       length is the upper bound on those lengths.
1.1  mrg
1.1  mrg 	     Other cases are unlikely to give a useful combination.
1.1  mrg
1.1  mrg 	     The lengths both have sizetype, so the sign is taken from
1.1  mrg 	     the step instead.  */
1.1  mrg 	  poly_uint64 new_seg_len = 0;
1.1  mrg 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1.1  mrg 						 dr_a2->seg_len, 0);
1.1  mrg 	  if (new_seg_len_p)
1.1  mrg 	    {
1.1  mrg 	      poly_uint64 seg_len_a1, seg_len_a2;
1.1  mrg 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1.1  mrg 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1.1  mrg 		continue;
1.1  mrg
1.1  mrg 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1.1  mrg 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1.1  mrg 		continue;
1.1  mrg
1.1  mrg 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1.1  mrg 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1.1  mrg 		continue;
1.1  mrg
1.1  mrg 	      int sign_a = tree_int_cst_sgn (indicator_a);
1.1  mrg 	      int sign_b = tree_int_cst_sgn (indicator_b);
1.1  mrg
1.1  mrg 	      if (sign_a <= 0 && sign_b <= 0)
1.1  mrg 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1.1  mrg 	      else if (sign_a >= 0 && sign_b >= 0)
1.1  mrg 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1.1  mrg 	      else
1.1  mrg 		continue;
1.1  mrg 	    }
1.1  mrg 	  /* At this point we're committed to merging the refs.  */
1.1  mrg
1.1  mrg 	  /* Make sure dr_a1 starts left of dr_a2.  */
1.1  mrg 	  if (maybe_gt (init_a1, init_a2))
1.1  mrg 	    {
1.1  mrg 	      std::swap (*dr_a1, *dr_a2);
1.1  mrg 	      std::swap (init_a1, init_a2);
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* The DR_Bs are equal, so only the DR_As can introduce
1.1  mrg 	     mixed steps.  */
1.1  mrg 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1.1  mrg 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1.1  mrg
1.1  mrg 	  if (new_seg_len_p)
1.1  mrg 	    {
1.1  mrg 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1.1  mrg 					      new_seg_len);
1.1  mrg 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* This is always positive due to the swap above.  */
1.1  mrg 	  poly_uint64 diff = init_a2 - init_a1;
1.1  mrg
1.1  mrg 	  /* The new check will start at DR_A1.  Make sure that its access
1.1  mrg 	     size encompasses the initial DR_A2.  */
1.1  mrg 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1.1  mrg 	    {
1.1  mrg 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1.1  mrg 						diff + dr_a2->access_size);
1.1  mrg 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1.1  mrg 	      dr_a1->align = MIN (dr_a1->align, new_align);
1.1  mrg 	    }
1.1  mrg 	  if (dump_enabled_p ())
1.1  mrg 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1.1  mrg 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1.1  mrg 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1.1  mrg 	  alias_pair1->flags |= alias_pair2->flags;
1.1  mrg 	  last -= 1;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   alias_pairs->truncate (last + 1);
1.1  mrg
1.1  mrg   /* Try to restore the original dr_with_seg_len order within each
1.1  mrg      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1.1  mrg      unswapped pairs into the same check, we have to invalidate any
1.1  mrg      RAW, WAR and WAW information for it.  */
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     dump_printf (MSG_NOTE, "merged alias checks:\n");
1.1  mrg   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1.1  mrg     {
1.1  mrg       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1.1  mrg       unsigned int swapped = (alias_pair->flags & swap_mask);
1.1  mrg       if (swapped == DR_ALIAS_SWAPPED)
1.1  mrg 	std::swap (alias_pair->first, alias_pair->second);
1.1  mrg       else if (swapped != DR_ALIAS_UNSWAPPED)
1.1  mrg 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1.1  mrg       alias_pair->flags &= ~swap_mask;
1.1  mrg       if (dump_enabled_p ())
1.1  mrg 	dump_alias_pair (alias_pair, "  ");
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* A subroutine of create_intersect_range_checks, with a subset of the
1.1  mrg    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1.1  mrg    to optimize cases in which the references form a simple RAW, WAR or
1.1  mrg    WAR dependence.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg create_ifn_alias_checks (tree *cond_expr,
1.1  mrg 			 const dr_with_seg_len_pair_t &alias_pair)
1.1  mrg {
1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
1.1  mrg
1.1  mrg   /* Check for cases in which:
1.1  mrg
1.1  mrg      (a) we have a known RAW, WAR or WAR dependence
1.1  mrg      (b) the accesses are well-ordered in both the original and new code
1.1  mrg 	 (see the comment above the DR_ALIAS_* flags for details); and
1.1  mrg      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1.1  mrg   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Make sure that both DRs access the same pattern of bytes,
1.1  mrg      with a constant length and step.  */
1.1  mrg   poly_uint64 seg_len;
1.1  mrg   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
1.1  mrg       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
1.1  mrg       || maybe_ne (dr_a.access_size, dr_b.access_size)
1.1  mrg       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
1.1  mrg       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
1.1  mrg   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
1.1  mrg   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
1.1  mrg
1.1  mrg   /* See whether the target suports what we want to do.  WAW checks are
1.1  mrg      equivalent to WAR checks here.  */
1.1  mrg   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
1.1  mrg 		     ? IFN_CHECK_RAW_PTRS
1.1  mrg 		     : IFN_CHECK_WAR_PTRS);
1.1  mrg   unsigned int align = MIN (dr_a.align, dr_b.align);
1.1  mrg   poly_uint64 full_length = seg_len + bytes;
1.1  mrg   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1.1  mrg 					   full_length, align))
1.1  mrg     {
1.1  mrg       full_length = seg_len + dr_a.access_size;
1.1  mrg       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1.1  mrg 					       full_length, align))
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Commit to using this form of test.  */
1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
1.1  mrg
1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
1.1  mrg
1.1  mrg   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
1.1  mrg 					     ifn, boolean_type_node,
1.1  mrg 					     4, addr_a, addr_b,
1.1  mrg 					     size_int (full_length),
1.1  mrg 					     size_int (align));
1.1  mrg
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     {
1.1  mrg       if (ifn == IFN_CHECK_RAW_PTRS)
1.1  mrg 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
1.1  mrg       else
1.1  mrg 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
1.1  mrg     }
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1.1  mrg    free of aliases, using a condition based on index values instead
1.1  mrg    of a condition based on addresses.  Return true on success,
1.1  mrg    storing the condition in *COND_EXPR.
1.1  mrg
1.1  mrg    This can only be done if the two data references in ALIAS_PAIR access
1.1  mrg    the same array object and the index is the only difference.  For example,
1.1  mrg    if the two data references are DR_A and DR_B:
1.1  mrg
1.1  mrg                        DR_A                           DR_B
1.1  mrg       data-ref         arr[i]                         arr[j]
1.1  mrg       base_object      arr                            arr
1.1  mrg       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1.1  mrg
1.1  mrg    The addresses and their index are like:
1.1  mrg
1.1  mrg         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1.1  mrg      ------------------------------------------------------->
1.1  mrg         |   |   |   |   |          |   |   |   |   |
1.1  mrg      ------------------------------------------------------->
1.1  mrg         i_0 ...         i_0+4      j_0 ...         j_0+4
1.1  mrg
1.1  mrg    We can create expression based on index rather than address:
1.1  mrg
1.1  mrg      (unsigned) (i_0 - j_0 + 3) <= 6
1.1  mrg
1.1  mrg    i.e. the indices are less than 4 apart.
1.1  mrg
1.1  mrg    Note evolution step of index needs to be considered in comparison.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1.1  mrg 				     const dr_with_seg_len_pair_t &alias_pair)
1.1  mrg {
1.1  mrg   const dr_with_seg_len &dr_a = alias_pair.first;
1.1  mrg   const dr_with_seg_len &dr_b = alias_pair.second;
1.1  mrg   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1.1  mrg       || integer_zerop (DR_STEP (dr_a.dr))
1.1  mrg       || integer_zerop (DR_STEP (dr_b.dr))
1.1  mrg       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   poly_uint64 seg_len1, seg_len2;
1.1  mrg   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1.1  mrg       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1.1  mrg
1.1  mrg   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1.1  mrg   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1.1  mrg   if (neg_step)
1.1  mrg     {
1.1  mrg       abs_step = -abs_step;
1.1  mrg       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
1.1  mrg       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Infer the number of iterations with which the memory segment is accessed
1.1  mrg      by DR.  In other words, alias is checked if memory segment accessed by
1.1  mrg      DR_A in some iterations intersect with memory segment accessed by DR_B
1.1  mrg      in the same amount iterations.
1.1  mrg      Note segnment length is a linear function of number of iterations with
1.1  mrg      DR_STEP as the coefficient.  */
1.1  mrg   poly_uint64 niter_len1, niter_len2;
1.1  mrg   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1.1  mrg       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Divide each access size by the byte step, rounding up.  */
1.1  mrg   poly_uint64 niter_access1, niter_access2;
1.1  mrg   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
1.1  mrg 			abs_step, &niter_access1)
1.1  mrg       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1.1  mrg 			   abs_step, &niter_access2))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
1.1  mrg
1.1  mrg   int found = -1;
1.1  mrg   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1.1  mrg     {
1.1  mrg       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1.1  mrg       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1.1  mrg       /* Two indices must be the same if they are not scev, or not scev wrto
1.1  mrg 	 current loop being vecorized.  */
1.1  mrg       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1.1  mrg 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
1.1  mrg 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
1.1  mrg 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1.1  mrg 	{
1.1  mrg 	  if (operand_equal_p (access1, access2, 0))
1.1  mrg 	    continue;
1.1  mrg
1.1  mrg 	  return false;
1.1  mrg 	}
1.1  mrg       if (found >= 0)
1.1  mrg 	return false;
1.1  mrg       found = i;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Ought not to happen in practice, since if all accesses are equal then the
1.1  mrg      alias should be decidable at compile time.  */
1.1  mrg   if (found < 0)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* The two indices must have the same step.  */
1.1  mrg   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
1.1  mrg   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
1.1  mrg   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   tree idx_step = CHREC_RIGHT (access1);
1.1  mrg   /* Index must have const step, otherwise DR_STEP won't be constant.  */
1.1  mrg   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1.1  mrg   /* Index must evaluate in the same direction as DR.  */
1.1  mrg   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1.1  mrg
1.1  mrg   tree min1 = CHREC_LEFT (access1);
1.1  mrg   tree min2 = CHREC_LEFT (access2);
1.1  mrg   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Ideally, alias can be checked against loop's control IV, but we
1.1  mrg      need to prove linear mapping between control IV and reference
1.1  mrg      index.  Although that should be true, we check against (array)
1.1  mrg      index of data reference.  Like segment length, index length is
1.1  mrg      linear function of the number of iterations with index_step as
1.1  mrg      the coefficient, i.e, niter_len * idx_step.  */
1.1  mrg   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
1.1  mrg 					      SIGNED);
1.1  mrg   if (neg_step)
1.1  mrg     abs_idx_step = -abs_idx_step;
1.1  mrg   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
1.1  mrg   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
1.1  mrg   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
1.1  mrg   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
1.1  mrg
1.1  mrg   gcc_assert (known_ge (idx_len1, 0)
1.1  mrg 	      && known_ge (idx_len2, 0)
1.1  mrg 	      && known_ge (idx_access1, 0)
1.1  mrg 	      && known_ge (idx_access2, 0));
1.1  mrg
1.1  mrg   /* Each access has the following pattern, with lengths measured
1.1  mrg      in units of INDEX:
1.1  mrg
1.1  mrg 	  <-- idx_len -->
1.1  mrg 	  <--- A: -ve step --->
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 	  | n-1 | ..... |  0  | ..... | n-1 |
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 			<--- B: +ve step --->
1.1  mrg 			<-- idx_len -->
1.1  mrg 			|
1.1  mrg 		       min
1.1  mrg
1.1  mrg      where "n" is the number of scalar iterations covered by the segment
1.1  mrg      and where each access spans idx_access units.
1.1  mrg
1.1  mrg      A is the range of bytes accessed when the step is negative,
1.1  mrg      B is the range when the step is positive.
1.1  mrg
1.1  mrg      When checking for general overlap, we need to test whether
1.1  mrg      the range:
1.1  mrg
1.1  mrg        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
1.1  mrg
1.1  mrg      overlaps:
1.1  mrg
1.1  mrg        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
1.1  mrg
1.1  mrg      where:
1.1  mrg
1.1  mrg 	low_offsetN = +ve step ? 0 : -idx_lenN;
1.1  mrg        high_offsetN = +ve step ? idx_lenN : 0;
1.1  mrg
1.1  mrg      This is equivalent to testing whether:
1.1  mrg
1.1  mrg        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
1.1  mrg        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
1.1  mrg
1.1  mrg      Converting this into a single test, there is an overlap if:
1.1  mrg
1.1  mrg        0 <= min2 - min1 + bias <= limit
1.1  mrg
1.1  mrg      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
1.1  mrg 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
1.1  mrg 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
1.1  mrg       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
1.1  mrg
1.1  mrg      Combining the tests requires limit to be computable in an unsigned
1.1  mrg      form of the index type; if it isn't, we fall back to the usual
1.1  mrg      pointer-based checks.
1.1  mrg
1.1  mrg      We can do better if DR_B is a write and if DR_A and DR_B are
1.1  mrg      well-ordered in both the original and the new code (see the
1.1  mrg      comment above the DR_ALIAS_* flags for details).  In this case
1.1  mrg      we know that for each i in [0, n-1], the write performed by
1.1  mrg      access i of DR_B occurs after access numbers j<=i of DR_A in
1.1  mrg      both the original and the new code.  Any write or anti
1.1  mrg      dependencies wrt those DR_A accesses are therefore maintained.
1.1  mrg
1.1  mrg      We just need to make sure that each individual write in DR_B does not
1.1  mrg      overlap any higher-indexed access in DR_A; such DR_A accesses happen
1.1  mrg      after the DR_B access in the original code but happen before it in
1.1  mrg      the new code.
1.1  mrg
1.1  mrg      We know the steps for both accesses are equal, so by induction, we
1.1  mrg      just need to test whether the first write of DR_B overlaps a later
1.1  mrg      access of DR_A.  In other words, we need to move min1 along by
1.1  mrg      one iteration:
1.1  mrg
1.1  mrg        min1' = min1 + idx_step
1.1  mrg
1.1  mrg      and use the ranges:
1.1  mrg
1.1  mrg        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
1.1  mrg
1.1  mrg      and:
1.1  mrg
1.1  mrg        [min2, min2 + idx_access2 - 1]
1.1  mrg
1.1  mrg      where:
1.1  mrg
1.1  mrg 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
1.1  mrg        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
1.1  mrg   if (waw_or_war_p)
1.1  mrg     idx_len1 -= abs_idx_step;
1.1  mrg
1.1  mrg   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
1.1  mrg   if (!waw_or_war_p)
1.1  mrg     limit += idx_len2;
1.1  mrg
1.1  mrg   tree utype = unsigned_type_for (TREE_TYPE (min1));
1.1  mrg   if (!wi::fits_to_tree_p (limit, utype))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
1.1  mrg   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
1.1  mrg   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
1.1  mrg   /* Equivalent to adding IDX_STEP to MIN1.  */
1.1  mrg   if (waw_or_war_p)
1.1  mrg     bias -= wi::to_offset (idx_step);
1.1  mrg
1.1  mrg   tree subject = fold_build2 (MINUS_EXPR, utype,
1.1  mrg 			      fold_convert (utype, min2),
1.1  mrg 			      fold_convert (utype, min1));
1.1  mrg   subject = fold_build2 (PLUS_EXPR, utype, subject,
1.1  mrg 			 wide_int_to_tree (utype, bias));
1.1  mrg   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
1.1  mrg 				     wide_int_to_tree (utype, limit));
1.1  mrg   if (*cond_expr)
1.1  mrg     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1.1  mrg 			      *cond_expr, part_cond_expr);
1.1  mrg   else
1.1  mrg     *cond_expr = part_cond_expr;
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     {
1.1  mrg       if (waw_or_war_p)
1.1  mrg 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
1.1  mrg       else
1.1  mrg 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
1.1  mrg     }
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* A subroutine of create_intersect_range_checks, with a subset of the
1.1  mrg    same arguments.  Try to optimize cases in which the second access
1.1  mrg    is a write and in which some overlap is valid.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg create_waw_or_war_checks (tree *cond_expr,
1.1  mrg 			  const dr_with_seg_len_pair_t &alias_pair)
1.1  mrg {
1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
1.1  mrg
1.1  mrg   /* Check for cases in which:
1.1  mrg
1.1  mrg      (a) DR_B is always a write;
1.1  mrg      (b) the accesses are well-ordered in both the original and new code
1.1  mrg 	 (see the comment above the DR_ALIAS_* flags for details); and
1.1  mrg      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1.1  mrg   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Check for equal (but possibly variable) steps.  */
1.1  mrg   tree step = DR_STEP (dr_a.dr);
1.1  mrg   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Make sure that we can operate on sizetype without loss of precision.  */
1.1  mrg   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
1.1  mrg   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* All addresses involved are known to have a common alignment ALIGN.
1.1  mrg      We can therefore subtract ALIGN from an exclusive endpoint to get
1.1  mrg      an inclusive endpoint.  In the best (and common) case, ALIGN is the
1.1  mrg      same as the access sizes of both DRs, and so subtracting ALIGN
1.1  mrg      cancels out the addition of an access size.  */
1.1  mrg   unsigned int align = MIN (dr_a.align, dr_b.align);
1.1  mrg   poly_uint64 last_chunk_a = dr_a.access_size - align;
1.1  mrg   poly_uint64 last_chunk_b = dr_b.access_size - align;
1.1  mrg
1.1  mrg   /* Get a boolean expression that is true when the step is negative.  */
1.1  mrg   tree indicator = dr_direction_indicator (dr_a.dr);
1.1  mrg   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1.1  mrg 			       fold_convert (ssizetype, indicator),
1.1  mrg 			       ssize_int (0));
1.1  mrg
1.1  mrg   /* Get lengths in sizetype.  */
1.1  mrg   tree seg_len_a
1.1  mrg     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
1.1  mrg   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
1.1  mrg
1.1  mrg   /* Each access has the following pattern:
1.1  mrg
1.1  mrg 	  <- |seg_len| ->
1.1  mrg 	  <--- A: -ve step --->
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 	  | n-1 | ..... |  0  | ..... | n-1 |
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 			<--- B: +ve step --->
1.1  mrg 			<- |seg_len| ->
1.1  mrg 			|
1.1  mrg 		   base address
1.1  mrg
1.1  mrg      where "n" is the number of scalar iterations covered by the segment.
1.1  mrg
1.1  mrg      A is the range of bytes accessed when the step is negative,
1.1  mrg      B is the range when the step is positive.
1.1  mrg
1.1  mrg      We know that DR_B is a write.  We also know (from checking that
1.1  mrg      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
1.1  mrg      the write performed by access i of DR_B occurs after access numbers
1.1  mrg      j<=i of DR_A in both the original and the new code.  Any write or
1.1  mrg      anti dependencies wrt those DR_A accesses are therefore maintained.
1.1  mrg
1.1  mrg      We just need to make sure that each individual write in DR_B does not
1.1  mrg      overlap any higher-indexed access in DR_A; such DR_A accesses happen
1.1  mrg      after the DR_B access in the original code but happen before it in
1.1  mrg      the new code.
1.1  mrg
1.1  mrg      We know the steps for both accesses are equal, so by induction, we
1.1  mrg      just need to test whether the first write of DR_B overlaps a later
1.1  mrg      access of DR_A.  In other words, we need to move addr_a along by
1.1  mrg      one iteration:
1.1  mrg
1.1  mrg        addr_a' = addr_a + step
1.1  mrg
1.1  mrg      and check whether:
1.1  mrg
1.1  mrg        [addr_b, addr_b + last_chunk_b]
1.1  mrg
1.1  mrg      overlaps:
1.1  mrg
1.1  mrg        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
1.1  mrg
1.1  mrg      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
1.1  mrg
1.1  mrg 	low_offset_a = +ve step ? 0 : seg_len_a - step
1.1  mrg        high_offset_a = +ve step ? seg_len_a - step : 0
1.1  mrg
1.1  mrg      This is equivalent to testing whether:
1.1  mrg
1.1  mrg        addr_a' + low_offset_a <= addr_b + last_chunk_b
1.1  mrg        && addr_b <= addr_a' + high_offset_a + last_chunk_a
1.1  mrg
1.1  mrg      Converting this into a single test, there is an overlap if:
1.1  mrg
1.1  mrg        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
1.1  mrg
1.1  mrg      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
1.1  mrg
1.1  mrg      If DR_A is performed, limit + |step| - last_chunk_b is known to be
1.1  mrg      less than the size of the object underlying DR_A.  We also know
1.1  mrg      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
1.1  mrg      guaranteed at compile time.  There can therefore be no overflow if
1.1  mrg      "limit" is calculated in an unsigned type with pointer precision.  */
1.1  mrg   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
1.1  mrg 					 DR_OFFSET (dr_a.dr));
1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
1.1  mrg
1.1  mrg   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
1.1  mrg 					 DR_OFFSET (dr_b.dr));
1.1  mrg   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
1.1  mrg
1.1  mrg   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
1.1  mrg   addr_a = fold_build_pointer_plus (addr_a, step);
1.1  mrg   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
1.1  mrg 					   seg_len_a, step);
1.1  mrg   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
1.1  mrg     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
1.1  mrg
1.1  mrg   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
1.1  mrg 				   seg_len_a_minus_step, size_zero_node);
1.1  mrg   if (!CONSTANT_CLASS_P (low_offset_a))
1.1  mrg     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
1.1  mrg
1.1  mrg   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
1.1  mrg      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
1.1  mrg   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
1.1  mrg 				    low_offset_a);
1.1  mrg
1.1  mrg   /* The amount added to addr_b - addr_a'.  */
1.1  mrg   tree bias = fold_build2 (MINUS_EXPR, sizetype,
1.1  mrg 			   size_int (last_chunk_b), low_offset_a);
1.1  mrg
1.1  mrg   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
1.1  mrg   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
1.1  mrg 		       size_int (last_chunk_a + last_chunk_b));
1.1  mrg
1.1  mrg   tree subject = fold_build2 (MINUS_EXPR, sizetype,
1.1  mrg 			      fold_convert (sizetype, addr_b),
1.1  mrg 			      fold_convert (sizetype, addr_a));
1.1  mrg   subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
1.1  mrg
1.1  mrg   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1.1  mrg    every address ADDR accessed by D:
1.1  mrg
1.1  mrg      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1.1  mrg
1.1  mrg    In this case, every element accessed by D is aligned to at least
1.1  mrg    ALIGN bytes.
1.1  mrg
1.1  mrg    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1.1  mrg
1.1  mrg      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1.1  mrg
1.1  mrg static void
1.1  mrg get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1.1  mrg 		     tree *seg_max_out, HOST_WIDE_INT align)
1.1  mrg {
1.1  mrg   /* Each access has the following pattern:
1.1  mrg
1.1  mrg 	  <- |seg_len| ->
1.1  mrg 	  <--- A: -ve step --->
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 	  | n-1 | ,.... |  0  | ..... | n-1 |
1.1  mrg 	  +-----+-------+-----+-------+-----+
1.1  mrg 			<--- B: +ve step --->
1.1  mrg 			<- |seg_len| ->
1.1  mrg 			|
1.1  mrg 		   base address
1.1  mrg
1.1  mrg      where "n" is the number of scalar iterations covered by the segment.
1.1  mrg      (This should be VF for a particular pair if we know that both steps
1.1  mrg      are the same, otherwise it will be the full number of scalar loop
1.1  mrg      iterations.)
1.1  mrg
1.1  mrg      A is the range of bytes accessed when the step is negative,
1.1  mrg      B is the range when the step is positive.
1.1  mrg
1.1  mrg      If the access size is "access_size" bytes, the lowest addressed byte is:
1.1  mrg
1.1  mrg 	 base + (step < 0 ? seg_len : 0)   [LB]
1.1  mrg
1.1  mrg      and the highest addressed byte is always below:
1.1  mrg
1.1  mrg 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1.1  mrg
1.1  mrg      Thus:
1.1  mrg
1.1  mrg 	 LB <= ADDR < UB
1.1  mrg
1.1  mrg      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1.1  mrg      bytes, so:
1.1  mrg
1.1  mrg 	 LB <= ADDR <= UB - ALIGN
1.1  mrg
1.1  mrg      where "- ALIGN" folds naturally with the "+ access_size" and often
1.1  mrg      cancels it out.
1.1  mrg
1.1  mrg      We don't try to simplify LB and UB beyond this (e.g. by using
1.1  mrg      MIN and MAX based on whether seg_len rather than the stride is
1.1  mrg      negative) because it is possible for the absolute size of the
1.1  mrg      segment to overflow the range of a ssize_t.
1.1  mrg
1.1  mrg      Keeping the pointer_plus outside of the cond_expr should allow
1.1  mrg      the cond_exprs to be shared with other alias checks.  */
1.1  mrg   tree indicator = dr_direction_indicator (d.dr);
1.1  mrg   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1.1  mrg 			       fold_convert (ssizetype, indicator),
1.1  mrg 			       ssize_int (0));
1.1  mrg   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1.1  mrg 					    DR_OFFSET (d.dr));
1.1  mrg   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1.1  mrg   tree seg_len
1.1  mrg     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1.1  mrg
1.1  mrg   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1.1  mrg 				seg_len, size_zero_node);
1.1  mrg   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1.1  mrg 				size_zero_node, seg_len);
1.1  mrg   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1.1  mrg 			   size_int (d.access_size - align));
1.1  mrg
1.1  mrg   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1.1  mrg   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1.1  mrg }
1.1  mrg
1.1  mrg /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
1.1  mrg    storing the condition in *COND_EXPR.  The fallback is to generate a
1.1  mrg    a test that the two accesses do not overlap:
1.1  mrg
1.1  mrg      end_a <= start_b || end_b <= start_a.  */
1.1  mrg
1.1  mrg static void
1.1  mrg create_intersect_range_checks (class loop *loop, tree *cond_expr,
1.1  mrg 			       const dr_with_seg_len_pair_t &alias_pair)
1.1  mrg {
1.1  mrg   const dr_with_seg_len& dr_a = alias_pair.first;
1.1  mrg   const dr_with_seg_len& dr_b = alias_pair.second;
1.1  mrg   *cond_expr = NULL_TREE;
1.1  mrg   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
1.1  mrg     return;
1.1  mrg
1.1  mrg   if (create_ifn_alias_checks (cond_expr, alias_pair))
1.1  mrg     return;
1.1  mrg
1.1  mrg   if (create_waw_or_war_checks (cond_expr, alias_pair))
1.1  mrg     return;
1.1  mrg
1.1  mrg   unsigned HOST_WIDE_INT min_align;
1.1  mrg   tree_code cmp_code;
1.1  mrg   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
1.1  mrg      are equivalent.  This is just an optimization heuristic.  */
1.1  mrg   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1.1  mrg       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1.1  mrg     {
1.1  mrg       /* In this case adding access_size to seg_len is likely to give
1.1  mrg 	 a simple X * step, where X is either the number of scalar
1.1  mrg 	 iterations or the vectorization factor.  We're better off
1.1  mrg 	 keeping that, rather than subtracting an alignment from it.
1.1  mrg
1.1  mrg 	 In this case the maximum values are exclusive and so there is
1.1  mrg 	 no alias if the maximum of one segment equals the minimum
1.1  mrg 	 of another.  */
1.1  mrg       min_align = 0;
1.1  mrg       cmp_code = LE_EXPR;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       /* Calculate the minimum alignment shared by all four pointers,
1.1  mrg 	 then arrange for this alignment to be subtracted from the
1.1  mrg 	 exclusive maximum values to get inclusive maximum values.
1.1  mrg 	 This "- min_align" is cumulative with a "+ access_size"
1.1  mrg 	 in the calculation of the maximum values.  In the best
1.1  mrg 	 (and common) case, the two cancel each other out, leaving
1.1  mrg 	 us with an inclusive bound based only on seg_len.  In the
1.1  mrg 	 worst case we're simply adding a smaller number than before.
1.1  mrg
1.1  mrg 	 Because the maximum values are inclusive, there is an alias
1.1  mrg 	 if the maximum value of one segment is equal to the minimum
1.1  mrg 	 value of the other.  */
1.1  mrg       min_align = std::min (dr_a.align, dr_b.align);
1.1  mrg       cmp_code = LT_EXPR;
1.1  mrg     }
1.1  mrg
1.1  mrg   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1.1  mrg   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1.1  mrg   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1.1  mrg
1.1  mrg   *cond_expr
1.1  mrg     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1.1  mrg 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1.1  mrg 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1.1  mrg   if (dump_enabled_p ())
1.1  mrg     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Create a conditional expression that represents the run-time checks for
1.1  mrg    overlapping of address ranges represented by a list of data references
1.1  mrg    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1.1  mrg    COND_EXPR is the conditional expression to be used in the if statement
1.1  mrg    that controls which version of the loop gets executed at runtime.  */
1.1  mrg
1.1  mrg void
1.1  mrg create_runtime_alias_checks (class loop *loop,
1.1  mrg 			     const vec<dr_with_seg_len_pair_t> *alias_pairs,
1.1  mrg 			     tree * cond_expr)
1.1  mrg {
1.1  mrg   tree part_cond_expr;
1.1  mrg
1.1  mrg   fold_defer_overflow_warnings ();
1.1  mrg   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
1.1  mrg     {
1.1  mrg       gcc_assert (alias_pair.flags);
1.1  mrg       if (dump_enabled_p ())
1.1  mrg 	dump_printf (MSG_NOTE,
1.1  mrg 		     "create runtime check for data references %T and %T\n",
1.1  mrg 		     DR_REF (alias_pair.first.dr),
1.1  mrg 		     DR_REF (alias_pair.second.dr));
1.1  mrg
1.1  mrg       /* Create condition expression for each pair data references.  */
1.1  mrg       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
1.1  mrg       if (*cond_expr)
1.1  mrg 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1.1  mrg 				  *cond_expr, part_cond_expr);
1.1  mrg       else
1.1  mrg 	*cond_expr = part_cond_expr;
1.1  mrg     }
1.1  mrg   fold_undefer_and_ignore_overflow_warnings ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1.1  mrg    expressions.  */
1.1  mrg static bool
1.1  mrg dr_equal_offsets_p1 (tree offset1, tree offset2)
1.1  mrg {
1.1  mrg   bool res;
1.1  mrg
1.1  mrg   STRIP_NOPS (offset1);
1.1  mrg   STRIP_NOPS (offset2);
1.1  mrg
1.1  mrg   if (offset1 == offset2)
1.1  mrg     return true;
1.1  mrg
1.1  mrg   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1.1  mrg       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1.1  mrg                              TREE_OPERAND (offset2, 0));
1.1  mrg
1.1  mrg   if (!res || !BINARY_CLASS_P (offset1))
1.1  mrg     return res;
1.1  mrg
1.1  mrg   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1.1  mrg                              TREE_OPERAND (offset2, 1));
1.1  mrg
1.1  mrg   return res;
1.1  mrg }
1.1  mrg
1.1  mrg /* Check if DRA and DRB have equal offsets.  */
1.1  mrg bool
1.1  mrg dr_equal_offsets_p (struct data_reference *dra,
1.1  mrg                     struct data_reference *drb)
1.1  mrg {
1.1  mrg   tree offset1, offset2;
1.1  mrg
1.1  mrg   offset1 = DR_OFFSET (dra);
1.1  mrg   offset2 = DR_OFFSET (drb);
1.1  mrg
1.1  mrg   return dr_equal_offsets_p1 (offset1, offset2);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if FNA == FNB.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg affine_function_equal_p (affine_fn fna, affine_fn fnb)
1.1  mrg {
1.1  mrg   unsigned i, n = fna.length ();
1.1  mrg
1.1  mrg   if (n != fnb.length ())
1.1  mrg     return false;
1.1  mrg
1.1  mrg   for (i = 0; i < n; i++)
1.1  mrg     if (!operand_equal_p (fna[i], fnb[i], 0))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* If all the functions in CF are the same, returns one of them,
1.1  mrg    otherwise returns NULL.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg common_affine_function (conflict_function *cf)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg   affine_fn comm;
1.1  mrg
1.1  mrg   if (!CF_NONTRIVIAL_P (cf))
1.1  mrg     return affine_fn ();
1.1  mrg
1.1  mrg   comm = cf->fns[0];
1.1  mrg
1.1  mrg   for (i = 1; i < cf->n; i++)
1.1  mrg     if (!affine_function_equal_p (comm, cf->fns[i]))
1.1  mrg       return affine_fn ();
1.1  mrg
1.1  mrg   return comm;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the base of the affine function FN.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg affine_function_base (affine_fn fn)
1.1  mrg {
1.1  mrg   return fn[0];
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if FN is a constant.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg affine_function_constant_p (affine_fn fn)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg   tree coef;
1.1  mrg
1.1  mrg   for (i = 1; fn.iterate (i, &coef); i++)
1.1  mrg     if (!integer_zerop (coef))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if FN is the zero constant function.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg affine_function_zero_p (affine_fn fn)
1.1  mrg {
1.1  mrg   return (integer_zerop (affine_function_base (fn))
1.1  mrg 	  && affine_function_constant_p (fn));
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns a signed integer type with the largest precision from TA
1.1  mrg    and TB.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg signed_type_for_types (tree ta, tree tb)
1.1  mrg {
1.1  mrg   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1.1  mrg     return signed_type_for (ta);
1.1  mrg   else
1.1  mrg     return signed_type_for (tb);
1.1  mrg }
1.1  mrg
1.1  mrg /* Applies operation OP on affine functions FNA and FNB, and returns the
1.1  mrg    result.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1.1  mrg {
1.1  mrg   unsigned i, n, m;
1.1  mrg   affine_fn ret;
1.1  mrg   tree coef;
1.1  mrg
1.1  mrg   if (fnb.length () > fna.length ())
1.1  mrg     {
1.1  mrg       n = fna.length ();
1.1  mrg       m = fnb.length ();
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       n = fnb.length ();
1.1  mrg       m = fna.length ();
1.1  mrg     }
1.1  mrg
1.1  mrg   ret.create (m);
1.1  mrg   for (i = 0; i < n; i++)
1.1  mrg     {
1.1  mrg       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1.1  mrg 					 TREE_TYPE (fnb[i]));
1.1  mrg       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1.1  mrg     }
1.1  mrg
1.1  mrg   for (; fna.iterate (i, &coef); i++)
1.1  mrg     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1.1  mrg 				 coef, integer_zero_node));
1.1  mrg   for (; fnb.iterate (i, &coef); i++)
1.1  mrg     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1.1  mrg 				 integer_zero_node, coef));
1.1  mrg
1.1  mrg   return ret;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the sum of affine functions FNA and FNB.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg affine_fn_plus (affine_fn fna, affine_fn fnb)
1.1  mrg {
1.1  mrg   return affine_fn_op (PLUS_EXPR, fna, fnb);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the difference of affine functions FNA and FNB.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg affine_fn_minus (affine_fn fna, affine_fn fnb)
1.1  mrg {
1.1  mrg   return affine_fn_op (MINUS_EXPR, fna, fnb);
1.1  mrg }
1.1  mrg
1.1  mrg /* Frees affine function FN.  */
1.1  mrg
1.1  mrg static void
1.1  mrg affine_fn_free (affine_fn fn)
1.1  mrg {
1.1  mrg   fn.release ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Determine for each subscript in the data dependence relation DDR
1.1  mrg    the distance.  */
1.1  mrg
1.1  mrg static void
1.1  mrg compute_subscript_distance (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   conflict_function *cf_a, *cf_b;
1.1  mrg   affine_fn fn_a, fn_b, diff;
1.1  mrg
1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1.1  mrg     {
1.1  mrg       unsigned int i;
1.1  mrg
1.1  mrg       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1.1  mrg  	{
1.1  mrg  	  struct subscript *subscript;
1.1  mrg
1.1  mrg  	  subscript = DDR_SUBSCRIPT (ddr, i);
1.1  mrg  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
1.1  mrg  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
1.1  mrg
1.1  mrg 	  fn_a = common_affine_function (cf_a);
1.1  mrg 	  fn_b = common_affine_function (cf_b);
1.1  mrg 	  if (!fn_a.exists () || !fn_b.exists ())
1.1  mrg 	    {
1.1  mrg 	      SUB_DISTANCE (subscript) = chrec_dont_know;
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg 	  diff = affine_fn_minus (fn_a, fn_b);
1.1  mrg
1.1  mrg  	  if (affine_function_constant_p (diff))
1.1  mrg  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
1.1  mrg  	  else
1.1  mrg  	    SUB_DISTANCE (subscript) = chrec_dont_know;
1.1  mrg
1.1  mrg 	  affine_fn_free (diff);
1.1  mrg  	}
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the conflict function for "unknown".  */
1.1  mrg
1.1  mrg static conflict_function *
1.1  mrg conflict_fn_not_known (void)
1.1  mrg {
1.1  mrg   conflict_function *fn = XCNEW (conflict_function);
1.1  mrg   fn->n = NOT_KNOWN;
1.1  mrg
1.1  mrg   return fn;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the conflict function for "independent".  */
1.1  mrg
1.1  mrg static conflict_function *
1.1  mrg conflict_fn_no_dependence (void)
1.1  mrg {
1.1  mrg   conflict_function *fn = XCNEW (conflict_function);
1.1  mrg   fn->n = NO_DEPENDENCE;
1.1  mrg
1.1  mrg   return fn;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if the address of OBJ is invariant in LOOP.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
1.1  mrg {
1.1  mrg   while (handled_component_p (obj))
1.1  mrg     {
1.1  mrg       if (TREE_CODE (obj) == ARRAY_REF)
1.1  mrg 	{
1.1  mrg 	  for (int i = 1; i < 4; ++i)
1.1  mrg 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
1.1  mrg 							loop->num))
1.1  mrg 	      return false;
1.1  mrg 	}
1.1  mrg       else if (TREE_CODE (obj) == COMPONENT_REF)
1.1  mrg 	{
1.1  mrg 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1.1  mrg 						      loop->num))
1.1  mrg 	    return false;
1.1  mrg 	}
1.1  mrg       obj = TREE_OPERAND (obj, 0);
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!INDIRECT_REF_P (obj)
1.1  mrg       && TREE_CODE (obj) != MEM_REF)
1.1  mrg     return true;
1.1  mrg
1.1  mrg   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1.1  mrg 						  loop->num);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns false if we can prove that data references A and B do not alias,
1.1  mrg    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1.1  mrg    considered.  */
1.1  mrg
1.1  mrg bool
1.1  mrg dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1.1  mrg 		class loop *loop_nest)
1.1  mrg {
1.1  mrg   tree addr_a = DR_BASE_OBJECT (a);
1.1  mrg   tree addr_b = DR_BASE_OBJECT (b);
1.1  mrg
1.1  mrg   /* If we are not processing a loop nest but scalar code we
1.1  mrg      do not need to care about possible cross-iteration dependences
1.1  mrg      and thus can process the full original reference.  Do so,
1.1  mrg      similar to how loop invariant motion applies extra offset-based
1.1  mrg      disambiguation.  */
1.1  mrg   if (!loop_nest)
1.1  mrg     {
1.1  mrg       aff_tree off1, off2;
1.1  mrg       poly_widest_int size1, size2;
1.1  mrg       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1.1  mrg       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1.1  mrg       aff_combination_scale (&off1, -1);
1.1  mrg       aff_combination_add (&off2, &off1);
1.1  mrg       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg
1.1  mrg   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
1.1  mrg       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
1.1  mrg       /* For cross-iteration dependences the cliques must be valid for the
1.1  mrg 	 whole loop, not just individual iterations.  */
1.1  mrg       && (!loop_nest
1.1  mrg 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
1.1  mrg 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
1.1  mrg       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
1.1  mrg       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1.1  mrg      do not know the size of the base-object.  So we cannot do any
1.1  mrg      offset/overlap based analysis but have to rely on points-to
1.1  mrg      information only.  */
1.1  mrg   if (TREE_CODE (addr_a) == MEM_REF
1.1  mrg       && (DR_UNCONSTRAINED_BASE (a)
1.1  mrg 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
1.1  mrg     {
1.1  mrg       /* For true dependences we can apply TBAA.  */
1.1  mrg       if (flag_strict_aliasing
1.1  mrg 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
1.1  mrg 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1.1  mrg 				     get_alias_set (DR_REF (b))))
1.1  mrg 	return false;
1.1  mrg       if (TREE_CODE (addr_b) == MEM_REF)
1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1.1  mrg 				       TREE_OPERAND (addr_b, 0));
1.1  mrg       else
1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1.1  mrg 				       build_fold_addr_expr (addr_b));
1.1  mrg     }
1.1  mrg   else if (TREE_CODE (addr_b) == MEM_REF
1.1  mrg 	   && (DR_UNCONSTRAINED_BASE (b)
1.1  mrg 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
1.1  mrg     {
1.1  mrg       /* For true dependences we can apply TBAA.  */
1.1  mrg       if (flag_strict_aliasing
1.1  mrg 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
1.1  mrg 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1.1  mrg 				     get_alias_set (DR_REF (b))))
1.1  mrg 	return false;
1.1  mrg       if (TREE_CODE (addr_a) == MEM_REF)
1.1  mrg 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1.1  mrg 				       TREE_OPERAND (addr_b, 0));
1.1  mrg       else
1.1  mrg 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1.1  mrg 				       TREE_OPERAND (addr_b, 0));
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1.1  mrg      that is being subsetted in the loop nest.  */
1.1  mrg   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1.1  mrg     return refs_output_dependent_p (addr_a, addr_b);
1.1  mrg   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1.1  mrg     return refs_anti_dependent_p (addr_a, addr_b);
1.1  mrg   return refs_may_alias_p (addr_a, addr_b);
1.1  mrg }
1.1  mrg
1.1  mrg /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
1.1  mrg    if it is meaningful to compare their associated access functions
1.1  mrg    when checking for dependencies.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg access_fn_components_comparable_p (tree ref_a, tree ref_b)
1.1  mrg {
1.1  mrg   /* Allow pairs of component refs from the following sets:
1.1  mrg
1.1  mrg        { REALPART_EXPR, IMAGPART_EXPR }
1.1  mrg        { COMPONENT_REF }
1.1  mrg        { ARRAY_REF }.  */
1.1  mrg   tree_code code_a = TREE_CODE (ref_a);
1.1  mrg   tree_code code_b = TREE_CODE (ref_b);
1.1  mrg   if (code_a == IMAGPART_EXPR)
1.1  mrg     code_a = REALPART_EXPR;
1.1  mrg   if (code_b == IMAGPART_EXPR)
1.1  mrg     code_b = REALPART_EXPR;
1.1  mrg   if (code_a != code_b)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (TREE_CODE (ref_a) == COMPONENT_REF)
1.1  mrg     /* ??? We cannot simply use the type of operand #0 of the refs here as
1.1  mrg        the Fortran compiler smuggles type punning into COMPONENT_REFs.
1.1  mrg        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
1.1  mrg     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
1.1  mrg 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
1.1  mrg
1.1  mrg   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
1.1  mrg 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
1.1  mrg }
1.1  mrg
1.1  mrg /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
1.1  mrg    is true when the main indices of A and B were not comparable so we try again
1.1  mrg    with alternate indices computed on an indirect reference.  */
1.1  mrg
1.1  mrg struct data_dependence_relation *
1.1  mrg initialize_data_dependence_relation (struct data_dependence_relation *res,
1.1  mrg 				     vec<loop_p> loop_nest,
1.1  mrg 				     bool use_alt_indices)
1.1  mrg {
1.1  mrg   struct data_reference *a = DDR_A (res);
1.1  mrg   struct data_reference *b = DDR_B (res);
1.1  mrg   unsigned int i;
1.1  mrg
1.1  mrg   struct indices *indices_a = &a->indices;
1.1  mrg   struct indices *indices_b = &b->indices;
1.1  mrg   if (use_alt_indices)
1.1  mrg     {
1.1  mrg       if (TREE_CODE (DR_REF (a)) != MEM_REF)
1.1  mrg 	indices_a = &a->alt_indices;
1.1  mrg       if (TREE_CODE (DR_REF (b)) != MEM_REF)
1.1  mrg 	indices_b = &b->alt_indices;
1.1  mrg     }
1.1  mrg   unsigned int num_dimensions_a = indices_a->access_fns.length ();
1.1  mrg   unsigned int num_dimensions_b = indices_b->access_fns.length ();
1.1  mrg   if (num_dimensions_a == 0 || num_dimensions_b == 0)
1.1  mrg     {
1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1.1  mrg       return res;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* For unconstrained bases, the root (highest-indexed) subscript
1.1  mrg      describes a variation in the base of the original DR_REF rather
1.1  mrg      than a component access.  We have no type that accurately describes
1.1  mrg      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
1.1  mrg      applying this subscript) so limit the search to the last real
1.1  mrg      component access.
1.1  mrg
1.1  mrg      E.g. for:
1.1  mrg
1.1  mrg 	void
1.1  mrg 	f (int a[][8], int b[][8])
1.1  mrg 	{
1.1  mrg 	  for (int i = 0; i < 8; ++i)
1.1  mrg 	    a[i * 2][0] = b[i][0];
1.1  mrg 	}
1.1  mrg
1.1  mrg      the a and b accesses have a single ARRAY_REF component reference [0]
1.1  mrg      but have two subscripts.  */
1.1  mrg   if (indices_a->unconstrained_base)
1.1  mrg     num_dimensions_a -= 1;
1.1  mrg   if (indices_b->unconstrained_base)
1.1  mrg     num_dimensions_b -= 1;
1.1  mrg
1.1  mrg   /* These structures describe sequences of component references in
1.1  mrg      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
1.1  mrg      specific access function.  */
1.1  mrg   struct {
1.1  mrg     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
1.1  mrg        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
1.1  mrg        indices.  In C notation, these are the indices of the rightmost
1.1  mrg        component references; e.g. for a sequence .b.c.d, the start
1.1  mrg        index is for .d.  */
1.1  mrg     unsigned int start_a;
1.1  mrg     unsigned int start_b;
1.1  mrg
1.1  mrg     /* The sequence contains LENGTH consecutive access functions from
1.1  mrg        each DR.  */
1.1  mrg     unsigned int length;
1.1  mrg
1.1  mrg     /* The enclosing objects for the A and B sequences respectively,
1.1  mrg        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
1.1  mrg        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
1.1  mrg     tree object_a;
1.1  mrg     tree object_b;
1.1  mrg   } full_seq = {}, struct_seq = {};
1.1  mrg
1.1  mrg   /* Before each iteration of the loop:
1.1  mrg
1.1  mrg      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
1.1  mrg      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
1.1  mrg   unsigned int index_a = 0;
1.1  mrg   unsigned int index_b = 0;
1.1  mrg   tree ref_a = DR_REF (a);
1.1  mrg   tree ref_b = DR_REF (b);
1.1  mrg
1.1  mrg   /* Now walk the component references from the final DR_REFs back up to
1.1  mrg      the enclosing base objects.  Each component reference corresponds
1.1  mrg      to one access function in the DR, with access function 0 being for
1.1  mrg      the final DR_REF and the highest-indexed access function being the
1.1  mrg      one that is applied to the base of the DR.
1.1  mrg
1.1  mrg      Look for a sequence of component references whose access functions
1.1  mrg      are comparable (see access_fn_components_comparable_p).  If more
1.1  mrg      than one such sequence exists, pick the one nearest the base
1.1  mrg      (which is the leftmost sequence in C notation).  Store this sequence
1.1  mrg      in FULL_SEQ.
1.1  mrg
1.1  mrg      For example, if we have:
1.1  mrg
1.1  mrg 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
1.1  mrg
1.1  mrg 	A: a[0][i].s.c.d
1.1  mrg 	B: __real b[0][i].s.e[i].f
1.1  mrg
1.1  mrg      (where d is the same type as the real component of f) then the access
1.1  mrg      functions would be:
1.1  mrg
1.1  mrg 			 0   1   2   3
1.1  mrg 	A:              .d  .c  .s [i]
1.1  mrg
1.1  mrg 		 0   1   2   3   4   5
1.1  mrg 	B:  __real  .f [i]  .e  .s [i]
1.1  mrg
1.1  mrg      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
1.1  mrg      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
1.1  mrg      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
1.1  mrg      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
1.1  mrg      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
1.1  mrg      index foo[10] arrays, so is again comparable.  The sequence is
1.1  mrg      therefore:
1.1  mrg
1.1  mrg         A: [1, 3]  (i.e. [i].s.c)
1.1  mrg         B: [3, 5]  (i.e. [i].s.e)
1.1  mrg
1.1  mrg      Also look for sequences of component references whose access
1.1  mrg      functions are comparable and whose enclosing objects have the same
1.1  mrg      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
1.1  mrg      example, STRUCT_SEQ would be:
1.1  mrg
1.1  mrg         A: [1, 2]  (i.e. s.c)
1.1  mrg         B: [3, 4]  (i.e. s.e)  */
1.1  mrg   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
1.1  mrg     {
1.1  mrg       /* The alternate indices form always has a single dimension
1.1  mrg 	 with unconstrained base.  */
1.1  mrg       gcc_assert (!use_alt_indices);
1.1  mrg
1.1  mrg       /* REF_A and REF_B must be one of the component access types
1.1  mrg 	 allowed by dr_analyze_indices.  */
1.1  mrg       gcc_checking_assert (access_fn_component_p (ref_a));
1.1  mrg       gcc_checking_assert (access_fn_component_p (ref_b));
1.1  mrg
1.1  mrg       /* Get the immediately-enclosing objects for REF_A and REF_B,
1.1  mrg 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
1.1  mrg 	 and DR_ACCESS_FN (B, INDEX_B).  */
1.1  mrg       tree object_a = TREE_OPERAND (ref_a, 0);
1.1  mrg       tree object_b = TREE_OPERAND (ref_b, 0);
1.1  mrg
1.1  mrg       tree type_a = TREE_TYPE (object_a);
1.1  mrg       tree type_b = TREE_TYPE (object_b);
1.1  mrg       if (access_fn_components_comparable_p (ref_a, ref_b))
1.1  mrg 	{
1.1  mrg 	  /* This pair of component accesses is comparable for dependence
1.1  mrg 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
1.1  mrg 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
1.1  mrg 	  if (full_seq.start_a + full_seq.length != index_a
1.1  mrg 	      || full_seq.start_b + full_seq.length != index_b)
1.1  mrg 	    {
1.1  mrg 	      /* The accesses don't extend the current sequence,
1.1  mrg 		 so start a new one here.  */
1.1  mrg 	      full_seq.start_a = index_a;
1.1  mrg 	      full_seq.start_b = index_b;
1.1  mrg 	      full_seq.length = 0;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* Add this pair of references to the sequence.  */
1.1  mrg 	  full_seq.length += 1;
1.1  mrg 	  full_seq.object_a = object_a;
1.1  mrg 	  full_seq.object_b = object_b;
1.1  mrg
1.1  mrg 	  /* If the enclosing objects are structures (and thus have the
1.1  mrg 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
1.1  mrg 	  if (TREE_CODE (type_a) == RECORD_TYPE)
1.1  mrg 	    struct_seq = full_seq;
1.1  mrg
1.1  mrg 	  /* Move to the next containing reference for both A and B.  */
1.1  mrg 	  ref_a = object_a;
1.1  mrg 	  ref_b = object_b;
1.1  mrg 	  index_a += 1;
1.1  mrg 	  index_b += 1;
1.1  mrg 	  continue;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Try to approach equal type sizes.  */
1.1  mrg       if (!COMPLETE_TYPE_P (type_a)
1.1  mrg 	  || !COMPLETE_TYPE_P (type_b)
1.1  mrg 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
1.1  mrg 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
1.1  mrg 	break;
1.1  mrg
1.1  mrg       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
1.1  mrg       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
1.1  mrg       if (size_a <= size_b)
1.1  mrg 	{
1.1  mrg 	  index_a += 1;
1.1  mrg 	  ref_a = object_a;
1.1  mrg 	}
1.1  mrg       if (size_b <= size_a)
1.1  mrg 	{
1.1  mrg 	  index_b += 1;
1.1  mrg 	  ref_b = object_b;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   /* See whether FULL_SEQ ends at the base and whether the two bases
1.1  mrg      are equal.  We do not care about TBAA or alignment info so we can
1.1  mrg      use OEP_ADDRESS_OF to avoid false negatives.  */
1.1  mrg   tree base_a = indices_a->base_object;
1.1  mrg   tree base_b = indices_b->base_object;
1.1  mrg   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
1.1  mrg 		      && full_seq.start_b + full_seq.length == num_dimensions_b
1.1  mrg 		      && (indices_a->unconstrained_base
1.1  mrg 			  == indices_b->unconstrained_base)
1.1  mrg 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
1.1  mrg 		      && (types_compatible_p (TREE_TYPE (base_a),
1.1  mrg 					      TREE_TYPE (base_b))
1.1  mrg 			  || (!base_supports_access_fn_components_p (base_a)
1.1  mrg 			      && !base_supports_access_fn_components_p (base_b)
1.1  mrg 			      && operand_equal_p
1.1  mrg 				   (TYPE_SIZE (TREE_TYPE (base_a)),
1.1  mrg 				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
1.1  mrg 		      && (!loop_nest.exists ()
1.1  mrg 			  || (object_address_invariant_in_loop_p
1.1  mrg 			      (loop_nest[0], base_a))));
1.1  mrg
1.1  mrg   /* If the bases are the same, we can include the base variation too.
1.1  mrg      E.g. the b accesses in:
1.1  mrg
1.1  mrg        for (int i = 0; i < n; ++i)
1.1  mrg          b[i + 4][0] = b[i][0];
1.1  mrg
1.1  mrg      have a definite dependence distance of 4, while for:
1.1  mrg
1.1  mrg        for (int i = 0; i < n; ++i)
1.1  mrg          a[i + 4][0] = b[i][0];
1.1  mrg
1.1  mrg      the dependence distance depends on the gap between a and b.
1.1  mrg
1.1  mrg      If the bases are different then we can only rely on the sequence
1.1  mrg      rooted at a structure access, since arrays are allowed to overlap
1.1  mrg      arbitrarily and change shape arbitrarily.  E.g. we treat this as
1.1  mrg      valid code:
1.1  mrg
1.1  mrg        int a[256];
1.1  mrg        ...
1.1  mrg        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
1.1  mrg
1.1  mrg      where two lvalues with the same int[4][3] type overlap, and where
1.1  mrg      both lvalues are distinct from the object's declared type.  */
1.1  mrg   if (same_base_p)
1.1  mrg     {
1.1  mrg       if (indices_a->unconstrained_base)
1.1  mrg 	full_seq.length += 1;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     full_seq = struct_seq;
1.1  mrg
1.1  mrg   /* Punt if we didn't find a suitable sequence.  */
1.1  mrg   if (full_seq.length == 0)
1.1  mrg     {
1.1  mrg       if (use_alt_indices
1.1  mrg 	  || (TREE_CODE (DR_REF (a)) == MEM_REF
1.1  mrg 	      && TREE_CODE (DR_REF (b)) == MEM_REF)
1.1  mrg 	  || may_be_nonaddressable_p (DR_REF (a))
1.1  mrg 	  || may_be_nonaddressable_p (DR_REF (b)))
1.1  mrg 	{
1.1  mrg 	  /* Fully exhausted possibilities.  */
1.1  mrg 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1.1  mrg 	  return res;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Try evaluating both DRs as dereferences of pointers.  */
1.1  mrg       if (!a->alt_indices.base_object
1.1  mrg 	  && TREE_CODE (DR_REF (a)) != MEM_REF)
1.1  mrg 	{
1.1  mrg 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
1.1  mrg 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
1.1  mrg 				 build_int_cst
1.1  mrg 				   (reference_alias_ptr_type (DR_REF (a)), 0));
1.1  mrg 	  dr_analyze_indices (&a->alt_indices, alt_ref,
1.1  mrg 			      loop_preheader_edge (loop_nest[0]),
1.1  mrg 			      loop_containing_stmt (DR_STMT (a)));
1.1  mrg 	}
1.1  mrg       if (!b->alt_indices.base_object
1.1  mrg 	  && TREE_CODE (DR_REF (b)) != MEM_REF)
1.1  mrg 	{
1.1  mrg 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
1.1  mrg 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
1.1  mrg 				 build_int_cst
1.1  mrg 				   (reference_alias_ptr_type (DR_REF (b)), 0));
1.1  mrg 	  dr_analyze_indices (&b->alt_indices, alt_ref,
1.1  mrg 			      loop_preheader_edge (loop_nest[0]),
1.1  mrg 			      loop_containing_stmt (DR_STMT (b)));
1.1  mrg 	}
1.1  mrg       return initialize_data_dependence_relation (res, loop_nest, true);
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!same_base_p)
1.1  mrg     {
1.1  mrg       /* Partial overlap is possible for different bases when strict aliasing
1.1  mrg 	 is not in effect.  It's also possible if either base involves a union
1.1  mrg 	 access; e.g. for:
1.1  mrg
1.1  mrg 	   struct s1 { int a[2]; };
1.1  mrg 	   struct s2 { struct s1 b; int c; };
1.1  mrg 	   struct s3 { int d; struct s1 e; };
1.1  mrg 	   union u { struct s2 f; struct s3 g; } *p, *q;
1.1  mrg
1.1  mrg 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
1.1  mrg 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
1.1  mrg 	 "q->g.e" (base "q->g").  */
1.1  mrg       if (!flag_strict_aliasing
1.1  mrg 	  || ref_contains_union_access_p (full_seq.object_a)
1.1  mrg 	  || ref_contains_union_access_p (full_seq.object_b))
1.1  mrg 	{
1.1  mrg 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1.1  mrg 	  return res;
1.1  mrg 	}
1.1  mrg
1.1  mrg       DDR_COULD_BE_INDEPENDENT_P (res) = true;
1.1  mrg       if (!loop_nest.exists ()
1.1  mrg 	  || (object_address_invariant_in_loop_p (loop_nest[0],
1.1  mrg 						  full_seq.object_a)
1.1  mrg 	      && object_address_invariant_in_loop_p (loop_nest[0],
1.1  mrg 						     full_seq.object_b)))
1.1  mrg 	{
1.1  mrg 	  DDR_OBJECT_A (res) = full_seq.object_a;
1.1  mrg 	  DDR_OBJECT_B (res) = full_seq.object_b;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   DDR_AFFINE_P (res) = true;
1.1  mrg   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1.1  mrg   DDR_SUBSCRIPTS (res).create (full_seq.length);
1.1  mrg   DDR_LOOP_NEST (res) = loop_nest;
1.1  mrg   DDR_SELF_REFERENCE (res) = false;
1.1  mrg
1.1  mrg   for (i = 0; i < full_seq.length; ++i)
1.1  mrg     {
1.1  mrg       struct subscript *subscript;
1.1  mrg
1.1  mrg       subscript = XNEW (struct subscript);
1.1  mrg       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
1.1  mrg       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
1.1  mrg       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1.1  mrg       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1.1  mrg       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1.1  mrg       SUB_DISTANCE (subscript) = chrec_dont_know;
1.1  mrg       DDR_SUBSCRIPTS (res).safe_push (subscript);
1.1  mrg     }
1.1  mrg
1.1  mrg   return res;
1.1  mrg }
1.1  mrg
1.1  mrg /* Initialize a data dependence relation between data accesses A and
1.1  mrg    B.  NB_LOOPS is the number of loops surrounding the references: the
1.1  mrg    size of the classic distance/direction vectors.  */
1.1  mrg
1.1  mrg struct data_dependence_relation *
1.1  mrg initialize_data_dependence_relation (struct data_reference *a,
1.1  mrg 				     struct data_reference *b,
1.1  mrg 				     vec<loop_p> loop_nest)
1.1  mrg {
1.1  mrg   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
1.1  mrg   DDR_A (res) = a;
1.1  mrg   DDR_B (res) = b;
1.1  mrg   DDR_LOOP_NEST (res).create (0);
1.1  mrg   DDR_SUBSCRIPTS (res).create (0);
1.1  mrg   DDR_DIR_VECTS (res).create (0);
1.1  mrg   DDR_DIST_VECTS (res).create (0);
1.1  mrg
1.1  mrg   if (a == NULL || b == NULL)
1.1  mrg     {
1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1.1  mrg       return res;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* If the data references do not alias, then they are independent.  */
1.1  mrg   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
1.1  mrg     {
1.1  mrg       DDR_ARE_DEPENDENT (res) = chrec_known;
1.1  mrg       return res;
1.1  mrg     }
1.1  mrg
1.1  mrg   return initialize_data_dependence_relation (res, loop_nest, false);
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Frees memory used by the conflict function F.  */
1.1  mrg
1.1  mrg static void
1.1  mrg free_conflict_function (conflict_function *f)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg
1.1  mrg   if (CF_NONTRIVIAL_P (f))
1.1  mrg     {
1.1  mrg       for (i = 0; i < f->n; i++)
1.1  mrg 	affine_fn_free (f->fns[i]);
1.1  mrg     }
1.1  mrg   free (f);
1.1  mrg }
1.1  mrg
1.1  mrg /* Frees memory used by SUBSCRIPTS.  */
1.1  mrg
1.1  mrg static void
1.1  mrg free_subscripts (vec<subscript_p> subscripts)
1.1  mrg {
1.1  mrg   for (subscript_p s : subscripts)
1.1  mrg     {
1.1  mrg       free_conflict_function (s->conflicting_iterations_in_a);
1.1  mrg       free_conflict_function (s->conflicting_iterations_in_b);
1.1  mrg       free (s);
1.1  mrg     }
1.1  mrg   subscripts.release ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1.1  mrg    description.  */
1.1  mrg
1.1  mrg static inline void
1.1  mrg finalize_ddr_dependent (struct data_dependence_relation *ddr,
1.1  mrg 			tree chrec)
1.1  mrg {
1.1  mrg   DDR_ARE_DEPENDENT (ddr) = chrec;
1.1  mrg   free_subscripts (DDR_SUBSCRIPTS (ddr));
1.1  mrg   DDR_SUBSCRIPTS (ddr).create (0);
1.1  mrg }
1.1  mrg
1.1  mrg /* The dependence relation DDR cannot be represented by a distance
1.1  mrg    vector.  */
1.1  mrg
1.1  mrg static inline void
1.1  mrg non_affine_dependence_relation (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1.1  mrg
1.1  mrg   DDR_AFFINE_P (ddr) = false;
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg
1.1  mrg /* This section contains the classic Banerjee tests.  */
1.1  mrg
1.1  mrg /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1.1  mrg    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1.1  mrg
1.1  mrg static inline bool
1.1  mrg ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1.1  mrg {
1.1  mrg   return (evolution_function_is_constant_p (chrec_a)
1.1  mrg 	  && evolution_function_is_constant_p (chrec_b));
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1.1  mrg    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1.1  mrg {
1.1  mrg   if ((evolution_function_is_constant_p (chrec_a)
1.1  mrg        && evolution_function_is_univariate_p (chrec_b))
1.1  mrg       || (evolution_function_is_constant_p (chrec_b)
1.1  mrg 	  && evolution_function_is_univariate_p (chrec_a)))
1.1  mrg     return true;
1.1  mrg
1.1  mrg   if (evolution_function_is_univariate_p (chrec_a)
1.1  mrg       && evolution_function_is_univariate_p (chrec_b))
1.1  mrg     {
1.1  mrg       switch (TREE_CODE (chrec_a))
1.1  mrg 	{
1.1  mrg 	case POLYNOMIAL_CHREC:
1.1  mrg 	  switch (TREE_CODE (chrec_b))
1.1  mrg 	    {
1.1  mrg 	    case POLYNOMIAL_CHREC:
1.1  mrg 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1.1  mrg 		return false;
1.1  mrg 	      /* FALLTHRU */
1.1  mrg
1.1  mrg 	    default:
1.1  mrg 	      return true;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	default:
1.1  mrg 	  return true;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg /* Creates a conflict function with N dimensions.  The affine functions
1.1  mrg    in each dimension follow.  */
1.1  mrg
1.1  mrg static conflict_function *
1.1  mrg conflict_fn (unsigned n, ...)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg   conflict_function *ret = XCNEW (conflict_function);
1.1  mrg   va_list ap;
1.1  mrg
1.1  mrg   gcc_assert (n > 0 && n <= MAX_DIM);
1.1  mrg   va_start (ap, n);
1.1  mrg
1.1  mrg   ret->n = n;
1.1  mrg   for (i = 0; i < n; i++)
1.1  mrg     ret->fns[i] = va_arg (ap, affine_fn);
1.1  mrg   va_end (ap);
1.1  mrg
1.1  mrg   return ret;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns constant affine function with value CST.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg affine_fn_cst (tree cst)
1.1  mrg {
1.1  mrg   affine_fn fn;
1.1  mrg   fn.create (1);
1.1  mrg   fn.quick_push (cst);
1.1  mrg   return fn;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1.1  mrg
1.1  mrg static affine_fn
1.1  mrg affine_fn_univar (tree cst, unsigned dim, tree coef)
1.1  mrg {
1.1  mrg   affine_fn fn;
1.1  mrg   fn.create (dim + 1);
1.1  mrg   unsigned i;
1.1  mrg
1.1  mrg   gcc_assert (dim > 0);
1.1  mrg   fn.quick_push (cst);
1.1  mrg   for (i = 1; i < dim; i++)
1.1  mrg     fn.quick_push (integer_zero_node);
1.1  mrg   fn.quick_push (coef);
1.1  mrg   return fn;
1.1  mrg }
1.1  mrg
1.1  mrg /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
1.1  mrg    relation between the elements accessed twice by CHREC_A and
1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
1.1  mrg
1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_ziv_subscript (tree chrec_a,
1.1  mrg 		       tree chrec_b,
1.1  mrg 		       conflict_function **overlaps_a,
1.1  mrg 		       conflict_function **overlaps_b,
1.1  mrg 		       tree *last_conflicts)
1.1  mrg {
1.1  mrg   tree type, difference;
1.1  mrg   dependence_stats.num_ziv++;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "(analyze_ziv_subscript \n");
1.1  mrg
1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
1.1  mrg   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1.1  mrg
1.1  mrg   switch (TREE_CODE (difference))
1.1  mrg     {
1.1  mrg     case INTEGER_CST:
1.1  mrg       if (integer_zerop (difference))
1.1  mrg 	{
1.1  mrg 	  /* The difference is equal to zero: the accessed index
1.1  mrg 	     overlaps for each iteration in the loop.  */
1.1  mrg 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg 	  *last_conflicts = chrec_dont_know;
1.1  mrg 	  dependence_stats.num_ziv_dependent++;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  /* The accesses do not overlap.  */
1.1  mrg 	  *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 	  *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 	  *last_conflicts = integer_zero_node;
1.1  mrg 	  dependence_stats.num_ziv_independent++;
1.1  mrg 	}
1.1  mrg       break;
1.1  mrg
1.1  mrg     default:
1.1  mrg       /* We're not sure whether the indexes overlap.  For the moment,
1.1  mrg 	 conservatively answer "don't know".  */
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1.1  mrg
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       dependence_stats.num_ziv_unimplemented++;
1.1  mrg       break;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, ")\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1.1  mrg    and only if it fits to the int type.  If this is not the case, or the
1.1  mrg    bound  on the number of iterations of LOOP could not be derived, returns
1.1  mrg    chrec_dont_know.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg max_stmt_executions_tree (class loop *loop)
1.1  mrg {
1.1  mrg   widest_int nit;
1.1  mrg
1.1  mrg   if (!max_stmt_executions (loop, &nit))
1.1  mrg     return chrec_dont_know;
1.1  mrg
1.1  mrg   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1.1  mrg     return chrec_dont_know;
1.1  mrg
1.1  mrg   return wide_int_to_tree (unsigned_type_node, nit);
1.1  mrg }
1.1  mrg
1.1  mrg /* Determine whether the CHREC is always positive/negative.  If the expression
1.1  mrg    cannot be statically analyzed, return false, otherwise set the answer into
1.1  mrg    VALUE.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg chrec_is_positive (tree chrec, bool *value)
1.1  mrg {
1.1  mrg   bool value0, value1, value2;
1.1  mrg   tree end_value, nb_iter;
1.1  mrg
1.1  mrg   switch (TREE_CODE (chrec))
1.1  mrg     {
1.1  mrg     case POLYNOMIAL_CHREC:
1.1  mrg       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1.1  mrg 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       /* FIXME -- overflows.  */
1.1  mrg       if (value0 == value1)
1.1  mrg 	{
1.1  mrg 	  *value = value0;
1.1  mrg 	  return true;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1.1  mrg 	 and the proof consists in showing that the sign never
1.1  mrg 	 changes during the execution of the loop, from 0 to
1.1  mrg 	 loop->nb_iterations.  */
1.1  mrg       if (!evolution_function_is_affine_p (chrec))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1.1  mrg       if (chrec_contains_undetermined (nb_iter))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg #if 0
1.1  mrg       /* TODO -- If the test is after the exit, we may decrease the number of
1.1  mrg 	 iterations by one.  */
1.1  mrg       if (after_exit)
1.1  mrg 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1.1  mrg #endif
1.1  mrg
1.1  mrg       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1.1  mrg
1.1  mrg       if (!chrec_is_positive (end_value, &value2))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       *value = value0;
1.1  mrg       return value0 == value1;
1.1  mrg
1.1  mrg     case INTEGER_CST:
1.1  mrg       switch (tree_int_cst_sgn (chrec))
1.1  mrg 	{
1.1  mrg 	case -1:
1.1  mrg 	  *value = false;
1.1  mrg 	  break;
1.1  mrg 	case 1:
1.1  mrg 	  *value = true;
1.1  mrg 	  break;
1.1  mrg 	default:
1.1  mrg 	  return false;
1.1  mrg 	}
1.1  mrg       return true;
1.1  mrg
1.1  mrg     default:
1.1  mrg       return false;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1.1  mrg    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
1.1  mrg    relation between the elements accessed twice by CHREC_A and
1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
1.1  mrg
1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_siv_subscript_cst_affine (tree chrec_a,
1.1  mrg 				  tree chrec_b,
1.1  mrg 				  conflict_function **overlaps_a,
1.1  mrg 				  conflict_function **overlaps_b,
1.1  mrg 				  tree *last_conflicts)
1.1  mrg {
1.1  mrg   bool value0, value1, value2;
1.1  mrg   tree type, difference, tmp;
1.1  mrg
1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
1.1  mrg   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1.1  mrg
1.1  mrg   /* Special case overlap in the first iteration.  */
1.1  mrg   if (integer_zerop (difference))
1.1  mrg     {
1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *last_conflicts = integer_one_node;
1.1  mrg       return;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!chrec_is_positive (initial_condition (difference), &value0))
1.1  mrg     {
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1.1  mrg
1.1  mrg       dependence_stats.num_siv_unimplemented++;
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       return;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       if (value0 == false)
1.1  mrg 	{
1.1  mrg 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
1.1  mrg 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1.1  mrg 	    {
1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1.1  mrg
1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
1.1  mrg 	      *last_conflicts = chrec_dont_know;
1.1  mrg 	      dependence_stats.num_siv_unimplemented++;
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      if (value1 == true)
1.1  mrg 		{
1.1  mrg 		  /* Example:
1.1  mrg 		     chrec_a = 12
1.1  mrg 		     chrec_b = {10, +, 1}
1.1  mrg 		  */
1.1  mrg
1.1  mrg 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1.1  mrg 		    {
1.1  mrg 		      HOST_WIDE_INT numiter;
1.1  mrg 		      class loop *loop = get_chrec_loop (chrec_b);
1.1  mrg
1.1  mrg 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
1.1  mrg 					 fold_build1 (ABS_EXPR, type, difference),
1.1  mrg 					 CHREC_RIGHT (chrec_b));
1.1  mrg 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1.1  mrg 		      *last_conflicts = integer_one_node;
1.1  mrg
1.1  mrg
1.1  mrg 		      /* Perform weak-zero siv test to see if overlap is
1.1  mrg 			 outside the loop bounds.  */
1.1  mrg 		      numiter = max_stmt_executions_int (loop);
1.1  mrg
1.1  mrg 		      if (numiter >= 0
1.1  mrg 			  && compare_tree_int (tmp, numiter) > 0)
1.1  mrg 			{
1.1  mrg 			  free_conflict_function (*overlaps_a);
1.1  mrg 			  free_conflict_function (*overlaps_b);
1.1  mrg 			  *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 			  *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 			  *last_conflicts = integer_zero_node;
1.1  mrg 			  dependence_stats.num_siv_independent++;
1.1  mrg 			  return;
1.1  mrg 			}
1.1  mrg 		      dependence_stats.num_siv_dependent++;
1.1  mrg 		      return;
1.1  mrg 		    }
1.1  mrg
1.1  mrg 		  /* When the step does not divide the difference, there are
1.1  mrg 		     no overlaps.  */
1.1  mrg 		  else
1.1  mrg 		    {
1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 		      *last_conflicts = integer_zero_node;
1.1  mrg 		      dependence_stats.num_siv_independent++;
1.1  mrg 		      return;
1.1  mrg 		    }
1.1  mrg 		}
1.1  mrg
1.1  mrg 	      else
1.1  mrg 		{
1.1  mrg 		  /* Example:
1.1  mrg 		     chrec_a = 12
1.1  mrg 		     chrec_b = {10, +, -1}
1.1  mrg
1.1  mrg 		     In this case, chrec_a will not overlap with chrec_b.  */
1.1  mrg 		  *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 		  *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 		  *last_conflicts = integer_zero_node;
1.1  mrg 		  dependence_stats.num_siv_independent++;
1.1  mrg 		  return;
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
1.1  mrg 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1.1  mrg 	    {
1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1.1  mrg
1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
1.1  mrg 	      *last_conflicts = chrec_dont_know;
1.1  mrg 	      dependence_stats.num_siv_unimplemented++;
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      if (value2 == false)
1.1  mrg 		{
1.1  mrg 		  /* Example:
1.1  mrg 		     chrec_a = 3
1.1  mrg 		     chrec_b = {10, +, -1}
1.1  mrg 		  */
1.1  mrg 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1.1  mrg 		    {
1.1  mrg 		      HOST_WIDE_INT numiter;
1.1  mrg 		      class loop *loop = get_chrec_loop (chrec_b);
1.1  mrg
1.1  mrg 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1.1  mrg 					 CHREC_RIGHT (chrec_b));
1.1  mrg 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1.1  mrg 		      *last_conflicts = integer_one_node;
1.1  mrg
1.1  mrg 		      /* Perform weak-zero siv test to see if overlap is
1.1  mrg 			 outside the loop bounds.  */
1.1  mrg 		      numiter = max_stmt_executions_int (loop);
1.1  mrg
1.1  mrg 		      if (numiter >= 0
1.1  mrg 			  && compare_tree_int (tmp, numiter) > 0)
1.1  mrg 			{
1.1  mrg 			  free_conflict_function (*overlaps_a);
1.1  mrg 			  free_conflict_function (*overlaps_b);
1.1  mrg 			  *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 			  *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 			  *last_conflicts = integer_zero_node;
1.1  mrg 			  dependence_stats.num_siv_independent++;
1.1  mrg 			  return;
1.1  mrg 			}
1.1  mrg 		      dependence_stats.num_siv_dependent++;
1.1  mrg 		      return;
1.1  mrg 		    }
1.1  mrg
1.1  mrg 		  /* When the step does not divide the difference, there
1.1  mrg 		     are no overlaps.  */
1.1  mrg 		  else
1.1  mrg 		    {
1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 		      *last_conflicts = integer_zero_node;
1.1  mrg 		      dependence_stats.num_siv_independent++;
1.1  mrg 		      return;
1.1  mrg 		    }
1.1  mrg 		}
1.1  mrg 	      else
1.1  mrg 		{
1.1  mrg 		  /* Example:
1.1  mrg 		     chrec_a = 3
1.1  mrg 		     chrec_b = {4, +, 1}
1.1  mrg
1.1  mrg 		     In this case, chrec_a will not overlap with chrec_b.  */
1.1  mrg 		  *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 		  *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 		  *last_conflicts = integer_zero_node;
1.1  mrg 		  dependence_stats.num_siv_independent++;
1.1  mrg 		  return;
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper recursive function for initializing the matrix A.  Returns
1.1  mrg    the initial value of CHREC.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1.1  mrg {
1.1  mrg   gcc_assert (chrec);
1.1  mrg
1.1  mrg   switch (TREE_CODE (chrec))
1.1  mrg     {
1.1  mrg     case POLYNOMIAL_CHREC:
1.1  mrg       HOST_WIDE_INT chrec_right;
1.1  mrg       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
1.1  mrg 	return chrec_dont_know;
1.1  mrg       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
1.1  mrg       /* We want to be able to negate without overflow.  */
1.1  mrg       if (chrec_right == HOST_WIDE_INT_MIN)
1.1  mrg 	return chrec_dont_know;
1.1  mrg       A[index][0] = mult * chrec_right;
1.1  mrg       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1.1  mrg
1.1  mrg     case PLUS_EXPR:
1.1  mrg     case MULT_EXPR:
1.1  mrg     case MINUS_EXPR:
1.1  mrg       {
1.1  mrg 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1.1  mrg 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1.1  mrg
1.1  mrg 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1.1  mrg       }
1.1  mrg
1.1  mrg     CASE_CONVERT:
1.1  mrg       {
1.1  mrg 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1.1  mrg 	return chrec_convert (chrec_type (chrec), op, NULL);
1.1  mrg       }
1.1  mrg
1.1  mrg     case BIT_NOT_EXPR:
1.1  mrg       {
1.1  mrg 	/* Handle ~X as -1 - X.  */
1.1  mrg 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1.1  mrg 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1.1  mrg 			      build_int_cst (TREE_TYPE (chrec), -1), op);
1.1  mrg       }
1.1  mrg
1.1  mrg     case INTEGER_CST:
1.1  mrg       return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
1.1  mrg
1.1  mrg     default:
1.1  mrg       gcc_unreachable ();
1.1  mrg       return NULL_TREE;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg #define FLOOR_DIV(x,y) ((x) / (y))
1.1  mrg
1.1  mrg /* Solves the special case of the Diophantine equation:
1.1  mrg    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1.1  mrg
1.1  mrg    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
1.1  mrg    number of iterations that loops X and Y run.  The overlaps will be
1.1  mrg    constructed as evolutions in dimension DIM.  */
1.1  mrg
1.1  mrg static void
1.1  mrg compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
1.1  mrg 					 HOST_WIDE_INT step_a,
1.1  mrg 					 HOST_WIDE_INT step_b,
1.1  mrg 					 affine_fn *overlaps_a,
1.1  mrg 					 affine_fn *overlaps_b,
1.1  mrg 					 tree *last_conflicts, int dim)
1.1  mrg {
1.1  mrg   if (((step_a > 0 && step_b > 0)
1.1  mrg        || (step_a < 0 && step_b < 0)))
1.1  mrg     {
1.1  mrg       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
1.1  mrg       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
1.1  mrg
1.1  mrg       gcd_steps_a_b = gcd (step_a, step_b);
1.1  mrg       step_overlaps_a = step_b / gcd_steps_a_b;
1.1  mrg       step_overlaps_b = step_a / gcd_steps_a_b;
1.1  mrg
1.1  mrg       if (niter > 0)
1.1  mrg 	{
1.1  mrg 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
1.1  mrg 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1.1  mrg 	  last_conflict = tau2;
1.1  mrg 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	*last_conflicts = chrec_dont_know;
1.1  mrg
1.1  mrg       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1.1  mrg 				      build_int_cst (NULL_TREE,
1.1  mrg 						     step_overlaps_a));
1.1  mrg       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1.1  mrg 				      build_int_cst (NULL_TREE,
1.1  mrg 						     step_overlaps_b));
1.1  mrg     }
1.1  mrg
1.1  mrg   else
1.1  mrg     {
1.1  mrg       *overlaps_a = affine_fn_cst (integer_zero_node);
1.1  mrg       *overlaps_b = affine_fn_cst (integer_zero_node);
1.1  mrg       *last_conflicts = integer_zero_node;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Solves the special case of a Diophantine equation where CHREC_A is
1.1  mrg    an affine bivariate function, and CHREC_B is an affine univariate
1.1  mrg    function.  For example,
1.1  mrg
1.1  mrg    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1.1  mrg
1.1  mrg    has the following overlapping functions:
1.1  mrg
1.1  mrg    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1.1  mrg    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1.1  mrg    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1.1  mrg
1.1  mrg    FORNOW: This is a specialized implementation for a case occurring in
1.1  mrg    a common benchmark.  Implement the general algorithm.  */
1.1  mrg
1.1  mrg static void
1.1  mrg compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1.1  mrg 				      conflict_function **overlaps_a,
1.1  mrg 				      conflict_function **overlaps_b,
1.1  mrg 				      tree *last_conflicts)
1.1  mrg {
1.1  mrg   bool xz_p, yz_p, xyz_p;
1.1  mrg   HOST_WIDE_INT step_x, step_y, step_z;
1.1  mrg   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1.1  mrg   affine_fn overlaps_a_xz, overlaps_b_xz;
1.1  mrg   affine_fn overlaps_a_yz, overlaps_b_yz;
1.1  mrg   affine_fn overlaps_a_xyz, overlaps_b_xyz;
1.1  mrg   affine_fn ova1, ova2, ovb;
1.1  mrg   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1.1  mrg
1.1  mrg   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1.1  mrg   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1.1  mrg   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1.1  mrg
1.1  mrg   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
1.1  mrg   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
1.1  mrg   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
1.1  mrg
1.1  mrg   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1.1  mrg     {
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1.1  mrg
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       return;
1.1  mrg     }
1.1  mrg
1.1  mrg   niter = MIN (niter_x, niter_z);
1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1.1  mrg 					   &overlaps_a_xz,
1.1  mrg 					   &overlaps_b_xz,
1.1  mrg 					   &last_conflicts_xz, 1);
1.1  mrg   niter = MIN (niter_y, niter_z);
1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1.1  mrg 					   &overlaps_a_yz,
1.1  mrg 					   &overlaps_b_yz,
1.1  mrg 					   &last_conflicts_yz, 2);
1.1  mrg   niter = MIN (niter_x, niter_z);
1.1  mrg   niter = MIN (niter_y, niter);
1.1  mrg   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1.1  mrg 					   &overlaps_a_xyz,
1.1  mrg 					   &overlaps_b_xyz,
1.1  mrg 					   &last_conflicts_xyz, 3);
1.1  mrg
1.1  mrg   xz_p = !integer_zerop (last_conflicts_xz);
1.1  mrg   yz_p = !integer_zerop (last_conflicts_yz);
1.1  mrg   xyz_p = !integer_zerop (last_conflicts_xyz);
1.1  mrg
1.1  mrg   if (xz_p || yz_p || xyz_p)
1.1  mrg     {
1.1  mrg       ova1 = affine_fn_cst (integer_zero_node);
1.1  mrg       ova2 = affine_fn_cst (integer_zero_node);
1.1  mrg       ovb = affine_fn_cst (integer_zero_node);
1.1  mrg       if (xz_p)
1.1  mrg 	{
1.1  mrg 	  affine_fn t0 = ova1;
1.1  mrg 	  affine_fn t2 = ovb;
1.1  mrg
1.1  mrg 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
1.1  mrg 	  affine_fn_free (t0);
1.1  mrg 	  affine_fn_free (t2);
1.1  mrg 	  *last_conflicts = last_conflicts_xz;
1.1  mrg 	}
1.1  mrg       if (yz_p)
1.1  mrg 	{
1.1  mrg 	  affine_fn t0 = ova2;
1.1  mrg 	  affine_fn t2 = ovb;
1.1  mrg
1.1  mrg 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
1.1  mrg 	  affine_fn_free (t0);
1.1  mrg 	  affine_fn_free (t2);
1.1  mrg 	  *last_conflicts = last_conflicts_yz;
1.1  mrg 	}
1.1  mrg       if (xyz_p)
1.1  mrg 	{
1.1  mrg 	  affine_fn t0 = ova1;
1.1  mrg 	  affine_fn t2 = ova2;
1.1  mrg 	  affine_fn t4 = ovb;
1.1  mrg
1.1  mrg 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1.1  mrg 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1.1  mrg 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1.1  mrg 	  affine_fn_free (t0);
1.1  mrg 	  affine_fn_free (t2);
1.1  mrg 	  affine_fn_free (t4);
1.1  mrg 	  *last_conflicts = last_conflicts_xyz;
1.1  mrg 	}
1.1  mrg       *overlaps_a = conflict_fn (2, ova1, ova2);
1.1  mrg       *overlaps_b = conflict_fn (1, ovb);
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *last_conflicts = integer_zero_node;
1.1  mrg     }
1.1  mrg
1.1  mrg   affine_fn_free (overlaps_a_xz);
1.1  mrg   affine_fn_free (overlaps_b_xz);
1.1  mrg   affine_fn_free (overlaps_a_yz);
1.1  mrg   affine_fn_free (overlaps_b_yz);
1.1  mrg   affine_fn_free (overlaps_a_xyz);
1.1  mrg   affine_fn_free (overlaps_b_xyz);
1.1  mrg }
1.1  mrg
1.1  mrg /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
1.1  mrg 		    int size)
1.1  mrg {
1.1  mrg   memcpy (vec2, vec1, size * sizeof (*vec1));
1.1  mrg }
1.1  mrg
1.1  mrg /* Copy the elements of M x N matrix MAT1 to MAT2.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
1.1  mrg 		    int m, int n)
1.1  mrg {
1.1  mrg   int i;
1.1  mrg
1.1  mrg   for (i = 0; i < m; i++)
1.1  mrg     lambda_vector_copy (mat1[i], mat2[i], n);
1.1  mrg }
1.1  mrg
1.1  mrg /* Store the N x N identity matrix in MAT.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_matrix_id (lambda_matrix mat, int size)
1.1  mrg {
1.1  mrg   int i, j;
1.1  mrg
1.1  mrg   for (i = 0; i < size; i++)
1.1  mrg     for (j = 0; j < size; j++)
1.1  mrg       mat[i][j] = (i == j) ? 1 : 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the index of the first nonzero element of vector VEC1 between
1.1  mrg    START and N.  We must have START <= N.
1.1  mrg    Returns N if VEC1 is the zero vector.  */
1.1  mrg
1.1  mrg static int
1.1  mrg lambda_vector_first_nz (lambda_vector vec1, int n, int start)
1.1  mrg {
1.1  mrg   int j = start;
1.1  mrg   while (j < n && vec1[j] == 0)
1.1  mrg     j++;
1.1  mrg   return j;
1.1  mrg }
1.1  mrg
1.1  mrg /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
1.1  mrg    R2 = R2 + CONST1 * R1.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
1.1  mrg 		       lambda_int const1)
1.1  mrg {
1.1  mrg   int i;
1.1  mrg
1.1  mrg   if (const1 == 0)
1.1  mrg     return true;
1.1  mrg
1.1  mrg   for (i = 0; i < n; i++)
1.1  mrg     {
1.1  mrg       bool ovf;
1.1  mrg       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
1.1  mrg       if (ovf)
1.1  mrg 	return false;
1.1  mrg       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
1.1  mrg       if (ovf || tem2 == HOST_WIDE_INT_MIN)
1.1  mrg 	return false;
1.1  mrg       mat[r2][i] = tem2;
1.1  mrg     }
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Multiply vector VEC1 of length SIZE by a constant CONST1,
1.1  mrg    and store the result in VEC2.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
1.1  mrg 			  int size, lambda_int const1)
1.1  mrg {
1.1  mrg   int i;
1.1  mrg
1.1  mrg   if (const1 == 0)
1.1  mrg     lambda_vector_clear (vec2, size);
1.1  mrg   else
1.1  mrg     for (i = 0; i < size; i++)
1.1  mrg       vec2[i] = const1 * vec1[i];
1.1  mrg }
1.1  mrg
1.1  mrg /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
1.1  mrg 		      int size)
1.1  mrg {
1.1  mrg   lambda_vector_mult_const (vec1, vec2, size, -1);
1.1  mrg }
1.1  mrg
1.1  mrg /* Negate row R1 of matrix MAT which has N columns.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
1.1  mrg {
1.1  mrg   lambda_vector_negate (mat[r1], mat[r1], n);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if two vectors are equal.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
1.1  mrg {
1.1  mrg   int i;
1.1  mrg   for (i = 0; i < size; i++)
1.1  mrg     if (vec1[i] != vec2[i])
1.1  mrg       return false;
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Given an M x N integer matrix A, this function determines an M x
1.1  mrg    M unimodular matrix U, and an M x N echelon matrix S such that
1.1  mrg    "U.A = S".  This decomposition is also known as "right Hermite".
1.1  mrg
1.1  mrg    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
1.1  mrg    Restructuring Compilers" Utpal Banerjee.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
1.1  mrg 			     lambda_matrix S, lambda_matrix U)
1.1  mrg {
1.1  mrg   int i, j, i0 = 0;
1.1  mrg
1.1  mrg   lambda_matrix_copy (A, S, m, n);
1.1  mrg   lambda_matrix_id (U, m);
1.1  mrg
1.1  mrg   for (j = 0; j < n; j++)
1.1  mrg     {
1.1  mrg       if (lambda_vector_first_nz (S[j], m, i0) < m)
1.1  mrg 	{
1.1  mrg 	  ++i0;
1.1  mrg 	  for (i = m - 1; i >= i0; i--)
1.1  mrg 	    {
1.1  mrg 	      while (S[i][j] != 0)
1.1  mrg 		{
1.1  mrg 		  lambda_int factor, a, b;
1.1  mrg
1.1  mrg 		  a = S[i-1][j];
1.1  mrg 		  b = S[i][j];
1.1  mrg 		  gcc_assert (a != HOST_WIDE_INT_MIN);
1.1  mrg 		  factor = a / b;
1.1  mrg
1.1  mrg 		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
1.1  mrg 		    return false;
1.1  mrg 		  std::swap (S[i], S[i-1]);
1.1  mrg
1.1  mrg 		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
1.1  mrg 		    return false;
1.1  mrg 		  std::swap (U[i], U[i-1]);
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Determines the overlapping elements due to accesses CHREC_A and
1.1  mrg    CHREC_B, that are affine functions.  This function cannot handle
1.1  mrg    symbolic evolution functions, ie. when initial conditions are
1.1  mrg    parameters, because it uses lambda matrices of integers.  */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_subscript_affine_affine (tree chrec_a,
1.1  mrg 				 tree chrec_b,
1.1  mrg 				 conflict_function **overlaps_a,
1.1  mrg 				 conflict_function **overlaps_b,
1.1  mrg 				 tree *last_conflicts)
1.1  mrg {
1.1  mrg   unsigned nb_vars_a, nb_vars_b, dim;
1.1  mrg   lambda_int gamma, gcd_alpha_beta;
1.1  mrg   lambda_matrix A, U, S;
1.1  mrg   struct obstack scratch_obstack;
1.1  mrg
1.1  mrg   if (eq_evolutions_p (chrec_a, chrec_b))
1.1  mrg     {
1.1  mrg       /* The accessed index overlaps for each iteration in the
1.1  mrg 	 loop.  */
1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       return;
1.1  mrg     }
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
1.1  mrg
1.1  mrg   /* For determining the initial intersection, we have to solve a
1.1  mrg      Diophantine equation.  This is the most time consuming part.
1.1  mrg
1.1  mrg      For answering to the question: "Is there a dependence?" we have
1.1  mrg      to prove that there exists a solution to the Diophantine
1.1  mrg      equation, and that the solution is in the iteration domain,
1.1  mrg      i.e. the solution is positive or zero, and that the solution
1.1  mrg      happens before the upper bound loop.nb_iterations.  Otherwise
1.1  mrg      there is no dependence.  This function outputs a description of
1.1  mrg      the iterations that hold the intersections.  */
1.1  mrg
1.1  mrg   nb_vars_a = nb_vars_in_chrec (chrec_a);
1.1  mrg   nb_vars_b = nb_vars_in_chrec (chrec_b);
1.1  mrg
1.1  mrg   gcc_obstack_init (&scratch_obstack);
1.1  mrg
1.1  mrg   dim = nb_vars_a + nb_vars_b;
1.1  mrg   U = lambda_matrix_new (dim, dim, &scratch_obstack);
1.1  mrg   A = lambda_matrix_new (dim, 1, &scratch_obstack);
1.1  mrg   S = lambda_matrix_new (dim, 1, &scratch_obstack);
1.1  mrg
1.1  mrg   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
1.1  mrg   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
1.1  mrg   if (init_a == chrec_dont_know
1.1  mrg       || init_b == chrec_dont_know)
1.1  mrg     {
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "affine-affine test failed: "
1.1  mrg 		 "representation issue.\n");
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       goto end_analyze_subs_aa;
1.1  mrg     }
1.1  mrg   gamma = int_cst_value (init_b) - int_cst_value (init_a);
1.1  mrg
1.1  mrg   /* Don't do all the hard work of solving the Diophantine equation
1.1  mrg      when we already know the solution: for example,
1.1  mrg      | {3, +, 1}_1
1.1  mrg      | {3, +, 4}_2
1.1  mrg      | gamma = 3 - 3 = 0.
1.1  mrg      Then the first overlap occurs during the first iterations:
1.1  mrg      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
1.1  mrg   */
1.1  mrg   if (gamma == 0)
1.1  mrg     {
1.1  mrg       if (nb_vars_a == 1 && nb_vars_b == 1)
1.1  mrg 	{
1.1  mrg 	  HOST_WIDE_INT step_a, step_b;
1.1  mrg 	  HOST_WIDE_INT niter, niter_a, niter_b;
1.1  mrg 	  affine_fn ova, ovb;
1.1  mrg
1.1  mrg 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
1.1  mrg 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
1.1  mrg 	  niter = MIN (niter_a, niter_b);
1.1  mrg 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
1.1  mrg 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
1.1  mrg
1.1  mrg 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
1.1  mrg 						   &ova, &ovb,
1.1  mrg 						   last_conflicts, 1);
1.1  mrg 	  *overlaps_a = conflict_fn (1, ova);
1.1  mrg 	  *overlaps_b = conflict_fn (1, ovb);
1.1  mrg 	}
1.1  mrg
1.1  mrg       else if (nb_vars_a == 2 && nb_vars_b == 1)
1.1  mrg 	compute_overlap_steps_for_affine_1_2
1.1  mrg 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
1.1  mrg
1.1  mrg       else if (nb_vars_a == 1 && nb_vars_b == 2)
1.1  mrg 	compute_overlap_steps_for_affine_1_2
1.1  mrg 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
1.1  mrg
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
1.1  mrg 	  *overlaps_a = conflict_fn_not_known ();
1.1  mrg 	  *overlaps_b = conflict_fn_not_known ();
1.1  mrg 	  *last_conflicts = chrec_dont_know;
1.1  mrg 	}
1.1  mrg       goto end_analyze_subs_aa;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* U.A = S */
1.1  mrg   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
1.1  mrg     {
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       goto end_analyze_subs_aa;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (S[0][0] < 0)
1.1  mrg     {
1.1  mrg       S[0][0] *= -1;
1.1  mrg       lambda_matrix_row_negate (U, dim, 0);
1.1  mrg     }
1.1  mrg   gcd_alpha_beta = S[0][0];
1.1  mrg
1.1  mrg   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
1.1  mrg      but that is a quite strange case.  Instead of ICEing, answer
1.1  mrg      don't know.  */
1.1  mrg   if (gcd_alpha_beta == 0)
1.1  mrg     {
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       goto end_analyze_subs_aa;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* The classic "gcd-test".  */
1.1  mrg   if (!int_divides_p (gcd_alpha_beta, gamma))
1.1  mrg     {
1.1  mrg       /* The "gcd-test" has determined that there is no integer
1.1  mrg 	 solution, i.e. there is no dependence.  */
1.1  mrg       *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg       *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg       *last_conflicts = integer_zero_node;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
1.1  mrg   else if (nb_vars_a == 1 && nb_vars_b == 1)
1.1  mrg     {
1.1  mrg       /* Both functions should have the same evolution sign.  */
1.1  mrg       if (((A[0][0] > 0 && -A[1][0] > 0)
1.1  mrg 	   || (A[0][0] < 0 && -A[1][0] < 0)))
1.1  mrg 	{
1.1  mrg 	  /* The solutions are given by:
1.1  mrg 	     |
1.1  mrg 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
1.1  mrg 	     |                           [u21 u22]    [y0]
1.1  mrg
1.1  mrg 	     For a given integer t.  Using the following variables,
1.1  mrg
1.1  mrg 	     | i0 = u11 * gamma / gcd_alpha_beta
1.1  mrg 	     | j0 = u12 * gamma / gcd_alpha_beta
1.1  mrg 	     | i1 = u21
1.1  mrg 	     | j1 = u22
1.1  mrg
1.1  mrg 	     the solutions are:
1.1  mrg
1.1  mrg 	     | x0 = i0 + i1 * t,
1.1  mrg 	     | y0 = j0 + j1 * t.  */
1.1  mrg       	  HOST_WIDE_INT i0, j0, i1, j1;
1.1  mrg
1.1  mrg 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
1.1  mrg 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
1.1  mrg 	  i1 = U[1][0];
1.1  mrg 	  j1 = U[1][1];
1.1  mrg
1.1  mrg 	  if ((i1 == 0 && i0 < 0)
1.1  mrg 	      || (j1 == 0 && j0 < 0))
1.1  mrg 	    {
1.1  mrg 	      /* There is no solution.
1.1  mrg 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
1.1  mrg 		 falls in here, but for the moment we don't look at the
1.1  mrg 		 upper bound of the iteration domain.  */
1.1  mrg 	      *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 	      *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 	      *last_conflicts = integer_zero_node;
1.1  mrg 	      goto end_analyze_subs_aa;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  if (i1 > 0 && j1 > 0)
1.1  mrg 	    {
1.1  mrg 	      HOST_WIDE_INT niter_a
1.1  mrg 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
1.1  mrg 	      HOST_WIDE_INT niter_b
1.1  mrg 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
1.1  mrg 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
1.1  mrg
1.1  mrg 	      /* (X0, Y0) is a solution of the Diophantine equation:
1.1  mrg 		 "chrec_a (X0) = chrec_b (Y0)".  */
1.1  mrg 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
1.1  mrg 					CEIL (-j0, j1));
1.1  mrg 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
1.1  mrg 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
1.1  mrg
1.1  mrg 	      /* (X1, Y1) is the smallest positive solution of the eq
1.1  mrg 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
1.1  mrg 		 first conflict occurs.  */
1.1  mrg 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
1.1  mrg 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
1.1  mrg 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
1.1  mrg
1.1  mrg 	      if (niter > 0)
1.1  mrg 		{
1.1  mrg 		  /* If the overlap occurs outside of the bounds of the
1.1  mrg 		     loop, there is no dependence.  */
1.1  mrg 		  if (x1 >= niter_a || y1 >= niter_b)
1.1  mrg 		    {
1.1  mrg 		      *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg 		      *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg 		      *last_conflicts = integer_zero_node;
1.1  mrg 		      goto end_analyze_subs_aa;
1.1  mrg 		    }
1.1  mrg
1.1  mrg 		  /* max stmt executions can get quite large, avoid
1.1  mrg 		     overflows by using wide ints here.  */
1.1  mrg 		  widest_int tau2
1.1  mrg 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
1.1  mrg 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
1.1  mrg 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
1.1  mrg 		  if (wi::min_precision (last_conflict, SIGNED)
1.1  mrg 		      <= TYPE_PRECISION (integer_type_node))
1.1  mrg 		    *last_conflicts
1.1  mrg 		       = build_int_cst (integer_type_node,
1.1  mrg 					last_conflict.to_shwi ());
1.1  mrg 		  else
1.1  mrg 		    *last_conflicts = chrec_dont_know;
1.1  mrg 		}
1.1  mrg 	      else
1.1  mrg 		*last_conflicts = chrec_dont_know;
1.1  mrg
1.1  mrg 	      *overlaps_a
1.1  mrg 		= conflict_fn (1,
1.1  mrg 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
1.1  mrg 						 1,
1.1  mrg 						 build_int_cst (NULL_TREE, i1)));
1.1  mrg 	      *overlaps_b
1.1  mrg 		= conflict_fn (1,
1.1  mrg 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
1.1  mrg 						 1,
1.1  mrg 						 build_int_cst (NULL_TREE, j1)));
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      /* FIXME: For the moment, the upper bound of the
1.1  mrg 		 iteration domain for i and j is not checked.  */
1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
1.1  mrg 	      *overlaps_a = conflict_fn_not_known ();
1.1  mrg 	      *overlaps_b = conflict_fn_not_known ();
1.1  mrg 	      *last_conflicts = chrec_dont_know;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
1.1  mrg 	  *overlaps_a = conflict_fn_not_known ();
1.1  mrg 	  *overlaps_b = conflict_fn_not_known ();
1.1  mrg 	  *last_conflicts = chrec_dont_know;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg     }
1.1  mrg
1.1  mrg end_analyze_subs_aa:
1.1  mrg   obstack_free (&scratch_obstack, NULL);
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "  (overlaps_a = ");
1.1  mrg       dump_conflict_function (dump_file, *overlaps_a);
1.1  mrg       fprintf (dump_file, ")\n  (overlaps_b = ");
1.1  mrg       dump_conflict_function (dump_file, *overlaps_b);
1.1  mrg       fprintf (dump_file, "))\n");
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true when analyze_subscript_affine_affine can be used for
1.1  mrg    determining the dependence relation between chrec_a and chrec_b,
1.1  mrg    that contain symbols.  This function modifies chrec_a and chrec_b
1.1  mrg    such that the analysis result is the same, and such that they don't
1.1  mrg    contain symbols, and then can safely be passed to the analyzer.
1.1  mrg
1.1  mrg    Example: The analysis of the following tuples of evolutions produce
1.1  mrg    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
1.1  mrg    vs. {0, +, 1}_1
1.1  mrg
1.1  mrg    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
1.1  mrg    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
1.1  mrg */
1.1  mrg
1.1  mrg static bool
1.1  mrg can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
1.1  mrg {
1.1  mrg   tree diff, type, left_a, left_b, right_b;
1.1  mrg
1.1  mrg   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
1.1  mrg       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
1.1  mrg     /* FIXME: For the moment not handled.  Might be refined later.  */
1.1  mrg     return false;
1.1  mrg
1.1  mrg   type = chrec_type (*chrec_a);
1.1  mrg   left_a = CHREC_LEFT (*chrec_a);
1.1  mrg   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
1.1  mrg   diff = chrec_fold_minus (type, left_a, left_b);
1.1  mrg
1.1  mrg   if (!evolution_function_is_constant_p (diff))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
1.1  mrg
1.1  mrg   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
1.1  mrg 				     diff, CHREC_RIGHT (*chrec_a));
1.1  mrg   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
1.1  mrg   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
1.1  mrg 				     build_int_cst (type, 0),
1.1  mrg 				     right_b);
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
1.1  mrg    *OVERLAPS_B are initialized to the functions that describe the
1.1  mrg    relation between the elements accessed twice by CHREC_A and
1.1  mrg    CHREC_B.  For k >= 0, the following property is verified:
1.1  mrg
1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_siv_subscript (tree chrec_a,
1.1  mrg 		       tree chrec_b,
1.1  mrg 		       conflict_function **overlaps_a,
1.1  mrg 		       conflict_function **overlaps_b,
1.1  mrg 		       tree *last_conflicts,
1.1  mrg 		       int loop_nest_num)
1.1  mrg {
1.1  mrg   dependence_stats.num_siv++;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "(analyze_siv_subscript \n");
1.1  mrg
1.1  mrg   if (evolution_function_is_constant_p (chrec_a)
1.1  mrg       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
1.1  mrg     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
1.1  mrg 				      overlaps_a, overlaps_b, last_conflicts);
1.1  mrg
1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
1.1  mrg 	   && evolution_function_is_constant_p (chrec_b))
1.1  mrg     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
1.1  mrg 				      overlaps_b, overlaps_a, last_conflicts);
1.1  mrg
1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
1.1  mrg 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
1.1  mrg     {
1.1  mrg       if (!chrec_contains_symbols (chrec_a)
1.1  mrg 	  && !chrec_contains_symbols (chrec_b))
1.1  mrg 	{
1.1  mrg 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
1.1  mrg 					   overlaps_a, overlaps_b,
1.1  mrg 					   last_conflicts);
1.1  mrg
1.1  mrg 	  if (CF_NOT_KNOWN_P (*overlaps_a)
1.1  mrg 	      || CF_NOT_KNOWN_P (*overlaps_b))
1.1  mrg 	    dependence_stats.num_siv_unimplemented++;
1.1  mrg 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
1.1  mrg 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
1.1  mrg 	    dependence_stats.num_siv_independent++;
1.1  mrg 	  else
1.1  mrg 	    dependence_stats.num_siv_dependent++;
1.1  mrg 	}
1.1  mrg       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
1.1  mrg 							&chrec_b))
1.1  mrg 	{
1.1  mrg 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
1.1  mrg 					   overlaps_a, overlaps_b,
1.1  mrg 					   last_conflicts);
1.1  mrg
1.1  mrg 	  if (CF_NOT_KNOWN_P (*overlaps_a)
1.1  mrg 	      || CF_NOT_KNOWN_P (*overlaps_b))
1.1  mrg 	    dependence_stats.num_siv_unimplemented++;
1.1  mrg 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
1.1  mrg 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
1.1  mrg 	    dependence_stats.num_siv_independent++;
1.1  mrg 	  else
1.1  mrg 	    dependence_stats.num_siv_dependent++;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	goto siv_subscript_dontknow;
1.1  mrg     }
1.1  mrg
1.1  mrg   else
1.1  mrg     {
1.1  mrg     siv_subscript_dontknow:;
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "  siv test failed: unimplemented");
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       dependence_stats.num_siv_unimplemented++;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, ")\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns false if we can prove that the greatest common divisor of the steps
1.1  mrg    of CHREC does not divide CST, false otherwise.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
1.1  mrg {
1.1  mrg   HOST_WIDE_INT cd = 0, val;
1.1  mrg   tree step;
1.1  mrg
1.1  mrg   if (!tree_fits_shwi_p (cst))
1.1  mrg     return true;
1.1  mrg   val = tree_to_shwi (cst);
1.1  mrg
1.1  mrg   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
1.1  mrg     {
1.1  mrg       step = CHREC_RIGHT (chrec);
1.1  mrg       if (!tree_fits_shwi_p (step))
1.1  mrg 	return true;
1.1  mrg       cd = gcd (cd, tree_to_shwi (step));
1.1  mrg       chrec = CHREC_LEFT (chrec);
1.1  mrg     }
1.1  mrg
1.1  mrg   return val % cd == 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Analyze a MIV (Multiple Index Variable) subscript with respect to
1.1  mrg    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
1.1  mrg    functions that describe the relation between the elements accessed
1.1  mrg    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
1.1  mrg    is verified:
1.1  mrg
1.1  mrg    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_miv_subscript (tree chrec_a,
1.1  mrg 		       tree chrec_b,
1.1  mrg 		       conflict_function **overlaps_a,
1.1  mrg 		       conflict_function **overlaps_b,
1.1  mrg 		       tree *last_conflicts,
1.1  mrg 		       class loop *loop_nest)
1.1  mrg {
1.1  mrg   tree type, difference;
1.1  mrg
1.1  mrg   dependence_stats.num_miv++;
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, "(analyze_miv_subscript \n");
1.1  mrg
1.1  mrg   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1.1  mrg   chrec_a = chrec_convert (type, chrec_a, NULL);
1.1  mrg   chrec_b = chrec_convert (type, chrec_b, NULL);
1.1  mrg   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1.1  mrg
1.1  mrg   if (eq_evolutions_p (chrec_a, chrec_b))
1.1  mrg     {
1.1  mrg       /* Access functions are the same: all the elements are accessed
1.1  mrg 	 in the same order.  */
1.1  mrg       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
1.1  mrg       dependence_stats.num_miv_dependent++;
1.1  mrg     }
1.1  mrg
1.1  mrg   else if (evolution_function_is_constant_p (difference)
1.1  mrg 	   && evolution_function_is_affine_multivariate_p (chrec_a,
1.1  mrg 							   loop_nest->num)
1.1  mrg 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
1.1  mrg     {
1.1  mrg       /* testsuite/.../ssa-chrec-33.c
1.1  mrg 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
1.1  mrg
1.1  mrg 	 The difference is 1, and all the evolution steps are multiples
1.1  mrg 	 of 2, consequently there are no overlapping elements.  */
1.1  mrg       *overlaps_a = conflict_fn_no_dependence ();
1.1  mrg       *overlaps_b = conflict_fn_no_dependence ();
1.1  mrg       *last_conflicts = integer_zero_node;
1.1  mrg       dependence_stats.num_miv_independent++;
1.1  mrg     }
1.1  mrg
1.1  mrg   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
1.1  mrg 	   && !chrec_contains_symbols (chrec_a, loop_nest)
1.1  mrg 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
1.1  mrg 	   && !chrec_contains_symbols (chrec_b, loop_nest))
1.1  mrg     {
1.1  mrg       /* testsuite/.../ssa-chrec-35.c
1.1  mrg 	 {0, +, 1}_2  vs.  {0, +, 1}_3
1.1  mrg 	 the overlapping elements are respectively located at iterations:
1.1  mrg 	 {0, +, 1}_x and {0, +, 1}_x,
1.1  mrg 	 in other words, we have the equality:
1.1  mrg 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
1.1  mrg
1.1  mrg 	 Other examples:
1.1  mrg 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
1.1  mrg 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
1.1  mrg
1.1  mrg 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
1.1  mrg 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
1.1  mrg       */
1.1  mrg       analyze_subscript_affine_affine (chrec_a, chrec_b,
1.1  mrg 				       overlaps_a, overlaps_b, last_conflicts);
1.1  mrg
1.1  mrg       if (CF_NOT_KNOWN_P (*overlaps_a)
1.1  mrg  	  || CF_NOT_KNOWN_P (*overlaps_b))
1.1  mrg 	dependence_stats.num_miv_unimplemented++;
1.1  mrg       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
1.1  mrg 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
1.1  mrg 	dependence_stats.num_miv_independent++;
1.1  mrg       else
1.1  mrg 	dependence_stats.num_miv_dependent++;
1.1  mrg     }
1.1  mrg
1.1  mrg   else
1.1  mrg     {
1.1  mrg       /* When the analysis is too difficult, answer "don't know".  */
1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
1.1  mrg
1.1  mrg       *overlaps_a = conflict_fn_not_known ();
1.1  mrg       *overlaps_b = conflict_fn_not_known ();
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg       dependence_stats.num_miv_unimplemented++;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     fprintf (dump_file, ")\n");
1.1  mrg }
1.1  mrg
1.1  mrg /* Determines the iterations for which CHREC_A is equal to CHREC_B in
1.1  mrg    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
1.1  mrg    OVERLAP_ITERATIONS_B are initialized with two functions that
1.1  mrg    describe the iterations that contain conflicting elements.
1.1  mrg
1.1  mrg    Remark: For an integer k >= 0, the following equality is true:
1.1  mrg
1.1  mrg    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
1.1  mrg */
1.1  mrg
1.1  mrg static void
1.1  mrg analyze_overlapping_iterations (tree chrec_a,
1.1  mrg 				tree chrec_b,
1.1  mrg 				conflict_function **overlap_iterations_a,
1.1  mrg 				conflict_function **overlap_iterations_b,
1.1  mrg 				tree *last_conflicts, class loop *loop_nest)
1.1  mrg {
1.1  mrg   unsigned int lnn = loop_nest->num;
1.1  mrg
1.1  mrg   dependence_stats.num_subscript_tests++;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "(analyze_overlapping_iterations \n");
1.1  mrg       fprintf (dump_file, "  (chrec_a = ");
1.1  mrg       print_generic_expr (dump_file, chrec_a);
1.1  mrg       fprintf (dump_file, ")\n  (chrec_b = ");
1.1  mrg       print_generic_expr (dump_file, chrec_b);
1.1  mrg       fprintf (dump_file, ")\n");
1.1  mrg     }
1.1  mrg
1.1  mrg   if (chrec_a == NULL_TREE
1.1  mrg       || chrec_b == NULL_TREE
1.1  mrg       || chrec_contains_undetermined (chrec_a)
1.1  mrg       || chrec_contains_undetermined (chrec_b))
1.1  mrg     {
1.1  mrg       dependence_stats.num_subscript_undetermined++;
1.1  mrg
1.1  mrg       *overlap_iterations_a = conflict_fn_not_known ();
1.1  mrg       *overlap_iterations_b = conflict_fn_not_known ();
1.1  mrg     }
1.1  mrg
1.1  mrg   /* If they are the same chrec, and are affine, they overlap
1.1  mrg      on every iteration.  */
1.1  mrg   else if (eq_evolutions_p (chrec_a, chrec_b)
1.1  mrg 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
1.1  mrg 	       || operand_equal_p (chrec_a, chrec_b, 0)))
1.1  mrg     {
1.1  mrg       dependence_stats.num_same_subscript_function++;
1.1  mrg       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1.1  mrg       *last_conflicts = chrec_dont_know;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* If they aren't the same, and aren't affine, we can't do anything
1.1  mrg      yet.  */
1.1  mrg   else if ((chrec_contains_symbols (chrec_a)
1.1  mrg 	    || chrec_contains_symbols (chrec_b))
1.1  mrg 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
1.1  mrg 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
1.1  mrg     {
1.1  mrg       dependence_stats.num_subscript_undetermined++;
1.1  mrg       *overlap_iterations_a = conflict_fn_not_known ();
1.1  mrg       *overlap_iterations_b = conflict_fn_not_known ();
1.1  mrg     }
1.1  mrg
1.1  mrg   else if (ziv_subscript_p (chrec_a, chrec_b))
1.1  mrg     analyze_ziv_subscript (chrec_a, chrec_b,
1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
1.1  mrg 			   last_conflicts);
1.1  mrg
1.1  mrg   else if (siv_subscript_p (chrec_a, chrec_b))
1.1  mrg     analyze_siv_subscript (chrec_a, chrec_b,
1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
1.1  mrg 			   last_conflicts, lnn);
1.1  mrg
1.1  mrg   else
1.1  mrg     analyze_miv_subscript (chrec_a, chrec_b,
1.1  mrg 			   overlap_iterations_a, overlap_iterations_b,
1.1  mrg 			   last_conflicts, loop_nest);
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "  (overlap_iterations_a = ");
1.1  mrg       dump_conflict_function (dump_file, *overlap_iterations_a);
1.1  mrg       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
1.1  mrg       dump_conflict_function (dump_file, *overlap_iterations_b);
1.1  mrg       fprintf (dump_file, "))\n");
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function for uniquely inserting distance vectors.  */
1.1  mrg
1.1  mrg static void
1.1  mrg save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
1.1  mrg {
1.1  mrg   for (lambda_vector v : DDR_DIST_VECTS (ddr))
1.1  mrg     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
1.1  mrg       return;
1.1  mrg
1.1  mrg   DDR_DIST_VECTS (ddr).safe_push (dist_v);
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function for uniquely inserting direction vectors.  */
1.1  mrg
1.1  mrg static void
1.1  mrg save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
1.1  mrg {
1.1  mrg   for (lambda_vector v : DDR_DIR_VECTS (ddr))
1.1  mrg     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
1.1  mrg       return;
1.1  mrg
1.1  mrg   DDR_DIR_VECTS (ddr).safe_push (dir_v);
1.1  mrg }
1.1  mrg
1.1  mrg /* Add a distance of 1 on all the loops outer than INDEX.  If we
1.1  mrg    haven't yet determined a distance for this outer loop, push a new
1.1  mrg    distance vector composed of the previous distance, and a distance
1.1  mrg    of 1 for this outer loop.  Example:
1.1  mrg
1.1  mrg    | loop_1
1.1  mrg    |   loop_2
1.1  mrg    |     A[10]
1.1  mrg    |   endloop_2
1.1  mrg    | endloop_1
1.1  mrg
1.1  mrg    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
1.1  mrg    save (0, 1), then we have to save (1, 0).  */
1.1  mrg
1.1  mrg static void
1.1  mrg add_outer_distances (struct data_dependence_relation *ddr,
1.1  mrg 		     lambda_vector dist_v, int index)
1.1  mrg {
1.1  mrg   /* For each outer loop where init_v is not set, the accesses are
1.1  mrg      in dependence of distance 1 in the loop.  */
1.1  mrg   while (--index >= 0)
1.1  mrg     {
1.1  mrg       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
1.1  mrg       save_v[index] = 1;
1.1  mrg       save_dist_v (ddr, save_v);
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Return false when fail to represent the data dependence as a
1.1  mrg    distance vector.  A_INDEX is the index of the first reference
1.1  mrg    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
1.1  mrg    second reference.  INIT_B is set to true when a component has been
1.1  mrg    added to the distance vector DIST_V.  INDEX_CARRY is then set to
1.1  mrg    the index in DIST_V that carries the dependence.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
1.1  mrg 			     unsigned int a_index, unsigned int b_index,
1.1  mrg 			     lambda_vector dist_v, bool *init_b,
1.1  mrg 			     int *index_carry)
1.1  mrg {
1.1  mrg   unsigned i;
1.1  mrg   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg   class loop *loop = DDR_LOOP_NEST (ddr)[0];
1.1  mrg
1.1  mrg   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1.1  mrg     {
1.1  mrg       tree access_fn_a, access_fn_b;
1.1  mrg       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
1.1  mrg
1.1  mrg       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
1.1  mrg 	{
1.1  mrg 	  non_affine_dependence_relation (ddr);
1.1  mrg 	  return false;
1.1  mrg 	}
1.1  mrg
1.1  mrg       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
1.1  mrg       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
1.1  mrg
1.1  mrg       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
1.1  mrg 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
1.1  mrg 	{
1.1  mrg 	  HOST_WIDE_INT dist;
1.1  mrg 	  int index;
1.1  mrg 	  int var_a = CHREC_VARIABLE (access_fn_a);
1.1  mrg 	  int var_b = CHREC_VARIABLE (access_fn_b);
1.1  mrg
1.1  mrg 	  if (var_a != var_b
1.1  mrg 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
1.1  mrg 	    {
1.1  mrg 	      non_affine_dependence_relation (ddr);
1.1  mrg 	      return false;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* When data references are collected in a loop while data
1.1  mrg 	     dependences are analyzed in loop nest nested in the loop, we
1.1  mrg 	     would have more number of access functions than number of
1.1  mrg 	     loops.  Skip access functions of loops not in the loop nest.
1.1  mrg
1.1  mrg 	     See PR89725 for more information.  */
1.1  mrg 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
1.1  mrg 	    continue;
1.1  mrg
1.1  mrg 	  dist = int_cst_value (SUB_DISTANCE (subscript));
1.1  mrg 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
1.1  mrg 	  *index_carry = MIN (index, *index_carry);
1.1  mrg
1.1  mrg 	  /* This is the subscript coupling test.  If we have already
1.1  mrg 	     recorded a distance for this loop (a distance coming from
1.1  mrg 	     another subscript), it should be the same.  For example,
1.1  mrg 	     in the following code, there is no dependence:
1.1  mrg
1.1  mrg 	     | loop i = 0, N, 1
1.1  mrg 	     |   T[i+1][i] = ...
1.1  mrg 	     |   ... = T[i][i]
1.1  mrg 	     | endloop
1.1  mrg 	  */
1.1  mrg 	  if (init_v[index] != 0 && dist_v[index] != dist)
1.1  mrg 	    {
1.1  mrg 	      finalize_ddr_dependent (ddr, chrec_known);
1.1  mrg 	      return false;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  dist_v[index] = dist;
1.1  mrg 	  init_v[index] = 1;
1.1  mrg 	  *init_b = true;
1.1  mrg 	}
1.1  mrg       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
1.1  mrg 	{
1.1  mrg 	  /* This can be for example an affine vs. constant dependence
1.1  mrg 	     (T[i] vs. T[3]) that is not an affine dependence and is
1.1  mrg 	     not representable as a distance vector.  */
1.1  mrg 	  non_affine_dependence_relation (ddr);
1.1  mrg 	  return false;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true when the DDR contains only invariant access functions wrto. loop
1.1  mrg    number LNUM.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg invariant_access_functions (const struct data_dependence_relation *ddr,
1.1  mrg 			    int lnum)
1.1  mrg {
1.1  mrg   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
1.1  mrg     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
1.1  mrg 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function for the case where DDR_A and DDR_B are the same
1.1  mrg    multivariate access function with a constant step.  For an example
1.1  mrg    see pr34635-1.c.  */
1.1  mrg
1.1  mrg static void
1.1  mrg add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
1.1  mrg {
1.1  mrg   int x_1, x_2;
1.1  mrg   tree c_1 = CHREC_LEFT (c_2);
1.1  mrg   tree c_0 = CHREC_LEFT (c_1);
1.1  mrg   lambda_vector dist_v;
1.1  mrg   HOST_WIDE_INT v1, v2, cd;
1.1  mrg
1.1  mrg   /* Polynomials with more than 2 variables are not handled yet.  When
1.1  mrg      the evolution steps are parameters, it is not possible to
1.1  mrg      represent the dependence using classical distance vectors.  */
1.1  mrg   if (TREE_CODE (c_0) != INTEGER_CST
1.1  mrg       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
1.1  mrg       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
1.1  mrg     {
1.1  mrg       DDR_AFFINE_P (ddr) = false;
1.1  mrg       return;
1.1  mrg     }
1.1  mrg
1.1  mrg   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
1.1  mrg   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
1.1  mrg
1.1  mrg   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg   v1 = int_cst_value (CHREC_RIGHT (c_1));
1.1  mrg   v2 = int_cst_value (CHREC_RIGHT (c_2));
1.1  mrg   cd = gcd (v1, v2);
1.1  mrg   v1 /= cd;
1.1  mrg   v2 /= cd;
1.1  mrg
1.1  mrg   if (v2 < 0)
1.1  mrg     {
1.1  mrg       v2 = -v2;
1.1  mrg       v1 = -v1;
1.1  mrg     }
1.1  mrg
1.1  mrg   dist_v[x_1] = v2;
1.1  mrg   dist_v[x_2] = -v1;
1.1  mrg   save_dist_v (ddr, dist_v);
1.1  mrg
1.1  mrg   add_outer_distances (ddr, dist_v, x_1);
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function for the case where DDR_A and DDR_B are the same
1.1  mrg    access functions.  */
1.1  mrg
1.1  mrg static void
1.1  mrg add_other_self_distances (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   lambda_vector dist_v;
1.1  mrg   unsigned i;
1.1  mrg   int index_carry = DDR_NB_LOOPS (ddr);
1.1  mrg   subscript *sub;
1.1  mrg   class loop *loop = DDR_LOOP_NEST (ddr)[0];
1.1  mrg
1.1  mrg   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
1.1  mrg     {
1.1  mrg       tree access_fun = SUB_ACCESS_FN (sub, 0);
1.1  mrg
1.1  mrg       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
1.1  mrg 	{
1.1  mrg 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
1.1  mrg 	    {
1.1  mrg 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
1.1  mrg 		{
1.1  mrg 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
1.1  mrg 		  return;
1.1  mrg 		}
1.1  mrg
1.1  mrg 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
1.1  mrg
1.1  mrg 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
1.1  mrg 		add_multivariate_self_dist (ddr, access_fun);
1.1  mrg 	      else
1.1  mrg 		/* The evolution step is not constant: it varies in
1.1  mrg 		   the outer loop, so this cannot be represented by a
1.1  mrg 		   distance vector.  For example in pr34635.c the
1.1  mrg 		   evolution is {0, +, {0, +, 4}_1}_2.  */
1.1  mrg 		DDR_AFFINE_P (ddr) = false;
1.1  mrg
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* When data references are collected in a loop while data
1.1  mrg 	     dependences are analyzed in loop nest nested in the loop, we
1.1  mrg 	     would have more number of access functions than number of
1.1  mrg 	     loops.  Skip access functions of loops not in the loop nest.
1.1  mrg
1.1  mrg 	     See PR89725 for more information.  */
1.1  mrg 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
1.1  mrg 				  loop))
1.1  mrg 	    continue;
1.1  mrg
1.1  mrg 	  index_carry = MIN (index_carry,
1.1  mrg 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
1.1  mrg 						 DDR_LOOP_NEST (ddr)));
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg   add_outer_distances (ddr, dist_v, index_carry);
1.1  mrg }
1.1  mrg
1.1  mrg static void
1.1  mrg insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg
1.1  mrg   dist_v[0] = 1;
1.1  mrg   save_dist_v (ddr, dist_v);
1.1  mrg }
1.1  mrg
1.1  mrg /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
1.1  mrg    is the case for example when access functions are the same and
1.1  mrg    equal to a constant, as in:
1.1  mrg
1.1  mrg    | loop_1
1.1  mrg    |   A[3] = ...
1.1  mrg    |   ... = A[3]
1.1  mrg    | endloop_1
1.1  mrg
1.1  mrg    in which case the distance vectors are (0) and (1).  */
1.1  mrg
1.1  mrg static void
1.1  mrg add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   unsigned i, j;
1.1  mrg
1.1  mrg   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1.1  mrg     {
1.1  mrg       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
1.1  mrg       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
1.1  mrg       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
1.1  mrg
1.1  mrg       for (j = 0; j < ca->n; j++)
1.1  mrg 	if (affine_function_zero_p (ca->fns[j]))
1.1  mrg 	  {
1.1  mrg 	    insert_innermost_unit_dist_vector (ddr);
1.1  mrg 	    return;
1.1  mrg 	  }
1.1  mrg
1.1  mrg       for (j = 0; j < cb->n; j++)
1.1  mrg 	if (affine_function_zero_p (cb->fns[j]))
1.1  mrg 	  {
1.1  mrg 	    insert_innermost_unit_dist_vector (ddr);
1.1  mrg 	    return;
1.1  mrg 	  }
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true when the DDR contains two data references that have the
1.1  mrg    same access functions.  */
1.1  mrg
1.1  mrg static inline bool
1.1  mrg same_access_functions (const struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
1.1  mrg     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
1.1  mrg 			  SUB_ACCESS_FN (sub, 1)))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Compute the classic per loop distance vector.  DDR is the data
1.1  mrg    dependence relation to build a vector from.  Return false when fail
1.1  mrg    to represent the data dependence as a distance vector.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg build_classic_dist_vector (struct data_dependence_relation *ddr,
1.1  mrg 			   class loop *loop_nest)
1.1  mrg {
1.1  mrg   bool init_b = false;
1.1  mrg   int index_carry = DDR_NB_LOOPS (ddr);
1.1  mrg   lambda_vector dist_v;
1.1  mrg
1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   if (same_access_functions (ddr))
1.1  mrg     {
1.1  mrg       /* Save the 0 vector.  */
1.1  mrg       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg       save_dist_v (ddr, dist_v);
1.1  mrg
1.1  mrg       if (invariant_access_functions (ddr, loop_nest->num))
1.1  mrg 	add_distance_for_zero_overlaps (ddr);
1.1  mrg
1.1  mrg       if (DDR_NB_LOOPS (ddr) > 1)
1.1  mrg 	add_other_self_distances (ddr);
1.1  mrg
1.1  mrg       return true;
1.1  mrg     }
1.1  mrg
1.1  mrg   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Save the distance vector if we initialized one.  */
1.1  mrg   if (init_b)
1.1  mrg     {
1.1  mrg       /* Verify a basic constraint: classic distance vectors should
1.1  mrg 	 always be lexicographically positive.
1.1  mrg
1.1  mrg 	 Data references are collected in the order of execution of
1.1  mrg 	 the program, thus for the following loop
1.1  mrg
1.1  mrg 	 | for (i = 1; i < 100; i++)
1.1  mrg 	 |   for (j = 1; j < 100; j++)
1.1  mrg 	 |     {
1.1  mrg 	 |       t = T[j+1][i-1];  // A
1.1  mrg 	 |       T[j][i] = t + 2;  // B
1.1  mrg 	 |     }
1.1  mrg
1.1  mrg 	 references are collected following the direction of the wind:
1.1  mrg 	 A then B.  The data dependence tests are performed also
1.1  mrg 	 following this order, such that we're looking at the distance
1.1  mrg 	 separating the elements accessed by A from the elements later
1.1  mrg 	 accessed by B.  But in this example, the distance returned by
1.1  mrg 	 test_dep (A, B) is lexicographically negative (-1, 1), that
1.1  mrg 	 means that the access A occurs later than B with respect to
1.1  mrg 	 the outer loop, ie. we're actually looking upwind.  In this
1.1  mrg 	 case we solve test_dep (B, A) looking downwind to the
1.1  mrg 	 lexicographically positive solution, that returns the
1.1  mrg 	 distance vector (1, -1).  */
1.1  mrg       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
1.1  mrg 	{
1.1  mrg 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
1.1  mrg 	    return false;
1.1  mrg 	  compute_subscript_distance (ddr);
1.1  mrg 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
1.1  mrg 					    &index_carry))
1.1  mrg 	    return false;
1.1  mrg 	  save_dist_v (ddr, save_v);
1.1  mrg 	  DDR_REVERSED_P (ddr) = true;
1.1  mrg
1.1  mrg 	  /* In this case there is a dependence forward for all the
1.1  mrg 	     outer loops:
1.1  mrg
1.1  mrg 	     | for (k = 1; k < 100; k++)
1.1  mrg 	     |  for (i = 1; i < 100; i++)
1.1  mrg 	     |   for (j = 1; j < 100; j++)
1.1  mrg 	     |     {
1.1  mrg 	     |       t = T[j+1][i-1];  // A
1.1  mrg 	     |       T[j][i] = t + 2;  // B
1.1  mrg 	     |     }
1.1  mrg
1.1  mrg 	     the vectors are:
1.1  mrg 	     (0,  1, -1)
1.1  mrg 	     (1,  1, -1)
1.1  mrg 	     (1, -1,  1)
1.1  mrg 	  */
1.1  mrg 	  if (DDR_NB_LOOPS (ddr) > 1)
1.1  mrg 	    {
1.1  mrg  	      add_outer_distances (ddr, save_v, index_carry);
1.1  mrg 	      add_outer_distances (ddr, dist_v, index_carry);
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
1.1  mrg
1.1  mrg 	  if (DDR_NB_LOOPS (ddr) > 1)
1.1  mrg 	    {
1.1  mrg 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg
1.1  mrg 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
1.1  mrg 		return false;
1.1  mrg 	      compute_subscript_distance (ddr);
1.1  mrg 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
1.1  mrg 						&index_carry))
1.1  mrg 		return false;
1.1  mrg
1.1  mrg 	      save_dist_v (ddr, save_v);
1.1  mrg 	      add_outer_distances (ddr, dist_v, index_carry);
1.1  mrg 	      add_outer_distances (ddr, opposite_v, index_carry);
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    save_dist_v (ddr, save_v);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       /* There is a distance of 1 on all the outer loops: Example:
1.1  mrg 	 there is a dependence of distance 1 on loop_1 for the array A.
1.1  mrg
1.1  mrg 	 | loop_1
1.1  mrg 	 |   A[5] = ...
1.1  mrg 	 | endloop
1.1  mrg       */
1.1  mrg       add_outer_distances (ddr, dist_v,
1.1  mrg 			   lambda_vector_first_nz (dist_v,
1.1  mrg 						   DDR_NB_LOOPS (ddr), 0));
1.1  mrg     }
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       unsigned i;
1.1  mrg
1.1  mrg       fprintf (dump_file, "(build_classic_dist_vector\n");
1.1  mrg       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
1.1  mrg 	{
1.1  mrg 	  fprintf (dump_file, "  dist_vector = (");
1.1  mrg 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
1.1  mrg 			       DDR_NB_LOOPS (ddr));
1.1  mrg 	  fprintf (dump_file, "  )\n");
1.1  mrg 	}
1.1  mrg       fprintf (dump_file, ")\n");
1.1  mrg     }
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the direction for a given distance.
1.1  mrg    FIXME: Computing dir this way is suboptimal, since dir can catch
1.1  mrg    cases that dist is unable to represent.  */
1.1  mrg
1.1  mrg static inline enum data_dependence_direction
1.1  mrg dir_from_dist (int dist)
1.1  mrg {
1.1  mrg   if (dist > 0)
1.1  mrg     return dir_positive;
1.1  mrg   else if (dist < 0)
1.1  mrg     return dir_negative;
1.1  mrg   else
1.1  mrg     return dir_equal;
1.1  mrg }
1.1  mrg
1.1  mrg /* Compute the classic per loop direction vector.  DDR is the data
1.1  mrg    dependence relation to build a vector from.  */
1.1  mrg
1.1  mrg static void
1.1  mrg build_classic_dir_vector (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   unsigned i, j;
1.1  mrg   lambda_vector dist_v;
1.1  mrg
1.1  mrg   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1.1  mrg     {
1.1  mrg       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
1.1  mrg
1.1  mrg       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
1.1  mrg 	dir_v[j] = dir_from_dist (dist_v[j]);
1.1  mrg
1.1  mrg       save_dir_v (ddr, dir_v);
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function.  Returns true when there is a dependence between the
1.1  mrg    data references.  A_INDEX is the index of the first reference (0 for
1.1  mrg    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
1.1  mrg 			       unsigned int a_index, unsigned int b_index,
1.1  mrg 			       class loop *loop_nest)
1.1  mrg {
1.1  mrg   unsigned int i;
1.1  mrg   tree last_conflicts;
1.1  mrg   struct subscript *subscript;
1.1  mrg   tree res = NULL_TREE;
1.1  mrg
1.1  mrg   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
1.1  mrg     {
1.1  mrg       conflict_function *overlaps_a, *overlaps_b;
1.1  mrg
1.1  mrg       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
1.1  mrg 				      SUB_ACCESS_FN (subscript, b_index),
1.1  mrg 				      &overlaps_a, &overlaps_b,
1.1  mrg 				      &last_conflicts, loop_nest);
1.1  mrg
1.1  mrg       if (SUB_CONFLICTS_IN_A (subscript))
1.1  mrg 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
1.1  mrg       if (SUB_CONFLICTS_IN_B (subscript))
1.1  mrg 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
1.1  mrg
1.1  mrg       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
1.1  mrg       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
1.1  mrg       SUB_LAST_CONFLICT (subscript) = last_conflicts;
1.1  mrg
1.1  mrg       /* If there is any undetermined conflict function we have to
1.1  mrg          give a conservative answer in case we cannot prove that
1.1  mrg 	 no dependence exists when analyzing another subscript.  */
1.1  mrg       if (CF_NOT_KNOWN_P (overlaps_a)
1.1  mrg  	  || CF_NOT_KNOWN_P (overlaps_b))
1.1  mrg  	{
1.1  mrg 	  res = chrec_dont_know;
1.1  mrg 	  continue;
1.1  mrg  	}
1.1  mrg
1.1  mrg       /* When there is a subscript with no dependence we can stop.  */
1.1  mrg       else if (CF_NO_DEPENDENCE_P (overlaps_a)
1.1  mrg  	       || CF_NO_DEPENDENCE_P (overlaps_b))
1.1  mrg  	{
1.1  mrg 	  res = chrec_known;
1.1  mrg 	  break;
1.1  mrg  	}
1.1  mrg     }
1.1  mrg
1.1  mrg   if (res == NULL_TREE)
1.1  mrg     return true;
1.1  mrg
1.1  mrg   if (res == chrec_known)
1.1  mrg     dependence_stats.num_dependence_independent++;
1.1  mrg   else
1.1  mrg     dependence_stats.num_dependence_undetermined++;
1.1  mrg   finalize_ddr_dependent (ddr, res);
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
1.1  mrg
1.1  mrg static void
1.1  mrg subscript_dependence_tester (struct data_dependence_relation *ddr,
1.1  mrg 			     class loop *loop_nest)
1.1  mrg {
1.1  mrg   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
1.1  mrg     dependence_stats.num_dependence_dependent++;
1.1  mrg
1.1  mrg   compute_subscript_distance (ddr);
1.1  mrg   if (build_classic_dist_vector (ddr, loop_nest))
1.1  mrg     build_classic_dir_vector (ddr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true when all the access functions of A are affine or
1.1  mrg    constant with respect to LOOP_NEST.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg access_functions_are_affine_or_constant_p (const struct data_reference *a,
1.1  mrg 					   const class loop *loop_nest)
1.1  mrg {
1.1  mrg   vec<tree> fns = DR_ACCESS_FNS (a);
1.1  mrg   for (tree t : fns)
1.1  mrg     if (!evolution_function_is_invariant_p (t, loop_nest->num)
1.1  mrg 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* This computes the affine dependence relation between A and B with
1.1  mrg    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
1.1  mrg    independence between two accesses, while CHREC_DONT_KNOW is used
1.1  mrg    for representing the unknown relation.
1.1  mrg
1.1  mrg    Note that it is possible to stop the computation of the dependence
1.1  mrg    relation the first time we detect a CHREC_KNOWN element for a given
1.1  mrg    subscript.  */
1.1  mrg
1.1  mrg void
1.1  mrg compute_affine_dependence (struct data_dependence_relation *ddr,
1.1  mrg 			   class loop *loop_nest)
1.1  mrg {
1.1  mrg   struct data_reference *dra = DDR_A (ddr);
1.1  mrg   struct data_reference *drb = DDR_B (ddr);
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "(compute_affine_dependence\n");
1.1  mrg       fprintf (dump_file, "  ref_a: ");
1.1  mrg       print_generic_expr (dump_file, DR_REF (dra));
1.1  mrg       fprintf (dump_file, ", stmt_a: ");
1.1  mrg       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
1.1  mrg       fprintf (dump_file, "  ref_b: ");
1.1  mrg       print_generic_expr (dump_file, DR_REF (drb));
1.1  mrg       fprintf (dump_file, ", stmt_b: ");
1.1  mrg       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Analyze only when the dependence relation is not yet known.  */
1.1  mrg   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1.1  mrg     {
1.1  mrg       dependence_stats.num_dependence_tests++;
1.1  mrg
1.1  mrg       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
1.1  mrg 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
1.1  mrg 	subscript_dependence_tester (ddr, loop_nest);
1.1  mrg
1.1  mrg       /* As a last case, if the dependence cannot be determined, or if
1.1  mrg 	 the dependence is considered too difficult to determine, answer
1.1  mrg 	 "don't know".  */
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  dependence_stats.num_dependence_undetermined++;
1.1  mrg
1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg 	    {
1.1  mrg 	      fprintf (dump_file, "Data ref a:\n");
1.1  mrg 	      dump_data_reference (dump_file, dra);
1.1  mrg 	      fprintf (dump_file, "Data ref b:\n");
1.1  mrg 	      dump_data_reference (dump_file, drb);
1.1  mrg 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
1.1  mrg 	    }
1.1  mrg 	  finalize_ddr_dependent (ddr, chrec_dont_know);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
1.1  mrg     {
1.1  mrg       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1.1  mrg 	fprintf (dump_file, ") -> no dependence\n");
1.1  mrg       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1.1  mrg 	fprintf (dump_file, ") -> dependence analysis failed\n");
1.1  mrg       else
1.1  mrg 	fprintf (dump_file, ")\n");
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
1.1  mrg    the data references in DATAREFS, in the LOOP_NEST.  When
1.1  mrg    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
1.1  mrg    relations.  Return true when successful, i.e. data references number
1.1  mrg    is small enough to be handled.  */
1.1  mrg
1.1  mrg bool
1.1  mrg compute_all_dependences (const vec<data_reference_p> &datarefs,
1.1  mrg 			 vec<ddr_p> *dependence_relations,
1.1  mrg 			 const vec<loop_p> &loop_nest,
1.1  mrg 			 bool compute_self_and_rr)
1.1  mrg {
1.1  mrg   struct data_dependence_relation *ddr;
1.1  mrg   struct data_reference *a, *b;
1.1  mrg   unsigned int i, j;
1.1  mrg
1.1  mrg   if ((int) datarefs.length ()
1.1  mrg       > param_loop_max_datarefs_for_datadeps)
1.1  mrg     {
1.1  mrg       struct data_dependence_relation *ddr;
1.1  mrg
1.1  mrg       /* Insert a single relation into dependence_relations:
1.1  mrg 	 chrec_dont_know.  */
1.1  mrg       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
1.1  mrg       dependence_relations->safe_push (ddr);
1.1  mrg       return false;
1.1  mrg     }
1.1  mrg
1.1  mrg   FOR_EACH_VEC_ELT (datarefs, i, a)
1.1  mrg     for (j = i + 1; datarefs.iterate (j, &b); j++)
1.1  mrg       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
1.1  mrg 	{
1.1  mrg 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
1.1  mrg 	  dependence_relations->safe_push (ddr);
1.1  mrg           if (loop_nest.exists ())
1.1  mrg    	    compute_affine_dependence (ddr, loop_nest[0]);
1.1  mrg 	}
1.1  mrg
1.1  mrg   if (compute_self_and_rr)
1.1  mrg     FOR_EACH_VEC_ELT (datarefs, i, a)
1.1  mrg       {
1.1  mrg 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
1.1  mrg 	dependence_relations->safe_push (ddr);
1.1  mrg         if (loop_nest.exists ())
1.1  mrg    	  compute_affine_dependence (ddr, loop_nest[0]);
1.1  mrg       }
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Describes a location of a memory reference.  */
1.1  mrg
1.1  mrg struct data_ref_loc
1.1  mrg {
1.1  mrg   /* The memory reference.  */
1.1  mrg   tree ref;
1.1  mrg
1.1  mrg   /* True if the memory reference is read.  */
1.1  mrg   bool is_read;
1.1  mrg
1.1  mrg   /* True if the data reference is conditional within the containing
1.1  mrg      statement, i.e. if it might not occur even when the statement
1.1  mrg      is executed and runs to completion.  */
1.1  mrg   bool is_conditional_in_stmt;
1.1  mrg };
1.1  mrg
1.1  mrg
1.1  mrg /* Stores the locations of memory references in STMT to REFERENCES.  Returns
1.1  mrg    true if STMT clobbers memory, false otherwise.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
1.1  mrg {
1.1  mrg   bool clobbers_memory = false;
1.1  mrg   data_ref_loc ref;
1.1  mrg   tree op0, op1;
1.1  mrg   enum gimple_code stmt_code = gimple_code (stmt);
1.1  mrg
1.1  mrg   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
1.1  mrg      As we cannot model data-references to not spelled out
1.1  mrg      accesses give up if they may occur.  */
1.1  mrg   if (stmt_code == GIMPLE_CALL
1.1  mrg       && !(gimple_call_flags (stmt) & ECF_CONST))
1.1  mrg     {
1.1  mrg       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
1.1  mrg       if (gimple_call_internal_p (stmt))
1.1  mrg 	switch (gimple_call_internal_fn (stmt))
1.1  mrg 	  {
1.1  mrg 	  case IFN_GOMP_SIMD_LANE:
1.1  mrg 	    {
1.1  mrg 	      class loop *loop = gimple_bb (stmt)->loop_father;
1.1  mrg 	      tree uid = gimple_call_arg (stmt, 0);
1.1  mrg 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
1.1  mrg 	      if (loop == NULL
1.1  mrg 		  || loop->simduid != SSA_NAME_VAR (uid))
1.1  mrg 		clobbers_memory = true;
1.1  mrg 	      break;
1.1  mrg 	    }
1.1  mrg 	  case IFN_MASK_LOAD:
1.1  mrg 	  case IFN_MASK_STORE:
1.1  mrg 	    break;
1.1  mrg 	  default:
1.1  mrg 	    clobbers_memory = true;
1.1  mrg 	    break;
1.1  mrg 	  }
1.1  mrg       else
1.1  mrg 	clobbers_memory = true;
1.1  mrg     }
1.1  mrg   else if (stmt_code == GIMPLE_ASM
1.1  mrg 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
1.1  mrg 	       || gimple_vuse (stmt)))
1.1  mrg     clobbers_memory = true;
1.1  mrg
1.1  mrg   if (!gimple_vuse (stmt))
1.1  mrg     return clobbers_memory;
1.1  mrg
1.1  mrg   if (stmt_code == GIMPLE_ASSIGN)
1.1  mrg     {
1.1  mrg       tree base;
1.1  mrg       op0 = gimple_assign_lhs (stmt);
1.1  mrg       op1 = gimple_assign_rhs1 (stmt);
1.1  mrg
1.1  mrg       if (DECL_P (op1)
1.1  mrg 	  || (REFERENCE_CLASS_P (op1)
1.1  mrg 	      && (base = get_base_address (op1))
1.1  mrg 	      && TREE_CODE (base) != SSA_NAME
1.1  mrg 	      && !is_gimple_min_invariant (base)))
1.1  mrg 	{
1.1  mrg 	  ref.ref = op1;
1.1  mrg 	  ref.is_read = true;
1.1  mrg 	  ref.is_conditional_in_stmt = false;
1.1  mrg 	  references->safe_push (ref);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else if (stmt_code == GIMPLE_CALL)
1.1  mrg     {
1.1  mrg       unsigned i, n;
1.1  mrg       tree ptr, type;
1.1  mrg       unsigned int align;
1.1  mrg
1.1  mrg       ref.is_read = false;
1.1  mrg       if (gimple_call_internal_p (stmt))
1.1  mrg 	switch (gimple_call_internal_fn (stmt))
1.1  mrg 	  {
1.1  mrg 	  case IFN_MASK_LOAD:
1.1  mrg 	    if (gimple_call_lhs (stmt) == NULL_TREE)
1.1  mrg 	      break;
1.1  mrg 	    ref.is_read = true;
1.1  mrg 	    /* FALLTHRU */
1.1  mrg 	  case IFN_MASK_STORE:
1.1  mrg 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
1.1  mrg 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
1.1  mrg 	    if (ref.is_read)
1.1  mrg 	      type = TREE_TYPE (gimple_call_lhs (stmt));
1.1  mrg 	    else
1.1  mrg 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
1.1  mrg 	    if (TYPE_ALIGN (type) != align)
1.1  mrg 	      type = build_aligned_type (type, align);
1.1  mrg 	    ref.is_conditional_in_stmt = true;
1.1  mrg 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
1.1  mrg 				   ptr);
1.1  mrg 	    references->safe_push (ref);
1.1  mrg 	    return false;
1.1  mrg 	  default:
1.1  mrg 	    break;
1.1  mrg 	  }
1.1  mrg
1.1  mrg       op0 = gimple_call_lhs (stmt);
1.1  mrg       n = gimple_call_num_args (stmt);
1.1  mrg       for (i = 0; i < n; i++)
1.1  mrg 	{
1.1  mrg 	  op1 = gimple_call_arg (stmt, i);
1.1  mrg
1.1  mrg 	  if (DECL_P (op1)
1.1  mrg 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
1.1  mrg 	    {
1.1  mrg 	      ref.ref = op1;
1.1  mrg 	      ref.is_read = true;
1.1  mrg 	      ref.is_conditional_in_stmt = false;
1.1  mrg 	      references->safe_push (ref);
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else
1.1  mrg     return clobbers_memory;
1.1  mrg
1.1  mrg   if (op0
1.1  mrg       && (DECL_P (op0)
1.1  mrg 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
1.1  mrg     {
1.1  mrg       ref.ref = op0;
1.1  mrg       ref.is_read = false;
1.1  mrg       ref.is_conditional_in_stmt = false;
1.1  mrg       references->safe_push (ref);
1.1  mrg     }
1.1  mrg   return clobbers_memory;
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Returns true if the loop-nest has any data reference.  */
1.1  mrg
1.1  mrg bool
1.1  mrg loop_nest_has_data_refs (loop_p loop)
1.1  mrg {
1.1  mrg   basic_block *bbs = get_loop_body (loop);
1.1  mrg   auto_vec<data_ref_loc, 3> references;
1.1  mrg
1.1  mrg   for (unsigned i = 0; i < loop->num_nodes; i++)
1.1  mrg     {
1.1  mrg       basic_block bb = bbs[i];
1.1  mrg       gimple_stmt_iterator bsi;
1.1  mrg
1.1  mrg       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1.1  mrg 	{
1.1  mrg 	  gimple *stmt = gsi_stmt (bsi);
1.1  mrg 	  get_references_in_stmt (stmt, &references);
1.1  mrg 	  if (references.length ())
1.1  mrg 	    {
1.1  mrg 	      free (bbs);
1.1  mrg 	      return true;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   free (bbs);
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
1.1  mrg    reference, returns false, otherwise returns true.  NEST is the outermost
1.1  mrg    loop of the loop nest in which the references should be analyzed.  */
1.1  mrg
1.1  mrg opt_result
1.1  mrg find_data_references_in_stmt (class loop *nest, gimple *stmt,
1.1  mrg 			      vec<data_reference_p> *datarefs)
1.1  mrg {
1.1  mrg   auto_vec<data_ref_loc, 2> references;
1.1  mrg   data_reference_p dr;
1.1  mrg
1.1  mrg   if (get_references_in_stmt (stmt, &references))
1.1  mrg     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
1.1  mrg 				   stmt);
1.1  mrg
1.1  mrg   for (const data_ref_loc &ref : references)
1.1  mrg     {
1.1  mrg       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
1.1  mrg 			    loop_containing_stmt (stmt), ref.ref,
1.1  mrg 			    stmt, ref.is_read, ref.is_conditional_in_stmt);
1.1  mrg       gcc_assert (dr != NULL);
1.1  mrg       datarefs->safe_push (dr);
1.1  mrg     }
1.1  mrg
1.1  mrg   return opt_result::success ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Stores the data references in STMT to DATAREFS.  If there is an
1.1  mrg    unanalyzable reference, returns false, otherwise returns true.
1.1  mrg    NEST is the outermost loop of the loop nest in which the references
1.1  mrg    should be instantiated, LOOP is the loop in which the references
1.1  mrg    should be analyzed.  */
1.1  mrg
1.1  mrg bool
1.1  mrg graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
1.1  mrg 				       vec<data_reference_p> *datarefs)
1.1  mrg {
1.1  mrg   auto_vec<data_ref_loc, 2> references;
1.1  mrg   bool ret = true;
1.1  mrg   data_reference_p dr;
1.1  mrg
1.1  mrg   if (get_references_in_stmt (stmt, &references))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   for (const data_ref_loc &ref : references)
1.1  mrg     {
1.1  mrg       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
1.1  mrg 			    ref.is_conditional_in_stmt);
1.1  mrg       gcc_assert (dr != NULL);
1.1  mrg       datarefs->safe_push (dr);
1.1  mrg     }
1.1  mrg
1.1  mrg   return ret;
1.1  mrg }
1.1  mrg
1.1  mrg /* Search the data references in LOOP, and record the information into
1.1  mrg    DATAREFS.  Returns chrec_dont_know when failing to analyze a
1.1  mrg    difficult case, returns NULL_TREE otherwise.  */
1.1  mrg
1.1  mrg tree
1.1  mrg find_data_references_in_bb (class loop *loop, basic_block bb,
1.1  mrg                             vec<data_reference_p> *datarefs)
1.1  mrg {
1.1  mrg   gimple_stmt_iterator bsi;
1.1  mrg
1.1  mrg   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1.1  mrg     {
1.1  mrg       gimple *stmt = gsi_stmt (bsi);
1.1  mrg
1.1  mrg       if (!find_data_references_in_stmt (loop, stmt, datarefs))
1.1  mrg         {
1.1  mrg           struct data_reference *res;
1.1  mrg           res = XCNEW (struct data_reference);
1.1  mrg           datarefs->safe_push (res);
1.1  mrg
1.1  mrg           return chrec_dont_know;
1.1  mrg         }
1.1  mrg     }
1.1  mrg
1.1  mrg   return NULL_TREE;
1.1  mrg }
1.1  mrg
1.1  mrg /* Search the data references in LOOP, and record the information into
1.1  mrg    DATAREFS.  Returns chrec_dont_know when failing to analyze a
1.1  mrg    difficult case, returns NULL_TREE otherwise.
1.1  mrg
1.1  mrg    TODO: This function should be made smarter so that it can handle address
1.1  mrg    arithmetic as if they were array accesses, etc.  */
1.1  mrg
1.1  mrg tree
1.1  mrg find_data_references_in_loop (class loop *loop,
1.1  mrg 			      vec<data_reference_p> *datarefs)
1.1  mrg {
1.1  mrg   basic_block bb, *bbs;
1.1  mrg   unsigned int i;
1.1  mrg
1.1  mrg   bbs = get_loop_body_in_dom_order (loop);
1.1  mrg
1.1  mrg   for (i = 0; i < loop->num_nodes; i++)
1.1  mrg     {
1.1  mrg       bb = bbs[i];
1.1  mrg
1.1  mrg       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
1.1  mrg         {
1.1  mrg           free (bbs);
1.1  mrg           return chrec_dont_know;
1.1  mrg         }
1.1  mrg     }
1.1  mrg   free (bbs);
1.1  mrg
1.1  mrg   return NULL_TREE;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the alignment in bytes that DRB is guaranteed to have at all
1.1  mrg    times.  */
1.1  mrg
1.1  mrg unsigned int
1.1  mrg dr_alignment (innermost_loop_behavior *drb)
1.1  mrg {
1.1  mrg   /* Get the alignment of BASE_ADDRESS + INIT.  */
1.1  mrg   unsigned int alignment = drb->base_alignment;
1.1  mrg   unsigned int misalignment = (drb->base_misalignment
1.1  mrg 			       + TREE_INT_CST_LOW (drb->init));
1.1  mrg   if (misalignment != 0)
1.1  mrg     alignment = MIN (alignment, misalignment & -misalignment);
1.1  mrg
1.1  mrg   /* Cap it to the alignment of OFFSET.  */
1.1  mrg   if (!integer_zerop (drb->offset))
1.1  mrg     alignment = MIN (alignment, drb->offset_alignment);
1.1  mrg
1.1  mrg   /* Cap it to the alignment of STEP.  */
1.1  mrg   if (!integer_zerop (drb->step))
1.1  mrg     alignment = MIN (alignment, drb->step_alignment);
1.1  mrg
1.1  mrg   return alignment;
1.1  mrg }
1.1  mrg
1.1  mrg /* If BASE is a pointer-typed SSA name, try to find the object that it
1.1  mrg    is based on.  Return this object X on success and store the alignment
1.1  mrg    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
1.1  mrg {
1.1  mrg   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
1.1  mrg     return NULL_TREE;
1.1  mrg
1.1  mrg   gimple *def = SSA_NAME_DEF_STMT (base);
1.1  mrg   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
1.1  mrg
1.1  mrg   /* Peel chrecs and record the minimum alignment preserved by
1.1  mrg      all steps.  */
1.1  mrg   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
1.1  mrg   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
1.1  mrg     {
1.1  mrg       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
1.1  mrg       alignment = MIN (alignment, step_alignment);
1.1  mrg       base = CHREC_LEFT (base);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Punt if the expression is too complicated to handle.  */
1.1  mrg   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
1.1  mrg     return NULL_TREE;
1.1  mrg
1.1  mrg   /* The only useful cases are those for which a dereference folds to something
1.1  mrg      other than an INDIRECT_REF.  */
1.1  mrg   tree ref_type = TREE_TYPE (TREE_TYPE (base));
1.1  mrg   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
1.1  mrg   if (!ref)
1.1  mrg     return NULL_TREE;
1.1  mrg
1.1  mrg   /* Analyze the base to which the steps we peeled were applied.  */
1.1  mrg   poly_int64 bitsize, bitpos, bytepos;
1.1  mrg   machine_mode mode;
1.1  mrg   int unsignedp, reversep, volatilep;
1.1  mrg   tree offset;
1.1  mrg   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
1.1  mrg 			      &unsignedp, &reversep, &volatilep);
1.1  mrg   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
1.1  mrg     return NULL_TREE;
1.1  mrg
1.1  mrg   /* Restrict the alignment to that guaranteed by the offsets.  */
1.1  mrg   unsigned int bytepos_alignment = known_alignment (bytepos);
1.1  mrg   if (bytepos_alignment != 0)
1.1  mrg     alignment = MIN (alignment, bytepos_alignment);
1.1  mrg   if (offset)
1.1  mrg     {
1.1  mrg       unsigned int offset_alignment = highest_pow2_factor (offset);
1.1  mrg       alignment = MIN (alignment, offset_alignment);
1.1  mrg     }
1.1  mrg
1.1  mrg   *alignment_out = alignment;
1.1  mrg   return base;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the object whose alignment would need to be changed in order
1.1  mrg    to increase the alignment of ADDR.  Store the maximum achievable
1.1  mrg    alignment in *MAX_ALIGNMENT.  */
1.1  mrg
1.1  mrg tree
1.1  mrg get_base_for_alignment (tree addr, unsigned int *max_alignment)
1.1  mrg {
1.1  mrg   tree base = get_base_for_alignment_1 (addr, max_alignment);
1.1  mrg   if (base)
1.1  mrg     return base;
1.1  mrg
1.1  mrg   if (TREE_CODE (addr) == ADDR_EXPR)
1.1  mrg     addr = TREE_OPERAND (addr, 0);
1.1  mrg   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
1.1  mrg   return addr;
1.1  mrg }
1.1  mrg
1.1  mrg /* Recursive helper function.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
1.1  mrg {
1.1  mrg   /* Inner loops of the nest should not contain siblings.  Example:
1.1  mrg      when there are two consecutive loops,
1.1  mrg
1.1  mrg      | loop_0
1.1  mrg      |   loop_1
1.1  mrg      |     A[{0, +, 1}_1]
1.1  mrg      |   endloop_1
1.1  mrg      |   loop_2
1.1  mrg      |     A[{0, +, 1}_2]
1.1  mrg      |   endloop_2
1.1  mrg      | endloop_0
1.1  mrg
1.1  mrg      the dependence relation cannot be captured by the distance
1.1  mrg      abstraction.  */
1.1  mrg   if (loop->next)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   loop_nest->safe_push (loop);
1.1  mrg   if (loop->inner)
1.1  mrg     return find_loop_nest_1 (loop->inner, loop_nest);
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return false when the LOOP is not well nested.  Otherwise return
1.1  mrg    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
1.1  mrg    contain the loops from the outermost to the innermost, as they will
1.1  mrg    appear in the classic distance vector.  */
1.1  mrg
1.1  mrg bool
1.1  mrg find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
1.1  mrg {
1.1  mrg   loop_nest->safe_push (loop);
1.1  mrg   if (loop->inner)
1.1  mrg     return find_loop_nest_1 (loop->inner, loop_nest);
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true when the data dependences have been computed, false otherwise.
1.1  mrg    Given a loop nest LOOP, the following vectors are returned:
1.1  mrg    DATAREFS is initialized to all the array elements contained in this loop,
1.1  mrg    DEPENDENCE_RELATIONS contains the relations between the data references.
1.1  mrg    Compute read-read and self relations if
1.1  mrg    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
1.1  mrg
1.1  mrg bool
1.1  mrg compute_data_dependences_for_loop (class loop *loop,
1.1  mrg 				   bool compute_self_and_read_read_dependences,
1.1  mrg 				   vec<loop_p> *loop_nest,
1.1  mrg 				   vec<data_reference_p> *datarefs,
1.1  mrg 				   vec<ddr_p> *dependence_relations)
1.1  mrg {
1.1  mrg   bool res = true;
1.1  mrg
1.1  mrg   memset (&dependence_stats, 0, sizeof (dependence_stats));
1.1  mrg
1.1  mrg   /* If the loop nest is not well formed, or one of the data references
1.1  mrg      is not computable, give up without spending time to compute other
1.1  mrg      dependences.  */
1.1  mrg   if (!loop
1.1  mrg       || !find_loop_nest (loop, loop_nest)
1.1  mrg       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
1.1  mrg       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
1.1  mrg 				   compute_self_and_read_read_dependences))
1.1  mrg     res = false;
1.1  mrg
1.1  mrg   if (dump_file && (dump_flags & TDF_STATS))
1.1  mrg     {
1.1  mrg       fprintf (dump_file, "Dependence tester statistics:\n");
1.1  mrg
1.1  mrg       fprintf (dump_file, "Number of dependence tests: %d\n",
1.1  mrg 	       dependence_stats.num_dependence_tests);
1.1  mrg       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
1.1  mrg 	       dependence_stats.num_dependence_dependent);
1.1  mrg       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
1.1  mrg 	       dependence_stats.num_dependence_independent);
1.1  mrg       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
1.1  mrg 	       dependence_stats.num_dependence_undetermined);
1.1  mrg
1.1  mrg       fprintf (dump_file, "Number of subscript tests: %d\n",
1.1  mrg 	       dependence_stats.num_subscript_tests);
1.1  mrg       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
1.1  mrg 	       dependence_stats.num_subscript_undetermined);
1.1  mrg       fprintf (dump_file, "Number of same subscript function: %d\n",
1.1  mrg 	       dependence_stats.num_same_subscript_function);
1.1  mrg
1.1  mrg       fprintf (dump_file, "Number of ziv tests: %d\n",
1.1  mrg 	       dependence_stats.num_ziv);
1.1  mrg       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
1.1  mrg 	       dependence_stats.num_ziv_dependent);
1.1  mrg       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
1.1  mrg 	       dependence_stats.num_ziv_independent);
1.1  mrg       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
1.1  mrg 	       dependence_stats.num_ziv_unimplemented);
1.1  mrg
1.1  mrg       fprintf (dump_file, "Number of siv tests: %d\n",
1.1  mrg 	       dependence_stats.num_siv);
1.1  mrg       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
1.1  mrg 	       dependence_stats.num_siv_dependent);
1.1  mrg       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
1.1  mrg 	       dependence_stats.num_siv_independent);
1.1  mrg       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
1.1  mrg 	       dependence_stats.num_siv_unimplemented);
1.1  mrg
1.1  mrg       fprintf (dump_file, "Number of miv tests: %d\n",
1.1  mrg 	       dependence_stats.num_miv);
1.1  mrg       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
1.1  mrg 	       dependence_stats.num_miv_dependent);
1.1  mrg       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
1.1  mrg 	       dependence_stats.num_miv_independent);
1.1  mrg       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
1.1  mrg 	       dependence_stats.num_miv_unimplemented);
1.1  mrg     }
1.1  mrg
1.1  mrg   return res;
1.1  mrg }
1.1  mrg
1.1  mrg /* Free the memory used by a data dependence relation DDR.  */
1.1  mrg
1.1  mrg void
1.1  mrg free_dependence_relation (struct data_dependence_relation *ddr)
1.1  mrg {
1.1  mrg   if (ddr == NULL)
1.1  mrg     return;
1.1  mrg
1.1  mrg   if (DDR_SUBSCRIPTS (ddr).exists ())
1.1  mrg     free_subscripts (DDR_SUBSCRIPTS (ddr));
1.1  mrg   DDR_DIST_VECTS (ddr).release ();
1.1  mrg   DDR_DIR_VECTS (ddr).release ();
1.1  mrg
1.1  mrg   free (ddr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Free the memory used by the data dependence relations from
1.1  mrg    DEPENDENCE_RELATIONS.  */
1.1  mrg
1.1  mrg void
1.1  mrg free_dependence_relations (vec<ddr_p>& dependence_relations)
1.1  mrg {
1.1  mrg   for (data_dependence_relation *ddr : dependence_relations)
1.1  mrg     if (ddr)
1.1  mrg       free_dependence_relation (ddr);
1.1  mrg
1.1  mrg   dependence_relations.release ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Free the memory used by the data references from DATAREFS.  */
1.1  mrg
1.1  mrg void
1.1  mrg free_data_refs (vec<data_reference_p>& datarefs)
1.1  mrg {
1.1  mrg   for (data_reference *dr : datarefs)
1.1  mrg     free_data_ref (dr);
1.1  mrg   datarefs.release ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Common routine implementing both dr_direction_indicator and
1.1  mrg    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
1.1  mrg    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
1.1  mrg    Return the step as the indicator otherwise.  */
1.1  mrg
1.1  mrg static tree
1.1  mrg dr_step_indicator (struct data_reference *dr, int useful_min)
1.1  mrg {
1.1  mrg   tree step = DR_STEP (dr);
1.1  mrg   if (!step)
1.1  mrg     return NULL_TREE;
1.1  mrg   STRIP_NOPS (step);
1.1  mrg   /* Look for cases where the step is scaled by a positive constant
1.1  mrg      integer, which will often be the access size.  If the multiplication
1.1  mrg      doesn't change the sign (due to overflow effects) then we can
1.1  mrg      test the unscaled value instead.  */
1.1  mrg   if (TREE_CODE (step) == MULT_EXPR
1.1  mrg       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
1.1  mrg       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
1.1  mrg     {
1.1  mrg       tree factor = TREE_OPERAND (step, 1);
1.1  mrg       step = TREE_OPERAND (step, 0);
1.1  mrg
1.1  mrg       /* Strip widening and truncating conversions as well as nops.  */
1.1  mrg       if (CONVERT_EXPR_P (step)
1.1  mrg 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
1.1  mrg 	step = TREE_OPERAND (step, 0);
1.1  mrg       tree type = TREE_TYPE (step);
1.1  mrg
1.1  mrg       /* Get the range of step values that would not cause overflow.  */
1.1  mrg       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
1.1  mrg 			 / wi::to_widest (factor));
1.1  mrg       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
1.1  mrg 			 / wi::to_widest (factor));
1.1  mrg
1.1  mrg       /* Get the range of values that the unconverted step actually has.  */
1.1  mrg       wide_int step_min, step_max;
1.1  mrg       value_range vr;
1.1  mrg       if (TREE_CODE (step) != SSA_NAME
1.1  mrg 	  || !get_range_query (cfun)->range_of_expr (vr, step)
1.1  mrg 	  || vr.kind () != VR_RANGE)
1.1  mrg 	{
1.1  mrg 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
1.1  mrg 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  step_min = vr.lower_bound ();
1.1  mrg 	  step_max = vr.upper_bound ();
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Check whether the unconverted step has an acceptable range.  */
1.1  mrg       signop sgn = TYPE_SIGN (type);
1.1  mrg       if (wi::les_p (minv, widest_int::from (step_min, sgn))
1.1  mrg 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
1.1  mrg 	{
1.1  mrg 	  if (wi::ge_p (step_min, useful_min, sgn))
1.1  mrg 	    return ssize_int (useful_min);
1.1  mrg 	  else if (wi::lt_p (step_max, 0, sgn))
1.1  mrg 	    return ssize_int (-1);
1.1  mrg 	  else
1.1  mrg 	    return fold_convert (ssizetype, step);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   return DR_STEP (dr);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a value that is negative iff DR has a negative step.  */
1.1  mrg
1.1  mrg tree
1.1  mrg dr_direction_indicator (struct data_reference *dr)
1.1  mrg {
1.1  mrg   return dr_step_indicator (dr, 0);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a value that is zero iff DR has a zero step.  */
1.1  mrg
1.1  mrg tree
1.1  mrg dr_zero_step_indicator (struct data_reference *dr)
1.1  mrg {
1.1  mrg   return dr_step_indicator (dr, 1);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if DR is known to have a nonnegative (but possibly zero)
1.1  mrg    step.  */
1.1  mrg
1.1  mrg bool
1.1  mrg dr_known_forward_stride_p (struct data_reference *dr)
1.1  mrg {
1.1  mrg   tree indicator = dr_direction_indicator (dr);
1.1  mrg   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
1.1  mrg 				   fold_convert (ssizetype, indicator),
1.1  mrg 				   ssize_int (0));
           return neg_step_val && integer_zerop (neg_step_val);
         }