1 1.1 mrg /* Data references and dependences detectors. 2 1.1 mrg Copyright (C) 2003-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Sebastian Pop <pop (at) cri.ensmp.fr> 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 8 1.1 mrg the terms of the GNU General Public License as published by the Free 9 1.1 mrg Software Foundation; either version 3, or (at your option) any later 10 1.1 mrg version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 1.1 mrg for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg /* This pass walks a given loop structure searching for array 22 1.1 mrg references. The information about the array accesses is recorded 23 1.1 mrg in DATA_REFERENCE structures. 24 1.1 mrg 25 1.1 mrg The basic test for determining the dependences is: 26 1.1 mrg given two access functions chrec1 and chrec2 to a same array, and 27 1.1 mrg x and y two vectors from the iteration domain, the same element of 28 1.1 mrg the array is accessed twice at iterations x and y if and only if: 29 1.1 mrg | chrec1 (x) == chrec2 (y). 30 1.1 mrg 31 1.1 mrg The goals of this analysis are: 32 1.1 mrg 33 1.1 mrg - to determine the independence: the relation between two 34 1.1 mrg independent accesses is qualified with the chrec_known (this 35 1.1 mrg information allows a loop parallelization), 36 1.1 mrg 37 1.1 mrg - when two data references access the same data, to qualify the 38 1.1 mrg dependence relation with classic dependence representations: 39 1.1 mrg 40 1.1 mrg - distance vectors 41 1.1 mrg - direction vectors 42 1.1 mrg - loop carried level dependence 43 1.1 mrg - polyhedron dependence 44 1.1 mrg or with the chains of recurrences based representation, 45 1.1 mrg 46 1.1 mrg - to define a knowledge base for storing the data dependence 47 1.1 mrg information, 48 1.1 mrg 49 1.1 mrg - to define an interface to access this data. 50 1.1 mrg 51 1.1 mrg 52 1.1 mrg Definitions: 53 1.1 mrg 54 1.1 mrg - subscript: given two array accesses a subscript is the tuple 55 1.1 mrg composed of the access functions for a given dimension. Example: 56 1.1 mrg Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: 57 1.1 mrg (f1, g1), (f2, g2), (f3, g3). 58 1.1 mrg 59 1.1 mrg - Diophantine equation: an equation whose coefficients and 60 1.1 mrg solutions are integer constants, for example the equation 61 1.1 mrg | 3*x + 2*y = 1 62 1.1 mrg has an integer solution x = 1 and y = -1. 63 1.1 mrg 64 1.1 mrg References: 65 1.1 mrg 66 1.1 mrg - "Advanced Compilation for High Performance Computing" by Randy 67 1.1 mrg Allen and Ken Kennedy. 68 1.1 mrg http://citeseer.ist.psu.edu/goff91practical.html 69 1.1 mrg 70 1.1 mrg - "Loop Transformations for Restructuring Compilers - The Foundations" 71 1.1 mrg by Utpal Banerjee. 72 1.1 mrg 73 1.1 mrg 74 1.1 mrg */ 75 1.1 mrg 76 1.1 mrg #define INCLUDE_ALGORITHM 77 1.1 mrg #include "config.h" 78 1.1 mrg #include "system.h" 79 1.1 mrg #include "coretypes.h" 80 1.1 mrg #include "backend.h" 81 1.1 mrg #include "rtl.h" 82 1.1 mrg #include "tree.h" 83 1.1 mrg #include "gimple.h" 84 1.1 mrg #include "gimple-pretty-print.h" 85 1.1 mrg #include "alias.h" 86 1.1 mrg #include "fold-const.h" 87 1.1 mrg #include "expr.h" 88 1.1 mrg #include "gimple-iterator.h" 89 1.1 mrg #include "tree-ssa-loop-niter.h" 90 1.1 mrg #include "tree-ssa-loop.h" 91 1.1 mrg #include "tree-ssa.h" 92 1.1 mrg #include "cfgloop.h" 93 1.1 mrg #include "tree-data-ref.h" 94 1.1 mrg #include "tree-scalar-evolution.h" 95 1.1 mrg #include "dumpfile.h" 96 1.1 mrg #include "tree-affine.h" 97 1.1 mrg #include "builtins.h" 98 1.1 mrg #include "tree-eh.h" 99 1.1 mrg #include "ssa.h" 100 1.1 mrg #include "internal-fn.h" 101 1.1 mrg #include "vr-values.h" 102 1.1 mrg #include "range-op.h" 103 1.1 mrg #include "tree-ssa-loop-ivopts.h" 104 1.1 mrg 105 1.1 mrg static struct datadep_stats 106 1.1 mrg { 107 1.1 mrg int num_dependence_tests; 108 1.1 mrg int num_dependence_dependent; 109 1.1 mrg int num_dependence_independent; 110 1.1 mrg int num_dependence_undetermined; 111 1.1 mrg 112 1.1 mrg int num_subscript_tests; 113 1.1 mrg int num_subscript_undetermined; 114 1.1 mrg int num_same_subscript_function; 115 1.1 mrg 116 1.1 mrg int num_ziv; 117 1.1 mrg int num_ziv_independent; 118 1.1 mrg int num_ziv_dependent; 119 1.1 mrg int num_ziv_unimplemented; 120 1.1 mrg 121 1.1 mrg int num_siv; 122 1.1 mrg int num_siv_independent; 123 1.1 mrg int num_siv_dependent; 124 1.1 mrg int num_siv_unimplemented; 125 1.1 mrg 126 1.1 mrg int num_miv; 127 1.1 mrg int num_miv_independent; 128 1.1 mrg int num_miv_dependent; 129 1.1 mrg int num_miv_unimplemented; 130 1.1 mrg } dependence_stats; 131 1.1 mrg 132 1.1 mrg static bool subscript_dependence_tester_1 (struct data_dependence_relation *, 133 1.1 mrg unsigned int, unsigned int, 134 1.1 mrg class loop *); 135 1.1 mrg /* Returns true iff A divides B. */ 136 1.1 mrg 137 1.1 mrg static inline bool 138 1.1 mrg tree_fold_divides_p (const_tree a, const_tree b) 139 1.1 mrg { 140 1.1 mrg gcc_assert (TREE_CODE (a) == INTEGER_CST); 141 1.1 mrg gcc_assert (TREE_CODE (b) == INTEGER_CST); 142 1.1 mrg return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); 143 1.1 mrg } 144 1.1 mrg 145 1.1 mrg /* Returns true iff A divides B. */ 146 1.1 mrg 147 1.1 mrg static inline bool 148 1.1 mrg int_divides_p (lambda_int a, lambda_int b) 149 1.1 mrg { 150 1.1 mrg return ((b % a) == 0); 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg /* Return true if reference REF contains a union access. */ 154 1.1 mrg 155 1.1 mrg static bool 156 1.1 mrg ref_contains_union_access_p (tree ref) 157 1.1 mrg { 158 1.1 mrg while (handled_component_p (ref)) 159 1.1 mrg { 160 1.1 mrg ref = TREE_OPERAND (ref, 0); 161 1.1 mrg if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE 162 1.1 mrg || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE) 163 1.1 mrg return true; 164 1.1 mrg } 165 1.1 mrg return false; 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg 169 1.1 mrg 171 1.1 mrg /* Dump into FILE all the data references from DATAREFS. */ 172 1.1 mrg 173 1.1 mrg static void 174 1.1 mrg dump_data_references (FILE *file, vec<data_reference_p> datarefs) 175 1.1 mrg { 176 1.1 mrg for (data_reference *dr : datarefs) 177 1.1 mrg dump_data_reference (file, dr); 178 1.1 mrg } 179 1.1 mrg 180 1.1 mrg /* Unified dump into FILE all the data references from DATAREFS. */ 181 1.1 mrg 182 1.1 mrg DEBUG_FUNCTION void 183 1.1 mrg debug (vec<data_reference_p> &ref) 184 1.1 mrg { 185 1.1 mrg dump_data_references (stderr, ref); 186 1.1 mrg } 187 1.1 mrg 188 1.1 mrg DEBUG_FUNCTION void 189 1.1 mrg debug (vec<data_reference_p> *ptr) 190 1.1 mrg { 191 1.1 mrg if (ptr) 192 1.1 mrg debug (*ptr); 193 1.1 mrg else 194 1.1 mrg fprintf (stderr, "<nil>\n"); 195 1.1 mrg } 196 1.1 mrg 197 1.1 mrg 198 1.1 mrg /* Dump into STDERR all the data references from DATAREFS. */ 199 1.1 mrg 200 1.1 mrg DEBUG_FUNCTION void 201 1.1 mrg debug_data_references (vec<data_reference_p> datarefs) 202 1.1 mrg { 203 1.1 mrg dump_data_references (stderr, datarefs); 204 1.1 mrg } 205 1.1 mrg 206 1.1 mrg /* Print to STDERR the data_reference DR. */ 207 1.1 mrg 208 1.1 mrg DEBUG_FUNCTION void 209 1.1 mrg debug_data_reference (struct data_reference *dr) 210 1.1 mrg { 211 1.1 mrg dump_data_reference (stderr, dr); 212 1.1 mrg } 213 1.1 mrg 214 1.1 mrg /* Dump function for a DATA_REFERENCE structure. */ 215 1.1 mrg 216 1.1 mrg void 217 1.1 mrg dump_data_reference (FILE *outf, 218 1.1 mrg struct data_reference *dr) 219 1.1 mrg { 220 1.1 mrg unsigned int i; 221 1.1 mrg 222 1.1 mrg fprintf (outf, "#(Data Ref: \n"); 223 1.1 mrg fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index); 224 1.1 mrg fprintf (outf, "# stmt: "); 225 1.1 mrg print_gimple_stmt (outf, DR_STMT (dr), 0); 226 1.1 mrg fprintf (outf, "# ref: "); 227 1.1 mrg print_generic_stmt (outf, DR_REF (dr)); 228 1.1 mrg fprintf (outf, "# base_object: "); 229 1.1 mrg print_generic_stmt (outf, DR_BASE_OBJECT (dr)); 230 1.1 mrg 231 1.1 mrg for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 232 1.1 mrg { 233 1.1 mrg fprintf (outf, "# Access function %d: ", i); 234 1.1 mrg print_generic_stmt (outf, DR_ACCESS_FN (dr, i)); 235 1.1 mrg } 236 1.1 mrg fprintf (outf, "#)\n"); 237 1.1 mrg } 238 1.1 mrg 239 1.1 mrg /* Unified dump function for a DATA_REFERENCE structure. */ 240 1.1 mrg 241 1.1 mrg DEBUG_FUNCTION void 242 1.1 mrg debug (data_reference &ref) 243 1.1 mrg { 244 1.1 mrg dump_data_reference (stderr, &ref); 245 1.1 mrg } 246 1.1 mrg 247 1.1 mrg DEBUG_FUNCTION void 248 1.1 mrg debug (data_reference *ptr) 249 1.1 mrg { 250 1.1 mrg if (ptr) 251 1.1 mrg debug (*ptr); 252 1.1 mrg else 253 1.1 mrg fprintf (stderr, "<nil>\n"); 254 1.1 mrg } 255 1.1 mrg 256 1.1 mrg 257 1.1 mrg /* Dumps the affine function described by FN to the file OUTF. */ 258 1.1 mrg 259 1.1 mrg DEBUG_FUNCTION void 260 1.1 mrg dump_affine_function (FILE *outf, affine_fn fn) 261 1.1 mrg { 262 1.1 mrg unsigned i; 263 1.1 mrg tree coef; 264 1.1 mrg 265 1.1 mrg print_generic_expr (outf, fn[0], TDF_SLIM); 266 1.1 mrg for (i = 1; fn.iterate (i, &coef); i++) 267 1.1 mrg { 268 1.1 mrg fprintf (outf, " + "); 269 1.1 mrg print_generic_expr (outf, coef, TDF_SLIM); 270 1.1 mrg fprintf (outf, " * x_%u", i); 271 1.1 mrg } 272 1.1 mrg } 273 1.1 mrg 274 1.1 mrg /* Dumps the conflict function CF to the file OUTF. */ 275 1.1 mrg 276 1.1 mrg DEBUG_FUNCTION void 277 1.1 mrg dump_conflict_function (FILE *outf, conflict_function *cf) 278 1.1 mrg { 279 1.1 mrg unsigned i; 280 1.1 mrg 281 1.1 mrg if (cf->n == NO_DEPENDENCE) 282 1.1 mrg fprintf (outf, "no dependence"); 283 1.1 mrg else if (cf->n == NOT_KNOWN) 284 1.1 mrg fprintf (outf, "not known"); 285 1.1 mrg else 286 1.1 mrg { 287 1.1 mrg for (i = 0; i < cf->n; i++) 288 1.1 mrg { 289 1.1 mrg if (i != 0) 290 1.1 mrg fprintf (outf, " "); 291 1.1 mrg fprintf (outf, "["); 292 1.1 mrg dump_affine_function (outf, cf->fns[i]); 293 1.1 mrg fprintf (outf, "]"); 294 1.1 mrg } 295 1.1 mrg } 296 1.1 mrg } 297 1.1 mrg 298 1.1 mrg /* Dump function for a SUBSCRIPT structure. */ 299 1.1 mrg 300 1.1 mrg DEBUG_FUNCTION void 301 1.1 mrg dump_subscript (FILE *outf, struct subscript *subscript) 302 1.1 mrg { 303 1.1 mrg conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); 304 1.1 mrg 305 1.1 mrg fprintf (outf, "\n (subscript \n"); 306 1.1 mrg fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); 307 1.1 mrg dump_conflict_function (outf, cf); 308 1.1 mrg if (CF_NONTRIVIAL_P (cf)) 309 1.1 mrg { 310 1.1 mrg tree last_iteration = SUB_LAST_CONFLICT (subscript); 311 1.1 mrg fprintf (outf, "\n last_conflict: "); 312 1.1 mrg print_generic_expr (outf, last_iteration); 313 1.1 mrg } 314 1.1 mrg 315 1.1 mrg cf = SUB_CONFLICTS_IN_B (subscript); 316 1.1 mrg fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: "); 317 1.1 mrg dump_conflict_function (outf, cf); 318 1.1 mrg if (CF_NONTRIVIAL_P (cf)) 319 1.1 mrg { 320 1.1 mrg tree last_iteration = SUB_LAST_CONFLICT (subscript); 321 1.1 mrg fprintf (outf, "\n last_conflict: "); 322 1.1 mrg print_generic_expr (outf, last_iteration); 323 1.1 mrg } 324 1.1 mrg 325 1.1 mrg fprintf (outf, "\n (Subscript distance: "); 326 1.1 mrg print_generic_expr (outf, SUB_DISTANCE (subscript)); 327 1.1 mrg fprintf (outf, " ))\n"); 328 1.1 mrg } 329 1.1 mrg 330 1.1 mrg /* Print the classic direction vector DIRV to OUTF. */ 331 1.1 mrg 332 1.1 mrg DEBUG_FUNCTION void 333 1.1 mrg print_direction_vector (FILE *outf, 334 1.1 mrg lambda_vector dirv, 335 1.1 mrg int length) 336 1.1 mrg { 337 1.1 mrg int eq; 338 1.1 mrg 339 1.1 mrg for (eq = 0; eq < length; eq++) 340 1.1 mrg { 341 1.1 mrg enum data_dependence_direction dir = ((enum data_dependence_direction) 342 1.1 mrg dirv[eq]); 343 1.1 mrg 344 1.1 mrg switch (dir) 345 1.1 mrg { 346 1.1 mrg case dir_positive: 347 1.1 mrg fprintf (outf, " +"); 348 1.1 mrg break; 349 1.1 mrg case dir_negative: 350 1.1 mrg fprintf (outf, " -"); 351 1.1 mrg break; 352 1.1 mrg case dir_equal: 353 1.1 mrg fprintf (outf, " ="); 354 1.1 mrg break; 355 1.1 mrg case dir_positive_or_equal: 356 1.1 mrg fprintf (outf, " +="); 357 1.1 mrg break; 358 1.1 mrg case dir_positive_or_negative: 359 1.1 mrg fprintf (outf, " +-"); 360 1.1 mrg break; 361 1.1 mrg case dir_negative_or_equal: 362 1.1 mrg fprintf (outf, " -="); 363 1.1 mrg break; 364 1.1 mrg case dir_star: 365 1.1 mrg fprintf (outf, " *"); 366 1.1 mrg break; 367 1.1 mrg default: 368 1.1 mrg fprintf (outf, "indep"); 369 1.1 mrg break; 370 1.1 mrg } 371 1.1 mrg } 372 1.1 mrg fprintf (outf, "\n"); 373 1.1 mrg } 374 1.1 mrg 375 1.1 mrg /* Print a vector of direction vectors. */ 376 1.1 mrg 377 1.1 mrg DEBUG_FUNCTION void 378 1.1 mrg print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, 379 1.1 mrg int length) 380 1.1 mrg { 381 1.1 mrg for (lambda_vector v : dir_vects) 382 1.1 mrg print_direction_vector (outf, v, length); 383 1.1 mrg } 384 1.1 mrg 385 1.1 mrg /* Print out a vector VEC of length N to OUTFILE. */ 386 1.1 mrg 387 1.1 mrg DEBUG_FUNCTION void 388 1.1 mrg print_lambda_vector (FILE * outfile, lambda_vector vector, int n) 389 1.1 mrg { 390 1.1 mrg int i; 391 1.1 mrg 392 1.1 mrg for (i = 0; i < n; i++) 393 1.1 mrg fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]); 394 1.1 mrg fprintf (outfile, "\n"); 395 1.1 mrg } 396 1.1 mrg 397 1.1 mrg /* Print a vector of distance vectors. */ 398 1.1 mrg 399 1.1 mrg DEBUG_FUNCTION void 400 1.1 mrg print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, 401 1.1 mrg int length) 402 1.1 mrg { 403 1.1 mrg for (lambda_vector v : dist_vects) 404 1.1 mrg print_lambda_vector (outf, v, length); 405 1.1 mrg } 406 1.1 mrg 407 1.1 mrg /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ 408 1.1 mrg 409 1.1 mrg DEBUG_FUNCTION void 410 1.1 mrg dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr) 411 1.1 mrg { 412 1.1 mrg struct data_reference *dra, *drb; 413 1.1 mrg 414 1.1 mrg fprintf (outf, "(Data Dep: \n"); 415 1.1 mrg 416 1.1 mrg if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 417 1.1 mrg { 418 1.1 mrg if (ddr) 419 1.1 mrg { 420 1.1 mrg dra = DDR_A (ddr); 421 1.1 mrg drb = DDR_B (ddr); 422 1.1 mrg if (dra) 423 1.1 mrg dump_data_reference (outf, dra); 424 1.1 mrg else 425 1.1 mrg fprintf (outf, " (nil)\n"); 426 1.1 mrg if (drb) 427 1.1 mrg dump_data_reference (outf, drb); 428 1.1 mrg else 429 1.1 mrg fprintf (outf, " (nil)\n"); 430 1.1 mrg } 431 1.1 mrg fprintf (outf, " (don't know)\n)\n"); 432 1.1 mrg return; 433 1.1 mrg } 434 1.1 mrg 435 1.1 mrg dra = DDR_A (ddr); 436 1.1 mrg drb = DDR_B (ddr); 437 1.1 mrg dump_data_reference (outf, dra); 438 1.1 mrg dump_data_reference (outf, drb); 439 1.1 mrg 440 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 441 1.1 mrg fprintf (outf, " (no dependence)\n"); 442 1.1 mrg 443 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 444 1.1 mrg { 445 1.1 mrg unsigned int i; 446 1.1 mrg class loop *loopi; 447 1.1 mrg 448 1.1 mrg subscript *sub; 449 1.1 mrg FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) 450 1.1 mrg { 451 1.1 mrg fprintf (outf, " access_fn_A: "); 452 1.1 mrg print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0)); 453 1.1 mrg fprintf (outf, " access_fn_B: "); 454 1.1 mrg print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1)); 455 1.1 mrg dump_subscript (outf, sub); 456 1.1 mrg } 457 1.1 mrg 458 1.1 mrg fprintf (outf, " loop nest: ("); 459 1.1 mrg FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) 460 1.1 mrg fprintf (outf, "%d ", loopi->num); 461 1.1 mrg fprintf (outf, ")\n"); 462 1.1 mrg 463 1.1 mrg for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 464 1.1 mrg { 465 1.1 mrg fprintf (outf, " distance_vector: "); 466 1.1 mrg print_lambda_vector (outf, DDR_DIST_VECT (ddr, i), 467 1.1 mrg DDR_NB_LOOPS (ddr)); 468 1.1 mrg } 469 1.1 mrg 470 1.1 mrg for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 471 1.1 mrg { 472 1.1 mrg fprintf (outf, " direction_vector: "); 473 1.1 mrg print_direction_vector (outf, DDR_DIR_VECT (ddr, i), 474 1.1 mrg DDR_NB_LOOPS (ddr)); 475 1.1 mrg } 476 1.1 mrg } 477 1.1 mrg 478 1.1 mrg fprintf (outf, ")\n"); 479 1.1 mrg } 480 1.1 mrg 481 1.1 mrg /* Debug version. */ 482 1.1 mrg 483 1.1 mrg DEBUG_FUNCTION void 484 1.1 mrg debug_data_dependence_relation (const struct data_dependence_relation *ddr) 485 1.1 mrg { 486 1.1 mrg dump_data_dependence_relation (stderr, ddr); 487 1.1 mrg } 488 1.1 mrg 489 1.1 mrg /* Dump into FILE all the dependence relations from DDRS. */ 490 1.1 mrg 491 1.1 mrg DEBUG_FUNCTION void 492 1.1 mrg dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs) 493 1.1 mrg { 494 1.1 mrg for (auto ddr : ddrs) 495 1.1 mrg dump_data_dependence_relation (file, ddr); 496 1.1 mrg } 497 1.1 mrg 498 1.1 mrg DEBUG_FUNCTION void 499 1.1 mrg debug (vec<ddr_p> &ref) 500 1.1 mrg { 501 1.1 mrg dump_data_dependence_relations (stderr, ref); 502 1.1 mrg } 503 1.1 mrg 504 1.1 mrg DEBUG_FUNCTION void 505 1.1 mrg debug (vec<ddr_p> *ptr) 506 1.1 mrg { 507 1.1 mrg if (ptr) 508 1.1 mrg debug (*ptr); 509 1.1 mrg else 510 1.1 mrg fprintf (stderr, "<nil>\n"); 511 1.1 mrg } 512 1.1 mrg 513 1.1 mrg 514 1.1 mrg /* Dump to STDERR all the dependence relations from DDRS. */ 515 1.1 mrg 516 1.1 mrg DEBUG_FUNCTION void 517 1.1 mrg debug_data_dependence_relations (vec<ddr_p> ddrs) 518 1.1 mrg { 519 1.1 mrg dump_data_dependence_relations (stderr, ddrs); 520 1.1 mrg } 521 1.1 mrg 522 1.1 mrg /* Dumps the distance and direction vectors in FILE. DDRS contains 523 1.1 mrg the dependence relations, and VECT_SIZE is the size of the 524 1.1 mrg dependence vectors, or in other words the number of loops in the 525 1.1 mrg considered nest. */ 526 1.1 mrg 527 1.1 mrg DEBUG_FUNCTION void 528 1.1 mrg dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) 529 1.1 mrg { 530 1.1 mrg for (data_dependence_relation *ddr : ddrs) 531 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) 532 1.1 mrg { 533 1.1 mrg for (lambda_vector v : DDR_DIST_VECTS (ddr)) 534 1.1 mrg { 535 1.1 mrg fprintf (file, "DISTANCE_V ("); 536 1.1 mrg print_lambda_vector (file, v, DDR_NB_LOOPS (ddr)); 537 1.1 mrg fprintf (file, ")\n"); 538 1.1 mrg } 539 1.1 mrg 540 1.1 mrg for (lambda_vector v : DDR_DIR_VECTS (ddr)) 541 1.1 mrg { 542 1.1 mrg fprintf (file, "DIRECTION_V ("); 543 1.1 mrg print_direction_vector (file, v, DDR_NB_LOOPS (ddr)); 544 1.1 mrg fprintf (file, ")\n"); 545 1.1 mrg } 546 1.1 mrg } 547 1.1 mrg 548 1.1 mrg fprintf (file, "\n\n"); 549 1.1 mrg } 550 1.1 mrg 551 1.1 mrg /* Dumps the data dependence relations DDRS in FILE. */ 552 1.1 mrg 553 1.1 mrg DEBUG_FUNCTION void 554 1.1 mrg dump_ddrs (FILE *file, vec<ddr_p> ddrs) 555 1.1 mrg { 556 1.1 mrg for (data_dependence_relation *ddr : ddrs) 557 1.1 mrg dump_data_dependence_relation (file, ddr); 558 1.1 mrg 559 1.1 mrg fprintf (file, "\n\n"); 560 1.1 mrg } 561 1.1 mrg 562 1.1 mrg DEBUG_FUNCTION void 563 1.1 mrg debug_ddrs (vec<ddr_p> ddrs) 564 1.1 mrg { 565 1.1 mrg dump_ddrs (stderr, ddrs); 566 1.1 mrg } 567 1.1 mrg 568 1.1 mrg /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of 569 1.1 mrg OP0 CODE OP1, where: 570 1.1 mrg 571 1.1 mrg - OP0 CODE OP1 has integral type TYPE 572 1.1 mrg - the range of OP0 is given by OP0_RANGE and 573 1.1 mrg - the range of OP1 is given by OP1_RANGE. 574 1.1 mrg 575 1.1 mrg Independently of RESULT_RANGE, try to compute: 576 1.1 mrg 577 1.1 mrg DELTA = ((sizetype) OP0 CODE (sizetype) OP1) 578 1.1 mrg - (sizetype) (OP0 CODE OP1) 579 1.1 mrg 580 1.1 mrg as a constant and subtract DELTA from the ssizetype constant in *OFF. 581 1.1 mrg Return true on success, or false if DELTA is not known at compile time. 582 1.1 mrg 583 1.1 mrg Truncation and sign changes are known to distribute over CODE, i.e. 584 1.1 mrg 585 1.1 mrg (itype) (A CODE B) == (itype) A CODE (itype) B 586 1.1 mrg 587 1.1 mrg for any integral type ITYPE whose precision is no greater than the 588 1.1 mrg precision of A and B. */ 589 1.1 mrg 590 1.1 mrg static bool 591 1.1 mrg compute_distributive_range (tree type, value_range &op0_range, 592 1.1 mrg tree_code code, value_range &op1_range, 593 1.1 mrg tree *off, value_range *result_range) 594 1.1 mrg { 595 1.1 mrg gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)); 596 1.1 mrg if (result_range) 597 1.1 mrg { 598 1.1 mrg range_operator *op = range_op_handler (code, type); 599 1.1 mrg op->fold_range (*result_range, type, op0_range, op1_range); 600 1.1 mrg } 601 1.1 mrg 602 1.1 mrg /* The distributive property guarantees that if TYPE is no narrower 603 1.1 mrg than SIZETYPE, 604 1.1 mrg 605 1.1 mrg (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1 606 1.1 mrg 607 1.1 mrg and so we can treat DELTA as zero. */ 608 1.1 mrg if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype)) 609 1.1 mrg return true; 610 1.1 mrg 611 1.1 mrg /* If overflow is undefined, we can assume that: 612 1.1 mrg 613 1.1 mrg X == (ssizetype) OP0 CODE (ssizetype) OP1 614 1.1 mrg 615 1.1 mrg is within the range of TYPE, i.e.: 616 1.1 mrg 617 1.1 mrg X == (ssizetype) (TYPE) X 618 1.1 mrg 619 1.1 mrg Distributing the (TYPE) truncation over X gives: 620 1.1 mrg 621 1.1 mrg X == (ssizetype) (OP0 CODE OP1) 622 1.1 mrg 623 1.1 mrg Casting both sides to sizetype and distributing the sizetype cast 624 1.1 mrg over X gives: 625 1.1 mrg 626 1.1 mrg (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1) 627 1.1 mrg 628 1.1 mrg and so we can treat DELTA as zero. */ 629 1.1 mrg if (TYPE_OVERFLOW_UNDEFINED (type)) 630 1.1 mrg return true; 631 1.1 mrg 632 1.1 mrg /* Compute the range of: 633 1.1 mrg 634 1.1 mrg (ssizetype) OP0 CODE (ssizetype) OP1 635 1.1 mrg 636 1.1 mrg The distributive property guarantees that this has the same bitpattern as: 637 1.1 mrg 638 1.1 mrg (sizetype) OP0 CODE (sizetype) OP1 639 1.1 mrg 640 1.1 mrg but its range is more conducive to analysis. */ 641 1.1 mrg range_cast (op0_range, ssizetype); 642 1.1 mrg range_cast (op1_range, ssizetype); 643 1.1 mrg value_range wide_range; 644 1.1 mrg range_operator *op = range_op_handler (code, ssizetype); 645 1.1 mrg bool saved_flag_wrapv = flag_wrapv; 646 1.1 mrg flag_wrapv = 1; 647 1.1 mrg op->fold_range (wide_range, ssizetype, op0_range, op1_range); 648 1.1 mrg flag_wrapv = saved_flag_wrapv; 649 1.1 mrg if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range)) 650 1.1 mrg return false; 651 1.1 mrg 652 1.1 mrg wide_int lb = wide_range.lower_bound (); 653 1.1 mrg wide_int ub = wide_range.upper_bound (); 654 1.1 mrg 655 1.1 mrg /* Calculate the number of times that each end of the range overflows or 656 1.1 mrg underflows TYPE. We can only calculate DELTA if the numbers match. */ 657 1.1 mrg unsigned int precision = TYPE_PRECISION (type); 658 1.1 mrg if (!TYPE_UNSIGNED (type)) 659 1.1 mrg { 660 1.1 mrg wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ()); 661 1.1 mrg lb -= type_min; 662 1.1 mrg ub -= type_min; 663 1.1 mrg } 664 1.1 mrg wide_int upper_bits = wi::mask (precision, true, lb.get_precision ()); 665 1.1 mrg lb &= upper_bits; 666 1.1 mrg ub &= upper_bits; 667 1.1 mrg if (lb != ub) 668 1.1 mrg return false; 669 1.1 mrg 670 1.1 mrg /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with 671 1.1 mrg negative values indicating underflow. The low PRECISION bits of LB 672 1.1 mrg are clear, so DELTA is therefore LB (== UB). */ 673 1.1 mrg *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb); 674 1.1 mrg return true; 675 1.1 mrg } 676 1.1 mrg 677 1.1 mrg /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP, 678 1.1 mrg given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and 679 1.1 mrg FROM_TYPE are integral types. */ 680 1.1 mrg 681 1.1 mrg static bool 682 1.1 mrg nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range) 683 1.1 mrg { 684 1.1 mrg gcc_assert (INTEGRAL_TYPE_P (to_type) 685 1.1 mrg && INTEGRAL_TYPE_P (from_type) 686 1.1 mrg && !TYPE_OVERFLOW_TRAPS (to_type) 687 1.1 mrg && !TYPE_OVERFLOW_TRAPS (from_type)); 688 1.1 mrg 689 1.1 mrg /* Converting to something no narrower than sizetype and then to sizetype 690 1.1 mrg is equivalent to converting directly to sizetype. */ 691 1.1 mrg if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype)) 692 1.1 mrg return true; 693 1.1 mrg 694 1.1 mrg /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */ 695 1.1 mrg if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type) 696 1.1 mrg && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type))) 697 1.1 mrg return true; 698 1.1 mrg 699 1.1 mrg /* For narrowing conversions, we could in principle test whether 700 1.1 mrg the bits in FROM_TYPE but not in TO_TYPE have a fixed value 701 1.1 mrg and apply a constant adjustment. 702 1.1 mrg 703 1.1 mrg For other conversions (which involve a sign change) we could 704 1.1 mrg check that the signs are always equal, and apply a constant 705 1.1 mrg adjustment if the signs are negative. 706 1.1 mrg 707 1.1 mrg However, both cases should be rare. */ 708 1.1 mrg return range_fits_type_p (&range, TYPE_PRECISION (to_type), 709 1.1 mrg TYPE_SIGN (to_type)); 710 1.1 mrg } 711 1.1 mrg 712 1.1 mrg static void 713 1.1 mrg split_constant_offset (tree type, tree *var, tree *off, 714 1.1 mrg value_range *result_range, 715 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache, 716 1.1 mrg unsigned *limit); 717 1.1 mrg 718 1.1 mrg /* Helper function for split_constant_offset. If TYPE is a pointer type, 719 1.1 mrg try to express OP0 CODE OP1 as: 720 1.1 mrg 721 1.1 mrg POINTER_PLUS <*VAR, (sizetype) *OFF> 722 1.1 mrg 723 1.1 mrg where: 724 1.1 mrg 725 1.1 mrg - *VAR has type TYPE 726 1.1 mrg - *OFF is a constant of type ssizetype. 727 1.1 mrg 728 1.1 mrg If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as: 729 1.1 mrg 730 1.1 mrg *VAR + (sizetype) *OFF 731 1.1 mrg 732 1.1 mrg where: 733 1.1 mrg 734 1.1 mrg - *VAR has type sizetype 735 1.1 mrg - *OFF is a constant of type ssizetype. 736 1.1 mrg 737 1.1 mrg In both cases, OP0 CODE OP1 has type TYPE. 738 1.1 mrg 739 1.1 mrg Return true on success. A false return value indicates that we can't 740 1.1 mrg do better than set *OFF to zero. 741 1.1 mrg 742 1.1 mrg When returning true, set RESULT_RANGE to the range of OP0 CODE OP1, 743 1.1 mrg if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING. 744 1.1 mrg 745 1.1 mrg CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously 746 1.1 mrg visited. LIMIT counts down the number of SSA names that we are 747 1.1 mrg allowed to process before giving up. */ 748 1.1 mrg 749 1.1 mrg static bool 750 1.1 mrg split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, 751 1.1 mrg tree *var, tree *off, value_range *result_range, 752 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache, 753 1.1 mrg unsigned *limit) 754 1.1 mrg { 755 1.1 mrg tree var0, var1; 756 1.1 mrg tree off0, off1; 757 1.1 mrg value_range op0_range, op1_range; 758 1.1 mrg 759 1.1 mrg *var = NULL_TREE; 760 1.1 mrg *off = NULL_TREE; 761 1.1 mrg 762 1.1 mrg if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) 763 1.1 mrg return false; 764 1.1 mrg 765 1.1 mrg if (TREE_CODE (op0) == SSA_NAME 766 1.1 mrg && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) 767 1.1 mrg return false; 768 1.1 mrg if (op1 769 1.1 mrg && TREE_CODE (op1) == SSA_NAME 770 1.1 mrg && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1)) 771 1.1 mrg return false; 772 1.1 mrg 773 1.1 mrg switch (code) 774 1.1 mrg { 775 1.1 mrg case INTEGER_CST: 776 1.1 mrg *var = size_int (0); 777 1.1 mrg *off = fold_convert (ssizetype, op0); 778 1.1 mrg if (result_range) 779 1.1 mrg result_range->set (op0, op0); 780 1.1 mrg return true; 781 1.1 mrg 782 1.1 mrg case POINTER_PLUS_EXPR: 783 1.1 mrg split_constant_offset (op0, &var0, &off0, nullptr, cache, limit); 784 1.1 mrg split_constant_offset (op1, &var1, &off1, nullptr, cache, limit); 785 1.1 mrg *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1); 786 1.1 mrg *off = size_binop (PLUS_EXPR, off0, off1); 787 1.1 mrg return true; 788 1.1 mrg 789 1.1 mrg case PLUS_EXPR: 790 1.1 mrg case MINUS_EXPR: 791 1.1 mrg split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit); 792 1.1 mrg split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit); 793 1.1 mrg *off = size_binop (code, off0, off1); 794 1.1 mrg if (!compute_distributive_range (type, op0_range, code, op1_range, 795 1.1 mrg off, result_range)) 796 1.1 mrg return false; 797 1.1 mrg *var = fold_build2 (code, sizetype, var0, var1); 798 1.1 mrg return true; 799 1.1 mrg 800 1.1 mrg case MULT_EXPR: 801 1.1 mrg if (TREE_CODE (op1) != INTEGER_CST) 802 1.1 mrg return false; 803 1.1 mrg 804 1.1 mrg split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit); 805 1.1 mrg op1_range.set (op1, op1); 806 1.1 mrg *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); 807 1.1 mrg if (!compute_distributive_range (type, op0_range, code, op1_range, 808 1.1 mrg off, result_range)) 809 1.1 mrg return false; 810 1.1 mrg *var = fold_build2 (MULT_EXPR, sizetype, var0, 811 1.1 mrg fold_convert (sizetype, op1)); 812 1.1 mrg return true; 813 1.1 mrg 814 1.1 mrg case ADDR_EXPR: 815 1.1 mrg { 816 1.1 mrg tree base, poffset; 817 1.1 mrg poly_int64 pbitsize, pbitpos, pbytepos; 818 1.1 mrg machine_mode pmode; 819 1.1 mrg int punsignedp, preversep, pvolatilep; 820 1.1 mrg 821 1.1 mrg op0 = TREE_OPERAND (op0, 0); 822 1.1 mrg base 823 1.1 mrg = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode, 824 1.1 mrg &punsignedp, &preversep, &pvolatilep); 825 1.1 mrg 826 1.1 mrg if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos)) 827 1.1 mrg return false; 828 1.1 mrg base = build_fold_addr_expr (base); 829 1.1 mrg off0 = ssize_int (pbytepos); 830 1.1 mrg 831 1.1 mrg if (poffset) 832 1.1 mrg { 833 1.1 mrg split_constant_offset (poffset, &poffset, &off1, nullptr, 834 1.1 mrg cache, limit); 835 1.1 mrg off0 = size_binop (PLUS_EXPR, off0, off1); 836 1.1 mrg base = fold_build_pointer_plus (base, poffset); 837 1.1 mrg } 838 1.1 mrg 839 1.1 mrg var0 = fold_convert (type, base); 840 1.1 mrg 841 1.1 mrg /* If variable length types are involved, punt, otherwise casts 842 1.1 mrg might be converted into ARRAY_REFs in gimplify_conversion. 843 1.1 mrg To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which 844 1.1 mrg possibly no longer appears in current GIMPLE, might resurface. 845 1.1 mrg This perhaps could run 846 1.1 mrg if (CONVERT_EXPR_P (var0)) 847 1.1 mrg { 848 1.1 mrg gimplify_conversion (&var0); 849 1.1 mrg // Attempt to fill in any within var0 found ARRAY_REF's 850 1.1 mrg // element size from corresponding op embedded ARRAY_REF, 851 1.1 mrg // if unsuccessful, just punt. 852 1.1 mrg } */ 853 1.1 mrg while (POINTER_TYPE_P (type)) 854 1.1 mrg type = TREE_TYPE (type); 855 1.1 mrg if (int_size_in_bytes (type) < 0) 856 1.1 mrg return false; 857 1.1 mrg 858 1.1 mrg *var = var0; 859 1.1 mrg *off = off0; 860 1.1 mrg return true; 861 1.1 mrg } 862 1.1 mrg 863 1.1 mrg case SSA_NAME: 864 1.1 mrg { 865 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (op0); 866 1.1 mrg enum tree_code subcode; 867 1.1 mrg 868 1.1 mrg if (gimple_code (def_stmt) != GIMPLE_ASSIGN) 869 1.1 mrg return false; 870 1.1 mrg 871 1.1 mrg subcode = gimple_assign_rhs_code (def_stmt); 872 1.1 mrg 873 1.1 mrg /* We are using a cache to avoid un-CSEing large amounts of code. */ 874 1.1 mrg bool use_cache = false; 875 1.1 mrg if (!has_single_use (op0) 876 1.1 mrg && (subcode == POINTER_PLUS_EXPR 877 1.1 mrg || subcode == PLUS_EXPR 878 1.1 mrg || subcode == MINUS_EXPR 879 1.1 mrg || subcode == MULT_EXPR 880 1.1 mrg || subcode == ADDR_EXPR 881 1.1 mrg || CONVERT_EXPR_CODE_P (subcode))) 882 1.1 mrg { 883 1.1 mrg use_cache = true; 884 1.1 mrg bool existed; 885 1.1 mrg std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed); 886 1.1 mrg if (existed) 887 1.1 mrg { 888 1.1 mrg if (integer_zerop (e.second)) 889 1.1 mrg return false; 890 1.1 mrg *var = e.first; 891 1.1 mrg *off = e.second; 892 1.1 mrg /* The caller sets the range in this case. */ 893 1.1 mrg return true; 894 1.1 mrg } 895 1.1 mrg e = std::make_pair (op0, ssize_int (0)); 896 1.1 mrg } 897 1.1 mrg 898 1.1 mrg if (*limit == 0) 899 1.1 mrg return false; 900 1.1 mrg --*limit; 901 1.1 mrg 902 1.1 mrg var0 = gimple_assign_rhs1 (def_stmt); 903 1.1 mrg var1 = gimple_assign_rhs2 (def_stmt); 904 1.1 mrg 905 1.1 mrg bool res = split_constant_offset_1 (type, var0, subcode, var1, 906 1.1 mrg var, off, nullptr, cache, limit); 907 1.1 mrg if (res && use_cache) 908 1.1 mrg *cache.get (op0) = std::make_pair (*var, *off); 909 1.1 mrg /* The caller sets the range in this case. */ 910 1.1 mrg return res; 911 1.1 mrg } 912 1.1 mrg CASE_CONVERT: 913 1.1 mrg { 914 1.1 mrg /* We can only handle the following conversions: 915 1.1 mrg 916 1.1 mrg - Conversions from one pointer type to another pointer type. 917 1.1 mrg 918 1.1 mrg - Conversions from one non-trapping integral type to another 919 1.1 mrg non-trapping integral type. In this case, the recursive 920 1.1 mrg call makes sure that: 921 1.1 mrg 922 1.1 mrg (sizetype) OP0 923 1.1 mrg 924 1.1 mrg can be expressed as a sizetype operation involving VAR and OFF, 925 1.1 mrg and all we need to do is check whether: 926 1.1 mrg 927 1.1 mrg (sizetype) OP0 == (sizetype) (TYPE) OP0 928 1.1 mrg 929 1.1 mrg - Conversions from a non-trapping sizetype-size integral type to 930 1.1 mrg a like-sized pointer type. In this case, the recursive call 931 1.1 mrg makes sure that: 932 1.1 mrg 933 1.1 mrg (sizetype) OP0 == *VAR + (sizetype) *OFF 934 1.1 mrg 935 1.1 mrg and we can convert that to: 936 1.1 mrg 937 1.1 mrg POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF> 938 1.1 mrg 939 1.1 mrg - Conversions from a sizetype-sized pointer type to a like-sized 940 1.1 mrg non-trapping integral type. In this case, the recursive call 941 1.1 mrg makes sure that: 942 1.1 mrg 943 1.1 mrg OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF> 944 1.1 mrg 945 1.1 mrg where the POINTER_PLUS and *VAR have the same precision as 946 1.1 mrg TYPE (and the same precision as sizetype). Then: 947 1.1 mrg 948 1.1 mrg (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */ 949 1.1 mrg tree itype = TREE_TYPE (op0); 950 1.1 mrg if ((POINTER_TYPE_P (itype) 951 1.1 mrg || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype))) 952 1.1 mrg && (POINTER_TYPE_P (type) 953 1.1 mrg || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))) 954 1.1 mrg && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype) 955 1.1 mrg || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype) 956 1.1 mrg && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype)))) 957 1.1 mrg { 958 1.1 mrg if (POINTER_TYPE_P (type)) 959 1.1 mrg { 960 1.1 mrg split_constant_offset (op0, var, off, nullptr, cache, limit); 961 1.1 mrg *var = fold_convert (type, *var); 962 1.1 mrg } 963 1.1 mrg else if (POINTER_TYPE_P (itype)) 964 1.1 mrg { 965 1.1 mrg split_constant_offset (op0, var, off, nullptr, cache, limit); 966 1.1 mrg *var = fold_convert (sizetype, *var); 967 1.1 mrg } 968 1.1 mrg else 969 1.1 mrg { 970 1.1 mrg split_constant_offset (op0, var, off, &op0_range, 971 1.1 mrg cache, limit); 972 1.1 mrg if (!nop_conversion_for_offset_p (type, itype, op0_range)) 973 1.1 mrg return false; 974 1.1 mrg if (result_range) 975 1.1 mrg { 976 1.1 mrg *result_range = op0_range; 977 1.1 mrg range_cast (*result_range, type); 978 1.1 mrg } 979 1.1 mrg } 980 1.1 mrg return true; 981 1.1 mrg } 982 1.1 mrg return false; 983 1.1 mrg } 984 1.1 mrg 985 1.1 mrg default: 986 1.1 mrg return false; 987 1.1 mrg } 988 1.1 mrg } 989 1.1 mrg 990 1.1 mrg /* If EXP has pointer type, try to express it as: 991 1.1 mrg 992 1.1 mrg POINTER_PLUS <*VAR, (sizetype) *OFF> 993 1.1 mrg 994 1.1 mrg where: 995 1.1 mrg 996 1.1 mrg - *VAR has the same type as EXP 997 1.1 mrg - *OFF is a constant of type ssizetype. 998 1.1 mrg 999 1.1 mrg If EXP has an integral type, try to express (sizetype) EXP as: 1000 1.1 mrg 1001 1.1 mrg *VAR + (sizetype) *OFF 1002 1.1 mrg 1003 1.1 mrg where: 1004 1.1 mrg 1005 1.1 mrg - *VAR has type sizetype 1006 1.1 mrg - *OFF is a constant of type ssizetype. 1007 1.1 mrg 1008 1.1 mrg If EXP_RANGE is nonnull, set it to the range of EXP. 1009 1.1 mrg 1010 1.1 mrg CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously 1011 1.1 mrg visited. LIMIT counts down the number of SSA names that we are 1012 1.1 mrg allowed to process before giving up. */ 1013 1.1 mrg 1014 1.1 mrg static void 1015 1.1 mrg split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range, 1016 1.1 mrg hash_map<tree, std::pair<tree, tree> > &cache, 1017 1.1 mrg unsigned *limit) 1018 1.1 mrg { 1019 1.1 mrg tree type = TREE_TYPE (exp), op0, op1; 1020 1.1 mrg enum tree_code code; 1021 1.1 mrg 1022 1.1 mrg code = TREE_CODE (exp); 1023 1.1 mrg if (exp_range) 1024 1.1 mrg { 1025 1.1 mrg *exp_range = type; 1026 1.1 mrg if (code == SSA_NAME) 1027 1.1 mrg { 1028 1.1 mrg value_range vr; 1029 1.1 mrg get_range_query (cfun)->range_of_expr (vr, exp); 1030 1.1 mrg if (vr.undefined_p ()) 1031 1.1 mrg vr.set_varying (TREE_TYPE (exp)); 1032 1.1 mrg wide_int var_min = wi::to_wide (vr.min ()); 1033 1.1 mrg wide_int var_max = wi::to_wide (vr.max ()); 1034 1.1 mrg value_range_kind vr_kind = vr.kind (); 1035 1.1 mrg wide_int var_nonzero = get_nonzero_bits (exp); 1036 1.1 mrg vr_kind = intersect_range_with_nonzero_bits (vr_kind, 1037 1.1 mrg &var_min, &var_max, 1038 1.1 mrg var_nonzero, 1039 1.1 mrg TYPE_SIGN (type)); 1040 1.1 mrg /* This check for VR_VARYING is here because the old code 1041 1.1 mrg using get_range_info would return VR_RANGE for the entire 1042 1.1 mrg domain, instead of VR_VARYING. The new code normalizes 1043 1.1 mrg full-domain ranges to VR_VARYING. */ 1044 1.1 mrg if (vr_kind == VR_RANGE || vr_kind == VR_VARYING) 1045 1.1 mrg *exp_range = value_range (type, var_min, var_max); 1046 1.1 mrg } 1047 1.1 mrg } 1048 1.1 mrg 1049 1.1 mrg if (!tree_is_chrec (exp) 1050 1.1 mrg && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS) 1051 1.1 mrg { 1052 1.1 mrg extract_ops_from_tree (exp, &code, &op0, &op1); 1053 1.1 mrg if (split_constant_offset_1 (type, op0, code, op1, var, off, 1054 1.1 mrg exp_range, cache, limit)) 1055 1.1 mrg return; 1056 1.1 mrg } 1057 1.1 mrg 1058 1.1 mrg *var = exp; 1059 1.1 mrg if (INTEGRAL_TYPE_P (type)) 1060 1.1 mrg *var = fold_convert (sizetype, *var); 1061 1.1 mrg *off = ssize_int (0); 1062 1.1 mrg 1063 1.1 mrg value_range r; 1064 1.1 mrg if (exp_range && code != SSA_NAME 1065 1.1 mrg && get_range_query (cfun)->range_of_expr (r, exp) 1066 1.1 mrg && !r.undefined_p ()) 1067 1.1 mrg *exp_range = r; 1068 1.1 mrg } 1069 1.1 mrg 1070 1.1 mrg /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same 1071 1.1 mrg type as EXP while OFF has type ssizetype. */ 1072 1.1 mrg 1073 1.1 mrg void 1074 1.1 mrg split_constant_offset (tree exp, tree *var, tree *off) 1075 1.1 mrg { 1076 1.1 mrg unsigned limit = param_ssa_name_def_chain_limit; 1077 1.1 mrg static hash_map<tree, std::pair<tree, tree> > *cache; 1078 1.1 mrg if (!cache) 1079 1.1 mrg cache = new hash_map<tree, std::pair<tree, tree> > (37); 1080 1.1 mrg split_constant_offset (exp, var, off, nullptr, *cache, &limit); 1081 1.1 mrg *var = fold_convert (TREE_TYPE (exp), *var); 1082 1.1 mrg cache->empty (); 1083 1.1 mrg } 1084 1.1 mrg 1085 1.1 mrg /* Returns the address ADDR of an object in a canonical shape (without nop 1086 1.1 mrg casts, and with type of pointer to the object). */ 1087 1.1 mrg 1088 1.1 mrg static tree 1089 1.1 mrg canonicalize_base_object_address (tree addr) 1090 1.1 mrg { 1091 1.1 mrg tree orig = addr; 1092 1.1 mrg 1093 1.1 mrg STRIP_NOPS (addr); 1094 1.1 mrg 1095 1.1 mrg /* The base address may be obtained by casting from integer, in that case 1096 1.1 mrg keep the cast. */ 1097 1.1 mrg if (!POINTER_TYPE_P (TREE_TYPE (addr))) 1098 1.1 mrg return orig; 1099 1.1 mrg 1100 1.1 mrg if (TREE_CODE (addr) != ADDR_EXPR) 1101 1.1 mrg return addr; 1102 1.1 mrg 1103 1.1 mrg return build_fold_addr_expr (TREE_OPERAND (addr, 0)); 1104 1.1 mrg } 1105 1.1 mrg 1106 1.1 mrg /* Analyze the behavior of memory reference REF within STMT. 1107 1.1 mrg There are two modes: 1108 1.1 mrg 1109 1.1 mrg - BB analysis. In this case we simply split the address into base, 1110 1.1 mrg init and offset components, without reference to any containing loop. 1111 1.1 mrg The resulting base and offset are general expressions and they can 1112 1.1 mrg vary arbitrarily from one iteration of the containing loop to the next. 1113 1.1 mrg The step is always zero. 1114 1.1 mrg 1115 1.1 mrg - loop analysis. In this case we analyze the reference both wrt LOOP 1116 1.1 mrg and on the basis that the reference occurs (is "used") in LOOP; 1117 1.1 mrg see the comment above analyze_scalar_evolution_in_loop for more 1118 1.1 mrg information about this distinction. The base, init, offset and 1119 1.1 mrg step fields are all invariant in LOOP. 1120 1.1 mrg 1121 1.1 mrg Perform BB analysis if LOOP is null, or if LOOP is the function's 1122 1.1 mrg dummy outermost loop. In other cases perform loop analysis. 1123 1.1 mrg 1124 1.1 mrg Return true if the analysis succeeded and store the results in DRB if so. 1125 1.1 mrg BB analysis can only fail for bitfield or reversed-storage accesses. */ 1126 1.1 mrg 1127 1.1 mrg opt_result 1128 1.1 mrg dr_analyze_innermost (innermost_loop_behavior *drb, tree ref, 1129 1.1 mrg class loop *loop, const gimple *stmt) 1130 1.1 mrg { 1131 1.1 mrg poly_int64 pbitsize, pbitpos; 1132 1.1 mrg tree base, poffset; 1133 1.1 mrg machine_mode pmode; 1134 1.1 mrg int punsignedp, preversep, pvolatilep; 1135 1.1 mrg affine_iv base_iv, offset_iv; 1136 1.1 mrg tree init, dinit, step; 1137 1.1 mrg bool in_loop = (loop && loop->num); 1138 1.1 mrg 1139 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 1140 1.1 mrg fprintf (dump_file, "analyze_innermost: "); 1141 1.1 mrg 1142 1.1 mrg base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode, 1143 1.1 mrg &punsignedp, &preversep, &pvolatilep); 1144 1.1 mrg gcc_assert (base != NULL_TREE); 1145 1.1 mrg 1146 1.1 mrg poly_int64 pbytepos; 1147 1.1 mrg if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos)) 1148 1.1 mrg return opt_result::failure_at (stmt, 1149 1.1 mrg "failed: bit offset alignment.\n"); 1150 1.1 mrg 1151 1.1 mrg if (preversep) 1152 1.1 mrg return opt_result::failure_at (stmt, 1153 1.1 mrg "failed: reverse storage order.\n"); 1154 1.1 mrg 1155 1.1 mrg /* Calculate the alignment and misalignment for the inner reference. */ 1156 1.1 mrg unsigned int HOST_WIDE_INT bit_base_misalignment; 1157 1.1 mrg unsigned int bit_base_alignment; 1158 1.1 mrg get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment); 1159 1.1 mrg 1160 1.1 mrg /* There are no bitfield references remaining in BASE, so the values 1161 1.1 mrg we got back must be whole bytes. */ 1162 1.1 mrg gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0 1163 1.1 mrg && bit_base_misalignment % BITS_PER_UNIT == 0); 1164 1.1 mrg unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT; 1165 1.1 mrg poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT; 1166 1.1 mrg 1167 1.1 mrg if (TREE_CODE (base) == MEM_REF) 1168 1.1 mrg { 1169 1.1 mrg if (!integer_zerop (TREE_OPERAND (base, 1))) 1170 1.1 mrg { 1171 1.1 mrg /* Subtract MOFF from the base and add it to POFFSET instead. 1172 1.1 mrg Adjust the misalignment to reflect the amount we subtracted. */ 1173 1.1 mrg poly_offset_int moff = mem_ref_offset (base); 1174 1.1 mrg base_misalignment -= moff.force_shwi (); 1175 1.1 mrg tree mofft = wide_int_to_tree (sizetype, moff); 1176 1.1 mrg if (!poffset) 1177 1.1 mrg poffset = mofft; 1178 1.1 mrg else 1179 1.1 mrg poffset = size_binop (PLUS_EXPR, poffset, mofft); 1180 1.1 mrg } 1181 1.1 mrg base = TREE_OPERAND (base, 0); 1182 1.1 mrg } 1183 1.1 mrg else 1184 1.1 mrg base = build_fold_addr_expr (base); 1185 1.1 mrg 1186 1.1 mrg if (in_loop) 1187 1.1 mrg { 1188 1.1 mrg if (!simple_iv (loop, loop, base, &base_iv, true)) 1189 1.1 mrg return opt_result::failure_at 1190 1.1 mrg (stmt, "failed: evolution of base is not affine.\n"); 1191 1.1 mrg } 1192 1.1 mrg else 1193 1.1 mrg { 1194 1.1 mrg base_iv.base = base; 1195 1.1 mrg base_iv.step = ssize_int (0); 1196 1.1 mrg base_iv.no_overflow = true; 1197 1.1 mrg } 1198 1.1 mrg 1199 1.1 mrg if (!poffset) 1200 1.1 mrg { 1201 1.1 mrg offset_iv.base = ssize_int (0); 1202 1.1 mrg offset_iv.step = ssize_int (0); 1203 1.1 mrg } 1204 1.1 mrg else 1205 1.1 mrg { 1206 1.1 mrg if (!in_loop) 1207 1.1 mrg { 1208 1.1 mrg offset_iv.base = poffset; 1209 1.1 mrg offset_iv.step = ssize_int (0); 1210 1.1 mrg } 1211 1.1 mrg else if (!simple_iv (loop, loop, poffset, &offset_iv, true)) 1212 1.1 mrg return opt_result::failure_at 1213 1.1 mrg (stmt, "failed: evolution of offset is not affine.\n"); 1214 1.1 mrg } 1215 1.1 mrg 1216 1.1 mrg init = ssize_int (pbytepos); 1217 1.1 mrg 1218 1.1 mrg /* Subtract any constant component from the base and add it to INIT instead. 1219 1.1 mrg Adjust the misalignment to reflect the amount we subtracted. */ 1220 1.1 mrg split_constant_offset (base_iv.base, &base_iv.base, &dinit); 1221 1.1 mrg init = size_binop (PLUS_EXPR, init, dinit); 1222 1.1 mrg base_misalignment -= TREE_INT_CST_LOW (dinit); 1223 1.1 mrg 1224 1.1 mrg split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 1225 1.1 mrg init = size_binop (PLUS_EXPR, init, dinit); 1226 1.1 mrg 1227 1.1 mrg step = size_binop (PLUS_EXPR, 1228 1.1 mrg fold_convert (ssizetype, base_iv.step), 1229 1.1 mrg fold_convert (ssizetype, offset_iv.step)); 1230 1.1 mrg 1231 1.1 mrg base = canonicalize_base_object_address (base_iv.base); 1232 1.1 mrg 1233 1.1 mrg /* See if get_pointer_alignment can guarantee a higher alignment than 1234 1.1 mrg the one we calculated above. */ 1235 1.1 mrg unsigned int HOST_WIDE_INT alt_misalignment; 1236 1.1 mrg unsigned int alt_alignment; 1237 1.1 mrg get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment); 1238 1.1 mrg 1239 1.1 mrg /* As above, these values must be whole bytes. */ 1240 1.1 mrg gcc_assert (alt_alignment % BITS_PER_UNIT == 0 1241 1.1 mrg && alt_misalignment % BITS_PER_UNIT == 0); 1242 1.1 mrg alt_alignment /= BITS_PER_UNIT; 1243 1.1 mrg alt_misalignment /= BITS_PER_UNIT; 1244 1.1 mrg 1245 1.1 mrg if (base_alignment < alt_alignment) 1246 1.1 mrg { 1247 1.1 mrg base_alignment = alt_alignment; 1248 1.1 mrg base_misalignment = alt_misalignment; 1249 1.1 mrg } 1250 1.1 mrg 1251 1.1 mrg drb->base_address = base; 1252 1.1 mrg drb->offset = fold_convert (ssizetype, offset_iv.base); 1253 1.1 mrg drb->init = init; 1254 1.1 mrg drb->step = step; 1255 1.1 mrg if (known_misalignment (base_misalignment, base_alignment, 1256 1.1 mrg &drb->base_misalignment)) 1257 1.1 mrg drb->base_alignment = base_alignment; 1258 1.1 mrg else 1259 1.1 mrg { 1260 1.1 mrg drb->base_alignment = known_alignment (base_misalignment); 1261 1.1 mrg drb->base_misalignment = 0; 1262 1.1 mrg } 1263 1.1 mrg drb->offset_alignment = highest_pow2_factor (offset_iv.base); 1264 1.1 mrg drb->step_alignment = highest_pow2_factor (step); 1265 1.1 mrg 1266 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 1267 1.1 mrg fprintf (dump_file, "success.\n"); 1268 1.1 mrg 1269 1.1 mrg return opt_result::success (); 1270 1.1 mrg } 1271 1.1 mrg 1272 1.1 mrg /* Return true if OP is a valid component reference for a DR access 1273 1.1 mrg function. This accepts a subset of what handled_component_p accepts. */ 1274 1.1 mrg 1275 1.1 mrg static bool 1276 1.1 mrg access_fn_component_p (tree op) 1277 1.1 mrg { 1278 1.1 mrg switch (TREE_CODE (op)) 1279 1.1 mrg { 1280 1.1 mrg case REALPART_EXPR: 1281 1.1 mrg case IMAGPART_EXPR: 1282 1.1 mrg case ARRAY_REF: 1283 1.1 mrg return true; 1284 1.1 mrg 1285 1.1 mrg case COMPONENT_REF: 1286 1.1 mrg return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE; 1287 1.1 mrg 1288 1.1 mrg default: 1289 1.1 mrg return false; 1290 1.1 mrg } 1291 1.1 mrg } 1292 1.1 mrg 1293 1.1 mrg /* Returns whether BASE can have a access_fn_component_p with BASE 1294 1.1 mrg as base. */ 1295 1.1 mrg 1296 1.1 mrg static bool 1297 1.1 mrg base_supports_access_fn_components_p (tree base) 1298 1.1 mrg { 1299 1.1 mrg switch (TREE_CODE (TREE_TYPE (base))) 1300 1.1 mrg { 1301 1.1 mrg case COMPLEX_TYPE: 1302 1.1 mrg case ARRAY_TYPE: 1303 1.1 mrg case RECORD_TYPE: 1304 1.1 mrg return true; 1305 1.1 mrg default: 1306 1.1 mrg return false; 1307 1.1 mrg } 1308 1.1 mrg } 1309 1.1 mrg 1310 1.1 mrg /* Determines the base object and the list of indices of memory reference 1311 1.1 mrg DR, analyzed in LOOP and instantiated before NEST. */ 1312 1.1 mrg 1313 1.1 mrg static void 1314 1.1 mrg dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop) 1315 1.1 mrg { 1316 1.1 mrg /* If analyzing a basic-block there are no indices to analyze 1317 1.1 mrg and thus no access functions. */ 1318 1.1 mrg if (!nest) 1319 1.1 mrg { 1320 1.1 mrg dri->base_object = ref; 1321 1.1 mrg dri->access_fns.create (0); 1322 1.1 mrg return; 1323 1.1 mrg } 1324 1.1 mrg 1325 1.1 mrg vec<tree> access_fns = vNULL; 1326 1.1 mrg 1327 1.1 mrg /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses 1328 1.1 mrg into a two element array with a constant index. The base is 1329 1.1 mrg then just the immediate underlying object. */ 1330 1.1 mrg if (TREE_CODE (ref) == REALPART_EXPR) 1331 1.1 mrg { 1332 1.1 mrg ref = TREE_OPERAND (ref, 0); 1333 1.1 mrg access_fns.safe_push (integer_zero_node); 1334 1.1 mrg } 1335 1.1 mrg else if (TREE_CODE (ref) == IMAGPART_EXPR) 1336 1.1 mrg { 1337 1.1 mrg ref = TREE_OPERAND (ref, 0); 1338 1.1 mrg access_fns.safe_push (integer_one_node); 1339 1.1 mrg } 1340 1.1 mrg 1341 1.1 mrg /* Analyze access functions of dimensions we know to be independent. 1342 1.1 mrg The list of component references handled here should be kept in 1343 1.1 mrg sync with access_fn_component_p. */ 1344 1.1 mrg while (handled_component_p (ref)) 1345 1.1 mrg { 1346 1.1 mrg if (TREE_CODE (ref) == ARRAY_REF) 1347 1.1 mrg { 1348 1.1 mrg tree op = TREE_OPERAND (ref, 1); 1349 1.1 mrg tree access_fn = analyze_scalar_evolution (loop, op); 1350 1.1 mrg access_fn = instantiate_scev (nest, loop, access_fn); 1351 1.1 mrg access_fns.safe_push (access_fn); 1352 1.1 mrg } 1353 1.1 mrg else if (TREE_CODE (ref) == COMPONENT_REF 1354 1.1 mrg && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) 1355 1.1 mrg { 1356 1.1 mrg /* For COMPONENT_REFs of records (but not unions!) use the 1357 1.1 mrg FIELD_DECL offset as constant access function so we can 1358 1.1 mrg disambiguate a[i].f1 and a[i].f2. */ 1359 1.1 mrg tree off = component_ref_field_offset (ref); 1360 1.1 mrg off = size_binop (PLUS_EXPR, 1361 1.1 mrg size_binop (MULT_EXPR, 1362 1.1 mrg fold_convert (bitsizetype, off), 1363 1.1 mrg bitsize_int (BITS_PER_UNIT)), 1364 1.1 mrg DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); 1365 1.1 mrg access_fns.safe_push (off); 1366 1.1 mrg } 1367 1.1 mrg else 1368 1.1 mrg /* If we have an unhandled component we could not translate 1369 1.1 mrg to an access function stop analyzing. We have determined 1370 1.1 mrg our base object in this case. */ 1371 1.1 mrg break; 1372 1.1 mrg 1373 1.1 mrg ref = TREE_OPERAND (ref, 0); 1374 1.1 mrg } 1375 1.1 mrg 1376 1.1 mrg /* If the address operand of a MEM_REF base has an evolution in the 1377 1.1 mrg analyzed nest, add it as an additional independent access-function. */ 1378 1.1 mrg if (TREE_CODE (ref) == MEM_REF) 1379 1.1 mrg { 1380 1.1 mrg tree op = TREE_OPERAND (ref, 0); 1381 1.1 mrg tree access_fn = analyze_scalar_evolution (loop, op); 1382 1.1 mrg access_fn = instantiate_scev (nest, loop, access_fn); 1383 1.1 mrg STRIP_NOPS (access_fn); 1384 1.1 mrg if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) 1385 1.1 mrg { 1386 1.1 mrg tree memoff = TREE_OPERAND (ref, 1); 1387 1.1 mrg tree base = initial_condition (access_fn); 1388 1.1 mrg tree orig_type = TREE_TYPE (base); 1389 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (base); 1390 1.1 mrg tree off; 1391 1.1 mrg split_constant_offset (base, &base, &off); 1392 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (base); 1393 1.1 mrg /* Fold the MEM_REF offset into the evolutions initial 1394 1.1 mrg value to make more bases comparable. */ 1395 1.1 mrg if (!integer_zerop (memoff)) 1396 1.1 mrg { 1397 1.1 mrg off = size_binop (PLUS_EXPR, off, 1398 1.1 mrg fold_convert (ssizetype, memoff)); 1399 1.1 mrg memoff = build_int_cst (TREE_TYPE (memoff), 0); 1400 1.1 mrg } 1401 1.1 mrg /* Adjust the offset so it is a multiple of the access type 1402 1.1 mrg size and thus we separate bases that can possibly be used 1403 1.1 mrg to produce partial overlaps (which the access_fn machinery 1404 1.1 mrg cannot handle). */ 1405 1.1 mrg wide_int rem; 1406 1.1 mrg if (TYPE_SIZE_UNIT (TREE_TYPE (ref)) 1407 1.1 mrg && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST 1408 1.1 mrg && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref)))) 1409 1.1 mrg rem = wi::mod_trunc 1410 1.1 mrg (wi::to_wide (off), 1411 1.1 mrg wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))), 1412 1.1 mrg SIGNED); 1413 1.1 mrg else 1414 1.1 mrg /* If we can't compute the remainder simply force the initial 1415 1.1 mrg condition to zero. */ 1416 1.1 mrg rem = wi::to_wide (off); 1417 1.1 mrg off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem); 1418 1.1 mrg memoff = wide_int_to_tree (TREE_TYPE (memoff), rem); 1419 1.1 mrg /* And finally replace the initial condition. */ 1420 1.1 mrg access_fn = chrec_replace_initial_condition 1421 1.1 mrg (access_fn, fold_convert (orig_type, off)); 1422 1.1 mrg /* ??? This is still not a suitable base object for 1423 1.1 mrg dr_may_alias_p - the base object needs to be an 1424 1.1 mrg access that covers the object as whole. With 1425 1.1 mrg an evolution in the pointer this cannot be 1426 1.1 mrg guaranteed. 1427 1.1 mrg As a band-aid, mark the access so we can special-case 1428 1.1 mrg it in dr_may_alias_p. */ 1429 1.1 mrg tree old = ref; 1430 1.1 mrg ref = fold_build2_loc (EXPR_LOCATION (ref), 1431 1.1 mrg MEM_REF, TREE_TYPE (ref), 1432 1.1 mrg base, memoff); 1433 1.1 mrg MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old); 1434 1.1 mrg MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old); 1435 1.1 mrg dri->unconstrained_base = true; 1436 1.1 mrg access_fns.safe_push (access_fn); 1437 1.1 mrg } 1438 1.1 mrg } 1439 1.1 mrg else if (DECL_P (ref)) 1440 1.1 mrg { 1441 1.1 mrg /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ 1442 1.1 mrg ref = build2 (MEM_REF, TREE_TYPE (ref), 1443 1.1 mrg build_fold_addr_expr (ref), 1444 1.1 mrg build_int_cst (reference_alias_ptr_type (ref), 0)); 1445 1.1 mrg } 1446 1.1 mrg 1447 1.1 mrg dri->base_object = ref; 1448 1.1 mrg dri->access_fns = access_fns; 1449 1.1 mrg } 1450 1.1 mrg 1451 1.1 mrg /* Extracts the alias analysis information from the memory reference DR. */ 1452 1.1 mrg 1453 1.1 mrg static void 1454 1.1 mrg dr_analyze_alias (struct data_reference *dr) 1455 1.1 mrg { 1456 1.1 mrg tree ref = DR_REF (dr); 1457 1.1 mrg tree base = get_base_address (ref), addr; 1458 1.1 mrg 1459 1.1 mrg if (INDIRECT_REF_P (base) 1460 1.1 mrg || TREE_CODE (base) == MEM_REF) 1461 1.1 mrg { 1462 1.1 mrg addr = TREE_OPERAND (base, 0); 1463 1.1 mrg if (TREE_CODE (addr) == SSA_NAME) 1464 1.1 mrg DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); 1465 1.1 mrg } 1466 1.1 mrg } 1467 1.1 mrg 1468 1.1 mrg /* Frees data reference DR. */ 1469 1.1 mrg 1470 1.1 mrg void 1471 1.1 mrg free_data_ref (data_reference_p dr) 1472 1.1 mrg { 1473 1.1 mrg DR_ACCESS_FNS (dr).release (); 1474 1.1 mrg if (dr->alt_indices.base_object) 1475 1.1 mrg dr->alt_indices.access_fns.release (); 1476 1.1 mrg free (dr); 1477 1.1 mrg } 1478 1.1 mrg 1479 1.1 mrg /* Analyze memory reference MEMREF, which is accessed in STMT. 1480 1.1 mrg The reference is a read if IS_READ is true, otherwise it is a write. 1481 1.1 mrg IS_CONDITIONAL_IN_STMT indicates that the reference is conditional 1482 1.1 mrg within STMT, i.e. that it might not occur even if STMT is executed 1483 1.1 mrg and runs to completion. 1484 1.1 mrg 1485 1.1 mrg Return the data_reference description of MEMREF. NEST is the outermost 1486 1.1 mrg loop in which the reference should be instantiated, LOOP is the loop 1487 1.1 mrg in which the data reference should be analyzed. */ 1488 1.1 mrg 1489 1.1 mrg struct data_reference * 1490 1.1 mrg create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt, 1491 1.1 mrg bool is_read, bool is_conditional_in_stmt) 1492 1.1 mrg { 1493 1.1 mrg struct data_reference *dr; 1494 1.1 mrg 1495 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 1496 1.1 mrg { 1497 1.1 mrg fprintf (dump_file, "Creating dr for "); 1498 1.1 mrg print_generic_expr (dump_file, memref, TDF_SLIM); 1499 1.1 mrg fprintf (dump_file, "\n"); 1500 1.1 mrg } 1501 1.1 mrg 1502 1.1 mrg dr = XCNEW (struct data_reference); 1503 1.1 mrg DR_STMT (dr) = stmt; 1504 1.1 mrg DR_REF (dr) = memref; 1505 1.1 mrg DR_IS_READ (dr) = is_read; 1506 1.1 mrg DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt; 1507 1.1 mrg 1508 1.1 mrg dr_analyze_innermost (&DR_INNERMOST (dr), memref, 1509 1.1 mrg nest != NULL ? loop : NULL, stmt); 1510 1.1 mrg dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop); 1511 1.1 mrg dr_analyze_alias (dr); 1512 1.1 mrg 1513 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 1514 1.1 mrg { 1515 1.1 mrg unsigned i; 1516 1.1 mrg fprintf (dump_file, "\tbase_address: "); 1517 1.1 mrg print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); 1518 1.1 mrg fprintf (dump_file, "\n\toffset from base address: "); 1519 1.1 mrg print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); 1520 1.1 mrg fprintf (dump_file, "\n\tconstant offset from base address: "); 1521 1.1 mrg print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); 1522 1.1 mrg fprintf (dump_file, "\n\tstep: "); 1523 1.1 mrg print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); 1524 1.1 mrg fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr)); 1525 1.1 mrg fprintf (dump_file, "\n\tbase misalignment: %d", 1526 1.1 mrg DR_BASE_MISALIGNMENT (dr)); 1527 1.1 mrg fprintf (dump_file, "\n\toffset alignment: %d", 1528 1.1 mrg DR_OFFSET_ALIGNMENT (dr)); 1529 1.1 mrg fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr)); 1530 1.1 mrg fprintf (dump_file, "\n\tbase_object: "); 1531 1.1 mrg print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); 1532 1.1 mrg fprintf (dump_file, "\n"); 1533 1.1 mrg for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 1534 1.1 mrg { 1535 1.1 mrg fprintf (dump_file, "\tAccess function %d: ", i); 1536 1.1 mrg print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); 1537 1.1 mrg } 1538 1.1 mrg } 1539 1.1 mrg 1540 1.1 mrg return dr; 1541 1.1 mrg } 1542 1.1 mrg 1543 1.1 mrg /* A helper function computes order between two tree expressions T1 and T2. 1544 1.1 mrg This is used in comparator functions sorting objects based on the order 1545 1.1 mrg of tree expressions. The function returns -1, 0, or 1. */ 1546 1.1 mrg 1547 1.1 mrg int 1548 1.1 mrg data_ref_compare_tree (tree t1, tree t2) 1549 1.1 mrg { 1550 1.1 mrg int i, cmp; 1551 1.1 mrg enum tree_code code; 1552 1.1 mrg char tclass; 1553 1.1 mrg 1554 1.1 mrg if (t1 == t2) 1555 1.1 mrg return 0; 1556 1.1 mrg if (t1 == NULL) 1557 1.1 mrg return -1; 1558 1.1 mrg if (t2 == NULL) 1559 1.1 mrg return 1; 1560 1.1 mrg 1561 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (t1); 1562 1.1 mrg STRIP_USELESS_TYPE_CONVERSION (t2); 1563 1.1 mrg if (t1 == t2) 1564 1.1 mrg return 0; 1565 1.1 mrg 1566 1.1 mrg if (TREE_CODE (t1) != TREE_CODE (t2) 1567 1.1 mrg && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2))) 1568 1.1 mrg return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; 1569 1.1 mrg 1570 1.1 mrg code = TREE_CODE (t1); 1571 1.1 mrg switch (code) 1572 1.1 mrg { 1573 1.1 mrg case INTEGER_CST: 1574 1.1 mrg return tree_int_cst_compare (t1, t2); 1575 1.1 mrg 1576 1.1 mrg case STRING_CST: 1577 1.1 mrg if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2)) 1578 1.1 mrg return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1; 1579 1.1 mrg return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), 1580 1.1 mrg TREE_STRING_LENGTH (t1)); 1581 1.1 mrg 1582 1.1 mrg case SSA_NAME: 1583 1.1 mrg if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) 1584 1.1 mrg return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; 1585 1.1 mrg break; 1586 1.1 mrg 1587 1.1 mrg default: 1588 1.1 mrg if (POLY_INT_CST_P (t1)) 1589 1.1 mrg return compare_sizes_for_sort (wi::to_poly_widest (t1), 1590 1.1 mrg wi::to_poly_widest (t2)); 1591 1.1 mrg 1592 1.1 mrg tclass = TREE_CODE_CLASS (code); 1593 1.1 mrg 1594 1.1 mrg /* For decls, compare their UIDs. */ 1595 1.1 mrg if (tclass == tcc_declaration) 1596 1.1 mrg { 1597 1.1 mrg if (DECL_UID (t1) != DECL_UID (t2)) 1598 1.1 mrg return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; 1599 1.1 mrg break; 1600 1.1 mrg } 1601 1.1 mrg /* For expressions, compare their operands recursively. */ 1602 1.1 mrg else if (IS_EXPR_CODE_CLASS (tclass)) 1603 1.1 mrg { 1604 1.1 mrg for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) 1605 1.1 mrg { 1606 1.1 mrg cmp = data_ref_compare_tree (TREE_OPERAND (t1, i), 1607 1.1 mrg TREE_OPERAND (t2, i)); 1608 1.1 mrg if (cmp != 0) 1609 1.1 mrg return cmp; 1610 1.1 mrg } 1611 1.1 mrg } 1612 1.1 mrg else 1613 1.1 mrg gcc_unreachable (); 1614 1.1 mrg } 1615 1.1 mrg 1616 1.1 mrg return 0; 1617 1.1 mrg } 1618 1.1 mrg 1619 1.1 mrg /* Return TRUE it's possible to resolve data dependence DDR by runtime alias 1620 1.1 mrg check. */ 1621 1.1 mrg 1622 1.1 mrg opt_result 1623 1.1 mrg runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p) 1624 1.1 mrg { 1625 1.1 mrg if (dump_enabled_p ()) 1626 1.1 mrg dump_printf (MSG_NOTE, 1627 1.1 mrg "consider run-time aliasing test between %T and %T\n", 1628 1.1 mrg DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr))); 1629 1.1 mrg 1630 1.1 mrg if (!speed_p) 1631 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)), 1632 1.1 mrg "runtime alias check not supported when" 1633 1.1 mrg " optimizing for size.\n"); 1634 1.1 mrg 1635 1.1 mrg /* FORNOW: We don't support versioning with outer-loop in either 1636 1.1 mrg vectorization or loop distribution. */ 1637 1.1 mrg if (loop != NULL && loop->inner != NULL) 1638 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)), 1639 1.1 mrg "runtime alias check not supported for" 1640 1.1 mrg " outer loop.\n"); 1641 1.1 mrg 1642 1.1 mrg /* FORNOW: We don't support handling different address spaces. */ 1643 1.1 mrg if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr))))) 1644 1.1 mrg != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr)))))) 1645 1.1 mrg return opt_result::failure_at (DR_STMT (DDR_A (ddr)), 1646 1.1 mrg "runtime alias check between different " 1647 1.1 mrg "address spaces not supported.\n"); 1648 1.1 mrg 1649 1.1 mrg return opt_result::success (); 1650 1.1 mrg } 1651 1.1 mrg 1652 1.1 mrg /* Operator == between two dr_with_seg_len objects. 1653 1.1 mrg 1654 1.1 mrg This equality operator is used to make sure two data refs 1655 1.1 mrg are the same one so that we will consider to combine the 1656 1.1 mrg aliasing checks of those two pairs of data dependent data 1657 1.1 mrg refs. */ 1658 1.1 mrg 1659 1.1 mrg static bool 1660 1.1 mrg operator == (const dr_with_seg_len& d1, 1661 1.1 mrg const dr_with_seg_len& d2) 1662 1.1 mrg { 1663 1.1 mrg return (operand_equal_p (DR_BASE_ADDRESS (d1.dr), 1664 1.1 mrg DR_BASE_ADDRESS (d2.dr), 0) 1665 1.1 mrg && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0 1666 1.1 mrg && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0 1667 1.1 mrg && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0 1668 1.1 mrg && known_eq (d1.access_size, d2.access_size) 1669 1.1 mrg && d1.align == d2.align); 1670 1.1 mrg } 1671 1.1 mrg 1672 1.1 mrg /* Comparison function for sorting objects of dr_with_seg_len_pair_t 1673 1.1 mrg so that we can combine aliasing checks in one scan. */ 1674 1.1 mrg 1675 1.1 mrg static int 1676 1.1 mrg comp_dr_with_seg_len_pair (const void *pa_, const void *pb_) 1677 1.1 mrg { 1678 1.1 mrg const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_; 1679 1.1 mrg const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_; 1680 1.1 mrg const dr_with_seg_len &a1 = pa->first, &a2 = pa->second; 1681 1.1 mrg const dr_with_seg_len &b1 = pb->first, &b2 = pb->second; 1682 1.1 mrg 1683 1.1 mrg /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks 1684 1.1 mrg if a and c have the same basic address snd step, and b and d have the same 1685 1.1 mrg address and step. Therefore, if any a&c or b&d don't have the same address 1686 1.1 mrg and step, we don't care the order of those two pairs after sorting. */ 1687 1.1 mrg int comp_res; 1688 1.1 mrg 1689 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr), 1690 1.1 mrg DR_BASE_ADDRESS (b1.dr))) != 0) 1691 1.1 mrg return comp_res; 1692 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr), 1693 1.1 mrg DR_BASE_ADDRESS (b2.dr))) != 0) 1694 1.1 mrg return comp_res; 1695 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr), 1696 1.1 mrg DR_STEP (b1.dr))) != 0) 1697 1.1 mrg return comp_res; 1698 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr), 1699 1.1 mrg DR_STEP (b2.dr))) != 0) 1700 1.1 mrg return comp_res; 1701 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr), 1702 1.1 mrg DR_OFFSET (b1.dr))) != 0) 1703 1.1 mrg return comp_res; 1704 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr), 1705 1.1 mrg DR_INIT (b1.dr))) != 0) 1706 1.1 mrg return comp_res; 1707 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr), 1708 1.1 mrg DR_OFFSET (b2.dr))) != 0) 1709 1.1 mrg return comp_res; 1710 1.1 mrg if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr), 1711 1.1 mrg DR_INIT (b2.dr))) != 0) 1712 1.1 mrg return comp_res; 1713 1.1 mrg 1714 1.1 mrg return 0; 1715 1.1 mrg } 1716 1.1 mrg 1717 1.1 mrg /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */ 1718 1.1 mrg 1719 1.1 mrg static void 1720 1.1 mrg dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent) 1721 1.1 mrg { 1722 1.1 mrg dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent, 1723 1.1 mrg DR_REF (alias_pair->first.dr), 1724 1.1 mrg DR_REF (alias_pair->second.dr)); 1725 1.1 mrg 1726 1.1 mrg dump_printf (MSG_NOTE, "%ssegment length: %T", indent, 1727 1.1 mrg alias_pair->first.seg_len); 1728 1.1 mrg if (!operand_equal_p (alias_pair->first.seg_len, 1729 1.1 mrg alias_pair->second.seg_len, 0)) 1730 1.1 mrg dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len); 1731 1.1 mrg 1732 1.1 mrg dump_printf (MSG_NOTE, "\n%saccess size: ", indent); 1733 1.1 mrg dump_dec (MSG_NOTE, alias_pair->first.access_size); 1734 1.1 mrg if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size)) 1735 1.1 mrg { 1736 1.1 mrg dump_printf (MSG_NOTE, " vs. "); 1737 1.1 mrg dump_dec (MSG_NOTE, alias_pair->second.access_size); 1738 1.1 mrg } 1739 1.1 mrg 1740 1.1 mrg dump_printf (MSG_NOTE, "\n%salignment: %d", indent, 1741 1.1 mrg alias_pair->first.align); 1742 1.1 mrg if (alias_pair->first.align != alias_pair->second.align) 1743 1.1 mrg dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align); 1744 1.1 mrg 1745 1.1 mrg dump_printf (MSG_NOTE, "\n%sflags: ", indent); 1746 1.1 mrg if (alias_pair->flags & DR_ALIAS_RAW) 1747 1.1 mrg dump_printf (MSG_NOTE, " RAW"); 1748 1.1 mrg if (alias_pair->flags & DR_ALIAS_WAR) 1749 1.1 mrg dump_printf (MSG_NOTE, " WAR"); 1750 1.1 mrg if (alias_pair->flags & DR_ALIAS_WAW) 1751 1.1 mrg dump_printf (MSG_NOTE, " WAW"); 1752 1.1 mrg if (alias_pair->flags & DR_ALIAS_ARBITRARY) 1753 1.1 mrg dump_printf (MSG_NOTE, " ARBITRARY"); 1754 1.1 mrg if (alias_pair->flags & DR_ALIAS_SWAPPED) 1755 1.1 mrg dump_printf (MSG_NOTE, " SWAPPED"); 1756 1.1 mrg if (alias_pair->flags & DR_ALIAS_UNSWAPPED) 1757 1.1 mrg dump_printf (MSG_NOTE, " UNSWAPPED"); 1758 1.1 mrg if (alias_pair->flags & DR_ALIAS_MIXED_STEPS) 1759 1.1 mrg dump_printf (MSG_NOTE, " MIXED_STEPS"); 1760 1.1 mrg if (alias_pair->flags == 0) 1761 1.1 mrg dump_printf (MSG_NOTE, " <none>"); 1762 1.1 mrg dump_printf (MSG_NOTE, "\n"); 1763 1.1 mrg } 1764 1.1 mrg 1765 1.1 mrg /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones. 1766 1.1 mrg FACTOR is number of iterations that each data reference is accessed. 1767 1.1 mrg 1768 1.1 mrg Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0, 1769 1.1 mrg we create an expression: 1770 1.1 mrg 1771 1.1 mrg ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) 1772 1.1 mrg || (load_ptr_0 + load_segment_length_0) <= store_ptr_0)) 1773 1.1 mrg 1774 1.1 mrg for aliasing checks. However, in some cases we can decrease the number 1775 1.1 mrg of checks by combining two checks into one. For example, suppose we have 1776 1.1 mrg another pair of data refs store_ptr_0 & load_ptr_1, and if the following 1777 1.1 mrg condition is satisfied: 1778 1.1 mrg 1779 1.1 mrg load_ptr_0 < load_ptr_1 && 1780 1.1 mrg load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0 1781 1.1 mrg 1782 1.1 mrg (this condition means, in each iteration of vectorized loop, the accessed 1783 1.1 mrg memory of store_ptr_0 cannot be between the memory of load_ptr_0 and 1784 1.1 mrg load_ptr_1.) 1785 1.1 mrg 1786 1.1 mrg we then can use only the following expression to finish the alising checks 1787 1.1 mrg between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1: 1788 1.1 mrg 1789 1.1 mrg ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) 1790 1.1 mrg || (load_ptr_1 + load_segment_length_1 <= store_ptr_0)) 1791 1.1 mrg 1792 1.1 mrg Note that we only consider that load_ptr_0 and load_ptr_1 have the same 1793 1.1 mrg basic address. */ 1794 1.1 mrg 1795 1.1 mrg void 1796 1.1 mrg prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, 1797 1.1 mrg poly_uint64) 1798 1.1 mrg { 1799 1.1 mrg if (alias_pairs->is_empty ()) 1800 1.1 mrg return; 1801 1.1 mrg 1802 1.1 mrg /* Canonicalize each pair so that the base components are ordered wrt 1803 1.1 mrg data_ref_compare_tree. This allows the loop below to merge more 1804 1.1 mrg cases. */ 1805 1.1 mrg unsigned int i; 1806 1.1 mrg dr_with_seg_len_pair_t *alias_pair; 1807 1.1 mrg FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) 1808 1.1 mrg { 1809 1.1 mrg data_reference_p dr_a = alias_pair->first.dr; 1810 1.1 mrg data_reference_p dr_b = alias_pair->second.dr; 1811 1.1 mrg int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), 1812 1.1 mrg DR_BASE_ADDRESS (dr_b)); 1813 1.1 mrg if (comp_res == 0) 1814 1.1 mrg comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)); 1815 1.1 mrg if (comp_res == 0) 1816 1.1 mrg comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b)); 1817 1.1 mrg if (comp_res > 0) 1818 1.1 mrg { 1819 1.1 mrg std::swap (alias_pair->first, alias_pair->second); 1820 1.1 mrg alias_pair->flags |= DR_ALIAS_SWAPPED; 1821 1.1 mrg } 1822 1.1 mrg else 1823 1.1 mrg alias_pair->flags |= DR_ALIAS_UNSWAPPED; 1824 1.1 mrg } 1825 1.1 mrg 1826 1.1 mrg /* Sort the collected data ref pairs so that we can scan them once to 1827 1.1 mrg combine all possible aliasing checks. */ 1828 1.1 mrg alias_pairs->qsort (comp_dr_with_seg_len_pair); 1829 1.1 mrg 1830 1.1 mrg /* Scan the sorted dr pairs and check if we can combine alias checks 1831 1.1 mrg of two neighboring dr pairs. */ 1832 1.1 mrg unsigned int last = 0; 1833 1.1 mrg for (i = 1; i < alias_pairs->length (); ++i) 1834 1.1 mrg { 1835 1.1 mrg /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ 1836 1.1 mrg dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last]; 1837 1.1 mrg dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i]; 1838 1.1 mrg 1839 1.1 mrg dr_with_seg_len *dr_a1 = &alias_pair1->first; 1840 1.1 mrg dr_with_seg_len *dr_b1 = &alias_pair1->second; 1841 1.1 mrg dr_with_seg_len *dr_a2 = &alias_pair2->first; 1842 1.1 mrg dr_with_seg_len *dr_b2 = &alias_pair2->second; 1843 1.1 mrg 1844 1.1 mrg /* Remove duplicate data ref pairs. */ 1845 1.1 mrg if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2) 1846 1.1 mrg { 1847 1.1 mrg if (dump_enabled_p ()) 1848 1.1 mrg dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n", 1849 1.1 mrg DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), 1850 1.1 mrg DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); 1851 1.1 mrg alias_pair1->flags |= alias_pair2->flags; 1852 1.1 mrg continue; 1853 1.1 mrg } 1854 1.1 mrg 1855 1.1 mrg /* Assume that we won't be able to merge the pairs, then correct 1856 1.1 mrg if we do. */ 1857 1.1 mrg last += 1; 1858 1.1 mrg if (last != i) 1859 1.1 mrg (*alias_pairs)[last] = (*alias_pairs)[i]; 1860 1.1 mrg 1861 1.1 mrg if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2) 1862 1.1 mrg { 1863 1.1 mrg /* We consider the case that DR_B1 and DR_B2 are same memrefs, 1864 1.1 mrg and DR_A1 and DR_A2 are two consecutive memrefs. */ 1865 1.1 mrg if (*dr_a1 == *dr_a2) 1866 1.1 mrg { 1867 1.1 mrg std::swap (dr_a1, dr_b1); 1868 1.1 mrg std::swap (dr_a2, dr_b2); 1869 1.1 mrg } 1870 1.1 mrg 1871 1.1 mrg poly_int64 init_a1, init_a2; 1872 1.1 mrg /* Only consider cases in which the distance between the initial 1873 1.1 mrg DR_A1 and the initial DR_A2 is known at compile time. */ 1874 1.1 mrg if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr), 1875 1.1 mrg DR_BASE_ADDRESS (dr_a2->dr), 0) 1876 1.1 mrg || !operand_equal_p (DR_OFFSET (dr_a1->dr), 1877 1.1 mrg DR_OFFSET (dr_a2->dr), 0) 1878 1.1 mrg || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1) 1879 1.1 mrg || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2)) 1880 1.1 mrg continue; 1881 1.1 mrg 1882 1.1 mrg /* Don't combine if we can't tell which one comes first. */ 1883 1.1 mrg if (!ordered_p (init_a1, init_a2)) 1884 1.1 mrg continue; 1885 1.1 mrg 1886 1.1 mrg /* Work out what the segment length would be if we did combine 1887 1.1 mrg DR_A1 and DR_A2: 1888 1.1 mrg 1889 1.1 mrg - If DR_A1 and DR_A2 have equal lengths, that length is 1890 1.1 mrg also the combined length. 1891 1.1 mrg 1892 1.1 mrg - If DR_A1 and DR_A2 both have negative "lengths", the combined 1893 1.1 mrg length is the lower bound on those lengths. 1894 1.1 mrg 1895 1.1 mrg - If DR_A1 and DR_A2 both have positive lengths, the combined 1896 1.1 mrg length is the upper bound on those lengths. 1897 1.1 mrg 1898 1.1 mrg Other cases are unlikely to give a useful combination. 1899 1.1 mrg 1900 1.1 mrg The lengths both have sizetype, so the sign is taken from 1901 1.1 mrg the step instead. */ 1902 1.1 mrg poly_uint64 new_seg_len = 0; 1903 1.1 mrg bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len, 1904 1.1 mrg dr_a2->seg_len, 0); 1905 1.1 mrg if (new_seg_len_p) 1906 1.1 mrg { 1907 1.1 mrg poly_uint64 seg_len_a1, seg_len_a2; 1908 1.1 mrg if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1) 1909 1.1 mrg || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2)) 1910 1.1 mrg continue; 1911 1.1 mrg 1912 1.1 mrg tree indicator_a = dr_direction_indicator (dr_a1->dr); 1913 1.1 mrg if (TREE_CODE (indicator_a) != INTEGER_CST) 1914 1.1 mrg continue; 1915 1.1 mrg 1916 1.1 mrg tree indicator_b = dr_direction_indicator (dr_a2->dr); 1917 1.1 mrg if (TREE_CODE (indicator_b) != INTEGER_CST) 1918 1.1 mrg continue; 1919 1.1 mrg 1920 1.1 mrg int sign_a = tree_int_cst_sgn (indicator_a); 1921 1.1 mrg int sign_b = tree_int_cst_sgn (indicator_b); 1922 1.1 mrg 1923 1.1 mrg if (sign_a <= 0 && sign_b <= 0) 1924 1.1 mrg new_seg_len = lower_bound (seg_len_a1, seg_len_a2); 1925 1.1 mrg else if (sign_a >= 0 && sign_b >= 0) 1926 1.1 mrg new_seg_len = upper_bound (seg_len_a1, seg_len_a2); 1927 1.1 mrg else 1928 1.1 mrg continue; 1929 1.1 mrg } 1930 1.1 mrg /* At this point we're committed to merging the refs. */ 1931 1.1 mrg 1932 1.1 mrg /* Make sure dr_a1 starts left of dr_a2. */ 1933 1.1 mrg if (maybe_gt (init_a1, init_a2)) 1934 1.1 mrg { 1935 1.1 mrg std::swap (*dr_a1, *dr_a2); 1936 1.1 mrg std::swap (init_a1, init_a2); 1937 1.1 mrg } 1938 1.1 mrg 1939 1.1 mrg /* The DR_Bs are equal, so only the DR_As can introduce 1940 1.1 mrg mixed steps. */ 1941 1.1 mrg if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0)) 1942 1.1 mrg alias_pair1->flags |= DR_ALIAS_MIXED_STEPS; 1943 1.1 mrg 1944 1.1 mrg if (new_seg_len_p) 1945 1.1 mrg { 1946 1.1 mrg dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), 1947 1.1 mrg new_seg_len); 1948 1.1 mrg dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len)); 1949 1.1 mrg } 1950 1.1 mrg 1951 1.1 mrg /* This is always positive due to the swap above. */ 1952 1.1 mrg poly_uint64 diff = init_a2 - init_a1; 1953 1.1 mrg 1954 1.1 mrg /* The new check will start at DR_A1. Make sure that its access 1955 1.1 mrg size encompasses the initial DR_A2. */ 1956 1.1 mrg if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size)) 1957 1.1 mrg { 1958 1.1 mrg dr_a1->access_size = upper_bound (dr_a1->access_size, 1959 1.1 mrg diff + dr_a2->access_size); 1960 1.1 mrg unsigned int new_align = known_alignment (dr_a1->access_size); 1961 1.1 mrg dr_a1->align = MIN (dr_a1->align, new_align); 1962 1.1 mrg } 1963 1.1 mrg if (dump_enabled_p ()) 1964 1.1 mrg dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n", 1965 1.1 mrg DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), 1966 1.1 mrg DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); 1967 1.1 mrg alias_pair1->flags |= alias_pair2->flags; 1968 1.1 mrg last -= 1; 1969 1.1 mrg } 1970 1.1 mrg } 1971 1.1 mrg alias_pairs->truncate (last + 1); 1972 1.1 mrg 1973 1.1 mrg /* Try to restore the original dr_with_seg_len order within each 1974 1.1 mrg dr_with_seg_len_pair_t. If we ended up combining swapped and 1975 1.1 mrg unswapped pairs into the same check, we have to invalidate any 1976 1.1 mrg RAW, WAR and WAW information for it. */ 1977 1.1 mrg if (dump_enabled_p ()) 1978 1.1 mrg dump_printf (MSG_NOTE, "merged alias checks:\n"); 1979 1.1 mrg FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) 1980 1.1 mrg { 1981 1.1 mrg unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED); 1982 1.1 mrg unsigned int swapped = (alias_pair->flags & swap_mask); 1983 1.1 mrg if (swapped == DR_ALIAS_SWAPPED) 1984 1.1 mrg std::swap (alias_pair->first, alias_pair->second); 1985 1.1 mrg else if (swapped != DR_ALIAS_UNSWAPPED) 1986 1.1 mrg alias_pair->flags |= DR_ALIAS_ARBITRARY; 1987 1.1 mrg alias_pair->flags &= ~swap_mask; 1988 1.1 mrg if (dump_enabled_p ()) 1989 1.1 mrg dump_alias_pair (alias_pair, " "); 1990 1.1 mrg } 1991 1.1 mrg } 1992 1.1 mrg 1993 1.1 mrg /* A subroutine of create_intersect_range_checks, with a subset of the 1994 1.1 mrg same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS 1995 1.1 mrg to optimize cases in which the references form a simple RAW, WAR or 1996 1.1 mrg WAR dependence. */ 1997 1.1 mrg 1998 1.1 mrg static bool 1999 1.1 mrg create_ifn_alias_checks (tree *cond_expr, 2000 1.1 mrg const dr_with_seg_len_pair_t &alias_pair) 2001 1.1 mrg { 2002 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first; 2003 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second; 2004 1.1 mrg 2005 1.1 mrg /* Check for cases in which: 2006 1.1 mrg 2007 1.1 mrg (a) we have a known RAW, WAR or WAR dependence 2008 1.1 mrg (b) the accesses are well-ordered in both the original and new code 2009 1.1 mrg (see the comment above the DR_ALIAS_* flags for details); and 2010 1.1 mrg (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ 2011 1.1 mrg if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) 2012 1.1 mrg return false; 2013 1.1 mrg 2014 1.1 mrg /* Make sure that both DRs access the same pattern of bytes, 2015 1.1 mrg with a constant length and step. */ 2016 1.1 mrg poly_uint64 seg_len; 2017 1.1 mrg if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0) 2018 1.1 mrg || !poly_int_tree_p (dr_a.seg_len, &seg_len) 2019 1.1 mrg || maybe_ne (dr_a.access_size, dr_b.access_size) 2020 1.1 mrg || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0) 2021 1.1 mrg || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) 2022 1.1 mrg return false; 2023 1.1 mrg 2024 1.1 mrg unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); 2025 1.1 mrg tree addr_a = DR_BASE_ADDRESS (dr_a.dr); 2026 1.1 mrg tree addr_b = DR_BASE_ADDRESS (dr_b.dr); 2027 1.1 mrg 2028 1.1 mrg /* See whether the target suports what we want to do. WAW checks are 2029 1.1 mrg equivalent to WAR checks here. */ 2030 1.1 mrg internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW 2031 1.1 mrg ? IFN_CHECK_RAW_PTRS 2032 1.1 mrg : IFN_CHECK_WAR_PTRS); 2033 1.1 mrg unsigned int align = MIN (dr_a.align, dr_b.align); 2034 1.1 mrg poly_uint64 full_length = seg_len + bytes; 2035 1.1 mrg if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), 2036 1.1 mrg full_length, align)) 2037 1.1 mrg { 2038 1.1 mrg full_length = seg_len + dr_a.access_size; 2039 1.1 mrg if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), 2040 1.1 mrg full_length, align)) 2041 1.1 mrg return false; 2042 1.1 mrg } 2043 1.1 mrg 2044 1.1 mrg /* Commit to using this form of test. */ 2045 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); 2046 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); 2047 1.1 mrg 2048 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); 2049 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); 2050 1.1 mrg 2051 1.1 mrg *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, 2052 1.1 mrg ifn, boolean_type_node, 2053 1.1 mrg 4, addr_a, addr_b, 2054 1.1 mrg size_int (full_length), 2055 1.1 mrg size_int (align)); 2056 1.1 mrg 2057 1.1 mrg if (dump_enabled_p ()) 2058 1.1 mrg { 2059 1.1 mrg if (ifn == IFN_CHECK_RAW_PTRS) 2060 1.1 mrg dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n"); 2061 1.1 mrg else 2062 1.1 mrg dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n"); 2063 1.1 mrg } 2064 1.1 mrg return true; 2065 1.1 mrg } 2066 1.1 mrg 2067 1.1 mrg /* Try to generate a runtime condition that is true if ALIAS_PAIR is 2068 1.1 mrg free of aliases, using a condition based on index values instead 2069 1.1 mrg of a condition based on addresses. Return true on success, 2070 1.1 mrg storing the condition in *COND_EXPR. 2071 1.1 mrg 2072 1.1 mrg This can only be done if the two data references in ALIAS_PAIR access 2073 1.1 mrg the same array object and the index is the only difference. For example, 2074 1.1 mrg if the two data references are DR_A and DR_B: 2075 1.1 mrg 2076 1.1 mrg DR_A DR_B 2077 1.1 mrg data-ref arr[i] arr[j] 2078 1.1 mrg base_object arr arr 2079 1.1 mrg index {i_0, +, 1}_loop {j_0, +, 1}_loop 2080 1.1 mrg 2081 1.1 mrg The addresses and their index are like: 2082 1.1 mrg 2083 1.1 mrg |<- ADDR_A ->| |<- ADDR_B ->| 2084 1.1 mrg -------------------------------------------------------> 2085 1.1 mrg | | | | | | | | | | 2086 1.1 mrg -------------------------------------------------------> 2087 1.1 mrg i_0 ... i_0+4 j_0 ... j_0+4 2088 1.1 mrg 2089 1.1 mrg We can create expression based on index rather than address: 2090 1.1 mrg 2091 1.1 mrg (unsigned) (i_0 - j_0 + 3) <= 6 2092 1.1 mrg 2093 1.1 mrg i.e. the indices are less than 4 apart. 2094 1.1 mrg 2095 1.1 mrg Note evolution step of index needs to be considered in comparison. */ 2096 1.1 mrg 2097 1.1 mrg static bool 2098 1.1 mrg create_intersect_range_checks_index (class loop *loop, tree *cond_expr, 2099 1.1 mrg const dr_with_seg_len_pair_t &alias_pair) 2100 1.1 mrg { 2101 1.1 mrg const dr_with_seg_len &dr_a = alias_pair.first; 2102 1.1 mrg const dr_with_seg_len &dr_b = alias_pair.second; 2103 1.1 mrg if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS) 2104 1.1 mrg || integer_zerop (DR_STEP (dr_a.dr)) 2105 1.1 mrg || integer_zerop (DR_STEP (dr_b.dr)) 2106 1.1 mrg || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) 2107 1.1 mrg return false; 2108 1.1 mrg 2109 1.1 mrg poly_uint64 seg_len1, seg_len2; 2110 1.1 mrg if (!poly_int_tree_p (dr_a.seg_len, &seg_len1) 2111 1.1 mrg || !poly_int_tree_p (dr_b.seg_len, &seg_len2)) 2112 1.1 mrg return false; 2113 1.1 mrg 2114 1.1 mrg if (!tree_fits_shwi_p (DR_STEP (dr_a.dr))) 2115 1.1 mrg return false; 2116 1.1 mrg 2117 1.1 mrg if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0)) 2118 1.1 mrg return false; 2119 1.1 mrg 2120 1.1 mrg if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)) 2121 1.1 mrg return false; 2122 1.1 mrg 2123 1.1 mrg gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST); 2124 1.1 mrg 2125 1.1 mrg bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0; 2126 1.1 mrg unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr)); 2127 1.1 mrg if (neg_step) 2128 1.1 mrg { 2129 1.1 mrg abs_step = -abs_step; 2130 1.1 mrg seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi (); 2131 1.1 mrg seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi (); 2132 1.1 mrg } 2133 1.1 mrg 2134 1.1 mrg /* Infer the number of iterations with which the memory segment is accessed 2135 1.1 mrg by DR. In other words, alias is checked if memory segment accessed by 2136 1.1 mrg DR_A in some iterations intersect with memory segment accessed by DR_B 2137 1.1 mrg in the same amount iterations. 2138 1.1 mrg Note segnment length is a linear function of number of iterations with 2139 1.1 mrg DR_STEP as the coefficient. */ 2140 1.1 mrg poly_uint64 niter_len1, niter_len2; 2141 1.1 mrg if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1) 2142 1.1 mrg || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2)) 2143 1.1 mrg return false; 2144 1.1 mrg 2145 1.1 mrg /* Divide each access size by the byte step, rounding up. */ 2146 1.1 mrg poly_uint64 niter_access1, niter_access2; 2147 1.1 mrg if (!can_div_trunc_p (dr_a.access_size + abs_step - 1, 2148 1.1 mrg abs_step, &niter_access1) 2149 1.1 mrg || !can_div_trunc_p (dr_b.access_size + abs_step - 1, 2150 1.1 mrg abs_step, &niter_access2)) 2151 1.1 mrg return false; 2152 1.1 mrg 2153 1.1 mrg bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0; 2154 1.1 mrg 2155 1.1 mrg int found = -1; 2156 1.1 mrg for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) 2157 1.1 mrg { 2158 1.1 mrg tree access1 = DR_ACCESS_FN (dr_a.dr, i); 2159 1.1 mrg tree access2 = DR_ACCESS_FN (dr_b.dr, i); 2160 1.1 mrg /* Two indices must be the same if they are not scev, or not scev wrto 2161 1.1 mrg current loop being vecorized. */ 2162 1.1 mrg if (TREE_CODE (access1) != POLYNOMIAL_CHREC 2163 1.1 mrg || TREE_CODE (access2) != POLYNOMIAL_CHREC 2164 1.1 mrg || CHREC_VARIABLE (access1) != (unsigned)loop->num 2165 1.1 mrg || CHREC_VARIABLE (access2) != (unsigned)loop->num) 2166 1.1 mrg { 2167 1.1 mrg if (operand_equal_p (access1, access2, 0)) 2168 1.1 mrg continue; 2169 1.1 mrg 2170 1.1 mrg return false; 2171 1.1 mrg } 2172 1.1 mrg if (found >= 0) 2173 1.1 mrg return false; 2174 1.1 mrg found = i; 2175 1.1 mrg } 2176 1.1 mrg 2177 1.1 mrg /* Ought not to happen in practice, since if all accesses are equal then the 2178 1.1 mrg alias should be decidable at compile time. */ 2179 1.1 mrg if (found < 0) 2180 1.1 mrg return false; 2181 1.1 mrg 2182 1.1 mrg /* The two indices must have the same step. */ 2183 1.1 mrg tree access1 = DR_ACCESS_FN (dr_a.dr, found); 2184 1.1 mrg tree access2 = DR_ACCESS_FN (dr_b.dr, found); 2185 1.1 mrg if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0)) 2186 1.1 mrg return false; 2187 1.1 mrg 2188 1.1 mrg tree idx_step = CHREC_RIGHT (access1); 2189 1.1 mrg /* Index must have const step, otherwise DR_STEP won't be constant. */ 2190 1.1 mrg gcc_assert (TREE_CODE (idx_step) == INTEGER_CST); 2191 1.1 mrg /* Index must evaluate in the same direction as DR. */ 2192 1.1 mrg gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1); 2193 1.1 mrg 2194 1.1 mrg tree min1 = CHREC_LEFT (access1); 2195 1.1 mrg tree min2 = CHREC_LEFT (access2); 2196 1.1 mrg if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2))) 2197 1.1 mrg return false; 2198 1.1 mrg 2199 1.1 mrg /* Ideally, alias can be checked against loop's control IV, but we 2200 1.1 mrg need to prove linear mapping between control IV and reference 2201 1.1 mrg index. Although that should be true, we check against (array) 2202 1.1 mrg index of data reference. Like segment length, index length is 2203 1.1 mrg linear function of the number of iterations with index_step as 2204 1.1 mrg the coefficient, i.e, niter_len * idx_step. */ 2205 1.1 mrg offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step), 2206 1.1 mrg SIGNED); 2207 1.1 mrg if (neg_step) 2208 1.1 mrg abs_idx_step = -abs_idx_step; 2209 1.1 mrg poly_offset_int idx_len1 = abs_idx_step * niter_len1; 2210 1.1 mrg poly_offset_int idx_len2 = abs_idx_step * niter_len2; 2211 1.1 mrg poly_offset_int idx_access1 = abs_idx_step * niter_access1; 2212 1.1 mrg poly_offset_int idx_access2 = abs_idx_step * niter_access2; 2213 1.1 mrg 2214 1.1 mrg gcc_assert (known_ge (idx_len1, 0) 2215 1.1 mrg && known_ge (idx_len2, 0) 2216 1.1 mrg && known_ge (idx_access1, 0) 2217 1.1 mrg && known_ge (idx_access2, 0)); 2218 1.1 mrg 2219 1.1 mrg /* Each access has the following pattern, with lengths measured 2220 1.1 mrg in units of INDEX: 2221 1.1 mrg 2222 1.1 mrg <-- idx_len --> 2223 1.1 mrg <--- A: -ve step ---> 2224 1.1 mrg +-----+-------+-----+-------+-----+ 2225 1.1 mrg | n-1 | ..... | 0 | ..... | n-1 | 2226 1.1 mrg +-----+-------+-----+-------+-----+ 2227 1.1 mrg <--- B: +ve step ---> 2228 1.1 mrg <-- idx_len --> 2229 1.1 mrg | 2230 1.1 mrg min 2231 1.1 mrg 2232 1.1 mrg where "n" is the number of scalar iterations covered by the segment 2233 1.1 mrg and where each access spans idx_access units. 2234 1.1 mrg 2235 1.1 mrg A is the range of bytes accessed when the step is negative, 2236 1.1 mrg B is the range when the step is positive. 2237 1.1 mrg 2238 1.1 mrg When checking for general overlap, we need to test whether 2239 1.1 mrg the range: 2240 1.1 mrg 2241 1.1 mrg [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1] 2242 1.1 mrg 2243 1.1 mrg overlaps: 2244 1.1 mrg 2245 1.1 mrg [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1] 2246 1.1 mrg 2247 1.1 mrg where: 2248 1.1 mrg 2249 1.1 mrg low_offsetN = +ve step ? 0 : -idx_lenN; 2250 1.1 mrg high_offsetN = +ve step ? idx_lenN : 0; 2251 1.1 mrg 2252 1.1 mrg This is equivalent to testing whether: 2253 1.1 mrg 2254 1.1 mrg min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1 2255 1.1 mrg && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1 2256 1.1 mrg 2257 1.1 mrg Converting this into a single test, there is an overlap if: 2258 1.1 mrg 2259 1.1 mrg 0 <= min2 - min1 + bias <= limit 2260 1.1 mrg 2261 1.1 mrg where bias = high_offset2 + idx_access2 - 1 - low_offset1 2262 1.1 mrg limit = (high_offset1 - low_offset1 + idx_access1 - 1) 2263 1.1 mrg + (high_offset2 - low_offset2 + idx_access2 - 1) 2264 1.1 mrg i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1 2265 1.1 mrg 2266 1.1 mrg Combining the tests requires limit to be computable in an unsigned 2267 1.1 mrg form of the index type; if it isn't, we fall back to the usual 2268 1.1 mrg pointer-based checks. 2269 1.1 mrg 2270 1.1 mrg We can do better if DR_B is a write and if DR_A and DR_B are 2271 1.1 mrg well-ordered in both the original and the new code (see the 2272 1.1 mrg comment above the DR_ALIAS_* flags for details). In this case 2273 1.1 mrg we know that for each i in [0, n-1], the write performed by 2274 1.1 mrg access i of DR_B occurs after access numbers j<=i of DR_A in 2275 1.1 mrg both the original and the new code. Any write or anti 2276 1.1 mrg dependencies wrt those DR_A accesses are therefore maintained. 2277 1.1 mrg 2278 1.1 mrg We just need to make sure that each individual write in DR_B does not 2279 1.1 mrg overlap any higher-indexed access in DR_A; such DR_A accesses happen 2280 1.1 mrg after the DR_B access in the original code but happen before it in 2281 1.1 mrg the new code. 2282 1.1 mrg 2283 1.1 mrg We know the steps for both accesses are equal, so by induction, we 2284 1.1 mrg just need to test whether the first write of DR_B overlaps a later 2285 1.1 mrg access of DR_A. In other words, we need to move min1 along by 2286 1.1 mrg one iteration: 2287 1.1 mrg 2288 1.1 mrg min1' = min1 + idx_step 2289 1.1 mrg 2290 1.1 mrg and use the ranges: 2291 1.1 mrg 2292 1.1 mrg [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1] 2293 1.1 mrg 2294 1.1 mrg and: 2295 1.1 mrg 2296 1.1 mrg [min2, min2 + idx_access2 - 1] 2297 1.1 mrg 2298 1.1 mrg where: 2299 1.1 mrg 2300 1.1 mrg low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|) 2301 1.1 mrg high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */ 2302 1.1 mrg if (waw_or_war_p) 2303 1.1 mrg idx_len1 -= abs_idx_step; 2304 1.1 mrg 2305 1.1 mrg poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1; 2306 1.1 mrg if (!waw_or_war_p) 2307 1.1 mrg limit += idx_len2; 2308 1.1 mrg 2309 1.1 mrg tree utype = unsigned_type_for (TREE_TYPE (min1)); 2310 1.1 mrg if (!wi::fits_to_tree_p (limit, utype)) 2311 1.1 mrg return false; 2312 1.1 mrg 2313 1.1 mrg poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; 2314 1.1 mrg poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2; 2315 1.1 mrg poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; 2316 1.1 mrg /* Equivalent to adding IDX_STEP to MIN1. */ 2317 1.1 mrg if (waw_or_war_p) 2318 1.1 mrg bias -= wi::to_offset (idx_step); 2319 1.1 mrg 2320 1.1 mrg tree subject = fold_build2 (MINUS_EXPR, utype, 2321 1.1 mrg fold_convert (utype, min2), 2322 1.1 mrg fold_convert (utype, min1)); 2323 1.1 mrg subject = fold_build2 (PLUS_EXPR, utype, subject, 2324 1.1 mrg wide_int_to_tree (utype, bias)); 2325 1.1 mrg tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, 2326 1.1 mrg wide_int_to_tree (utype, limit)); 2327 1.1 mrg if (*cond_expr) 2328 1.1 mrg *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2329 1.1 mrg *cond_expr, part_cond_expr); 2330 1.1 mrg else 2331 1.1 mrg *cond_expr = part_cond_expr; 2332 1.1 mrg if (dump_enabled_p ()) 2333 1.1 mrg { 2334 1.1 mrg if (waw_or_war_p) 2335 1.1 mrg dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n"); 2336 1.1 mrg else 2337 1.1 mrg dump_printf (MSG_NOTE, "using an index-based overlap test\n"); 2338 1.1 mrg } 2339 1.1 mrg return true; 2340 1.1 mrg } 2341 1.1 mrg 2342 1.1 mrg /* A subroutine of create_intersect_range_checks, with a subset of the 2343 1.1 mrg same arguments. Try to optimize cases in which the second access 2344 1.1 mrg is a write and in which some overlap is valid. */ 2345 1.1 mrg 2346 1.1 mrg static bool 2347 1.1 mrg create_waw_or_war_checks (tree *cond_expr, 2348 1.1 mrg const dr_with_seg_len_pair_t &alias_pair) 2349 1.1 mrg { 2350 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first; 2351 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second; 2352 1.1 mrg 2353 1.1 mrg /* Check for cases in which: 2354 1.1 mrg 2355 1.1 mrg (a) DR_B is always a write; 2356 1.1 mrg (b) the accesses are well-ordered in both the original and new code 2357 1.1 mrg (see the comment above the DR_ALIAS_* flags for details); and 2358 1.1 mrg (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ 2359 1.1 mrg if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) 2360 1.1 mrg return false; 2361 1.1 mrg 2362 1.1 mrg /* Check for equal (but possibly variable) steps. */ 2363 1.1 mrg tree step = DR_STEP (dr_a.dr); 2364 1.1 mrg if (!operand_equal_p (step, DR_STEP (dr_b.dr))) 2365 1.1 mrg return false; 2366 1.1 mrg 2367 1.1 mrg /* Make sure that we can operate on sizetype without loss of precision. */ 2368 1.1 mrg tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); 2369 1.1 mrg if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) 2370 1.1 mrg return false; 2371 1.1 mrg 2372 1.1 mrg /* All addresses involved are known to have a common alignment ALIGN. 2373 1.1 mrg We can therefore subtract ALIGN from an exclusive endpoint to get 2374 1.1 mrg an inclusive endpoint. In the best (and common) case, ALIGN is the 2375 1.1 mrg same as the access sizes of both DRs, and so subtracting ALIGN 2376 1.1 mrg cancels out the addition of an access size. */ 2377 1.1 mrg unsigned int align = MIN (dr_a.align, dr_b.align); 2378 1.1 mrg poly_uint64 last_chunk_a = dr_a.access_size - align; 2379 1.1 mrg poly_uint64 last_chunk_b = dr_b.access_size - align; 2380 1.1 mrg 2381 1.1 mrg /* Get a boolean expression that is true when the step is negative. */ 2382 1.1 mrg tree indicator = dr_direction_indicator (dr_a.dr); 2383 1.1 mrg tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, 2384 1.1 mrg fold_convert (ssizetype, indicator), 2385 1.1 mrg ssize_int (0)); 2386 1.1 mrg 2387 1.1 mrg /* Get lengths in sizetype. */ 2388 1.1 mrg tree seg_len_a 2389 1.1 mrg = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); 2390 1.1 mrg step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); 2391 1.1 mrg 2392 1.1 mrg /* Each access has the following pattern: 2393 1.1 mrg 2394 1.1 mrg <- |seg_len| -> 2395 1.1 mrg <--- A: -ve step ---> 2396 1.1 mrg +-----+-------+-----+-------+-----+ 2397 1.1 mrg | n-1 | ..... | 0 | ..... | n-1 | 2398 1.1 mrg +-----+-------+-----+-------+-----+ 2399 1.1 mrg <--- B: +ve step ---> 2400 1.1 mrg <- |seg_len| -> 2401 1.1 mrg | 2402 1.1 mrg base address 2403 1.1 mrg 2404 1.1 mrg where "n" is the number of scalar iterations covered by the segment. 2405 1.1 mrg 2406 1.1 mrg A is the range of bytes accessed when the step is negative, 2407 1.1 mrg B is the range when the step is positive. 2408 1.1 mrg 2409 1.1 mrg We know that DR_B is a write. We also know (from checking that 2410 1.1 mrg DR_A and DR_B are well-ordered) that for each i in [0, n-1], 2411 1.1 mrg the write performed by access i of DR_B occurs after access numbers 2412 1.1 mrg j<=i of DR_A in both the original and the new code. Any write or 2413 1.1 mrg anti dependencies wrt those DR_A accesses are therefore maintained. 2414 1.1 mrg 2415 1.1 mrg We just need to make sure that each individual write in DR_B does not 2416 1.1 mrg overlap any higher-indexed access in DR_A; such DR_A accesses happen 2417 1.1 mrg after the DR_B access in the original code but happen before it in 2418 1.1 mrg the new code. 2419 1.1 mrg 2420 1.1 mrg We know the steps for both accesses are equal, so by induction, we 2421 1.1 mrg just need to test whether the first write of DR_B overlaps a later 2422 1.1 mrg access of DR_A. In other words, we need to move addr_a along by 2423 1.1 mrg one iteration: 2424 1.1 mrg 2425 1.1 mrg addr_a' = addr_a + step 2426 1.1 mrg 2427 1.1 mrg and check whether: 2428 1.1 mrg 2429 1.1 mrg [addr_b, addr_b + last_chunk_b] 2430 1.1 mrg 2431 1.1 mrg overlaps: 2432 1.1 mrg 2433 1.1 mrg [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a] 2434 1.1 mrg 2435 1.1 mrg where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.: 2436 1.1 mrg 2437 1.1 mrg low_offset_a = +ve step ? 0 : seg_len_a - step 2438 1.1 mrg high_offset_a = +ve step ? seg_len_a - step : 0 2439 1.1 mrg 2440 1.1 mrg This is equivalent to testing whether: 2441 1.1 mrg 2442 1.1 mrg addr_a' + low_offset_a <= addr_b + last_chunk_b 2443 1.1 mrg && addr_b <= addr_a' + high_offset_a + last_chunk_a 2444 1.1 mrg 2445 1.1 mrg Converting this into a single test, there is an overlap if: 2446 1.1 mrg 2447 1.1 mrg 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit 2448 1.1 mrg 2449 1.1 mrg where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b 2450 1.1 mrg 2451 1.1 mrg If DR_A is performed, limit + |step| - last_chunk_b is known to be 2452 1.1 mrg less than the size of the object underlying DR_A. We also know 2453 1.1 mrg that last_chunk_b <= |step|; this is checked elsewhere if it isn't 2454 1.1 mrg guaranteed at compile time. There can therefore be no overflow if 2455 1.1 mrg "limit" is calculated in an unsigned type with pointer precision. */ 2456 1.1 mrg tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), 2457 1.1 mrg DR_OFFSET (dr_a.dr)); 2458 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); 2459 1.1 mrg 2460 1.1 mrg tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), 2461 1.1 mrg DR_OFFSET (dr_b.dr)); 2462 1.1 mrg addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); 2463 1.1 mrg 2464 1.1 mrg /* Advance ADDR_A by one iteration and adjust the length to compensate. */ 2465 1.1 mrg addr_a = fold_build_pointer_plus (addr_a, step); 2466 1.1 mrg tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype, 2467 1.1 mrg seg_len_a, step); 2468 1.1 mrg if (!CONSTANT_CLASS_P (seg_len_a_minus_step)) 2469 1.1 mrg seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step); 2470 1.1 mrg 2471 1.1 mrg tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step, 2472 1.1 mrg seg_len_a_minus_step, size_zero_node); 2473 1.1 mrg if (!CONSTANT_CLASS_P (low_offset_a)) 2474 1.1 mrg low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a); 2475 1.1 mrg 2476 1.1 mrg /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>, 2477 1.1 mrg but it's usually more efficient to reuse the LOW_OFFSET_A result. */ 2478 1.1 mrg tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step, 2479 1.1 mrg low_offset_a); 2480 1.1 mrg 2481 1.1 mrg /* The amount added to addr_b - addr_a'. */ 2482 1.1 mrg tree bias = fold_build2 (MINUS_EXPR, sizetype, 2483 1.1 mrg size_int (last_chunk_b), low_offset_a); 2484 1.1 mrg 2485 1.1 mrg tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a); 2486 1.1 mrg limit = fold_build2 (PLUS_EXPR, sizetype, limit, 2487 1.1 mrg size_int (last_chunk_a + last_chunk_b)); 2488 1.1 mrg 2489 1.1 mrg tree subject = fold_build2 (MINUS_EXPR, sizetype, 2490 1.1 mrg fold_convert (sizetype, addr_b), 2491 1.1 mrg fold_convert (sizetype, addr_a)); 2492 1.1 mrg subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias); 2493 1.1 mrg 2494 1.1 mrg *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); 2495 1.1 mrg if (dump_enabled_p ()) 2496 1.1 mrg dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n"); 2497 1.1 mrg return true; 2498 1.1 mrg } 2499 1.1 mrg 2500 1.1 mrg /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for 2501 1.1 mrg every address ADDR accessed by D: 2502 1.1 mrg 2503 1.1 mrg *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT 2504 1.1 mrg 2505 1.1 mrg In this case, every element accessed by D is aligned to at least 2506 1.1 mrg ALIGN bytes. 2507 1.1 mrg 2508 1.1 mrg If ALIGN is zero then instead set *SEG_MAX_OUT so that: 2509 1.1 mrg 2510 1.1 mrg *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */ 2511 1.1 mrg 2512 1.1 mrg static void 2513 1.1 mrg get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out, 2514 1.1 mrg tree *seg_max_out, HOST_WIDE_INT align) 2515 1.1 mrg { 2516 1.1 mrg /* Each access has the following pattern: 2517 1.1 mrg 2518 1.1 mrg <- |seg_len| -> 2519 1.1 mrg <--- A: -ve step ---> 2520 1.1 mrg +-----+-------+-----+-------+-----+ 2521 1.1 mrg | n-1 | ,.... | 0 | ..... | n-1 | 2522 1.1 mrg +-----+-------+-----+-------+-----+ 2523 1.1 mrg <--- B: +ve step ---> 2524 1.1 mrg <- |seg_len| -> 2525 1.1 mrg | 2526 1.1 mrg base address 2527 1.1 mrg 2528 1.1 mrg where "n" is the number of scalar iterations covered by the segment. 2529 1.1 mrg (This should be VF for a particular pair if we know that both steps 2530 1.1 mrg are the same, otherwise it will be the full number of scalar loop 2531 1.1 mrg iterations.) 2532 1.1 mrg 2533 1.1 mrg A is the range of bytes accessed when the step is negative, 2534 1.1 mrg B is the range when the step is positive. 2535 1.1 mrg 2536 1.1 mrg If the access size is "access_size" bytes, the lowest addressed byte is: 2537 1.1 mrg 2538 1.1 mrg base + (step < 0 ? seg_len : 0) [LB] 2539 1.1 mrg 2540 1.1 mrg and the highest addressed byte is always below: 2541 1.1 mrg 2542 1.1 mrg base + (step < 0 ? 0 : seg_len) + access_size [UB] 2543 1.1 mrg 2544 1.1 mrg Thus: 2545 1.1 mrg 2546 1.1 mrg LB <= ADDR < UB 2547 1.1 mrg 2548 1.1 mrg If ALIGN is nonzero, all three values are aligned to at least ALIGN 2549 1.1 mrg bytes, so: 2550 1.1 mrg 2551 1.1 mrg LB <= ADDR <= UB - ALIGN 2552 1.1 mrg 2553 1.1 mrg where "- ALIGN" folds naturally with the "+ access_size" and often 2554 1.1 mrg cancels it out. 2555 1.1 mrg 2556 1.1 mrg We don't try to simplify LB and UB beyond this (e.g. by using 2557 1.1 mrg MIN and MAX based on whether seg_len rather than the stride is 2558 1.1 mrg negative) because it is possible for the absolute size of the 2559 1.1 mrg segment to overflow the range of a ssize_t. 2560 1.1 mrg 2561 1.1 mrg Keeping the pointer_plus outside of the cond_expr should allow 2562 1.1 mrg the cond_exprs to be shared with other alias checks. */ 2563 1.1 mrg tree indicator = dr_direction_indicator (d.dr); 2564 1.1 mrg tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, 2565 1.1 mrg fold_convert (ssizetype, indicator), 2566 1.1 mrg ssize_int (0)); 2567 1.1 mrg tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr), 2568 1.1 mrg DR_OFFSET (d.dr)); 2569 1.1 mrg addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr)); 2570 1.1 mrg tree seg_len 2571 1.1 mrg = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len)); 2572 1.1 mrg 2573 1.1 mrg tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step, 2574 1.1 mrg seg_len, size_zero_node); 2575 1.1 mrg tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step, 2576 1.1 mrg size_zero_node, seg_len); 2577 1.1 mrg max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach, 2578 1.1 mrg size_int (d.access_size - align)); 2579 1.1 mrg 2580 1.1 mrg *seg_min_out = fold_build_pointer_plus (addr_base, min_reach); 2581 1.1 mrg *seg_max_out = fold_build_pointer_plus (addr_base, max_reach); 2582 1.1 mrg } 2583 1.1 mrg 2584 1.1 mrg /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases, 2585 1.1 mrg storing the condition in *COND_EXPR. The fallback is to generate a 2586 1.1 mrg a test that the two accesses do not overlap: 2587 1.1 mrg 2588 1.1 mrg end_a <= start_b || end_b <= start_a. */ 2589 1.1 mrg 2590 1.1 mrg static void 2591 1.1 mrg create_intersect_range_checks (class loop *loop, tree *cond_expr, 2592 1.1 mrg const dr_with_seg_len_pair_t &alias_pair) 2593 1.1 mrg { 2594 1.1 mrg const dr_with_seg_len& dr_a = alias_pair.first; 2595 1.1 mrg const dr_with_seg_len& dr_b = alias_pair.second; 2596 1.1 mrg *cond_expr = NULL_TREE; 2597 1.1 mrg if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) 2598 1.1 mrg return; 2599 1.1 mrg 2600 1.1 mrg if (create_ifn_alias_checks (cond_expr, alias_pair)) 2601 1.1 mrg return; 2602 1.1 mrg 2603 1.1 mrg if (create_waw_or_war_checks (cond_expr, alias_pair)) 2604 1.1 mrg return; 2605 1.1 mrg 2606 1.1 mrg unsigned HOST_WIDE_INT min_align; 2607 1.1 mrg tree_code cmp_code; 2608 1.1 mrg /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions 2609 1.1 mrg are equivalent. This is just an optimization heuristic. */ 2610 1.1 mrg if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST 2611 1.1 mrg && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST) 2612 1.1 mrg { 2613 1.1 mrg /* In this case adding access_size to seg_len is likely to give 2614 1.1 mrg a simple X * step, where X is either the number of scalar 2615 1.1 mrg iterations or the vectorization factor. We're better off 2616 1.1 mrg keeping that, rather than subtracting an alignment from it. 2617 1.1 mrg 2618 1.1 mrg In this case the maximum values are exclusive and so there is 2619 1.1 mrg no alias if the maximum of one segment equals the minimum 2620 1.1 mrg of another. */ 2621 1.1 mrg min_align = 0; 2622 1.1 mrg cmp_code = LE_EXPR; 2623 1.1 mrg } 2624 1.1 mrg else 2625 1.1 mrg { 2626 1.1 mrg /* Calculate the minimum alignment shared by all four pointers, 2627 1.1 mrg then arrange for this alignment to be subtracted from the 2628 1.1 mrg exclusive maximum values to get inclusive maximum values. 2629 1.1 mrg This "- min_align" is cumulative with a "+ access_size" 2630 1.1 mrg in the calculation of the maximum values. In the best 2631 1.1 mrg (and common) case, the two cancel each other out, leaving 2632 1.1 mrg us with an inclusive bound based only on seg_len. In the 2633 1.1 mrg worst case we're simply adding a smaller number than before. 2634 1.1 mrg 2635 1.1 mrg Because the maximum values are inclusive, there is an alias 2636 1.1 mrg if the maximum value of one segment is equal to the minimum 2637 1.1 mrg value of the other. */ 2638 1.1 mrg min_align = std::min (dr_a.align, dr_b.align); 2639 1.1 mrg cmp_code = LT_EXPR; 2640 1.1 mrg } 2641 1.1 mrg 2642 1.1 mrg tree seg_a_min, seg_a_max, seg_b_min, seg_b_max; 2643 1.1 mrg get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align); 2644 1.1 mrg get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align); 2645 1.1 mrg 2646 1.1 mrg *cond_expr 2647 1.1 mrg = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, 2648 1.1 mrg fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min), 2649 1.1 mrg fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min)); 2650 1.1 mrg if (dump_enabled_p ()) 2651 1.1 mrg dump_printf (MSG_NOTE, "using an address-based overlap test\n"); 2652 1.1 mrg } 2653 1.1 mrg 2654 1.1 mrg /* Create a conditional expression that represents the run-time checks for 2655 1.1 mrg overlapping of address ranges represented by a list of data references 2656 1.1 mrg pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned 2657 1.1 mrg COND_EXPR is the conditional expression to be used in the if statement 2658 1.1 mrg that controls which version of the loop gets executed at runtime. */ 2659 1.1 mrg 2660 1.1 mrg void 2661 1.1 mrg create_runtime_alias_checks (class loop *loop, 2662 1.1 mrg const vec<dr_with_seg_len_pair_t> *alias_pairs, 2663 1.1 mrg tree * cond_expr) 2664 1.1 mrg { 2665 1.1 mrg tree part_cond_expr; 2666 1.1 mrg 2667 1.1 mrg fold_defer_overflow_warnings (); 2668 1.1 mrg for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs) 2669 1.1 mrg { 2670 1.1 mrg gcc_assert (alias_pair.flags); 2671 1.1 mrg if (dump_enabled_p ()) 2672 1.1 mrg dump_printf (MSG_NOTE, 2673 1.1 mrg "create runtime check for data references %T and %T\n", 2674 1.1 mrg DR_REF (alias_pair.first.dr), 2675 1.1 mrg DR_REF (alias_pair.second.dr)); 2676 1.1 mrg 2677 1.1 mrg /* Create condition expression for each pair data references. */ 2678 1.1 mrg create_intersect_range_checks (loop, &part_cond_expr, alias_pair); 2679 1.1 mrg if (*cond_expr) 2680 1.1 mrg *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2681 1.1 mrg *cond_expr, part_cond_expr); 2682 1.1 mrg else 2683 1.1 mrg *cond_expr = part_cond_expr; 2684 1.1 mrg } 2685 1.1 mrg fold_undefer_and_ignore_overflow_warnings (); 2686 1.1 mrg } 2687 1.1 mrg 2688 1.1 mrg /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical 2689 1.1 mrg expressions. */ 2690 1.1 mrg static bool 2691 1.1 mrg dr_equal_offsets_p1 (tree offset1, tree offset2) 2692 1.1 mrg { 2693 1.1 mrg bool res; 2694 1.1 mrg 2695 1.1 mrg STRIP_NOPS (offset1); 2696 1.1 mrg STRIP_NOPS (offset2); 2697 1.1 mrg 2698 1.1 mrg if (offset1 == offset2) 2699 1.1 mrg return true; 2700 1.1 mrg 2701 1.1 mrg if (TREE_CODE (offset1) != TREE_CODE (offset2) 2702 1.1 mrg || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) 2703 1.1 mrg return false; 2704 1.1 mrg 2705 1.1 mrg res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), 2706 1.1 mrg TREE_OPERAND (offset2, 0)); 2707 1.1 mrg 2708 1.1 mrg if (!res || !BINARY_CLASS_P (offset1)) 2709 1.1 mrg return res; 2710 1.1 mrg 2711 1.1 mrg res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), 2712 1.1 mrg TREE_OPERAND (offset2, 1)); 2713 1.1 mrg 2714 1.1 mrg return res; 2715 1.1 mrg } 2716 1.1 mrg 2717 1.1 mrg /* Check if DRA and DRB have equal offsets. */ 2718 1.1 mrg bool 2719 1.1 mrg dr_equal_offsets_p (struct data_reference *dra, 2720 1.1 mrg struct data_reference *drb) 2721 1.1 mrg { 2722 1.1 mrg tree offset1, offset2; 2723 1.1 mrg 2724 1.1 mrg offset1 = DR_OFFSET (dra); 2725 1.1 mrg offset2 = DR_OFFSET (drb); 2726 1.1 mrg 2727 1.1 mrg return dr_equal_offsets_p1 (offset1, offset2); 2728 1.1 mrg } 2729 1.1 mrg 2730 1.1 mrg /* Returns true if FNA == FNB. */ 2731 1.1 mrg 2732 1.1 mrg static bool 2733 1.1 mrg affine_function_equal_p (affine_fn fna, affine_fn fnb) 2734 1.1 mrg { 2735 1.1 mrg unsigned i, n = fna.length (); 2736 1.1 mrg 2737 1.1 mrg if (n != fnb.length ()) 2738 1.1 mrg return false; 2739 1.1 mrg 2740 1.1 mrg for (i = 0; i < n; i++) 2741 1.1 mrg if (!operand_equal_p (fna[i], fnb[i], 0)) 2742 1.1 mrg return false; 2743 1.1 mrg 2744 1.1 mrg return true; 2745 1.1 mrg } 2746 1.1 mrg 2747 1.1 mrg /* If all the functions in CF are the same, returns one of them, 2748 1.1 mrg otherwise returns NULL. */ 2749 1.1 mrg 2750 1.1 mrg static affine_fn 2751 1.1 mrg common_affine_function (conflict_function *cf) 2752 1.1 mrg { 2753 1.1 mrg unsigned i; 2754 1.1 mrg affine_fn comm; 2755 1.1 mrg 2756 1.1 mrg if (!CF_NONTRIVIAL_P (cf)) 2757 1.1 mrg return affine_fn (); 2758 1.1 mrg 2759 1.1 mrg comm = cf->fns[0]; 2760 1.1 mrg 2761 1.1 mrg for (i = 1; i < cf->n; i++) 2762 1.1 mrg if (!affine_function_equal_p (comm, cf->fns[i])) 2763 1.1 mrg return affine_fn (); 2764 1.1 mrg 2765 1.1 mrg return comm; 2766 1.1 mrg } 2767 1.1 mrg 2768 1.1 mrg /* Returns the base of the affine function FN. */ 2769 1.1 mrg 2770 1.1 mrg static tree 2771 1.1 mrg affine_function_base (affine_fn fn) 2772 1.1 mrg { 2773 1.1 mrg return fn[0]; 2774 1.1 mrg } 2775 1.1 mrg 2776 1.1 mrg /* Returns true if FN is a constant. */ 2777 1.1 mrg 2778 1.1 mrg static bool 2779 1.1 mrg affine_function_constant_p (affine_fn fn) 2780 1.1 mrg { 2781 1.1 mrg unsigned i; 2782 1.1 mrg tree coef; 2783 1.1 mrg 2784 1.1 mrg for (i = 1; fn.iterate (i, &coef); i++) 2785 1.1 mrg if (!integer_zerop (coef)) 2786 1.1 mrg return false; 2787 1.1 mrg 2788 1.1 mrg return true; 2789 1.1 mrg } 2790 1.1 mrg 2791 1.1 mrg /* Returns true if FN is the zero constant function. */ 2792 1.1 mrg 2793 1.1 mrg static bool 2794 1.1 mrg affine_function_zero_p (affine_fn fn) 2795 1.1 mrg { 2796 1.1 mrg return (integer_zerop (affine_function_base (fn)) 2797 1.1 mrg && affine_function_constant_p (fn)); 2798 1.1 mrg } 2799 1.1 mrg 2800 1.1 mrg /* Returns a signed integer type with the largest precision from TA 2801 1.1 mrg and TB. */ 2802 1.1 mrg 2803 1.1 mrg static tree 2804 1.1 mrg signed_type_for_types (tree ta, tree tb) 2805 1.1 mrg { 2806 1.1 mrg if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) 2807 1.1 mrg return signed_type_for (ta); 2808 1.1 mrg else 2809 1.1 mrg return signed_type_for (tb); 2810 1.1 mrg } 2811 1.1 mrg 2812 1.1 mrg /* Applies operation OP on affine functions FNA and FNB, and returns the 2813 1.1 mrg result. */ 2814 1.1 mrg 2815 1.1 mrg static affine_fn 2816 1.1 mrg affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) 2817 1.1 mrg { 2818 1.1 mrg unsigned i, n, m; 2819 1.1 mrg affine_fn ret; 2820 1.1 mrg tree coef; 2821 1.1 mrg 2822 1.1 mrg if (fnb.length () > fna.length ()) 2823 1.1 mrg { 2824 1.1 mrg n = fna.length (); 2825 1.1 mrg m = fnb.length (); 2826 1.1 mrg } 2827 1.1 mrg else 2828 1.1 mrg { 2829 1.1 mrg n = fnb.length (); 2830 1.1 mrg m = fna.length (); 2831 1.1 mrg } 2832 1.1 mrg 2833 1.1 mrg ret.create (m); 2834 1.1 mrg for (i = 0; i < n; i++) 2835 1.1 mrg { 2836 1.1 mrg tree type = signed_type_for_types (TREE_TYPE (fna[i]), 2837 1.1 mrg TREE_TYPE (fnb[i])); 2838 1.1 mrg ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); 2839 1.1 mrg } 2840 1.1 mrg 2841 1.1 mrg for (; fna.iterate (i, &coef); i++) 2842 1.1 mrg ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 2843 1.1 mrg coef, integer_zero_node)); 2844 1.1 mrg for (; fnb.iterate (i, &coef); i++) 2845 1.1 mrg ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 2846 1.1 mrg integer_zero_node, coef)); 2847 1.1 mrg 2848 1.1 mrg return ret; 2849 1.1 mrg } 2850 1.1 mrg 2851 1.1 mrg /* Returns the sum of affine functions FNA and FNB. */ 2852 1.1 mrg 2853 1.1 mrg static affine_fn 2854 1.1 mrg affine_fn_plus (affine_fn fna, affine_fn fnb) 2855 1.1 mrg { 2856 1.1 mrg return affine_fn_op (PLUS_EXPR, fna, fnb); 2857 1.1 mrg } 2858 1.1 mrg 2859 1.1 mrg /* Returns the difference of affine functions FNA and FNB. */ 2860 1.1 mrg 2861 1.1 mrg static affine_fn 2862 1.1 mrg affine_fn_minus (affine_fn fna, affine_fn fnb) 2863 1.1 mrg { 2864 1.1 mrg return affine_fn_op (MINUS_EXPR, fna, fnb); 2865 1.1 mrg } 2866 1.1 mrg 2867 1.1 mrg /* Frees affine function FN. */ 2868 1.1 mrg 2869 1.1 mrg static void 2870 1.1 mrg affine_fn_free (affine_fn fn) 2871 1.1 mrg { 2872 1.1 mrg fn.release (); 2873 1.1 mrg } 2874 1.1 mrg 2875 1.1 mrg /* Determine for each subscript in the data dependence relation DDR 2876 1.1 mrg the distance. */ 2877 1.1 mrg 2878 1.1 mrg static void 2879 1.1 mrg compute_subscript_distance (struct data_dependence_relation *ddr) 2880 1.1 mrg { 2881 1.1 mrg conflict_function *cf_a, *cf_b; 2882 1.1 mrg affine_fn fn_a, fn_b, diff; 2883 1.1 mrg 2884 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 2885 1.1 mrg { 2886 1.1 mrg unsigned int i; 2887 1.1 mrg 2888 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 2889 1.1 mrg { 2890 1.1 mrg struct subscript *subscript; 2891 1.1 mrg 2892 1.1 mrg subscript = DDR_SUBSCRIPT (ddr, i); 2893 1.1 mrg cf_a = SUB_CONFLICTS_IN_A (subscript); 2894 1.1 mrg cf_b = SUB_CONFLICTS_IN_B (subscript); 2895 1.1 mrg 2896 1.1 mrg fn_a = common_affine_function (cf_a); 2897 1.1 mrg fn_b = common_affine_function (cf_b); 2898 1.1 mrg if (!fn_a.exists () || !fn_b.exists ()) 2899 1.1 mrg { 2900 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know; 2901 1.1 mrg return; 2902 1.1 mrg } 2903 1.1 mrg diff = affine_fn_minus (fn_a, fn_b); 2904 1.1 mrg 2905 1.1 mrg if (affine_function_constant_p (diff)) 2906 1.1 mrg SUB_DISTANCE (subscript) = affine_function_base (diff); 2907 1.1 mrg else 2908 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know; 2909 1.1 mrg 2910 1.1 mrg affine_fn_free (diff); 2911 1.1 mrg } 2912 1.1 mrg } 2913 1.1 mrg } 2914 1.1 mrg 2915 1.1 mrg /* Returns the conflict function for "unknown". */ 2916 1.1 mrg 2917 1.1 mrg static conflict_function * 2918 1.1 mrg conflict_fn_not_known (void) 2919 1.1 mrg { 2920 1.1 mrg conflict_function *fn = XCNEW (conflict_function); 2921 1.1 mrg fn->n = NOT_KNOWN; 2922 1.1 mrg 2923 1.1 mrg return fn; 2924 1.1 mrg } 2925 1.1 mrg 2926 1.1 mrg /* Returns the conflict function for "independent". */ 2927 1.1 mrg 2928 1.1 mrg static conflict_function * 2929 1.1 mrg conflict_fn_no_dependence (void) 2930 1.1 mrg { 2931 1.1 mrg conflict_function *fn = XCNEW (conflict_function); 2932 1.1 mrg fn->n = NO_DEPENDENCE; 2933 1.1 mrg 2934 1.1 mrg return fn; 2935 1.1 mrg } 2936 1.1 mrg 2937 1.1 mrg /* Returns true if the address of OBJ is invariant in LOOP. */ 2938 1.1 mrg 2939 1.1 mrg static bool 2940 1.1 mrg object_address_invariant_in_loop_p (const class loop *loop, const_tree obj) 2941 1.1 mrg { 2942 1.1 mrg while (handled_component_p (obj)) 2943 1.1 mrg { 2944 1.1 mrg if (TREE_CODE (obj) == ARRAY_REF) 2945 1.1 mrg { 2946 1.1 mrg for (int i = 1; i < 4; ++i) 2947 1.1 mrg if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i), 2948 1.1 mrg loop->num)) 2949 1.1 mrg return false; 2950 1.1 mrg } 2951 1.1 mrg else if (TREE_CODE (obj) == COMPONENT_REF) 2952 1.1 mrg { 2953 1.1 mrg if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 2954 1.1 mrg loop->num)) 2955 1.1 mrg return false; 2956 1.1 mrg } 2957 1.1 mrg obj = TREE_OPERAND (obj, 0); 2958 1.1 mrg } 2959 1.1 mrg 2960 1.1 mrg if (!INDIRECT_REF_P (obj) 2961 1.1 mrg && TREE_CODE (obj) != MEM_REF) 2962 1.1 mrg return true; 2963 1.1 mrg 2964 1.1 mrg return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), 2965 1.1 mrg loop->num); 2966 1.1 mrg } 2967 1.1 mrg 2968 1.1 mrg /* Returns false if we can prove that data references A and B do not alias, 2969 1.1 mrg true otherwise. If LOOP_NEST is false no cross-iteration aliases are 2970 1.1 mrg considered. */ 2971 1.1 mrg 2972 1.1 mrg bool 2973 1.1 mrg dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, 2974 1.1 mrg class loop *loop_nest) 2975 1.1 mrg { 2976 1.1 mrg tree addr_a = DR_BASE_OBJECT (a); 2977 1.1 mrg tree addr_b = DR_BASE_OBJECT (b); 2978 1.1 mrg 2979 1.1 mrg /* If we are not processing a loop nest but scalar code we 2980 1.1 mrg do not need to care about possible cross-iteration dependences 2981 1.1 mrg and thus can process the full original reference. Do so, 2982 1.1 mrg similar to how loop invariant motion applies extra offset-based 2983 1.1 mrg disambiguation. */ 2984 1.1 mrg if (!loop_nest) 2985 1.1 mrg { 2986 1.1 mrg aff_tree off1, off2; 2987 1.1 mrg poly_widest_int size1, size2; 2988 1.1 mrg get_inner_reference_aff (DR_REF (a), &off1, &size1); 2989 1.1 mrg get_inner_reference_aff (DR_REF (b), &off2, &size2); 2990 1.1 mrg aff_combination_scale (&off1, -1); 2991 1.1 mrg aff_combination_add (&off2, &off1); 2992 1.1 mrg if (aff_comb_cannot_overlap_p (&off2, size1, size2)) 2993 1.1 mrg return false; 2994 1.1 mrg } 2995 1.1 mrg 2996 1.1 mrg if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) 2997 1.1 mrg && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) 2998 1.1 mrg /* For cross-iteration dependences the cliques must be valid for the 2999 1.1 mrg whole loop, not just individual iterations. */ 3000 1.1 mrg && (!loop_nest 3001 1.1 mrg || MR_DEPENDENCE_CLIQUE (addr_a) == 1 3002 1.1 mrg || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique) 3003 1.1 mrg && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) 3004 1.1 mrg && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) 3005 1.1 mrg return false; 3006 1.1 mrg 3007 1.1 mrg /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we 3008 1.1 mrg do not know the size of the base-object. So we cannot do any 3009 1.1 mrg offset/overlap based analysis but have to rely on points-to 3010 1.1 mrg information only. */ 3011 1.1 mrg if (TREE_CODE (addr_a) == MEM_REF 3012 1.1 mrg && (DR_UNCONSTRAINED_BASE (a) 3013 1.1 mrg || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) 3014 1.1 mrg { 3015 1.1 mrg /* For true dependences we can apply TBAA. */ 3016 1.1 mrg if (flag_strict_aliasing 3017 1.1 mrg && DR_IS_WRITE (a) && DR_IS_READ (b) 3018 1.1 mrg && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 3019 1.1 mrg get_alias_set (DR_REF (b)))) 3020 1.1 mrg return false; 3021 1.1 mrg if (TREE_CODE (addr_b) == MEM_REF) 3022 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 3023 1.1 mrg TREE_OPERAND (addr_b, 0)); 3024 1.1 mrg else 3025 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 3026 1.1 mrg build_fold_addr_expr (addr_b)); 3027 1.1 mrg } 3028 1.1 mrg else if (TREE_CODE (addr_b) == MEM_REF 3029 1.1 mrg && (DR_UNCONSTRAINED_BASE (b) 3030 1.1 mrg || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) 3031 1.1 mrg { 3032 1.1 mrg /* For true dependences we can apply TBAA. */ 3033 1.1 mrg if (flag_strict_aliasing 3034 1.1 mrg && DR_IS_WRITE (a) && DR_IS_READ (b) 3035 1.1 mrg && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 3036 1.1 mrg get_alias_set (DR_REF (b)))) 3037 1.1 mrg return false; 3038 1.1 mrg if (TREE_CODE (addr_a) == MEM_REF) 3039 1.1 mrg return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 3040 1.1 mrg TREE_OPERAND (addr_b, 0)); 3041 1.1 mrg else 3042 1.1 mrg return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), 3043 1.1 mrg TREE_OPERAND (addr_b, 0)); 3044 1.1 mrg } 3045 1.1 mrg 3046 1.1 mrg /* Otherwise DR_BASE_OBJECT is an access that covers the whole object 3047 1.1 mrg that is being subsetted in the loop nest. */ 3048 1.1 mrg if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) 3049 1.1 mrg return refs_output_dependent_p (addr_a, addr_b); 3050 1.1 mrg else if (DR_IS_READ (a) && DR_IS_WRITE (b)) 3051 1.1 mrg return refs_anti_dependent_p (addr_a, addr_b); 3052 1.1 mrg return refs_may_alias_p (addr_a, addr_b); 3053 1.1 mrg } 3054 1.1 mrg 3055 1.1 mrg /* REF_A and REF_B both satisfy access_fn_component_p. Return true 3056 1.1 mrg if it is meaningful to compare their associated access functions 3057 1.1 mrg when checking for dependencies. */ 3058 1.1 mrg 3059 1.1 mrg static bool 3060 1.1 mrg access_fn_components_comparable_p (tree ref_a, tree ref_b) 3061 1.1 mrg { 3062 1.1 mrg /* Allow pairs of component refs from the following sets: 3063 1.1 mrg 3064 1.1 mrg { REALPART_EXPR, IMAGPART_EXPR } 3065 1.1 mrg { COMPONENT_REF } 3066 1.1 mrg { ARRAY_REF }. */ 3067 1.1 mrg tree_code code_a = TREE_CODE (ref_a); 3068 1.1 mrg tree_code code_b = TREE_CODE (ref_b); 3069 1.1 mrg if (code_a == IMAGPART_EXPR) 3070 1.1 mrg code_a = REALPART_EXPR; 3071 1.1 mrg if (code_b == IMAGPART_EXPR) 3072 1.1 mrg code_b = REALPART_EXPR; 3073 1.1 mrg if (code_a != code_b) 3074 1.1 mrg return false; 3075 1.1 mrg 3076 1.1 mrg if (TREE_CODE (ref_a) == COMPONENT_REF) 3077 1.1 mrg /* ??? We cannot simply use the type of operand #0 of the refs here as 3078 1.1 mrg the Fortran compiler smuggles type punning into COMPONENT_REFs. 3079 1.1 mrg Use the DECL_CONTEXT of the FIELD_DECLs instead. */ 3080 1.1 mrg return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1)) 3081 1.1 mrg == DECL_CONTEXT (TREE_OPERAND (ref_b, 1))); 3082 1.1 mrg 3083 1.1 mrg return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)), 3084 1.1 mrg TREE_TYPE (TREE_OPERAND (ref_b, 0))); 3085 1.1 mrg } 3086 1.1 mrg 3087 1.1 mrg /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES 3088 1.1 mrg is true when the main indices of A and B were not comparable so we try again 3089 1.1 mrg with alternate indices computed on an indirect reference. */ 3090 1.1 mrg 3091 1.1 mrg struct data_dependence_relation * 3092 1.1 mrg initialize_data_dependence_relation (struct data_dependence_relation *res, 3093 1.1 mrg vec<loop_p> loop_nest, 3094 1.1 mrg bool use_alt_indices) 3095 1.1 mrg { 3096 1.1 mrg struct data_reference *a = DDR_A (res); 3097 1.1 mrg struct data_reference *b = DDR_B (res); 3098 1.1 mrg unsigned int i; 3099 1.1 mrg 3100 1.1 mrg struct indices *indices_a = &a->indices; 3101 1.1 mrg struct indices *indices_b = &b->indices; 3102 1.1 mrg if (use_alt_indices) 3103 1.1 mrg { 3104 1.1 mrg if (TREE_CODE (DR_REF (a)) != MEM_REF) 3105 1.1 mrg indices_a = &a->alt_indices; 3106 1.1 mrg if (TREE_CODE (DR_REF (b)) != MEM_REF) 3107 1.1 mrg indices_b = &b->alt_indices; 3108 1.1 mrg } 3109 1.1 mrg unsigned int num_dimensions_a = indices_a->access_fns.length (); 3110 1.1 mrg unsigned int num_dimensions_b = indices_b->access_fns.length (); 3111 1.1 mrg if (num_dimensions_a == 0 || num_dimensions_b == 0) 3112 1.1 mrg { 3113 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know; 3114 1.1 mrg return res; 3115 1.1 mrg } 3116 1.1 mrg 3117 1.1 mrg /* For unconstrained bases, the root (highest-indexed) subscript 3118 1.1 mrg describes a variation in the base of the original DR_REF rather 3119 1.1 mrg than a component access. We have no type that accurately describes 3120 1.1 mrg the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after* 3121 1.1 mrg applying this subscript) so limit the search to the last real 3122 1.1 mrg component access. 3123 1.1 mrg 3124 1.1 mrg E.g. for: 3125 1.1 mrg 3126 1.1 mrg void 3127 1.1 mrg f (int a[][8], int b[][8]) 3128 1.1 mrg { 3129 1.1 mrg for (int i = 0; i < 8; ++i) 3130 1.1 mrg a[i * 2][0] = b[i][0]; 3131 1.1 mrg } 3132 1.1 mrg 3133 1.1 mrg the a and b accesses have a single ARRAY_REF component reference [0] 3134 1.1 mrg but have two subscripts. */ 3135 1.1 mrg if (indices_a->unconstrained_base) 3136 1.1 mrg num_dimensions_a -= 1; 3137 1.1 mrg if (indices_b->unconstrained_base) 3138 1.1 mrg num_dimensions_b -= 1; 3139 1.1 mrg 3140 1.1 mrg /* These structures describe sequences of component references in 3141 1.1 mrg DR_REF (A) and DR_REF (B). Each component reference is tied to a 3142 1.1 mrg specific access function. */ 3143 1.1 mrg struct { 3144 1.1 mrg /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and 3145 1.1 mrg DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher 3146 1.1 mrg indices. In C notation, these are the indices of the rightmost 3147 1.1 mrg component references; e.g. for a sequence .b.c.d, the start 3148 1.1 mrg index is for .d. */ 3149 1.1 mrg unsigned int start_a; 3150 1.1 mrg unsigned int start_b; 3151 1.1 mrg 3152 1.1 mrg /* The sequence contains LENGTH consecutive access functions from 3153 1.1 mrg each DR. */ 3154 1.1 mrg unsigned int length; 3155 1.1 mrg 3156 1.1 mrg /* The enclosing objects for the A and B sequences respectively, 3157 1.1 mrg i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1) 3158 1.1 mrg and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */ 3159 1.1 mrg tree object_a; 3160 1.1 mrg tree object_b; 3161 1.1 mrg } full_seq = {}, struct_seq = {}; 3162 1.1 mrg 3163 1.1 mrg /* Before each iteration of the loop: 3164 1.1 mrg 3165 1.1 mrg - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and 3166 1.1 mrg - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */ 3167 1.1 mrg unsigned int index_a = 0; 3168 1.1 mrg unsigned int index_b = 0; 3169 1.1 mrg tree ref_a = DR_REF (a); 3170 1.1 mrg tree ref_b = DR_REF (b); 3171 1.1 mrg 3172 1.1 mrg /* Now walk the component references from the final DR_REFs back up to 3173 1.1 mrg the enclosing base objects. Each component reference corresponds 3174 1.1 mrg to one access function in the DR, with access function 0 being for 3175 1.1 mrg the final DR_REF and the highest-indexed access function being the 3176 1.1 mrg one that is applied to the base of the DR. 3177 1.1 mrg 3178 1.1 mrg Look for a sequence of component references whose access functions 3179 1.1 mrg are comparable (see access_fn_components_comparable_p). If more 3180 1.1 mrg than one such sequence exists, pick the one nearest the base 3181 1.1 mrg (which is the leftmost sequence in C notation). Store this sequence 3182 1.1 mrg in FULL_SEQ. 3183 1.1 mrg 3184 1.1 mrg For example, if we have: 3185 1.1 mrg 3186 1.1 mrg struct foo { struct bar s; ... } (*a)[10], (*b)[10]; 3187 1.1 mrg 3188 1.1 mrg A: a[0][i].s.c.d 3189 1.1 mrg B: __real b[0][i].s.e[i].f 3190 1.1 mrg 3191 1.1 mrg (where d is the same type as the real component of f) then the access 3192 1.1 mrg functions would be: 3193 1.1 mrg 3194 1.1 mrg 0 1 2 3 3195 1.1 mrg A: .d .c .s [i] 3196 1.1 mrg 3197 1.1 mrg 0 1 2 3 4 5 3198 1.1 mrg B: __real .f [i] .e .s [i] 3199 1.1 mrg 3200 1.1 mrg The A0/B2 column isn't comparable, since .d is a COMPONENT_REF 3201 1.1 mrg and [i] is an ARRAY_REF. However, the A1/B3 column contains two 3202 1.1 mrg COMPONENT_REF accesses for struct bar, so is comparable. Likewise 3203 1.1 mrg the A2/B4 column contains two COMPONENT_REF accesses for struct foo, 3204 1.1 mrg so is comparable. The A3/B5 column contains two ARRAY_REFs that 3205 1.1 mrg index foo[10] arrays, so is again comparable. The sequence is 3206 1.1 mrg therefore: 3207 1.1 mrg 3208 1.1 mrg A: [1, 3] (i.e. [i].s.c) 3209 1.1 mrg B: [3, 5] (i.e. [i].s.e) 3210 1.1 mrg 3211 1.1 mrg Also look for sequences of component references whose access 3212 1.1 mrg functions are comparable and whose enclosing objects have the same 3213 1.1 mrg RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above 3214 1.1 mrg example, STRUCT_SEQ would be: 3215 1.1 mrg 3216 1.1 mrg A: [1, 2] (i.e. s.c) 3217 1.1 mrg B: [3, 4] (i.e. s.e) */ 3218 1.1 mrg while (index_a < num_dimensions_a && index_b < num_dimensions_b) 3219 1.1 mrg { 3220 1.1 mrg /* The alternate indices form always has a single dimension 3221 1.1 mrg with unconstrained base. */ 3222 1.1 mrg gcc_assert (!use_alt_indices); 3223 1.1 mrg 3224 1.1 mrg /* REF_A and REF_B must be one of the component access types 3225 1.1 mrg allowed by dr_analyze_indices. */ 3226 1.1 mrg gcc_checking_assert (access_fn_component_p (ref_a)); 3227 1.1 mrg gcc_checking_assert (access_fn_component_p (ref_b)); 3228 1.1 mrg 3229 1.1 mrg /* Get the immediately-enclosing objects for REF_A and REF_B, 3230 1.1 mrg i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A) 3231 1.1 mrg and DR_ACCESS_FN (B, INDEX_B). */ 3232 1.1 mrg tree object_a = TREE_OPERAND (ref_a, 0); 3233 1.1 mrg tree object_b = TREE_OPERAND (ref_b, 0); 3234 1.1 mrg 3235 1.1 mrg tree type_a = TREE_TYPE (object_a); 3236 1.1 mrg tree type_b = TREE_TYPE (object_b); 3237 1.1 mrg if (access_fn_components_comparable_p (ref_a, ref_b)) 3238 1.1 mrg { 3239 1.1 mrg /* This pair of component accesses is comparable for dependence 3240 1.1 mrg analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and 3241 1.1 mrg DR_ACCESS_FN (B, INDEX_B) in the sequence. */ 3242 1.1 mrg if (full_seq.start_a + full_seq.length != index_a 3243 1.1 mrg || full_seq.start_b + full_seq.length != index_b) 3244 1.1 mrg { 3245 1.1 mrg /* The accesses don't extend the current sequence, 3246 1.1 mrg so start a new one here. */ 3247 1.1 mrg full_seq.start_a = index_a; 3248 1.1 mrg full_seq.start_b = index_b; 3249 1.1 mrg full_seq.length = 0; 3250 1.1 mrg } 3251 1.1 mrg 3252 1.1 mrg /* Add this pair of references to the sequence. */ 3253 1.1 mrg full_seq.length += 1; 3254 1.1 mrg full_seq.object_a = object_a; 3255 1.1 mrg full_seq.object_b = object_b; 3256 1.1 mrg 3257 1.1 mrg /* If the enclosing objects are structures (and thus have the 3258 1.1 mrg same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */ 3259 1.1 mrg if (TREE_CODE (type_a) == RECORD_TYPE) 3260 1.1 mrg struct_seq = full_seq; 3261 1.1 mrg 3262 1.1 mrg /* Move to the next containing reference for both A and B. */ 3263 1.1 mrg ref_a = object_a; 3264 1.1 mrg ref_b = object_b; 3265 1.1 mrg index_a += 1; 3266 1.1 mrg index_b += 1; 3267 1.1 mrg continue; 3268 1.1 mrg } 3269 1.1 mrg 3270 1.1 mrg /* Try to approach equal type sizes. */ 3271 1.1 mrg if (!COMPLETE_TYPE_P (type_a) 3272 1.1 mrg || !COMPLETE_TYPE_P (type_b) 3273 1.1 mrg || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a)) 3274 1.1 mrg || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b))) 3275 1.1 mrg break; 3276 1.1 mrg 3277 1.1 mrg unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a)); 3278 1.1 mrg unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b)); 3279 1.1 mrg if (size_a <= size_b) 3280 1.1 mrg { 3281 1.1 mrg index_a += 1; 3282 1.1 mrg ref_a = object_a; 3283 1.1 mrg } 3284 1.1 mrg if (size_b <= size_a) 3285 1.1 mrg { 3286 1.1 mrg index_b += 1; 3287 1.1 mrg ref_b = object_b; 3288 1.1 mrg } 3289 1.1 mrg } 3290 1.1 mrg 3291 1.1 mrg /* See whether FULL_SEQ ends at the base and whether the two bases 3292 1.1 mrg are equal. We do not care about TBAA or alignment info so we can 3293 1.1 mrg use OEP_ADDRESS_OF to avoid false negatives. */ 3294 1.1 mrg tree base_a = indices_a->base_object; 3295 1.1 mrg tree base_b = indices_b->base_object; 3296 1.1 mrg bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a 3297 1.1 mrg && full_seq.start_b + full_seq.length == num_dimensions_b 3298 1.1 mrg && (indices_a->unconstrained_base 3299 1.1 mrg == indices_b->unconstrained_base) 3300 1.1 mrg && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF) 3301 1.1 mrg && (types_compatible_p (TREE_TYPE (base_a), 3302 1.1 mrg TREE_TYPE (base_b)) 3303 1.1 mrg || (!base_supports_access_fn_components_p (base_a) 3304 1.1 mrg && !base_supports_access_fn_components_p (base_b) 3305 1.1 mrg && operand_equal_p 3306 1.1 mrg (TYPE_SIZE (TREE_TYPE (base_a)), 3307 1.1 mrg TYPE_SIZE (TREE_TYPE (base_b)), 0))) 3308 1.1 mrg && (!loop_nest.exists () 3309 1.1 mrg || (object_address_invariant_in_loop_p 3310 1.1 mrg (loop_nest[0], base_a)))); 3311 1.1 mrg 3312 1.1 mrg /* If the bases are the same, we can include the base variation too. 3313 1.1 mrg E.g. the b accesses in: 3314 1.1 mrg 3315 1.1 mrg for (int i = 0; i < n; ++i) 3316 1.1 mrg b[i + 4][0] = b[i][0]; 3317 1.1 mrg 3318 1.1 mrg have a definite dependence distance of 4, while for: 3319 1.1 mrg 3320 1.1 mrg for (int i = 0; i < n; ++i) 3321 1.1 mrg a[i + 4][0] = b[i][0]; 3322 1.1 mrg 3323 1.1 mrg the dependence distance depends on the gap between a and b. 3324 1.1 mrg 3325 1.1 mrg If the bases are different then we can only rely on the sequence 3326 1.1 mrg rooted at a structure access, since arrays are allowed to overlap 3327 1.1 mrg arbitrarily and change shape arbitrarily. E.g. we treat this as 3328 1.1 mrg valid code: 3329 1.1 mrg 3330 1.1 mrg int a[256]; 3331 1.1 mrg ... 3332 1.1 mrg ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0]; 3333 1.1 mrg 3334 1.1 mrg where two lvalues with the same int[4][3] type overlap, and where 3335 1.1 mrg both lvalues are distinct from the object's declared type. */ 3336 1.1 mrg if (same_base_p) 3337 1.1 mrg { 3338 1.1 mrg if (indices_a->unconstrained_base) 3339 1.1 mrg full_seq.length += 1; 3340 1.1 mrg } 3341 1.1 mrg else 3342 1.1 mrg full_seq = struct_seq; 3343 1.1 mrg 3344 1.1 mrg /* Punt if we didn't find a suitable sequence. */ 3345 1.1 mrg if (full_seq.length == 0) 3346 1.1 mrg { 3347 1.1 mrg if (use_alt_indices 3348 1.1 mrg || (TREE_CODE (DR_REF (a)) == MEM_REF 3349 1.1 mrg && TREE_CODE (DR_REF (b)) == MEM_REF) 3350 1.1 mrg || may_be_nonaddressable_p (DR_REF (a)) 3351 1.1 mrg || may_be_nonaddressable_p (DR_REF (b))) 3352 1.1 mrg { 3353 1.1 mrg /* Fully exhausted possibilities. */ 3354 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know; 3355 1.1 mrg return res; 3356 1.1 mrg } 3357 1.1 mrg 3358 1.1 mrg /* Try evaluating both DRs as dereferences of pointers. */ 3359 1.1 mrg if (!a->alt_indices.base_object 3360 1.1 mrg && TREE_CODE (DR_REF (a)) != MEM_REF) 3361 1.1 mrg { 3362 1.1 mrg tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)), 3363 1.1 mrg build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)), 3364 1.1 mrg build_int_cst 3365 1.1 mrg (reference_alias_ptr_type (DR_REF (a)), 0)); 3366 1.1 mrg dr_analyze_indices (&a->alt_indices, alt_ref, 3367 1.1 mrg loop_preheader_edge (loop_nest[0]), 3368 1.1 mrg loop_containing_stmt (DR_STMT (a))); 3369 1.1 mrg } 3370 1.1 mrg if (!b->alt_indices.base_object 3371 1.1 mrg && TREE_CODE (DR_REF (b)) != MEM_REF) 3372 1.1 mrg { 3373 1.1 mrg tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)), 3374 1.1 mrg build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)), 3375 1.1 mrg build_int_cst 3376 1.1 mrg (reference_alias_ptr_type (DR_REF (b)), 0)); 3377 1.1 mrg dr_analyze_indices (&b->alt_indices, alt_ref, 3378 1.1 mrg loop_preheader_edge (loop_nest[0]), 3379 1.1 mrg loop_containing_stmt (DR_STMT (b))); 3380 1.1 mrg } 3381 1.1 mrg return initialize_data_dependence_relation (res, loop_nest, true); 3382 1.1 mrg } 3383 1.1 mrg 3384 1.1 mrg if (!same_base_p) 3385 1.1 mrg { 3386 1.1 mrg /* Partial overlap is possible for different bases when strict aliasing 3387 1.1 mrg is not in effect. It's also possible if either base involves a union 3388 1.1 mrg access; e.g. for: 3389 1.1 mrg 3390 1.1 mrg struct s1 { int a[2]; }; 3391 1.1 mrg struct s2 { struct s1 b; int c; }; 3392 1.1 mrg struct s3 { int d; struct s1 e; }; 3393 1.1 mrg union u { struct s2 f; struct s3 g; } *p, *q; 3394 1.1 mrg 3395 1.1 mrg the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at 3396 1.1 mrg "p->g.e" (base "p->g") and might partially overlap the s1 at 3397 1.1 mrg "q->g.e" (base "q->g"). */ 3398 1.1 mrg if (!flag_strict_aliasing 3399 1.1 mrg || ref_contains_union_access_p (full_seq.object_a) 3400 1.1 mrg || ref_contains_union_access_p (full_seq.object_b)) 3401 1.1 mrg { 3402 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know; 3403 1.1 mrg return res; 3404 1.1 mrg } 3405 1.1 mrg 3406 1.1 mrg DDR_COULD_BE_INDEPENDENT_P (res) = true; 3407 1.1 mrg if (!loop_nest.exists () 3408 1.1 mrg || (object_address_invariant_in_loop_p (loop_nest[0], 3409 1.1 mrg full_seq.object_a) 3410 1.1 mrg && object_address_invariant_in_loop_p (loop_nest[0], 3411 1.1 mrg full_seq.object_b))) 3412 1.1 mrg { 3413 1.1 mrg DDR_OBJECT_A (res) = full_seq.object_a; 3414 1.1 mrg DDR_OBJECT_B (res) = full_seq.object_b; 3415 1.1 mrg } 3416 1.1 mrg } 3417 1.1 mrg 3418 1.1 mrg DDR_AFFINE_P (res) = true; 3419 1.1 mrg DDR_ARE_DEPENDENT (res) = NULL_TREE; 3420 1.1 mrg DDR_SUBSCRIPTS (res).create (full_seq.length); 3421 1.1 mrg DDR_LOOP_NEST (res) = loop_nest; 3422 1.1 mrg DDR_SELF_REFERENCE (res) = false; 3423 1.1 mrg 3424 1.1 mrg for (i = 0; i < full_seq.length; ++i) 3425 1.1 mrg { 3426 1.1 mrg struct subscript *subscript; 3427 1.1 mrg 3428 1.1 mrg subscript = XNEW (struct subscript); 3429 1.1 mrg SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i]; 3430 1.1 mrg SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i]; 3431 1.1 mrg SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 3432 1.1 mrg SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 3433 1.1 mrg SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 3434 1.1 mrg SUB_DISTANCE (subscript) = chrec_dont_know; 3435 1.1 mrg DDR_SUBSCRIPTS (res).safe_push (subscript); 3436 1.1 mrg } 3437 1.1 mrg 3438 1.1 mrg return res; 3439 1.1 mrg } 3440 1.1 mrg 3441 1.1 mrg /* Initialize a data dependence relation between data accesses A and 3442 1.1 mrg B. NB_LOOPS is the number of loops surrounding the references: the 3443 1.1 mrg size of the classic distance/direction vectors. */ 3444 1.1 mrg 3445 1.1 mrg struct data_dependence_relation * 3446 1.1 mrg initialize_data_dependence_relation (struct data_reference *a, 3447 1.1 mrg struct data_reference *b, 3448 1.1 mrg vec<loop_p> loop_nest) 3449 1.1 mrg { 3450 1.1 mrg data_dependence_relation *res = XCNEW (struct data_dependence_relation); 3451 1.1 mrg DDR_A (res) = a; 3452 1.1 mrg DDR_B (res) = b; 3453 1.1 mrg DDR_LOOP_NEST (res).create (0); 3454 1.1 mrg DDR_SUBSCRIPTS (res).create (0); 3455 1.1 mrg DDR_DIR_VECTS (res).create (0); 3456 1.1 mrg DDR_DIST_VECTS (res).create (0); 3457 1.1 mrg 3458 1.1 mrg if (a == NULL || b == NULL) 3459 1.1 mrg { 3460 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_dont_know; 3461 1.1 mrg return res; 3462 1.1 mrg } 3463 1.1 mrg 3464 1.1 mrg /* If the data references do not alias, then they are independent. */ 3465 1.1 mrg if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL)) 3466 1.1 mrg { 3467 1.1 mrg DDR_ARE_DEPENDENT (res) = chrec_known; 3468 1.1 mrg return res; 3469 1.1 mrg } 3470 1.1 mrg 3471 1.1 mrg return initialize_data_dependence_relation (res, loop_nest, false); 3472 1.1 mrg } 3473 1.1 mrg 3474 1.1 mrg 3475 1.1 mrg /* Frees memory used by the conflict function F. */ 3476 1.1 mrg 3477 1.1 mrg static void 3478 1.1 mrg free_conflict_function (conflict_function *f) 3479 1.1 mrg { 3480 1.1 mrg unsigned i; 3481 1.1 mrg 3482 1.1 mrg if (CF_NONTRIVIAL_P (f)) 3483 1.1 mrg { 3484 1.1 mrg for (i = 0; i < f->n; i++) 3485 1.1 mrg affine_fn_free (f->fns[i]); 3486 1.1 mrg } 3487 1.1 mrg free (f); 3488 1.1 mrg } 3489 1.1 mrg 3490 1.1 mrg /* Frees memory used by SUBSCRIPTS. */ 3491 1.1 mrg 3492 1.1 mrg static void 3493 1.1 mrg free_subscripts (vec<subscript_p> subscripts) 3494 1.1 mrg { 3495 1.1 mrg for (subscript_p s : subscripts) 3496 1.1 mrg { 3497 1.1 mrg free_conflict_function (s->conflicting_iterations_in_a); 3498 1.1 mrg free_conflict_function (s->conflicting_iterations_in_b); 3499 1.1 mrg free (s); 3500 1.1 mrg } 3501 1.1 mrg subscripts.release (); 3502 1.1 mrg } 3503 1.1 mrg 3504 1.1 mrg /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap 3505 1.1 mrg description. */ 3506 1.1 mrg 3507 1.1 mrg static inline void 3508 1.1 mrg finalize_ddr_dependent (struct data_dependence_relation *ddr, 3509 1.1 mrg tree chrec) 3510 1.1 mrg { 3511 1.1 mrg DDR_ARE_DEPENDENT (ddr) = chrec; 3512 1.1 mrg free_subscripts (DDR_SUBSCRIPTS (ddr)); 3513 1.1 mrg DDR_SUBSCRIPTS (ddr).create (0); 3514 1.1 mrg } 3515 1.1 mrg 3516 1.1 mrg /* The dependence relation DDR cannot be represented by a distance 3517 1.1 mrg vector. */ 3518 1.1 mrg 3519 1.1 mrg static inline void 3520 1.1 mrg non_affine_dependence_relation (struct data_dependence_relation *ddr) 3521 1.1 mrg { 3522 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3523 1.1 mrg fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); 3524 1.1 mrg 3525 1.1 mrg DDR_AFFINE_P (ddr) = false; 3526 1.1 mrg } 3527 1.1 mrg 3528 1.1 mrg 3529 1.1 mrg 3531 1.1 mrg /* This section contains the classic Banerjee tests. */ 3532 1.1 mrg 3533 1.1 mrg /* Returns true iff CHREC_A and CHREC_B are not dependent on any index 3534 1.1 mrg variables, i.e., if the ZIV (Zero Index Variable) test is true. */ 3535 1.1 mrg 3536 1.1 mrg static inline bool 3537 1.1 mrg ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) 3538 1.1 mrg { 3539 1.1 mrg return (evolution_function_is_constant_p (chrec_a) 3540 1.1 mrg && evolution_function_is_constant_p (chrec_b)); 3541 1.1 mrg } 3542 1.1 mrg 3543 1.1 mrg /* Returns true iff CHREC_A and CHREC_B are dependent on an index 3544 1.1 mrg variable, i.e., if the SIV (Single Index Variable) test is true. */ 3545 1.1 mrg 3546 1.1 mrg static bool 3547 1.1 mrg siv_subscript_p (const_tree chrec_a, const_tree chrec_b) 3548 1.1 mrg { 3549 1.1 mrg if ((evolution_function_is_constant_p (chrec_a) 3550 1.1 mrg && evolution_function_is_univariate_p (chrec_b)) 3551 1.1 mrg || (evolution_function_is_constant_p (chrec_b) 3552 1.1 mrg && evolution_function_is_univariate_p (chrec_a))) 3553 1.1 mrg return true; 3554 1.1 mrg 3555 1.1 mrg if (evolution_function_is_univariate_p (chrec_a) 3556 1.1 mrg && evolution_function_is_univariate_p (chrec_b)) 3557 1.1 mrg { 3558 1.1 mrg switch (TREE_CODE (chrec_a)) 3559 1.1 mrg { 3560 1.1 mrg case POLYNOMIAL_CHREC: 3561 1.1 mrg switch (TREE_CODE (chrec_b)) 3562 1.1 mrg { 3563 1.1 mrg case POLYNOMIAL_CHREC: 3564 1.1 mrg if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) 3565 1.1 mrg return false; 3566 1.1 mrg /* FALLTHRU */ 3567 1.1 mrg 3568 1.1 mrg default: 3569 1.1 mrg return true; 3570 1.1 mrg } 3571 1.1 mrg 3572 1.1 mrg default: 3573 1.1 mrg return true; 3574 1.1 mrg } 3575 1.1 mrg } 3576 1.1 mrg 3577 1.1 mrg return false; 3578 1.1 mrg } 3579 1.1 mrg 3580 1.1 mrg /* Creates a conflict function with N dimensions. The affine functions 3581 1.1 mrg in each dimension follow. */ 3582 1.1 mrg 3583 1.1 mrg static conflict_function * 3584 1.1 mrg conflict_fn (unsigned n, ...) 3585 1.1 mrg { 3586 1.1 mrg unsigned i; 3587 1.1 mrg conflict_function *ret = XCNEW (conflict_function); 3588 1.1 mrg va_list ap; 3589 1.1 mrg 3590 1.1 mrg gcc_assert (n > 0 && n <= MAX_DIM); 3591 1.1 mrg va_start (ap, n); 3592 1.1 mrg 3593 1.1 mrg ret->n = n; 3594 1.1 mrg for (i = 0; i < n; i++) 3595 1.1 mrg ret->fns[i] = va_arg (ap, affine_fn); 3596 1.1 mrg va_end (ap); 3597 1.1 mrg 3598 1.1 mrg return ret; 3599 1.1 mrg } 3600 1.1 mrg 3601 1.1 mrg /* Returns constant affine function with value CST. */ 3602 1.1 mrg 3603 1.1 mrg static affine_fn 3604 1.1 mrg affine_fn_cst (tree cst) 3605 1.1 mrg { 3606 1.1 mrg affine_fn fn; 3607 1.1 mrg fn.create (1); 3608 1.1 mrg fn.quick_push (cst); 3609 1.1 mrg return fn; 3610 1.1 mrg } 3611 1.1 mrg 3612 1.1 mrg /* Returns affine function with single variable, CST + COEF * x_DIM. */ 3613 1.1 mrg 3614 1.1 mrg static affine_fn 3615 1.1 mrg affine_fn_univar (tree cst, unsigned dim, tree coef) 3616 1.1 mrg { 3617 1.1 mrg affine_fn fn; 3618 1.1 mrg fn.create (dim + 1); 3619 1.1 mrg unsigned i; 3620 1.1 mrg 3621 1.1 mrg gcc_assert (dim > 0); 3622 1.1 mrg fn.quick_push (cst); 3623 1.1 mrg for (i = 1; i < dim; i++) 3624 1.1 mrg fn.quick_push (integer_zero_node); 3625 1.1 mrg fn.quick_push (coef); 3626 1.1 mrg return fn; 3627 1.1 mrg } 3628 1.1 mrg 3629 1.1 mrg /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and 3630 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the 3631 1.1 mrg relation between the elements accessed twice by CHREC_A and 3632 1.1 mrg CHREC_B. For k >= 0, the following property is verified: 3633 1.1 mrg 3634 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 3635 1.1 mrg 3636 1.1 mrg static void 3637 1.1 mrg analyze_ziv_subscript (tree chrec_a, 3638 1.1 mrg tree chrec_b, 3639 1.1 mrg conflict_function **overlaps_a, 3640 1.1 mrg conflict_function **overlaps_b, 3641 1.1 mrg tree *last_conflicts) 3642 1.1 mrg { 3643 1.1 mrg tree type, difference; 3644 1.1 mrg dependence_stats.num_ziv++; 3645 1.1 mrg 3646 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3647 1.1 mrg fprintf (dump_file, "(analyze_ziv_subscript \n"); 3648 1.1 mrg 3649 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 3650 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL); 3651 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL); 3652 1.1 mrg difference = chrec_fold_minus (type, chrec_a, chrec_b); 3653 1.1 mrg 3654 1.1 mrg switch (TREE_CODE (difference)) 3655 1.1 mrg { 3656 1.1 mrg case INTEGER_CST: 3657 1.1 mrg if (integer_zerop (difference)) 3658 1.1 mrg { 3659 1.1 mrg /* The difference is equal to zero: the accessed index 3660 1.1 mrg overlaps for each iteration in the loop. */ 3661 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3662 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3663 1.1 mrg *last_conflicts = chrec_dont_know; 3664 1.1 mrg dependence_stats.num_ziv_dependent++; 3665 1.1 mrg } 3666 1.1 mrg else 3667 1.1 mrg { 3668 1.1 mrg /* The accesses do not overlap. */ 3669 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3670 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3671 1.1 mrg *last_conflicts = integer_zero_node; 3672 1.1 mrg dependence_stats.num_ziv_independent++; 3673 1.1 mrg } 3674 1.1 mrg break; 3675 1.1 mrg 3676 1.1 mrg default: 3677 1.1 mrg /* We're not sure whether the indexes overlap. For the moment, 3678 1.1 mrg conservatively answer "don't know". */ 3679 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3680 1.1 mrg fprintf (dump_file, "ziv test failed: difference is non-integer.\n"); 3681 1.1 mrg 3682 1.1 mrg *overlaps_a = conflict_fn_not_known (); 3683 1.1 mrg *overlaps_b = conflict_fn_not_known (); 3684 1.1 mrg *last_conflicts = chrec_dont_know; 3685 1.1 mrg dependence_stats.num_ziv_unimplemented++; 3686 1.1 mrg break; 3687 1.1 mrg } 3688 1.1 mrg 3689 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3690 1.1 mrg fprintf (dump_file, ")\n"); 3691 1.1 mrg } 3692 1.1 mrg 3693 1.1 mrg /* Similar to max_stmt_executions_int, but returns the bound as a tree, 3694 1.1 mrg and only if it fits to the int type. If this is not the case, or the 3695 1.1 mrg bound on the number of iterations of LOOP could not be derived, returns 3696 1.1 mrg chrec_dont_know. */ 3697 1.1 mrg 3698 1.1 mrg static tree 3699 1.1 mrg max_stmt_executions_tree (class loop *loop) 3700 1.1 mrg { 3701 1.1 mrg widest_int nit; 3702 1.1 mrg 3703 1.1 mrg if (!max_stmt_executions (loop, &nit)) 3704 1.1 mrg return chrec_dont_know; 3705 1.1 mrg 3706 1.1 mrg if (!wi::fits_to_tree_p (nit, unsigned_type_node)) 3707 1.1 mrg return chrec_dont_know; 3708 1.1 mrg 3709 1.1 mrg return wide_int_to_tree (unsigned_type_node, nit); 3710 1.1 mrg } 3711 1.1 mrg 3712 1.1 mrg /* Determine whether the CHREC is always positive/negative. If the expression 3713 1.1 mrg cannot be statically analyzed, return false, otherwise set the answer into 3714 1.1 mrg VALUE. */ 3715 1.1 mrg 3716 1.1 mrg static bool 3717 1.1 mrg chrec_is_positive (tree chrec, bool *value) 3718 1.1 mrg { 3719 1.1 mrg bool value0, value1, value2; 3720 1.1 mrg tree end_value, nb_iter; 3721 1.1 mrg 3722 1.1 mrg switch (TREE_CODE (chrec)) 3723 1.1 mrg { 3724 1.1 mrg case POLYNOMIAL_CHREC: 3725 1.1 mrg if (!chrec_is_positive (CHREC_LEFT (chrec), &value0) 3726 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec), &value1)) 3727 1.1 mrg return false; 3728 1.1 mrg 3729 1.1 mrg /* FIXME -- overflows. */ 3730 1.1 mrg if (value0 == value1) 3731 1.1 mrg { 3732 1.1 mrg *value = value0; 3733 1.1 mrg return true; 3734 1.1 mrg } 3735 1.1 mrg 3736 1.1 mrg /* Otherwise the chrec is under the form: "{-197, +, 2}_1", 3737 1.1 mrg and the proof consists in showing that the sign never 3738 1.1 mrg changes during the execution of the loop, from 0 to 3739 1.1 mrg loop->nb_iterations. */ 3740 1.1 mrg if (!evolution_function_is_affine_p (chrec)) 3741 1.1 mrg return false; 3742 1.1 mrg 3743 1.1 mrg nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); 3744 1.1 mrg if (chrec_contains_undetermined (nb_iter)) 3745 1.1 mrg return false; 3746 1.1 mrg 3747 1.1 mrg #if 0 3748 1.1 mrg /* TODO -- If the test is after the exit, we may decrease the number of 3749 1.1 mrg iterations by one. */ 3750 1.1 mrg if (after_exit) 3751 1.1 mrg nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); 3752 1.1 mrg #endif 3753 1.1 mrg 3754 1.1 mrg end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); 3755 1.1 mrg 3756 1.1 mrg if (!chrec_is_positive (end_value, &value2)) 3757 1.1 mrg return false; 3758 1.1 mrg 3759 1.1 mrg *value = value0; 3760 1.1 mrg return value0 == value1; 3761 1.1 mrg 3762 1.1 mrg case INTEGER_CST: 3763 1.1 mrg switch (tree_int_cst_sgn (chrec)) 3764 1.1 mrg { 3765 1.1 mrg case -1: 3766 1.1 mrg *value = false; 3767 1.1 mrg break; 3768 1.1 mrg case 1: 3769 1.1 mrg *value = true; 3770 1.1 mrg break; 3771 1.1 mrg default: 3772 1.1 mrg return false; 3773 1.1 mrg } 3774 1.1 mrg return true; 3775 1.1 mrg 3776 1.1 mrg default: 3777 1.1 mrg return false; 3778 1.1 mrg } 3779 1.1 mrg } 3780 1.1 mrg 3781 1.1 mrg 3782 1.1 mrg /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a 3783 1.1 mrg constant, and CHREC_B is an affine function. *OVERLAPS_A and 3784 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the 3785 1.1 mrg relation between the elements accessed twice by CHREC_A and 3786 1.1 mrg CHREC_B. For k >= 0, the following property is verified: 3787 1.1 mrg 3788 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 3789 1.1 mrg 3790 1.1 mrg static void 3791 1.1 mrg analyze_siv_subscript_cst_affine (tree chrec_a, 3792 1.1 mrg tree chrec_b, 3793 1.1 mrg conflict_function **overlaps_a, 3794 1.1 mrg conflict_function **overlaps_b, 3795 1.1 mrg tree *last_conflicts) 3796 1.1 mrg { 3797 1.1 mrg bool value0, value1, value2; 3798 1.1 mrg tree type, difference, tmp; 3799 1.1 mrg 3800 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 3801 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL); 3802 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL); 3803 1.1 mrg difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); 3804 1.1 mrg 3805 1.1 mrg /* Special case overlap in the first iteration. */ 3806 1.1 mrg if (integer_zerop (difference)) 3807 1.1 mrg { 3808 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3809 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3810 1.1 mrg *last_conflicts = integer_one_node; 3811 1.1 mrg return; 3812 1.1 mrg } 3813 1.1 mrg 3814 1.1 mrg if (!chrec_is_positive (initial_condition (difference), &value0)) 3815 1.1 mrg { 3816 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3817 1.1 mrg fprintf (dump_file, "siv test failed: chrec is not positive.\n"); 3818 1.1 mrg 3819 1.1 mrg dependence_stats.num_siv_unimplemented++; 3820 1.1 mrg *overlaps_a = conflict_fn_not_known (); 3821 1.1 mrg *overlaps_b = conflict_fn_not_known (); 3822 1.1 mrg *last_conflicts = chrec_dont_know; 3823 1.1 mrg return; 3824 1.1 mrg } 3825 1.1 mrg else 3826 1.1 mrg { 3827 1.1 mrg if (value0 == false) 3828 1.1 mrg { 3829 1.1 mrg if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC 3830 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1)) 3831 1.1 mrg { 3832 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3833 1.1 mrg fprintf (dump_file, "siv test failed: chrec not positive.\n"); 3834 1.1 mrg 3835 1.1 mrg *overlaps_a = conflict_fn_not_known (); 3836 1.1 mrg *overlaps_b = conflict_fn_not_known (); 3837 1.1 mrg *last_conflicts = chrec_dont_know; 3838 1.1 mrg dependence_stats.num_siv_unimplemented++; 3839 1.1 mrg return; 3840 1.1 mrg } 3841 1.1 mrg else 3842 1.1 mrg { 3843 1.1 mrg if (value1 == true) 3844 1.1 mrg { 3845 1.1 mrg /* Example: 3846 1.1 mrg chrec_a = 12 3847 1.1 mrg chrec_b = {10, +, 1} 3848 1.1 mrg */ 3849 1.1 mrg 3850 1.1 mrg if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 3851 1.1 mrg { 3852 1.1 mrg HOST_WIDE_INT numiter; 3853 1.1 mrg class loop *loop = get_chrec_loop (chrec_b); 3854 1.1 mrg 3855 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3856 1.1 mrg tmp = fold_build2 (EXACT_DIV_EXPR, type, 3857 1.1 mrg fold_build1 (ABS_EXPR, type, difference), 3858 1.1 mrg CHREC_RIGHT (chrec_b)); 3859 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 3860 1.1 mrg *last_conflicts = integer_one_node; 3861 1.1 mrg 3862 1.1 mrg 3863 1.1 mrg /* Perform weak-zero siv test to see if overlap is 3864 1.1 mrg outside the loop bounds. */ 3865 1.1 mrg numiter = max_stmt_executions_int (loop); 3866 1.1 mrg 3867 1.1 mrg if (numiter >= 0 3868 1.1 mrg && compare_tree_int (tmp, numiter) > 0) 3869 1.1 mrg { 3870 1.1 mrg free_conflict_function (*overlaps_a); 3871 1.1 mrg free_conflict_function (*overlaps_b); 3872 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3873 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3874 1.1 mrg *last_conflicts = integer_zero_node; 3875 1.1 mrg dependence_stats.num_siv_independent++; 3876 1.1 mrg return; 3877 1.1 mrg } 3878 1.1 mrg dependence_stats.num_siv_dependent++; 3879 1.1 mrg return; 3880 1.1 mrg } 3881 1.1 mrg 3882 1.1 mrg /* When the step does not divide the difference, there are 3883 1.1 mrg no overlaps. */ 3884 1.1 mrg else 3885 1.1 mrg { 3886 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3887 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3888 1.1 mrg *last_conflicts = integer_zero_node; 3889 1.1 mrg dependence_stats.num_siv_independent++; 3890 1.1 mrg return; 3891 1.1 mrg } 3892 1.1 mrg } 3893 1.1 mrg 3894 1.1 mrg else 3895 1.1 mrg { 3896 1.1 mrg /* Example: 3897 1.1 mrg chrec_a = 12 3898 1.1 mrg chrec_b = {10, +, -1} 3899 1.1 mrg 3900 1.1 mrg In this case, chrec_a will not overlap with chrec_b. */ 3901 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3902 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3903 1.1 mrg *last_conflicts = integer_zero_node; 3904 1.1 mrg dependence_stats.num_siv_independent++; 3905 1.1 mrg return; 3906 1.1 mrg } 3907 1.1 mrg } 3908 1.1 mrg } 3909 1.1 mrg else 3910 1.1 mrg { 3911 1.1 mrg if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC 3912 1.1 mrg || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2)) 3913 1.1 mrg { 3914 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 3915 1.1 mrg fprintf (dump_file, "siv test failed: chrec not positive.\n"); 3916 1.1 mrg 3917 1.1 mrg *overlaps_a = conflict_fn_not_known (); 3918 1.1 mrg *overlaps_b = conflict_fn_not_known (); 3919 1.1 mrg *last_conflicts = chrec_dont_know; 3920 1.1 mrg dependence_stats.num_siv_unimplemented++; 3921 1.1 mrg return; 3922 1.1 mrg } 3923 1.1 mrg else 3924 1.1 mrg { 3925 1.1 mrg if (value2 == false) 3926 1.1 mrg { 3927 1.1 mrg /* Example: 3928 1.1 mrg chrec_a = 3 3929 1.1 mrg chrec_b = {10, +, -1} 3930 1.1 mrg */ 3931 1.1 mrg if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 3932 1.1 mrg { 3933 1.1 mrg HOST_WIDE_INT numiter; 3934 1.1 mrg class loop *loop = get_chrec_loop (chrec_b); 3935 1.1 mrg 3936 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3937 1.1 mrg tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, 3938 1.1 mrg CHREC_RIGHT (chrec_b)); 3939 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 3940 1.1 mrg *last_conflicts = integer_one_node; 3941 1.1 mrg 3942 1.1 mrg /* Perform weak-zero siv test to see if overlap is 3943 1.1 mrg outside the loop bounds. */ 3944 1.1 mrg numiter = max_stmt_executions_int (loop); 3945 1.1 mrg 3946 1.1 mrg if (numiter >= 0 3947 1.1 mrg && compare_tree_int (tmp, numiter) > 0) 3948 1.1 mrg { 3949 1.1 mrg free_conflict_function (*overlaps_a); 3950 1.1 mrg free_conflict_function (*overlaps_b); 3951 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3952 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3953 1.1 mrg *last_conflicts = integer_zero_node; 3954 1.1 mrg dependence_stats.num_siv_independent++; 3955 1.1 mrg return; 3956 1.1 mrg } 3957 1.1 mrg dependence_stats.num_siv_dependent++; 3958 1.1 mrg return; 3959 1.1 mrg } 3960 1.1 mrg 3961 1.1 mrg /* When the step does not divide the difference, there 3962 1.1 mrg are no overlaps. */ 3963 1.1 mrg else 3964 1.1 mrg { 3965 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3966 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3967 1.1 mrg *last_conflicts = integer_zero_node; 3968 1.1 mrg dependence_stats.num_siv_independent++; 3969 1.1 mrg return; 3970 1.1 mrg } 3971 1.1 mrg } 3972 1.1 mrg else 3973 1.1 mrg { 3974 1.1 mrg /* Example: 3975 1.1 mrg chrec_a = 3 3976 1.1 mrg chrec_b = {4, +, 1} 3977 1.1 mrg 3978 1.1 mrg In this case, chrec_a will not overlap with chrec_b. */ 3979 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 3980 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 3981 1.1 mrg *last_conflicts = integer_zero_node; 3982 1.1 mrg dependence_stats.num_siv_independent++; 3983 1.1 mrg return; 3984 1.1 mrg } 3985 1.1 mrg } 3986 1.1 mrg } 3987 1.1 mrg } 3988 1.1 mrg } 3989 1.1 mrg 3990 1.1 mrg /* Helper recursive function for initializing the matrix A. Returns 3991 1.1 mrg the initial value of CHREC. */ 3992 1.1 mrg 3993 1.1 mrg static tree 3994 1.1 mrg initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) 3995 1.1 mrg { 3996 1.1 mrg gcc_assert (chrec); 3997 1.1 mrg 3998 1.1 mrg switch (TREE_CODE (chrec)) 3999 1.1 mrg { 4000 1.1 mrg case POLYNOMIAL_CHREC: 4001 1.1 mrg HOST_WIDE_INT chrec_right; 4002 1.1 mrg if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec))) 4003 1.1 mrg return chrec_dont_know; 4004 1.1 mrg chrec_right = int_cst_value (CHREC_RIGHT (chrec)); 4005 1.1 mrg /* We want to be able to negate without overflow. */ 4006 1.1 mrg if (chrec_right == HOST_WIDE_INT_MIN) 4007 1.1 mrg return chrec_dont_know; 4008 1.1 mrg A[index][0] = mult * chrec_right; 4009 1.1 mrg return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); 4010 1.1 mrg 4011 1.1 mrg case PLUS_EXPR: 4012 1.1 mrg case MULT_EXPR: 4013 1.1 mrg case MINUS_EXPR: 4014 1.1 mrg { 4015 1.1 mrg tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 4016 1.1 mrg tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); 4017 1.1 mrg 4018 1.1 mrg return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1); 4019 1.1 mrg } 4020 1.1 mrg 4021 1.1 mrg CASE_CONVERT: 4022 1.1 mrg { 4023 1.1 mrg tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 4024 1.1 mrg return chrec_convert (chrec_type (chrec), op, NULL); 4025 1.1 mrg } 4026 1.1 mrg 4027 1.1 mrg case BIT_NOT_EXPR: 4028 1.1 mrg { 4029 1.1 mrg /* Handle ~X as -1 - X. */ 4030 1.1 mrg tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 4031 1.1 mrg return chrec_fold_op (MINUS_EXPR, chrec_type (chrec), 4032 1.1 mrg build_int_cst (TREE_TYPE (chrec), -1), op); 4033 1.1 mrg } 4034 1.1 mrg 4035 1.1 mrg case INTEGER_CST: 4036 1.1 mrg return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know; 4037 1.1 mrg 4038 1.1 mrg default: 4039 1.1 mrg gcc_unreachable (); 4040 1.1 mrg return NULL_TREE; 4041 1.1 mrg } 4042 1.1 mrg } 4043 1.1 mrg 4044 1.1 mrg #define FLOOR_DIV(x,y) ((x) / (y)) 4045 1.1 mrg 4046 1.1 mrg /* Solves the special case of the Diophantine equation: 4047 1.1 mrg | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) 4048 1.1 mrg 4049 1.1 mrg Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the 4050 1.1 mrg number of iterations that loops X and Y run. The overlaps will be 4051 1.1 mrg constructed as evolutions in dimension DIM. */ 4052 1.1 mrg 4053 1.1 mrg static void 4054 1.1 mrg compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter, 4055 1.1 mrg HOST_WIDE_INT step_a, 4056 1.1 mrg HOST_WIDE_INT step_b, 4057 1.1 mrg affine_fn *overlaps_a, 4058 1.1 mrg affine_fn *overlaps_b, 4059 1.1 mrg tree *last_conflicts, int dim) 4060 1.1 mrg { 4061 1.1 mrg if (((step_a > 0 && step_b > 0) 4062 1.1 mrg || (step_a < 0 && step_b < 0))) 4063 1.1 mrg { 4064 1.1 mrg HOST_WIDE_INT step_overlaps_a, step_overlaps_b; 4065 1.1 mrg HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2; 4066 1.1 mrg 4067 1.1 mrg gcd_steps_a_b = gcd (step_a, step_b); 4068 1.1 mrg step_overlaps_a = step_b / gcd_steps_a_b; 4069 1.1 mrg step_overlaps_b = step_a / gcd_steps_a_b; 4070 1.1 mrg 4071 1.1 mrg if (niter > 0) 4072 1.1 mrg { 4073 1.1 mrg tau2 = FLOOR_DIV (niter, step_overlaps_a); 4074 1.1 mrg tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); 4075 1.1 mrg last_conflict = tau2; 4076 1.1 mrg *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 4077 1.1 mrg } 4078 1.1 mrg else 4079 1.1 mrg *last_conflicts = chrec_dont_know; 4080 1.1 mrg 4081 1.1 mrg *overlaps_a = affine_fn_univar (integer_zero_node, dim, 4082 1.1 mrg build_int_cst (NULL_TREE, 4083 1.1 mrg step_overlaps_a)); 4084 1.1 mrg *overlaps_b = affine_fn_univar (integer_zero_node, dim, 4085 1.1 mrg build_int_cst (NULL_TREE, 4086 1.1 mrg step_overlaps_b)); 4087 1.1 mrg } 4088 1.1 mrg 4089 1.1 mrg else 4090 1.1 mrg { 4091 1.1 mrg *overlaps_a = affine_fn_cst (integer_zero_node); 4092 1.1 mrg *overlaps_b = affine_fn_cst (integer_zero_node); 4093 1.1 mrg *last_conflicts = integer_zero_node; 4094 1.1 mrg } 4095 1.1 mrg } 4096 1.1 mrg 4097 1.1 mrg /* Solves the special case of a Diophantine equation where CHREC_A is 4098 1.1 mrg an affine bivariate function, and CHREC_B is an affine univariate 4099 1.1 mrg function. For example, 4100 1.1 mrg 4101 1.1 mrg | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z 4102 1.1 mrg 4103 1.1 mrg has the following overlapping functions: 4104 1.1 mrg 4105 1.1 mrg | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v 4106 1.1 mrg | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v 4107 1.1 mrg | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v 4108 1.1 mrg 4109 1.1 mrg FORNOW: This is a specialized implementation for a case occurring in 4110 1.1 mrg a common benchmark. Implement the general algorithm. */ 4111 1.1 mrg 4112 1.1 mrg static void 4113 1.1 mrg compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, 4114 1.1 mrg conflict_function **overlaps_a, 4115 1.1 mrg conflict_function **overlaps_b, 4116 1.1 mrg tree *last_conflicts) 4117 1.1 mrg { 4118 1.1 mrg bool xz_p, yz_p, xyz_p; 4119 1.1 mrg HOST_WIDE_INT step_x, step_y, step_z; 4120 1.1 mrg HOST_WIDE_INT niter_x, niter_y, niter_z, niter; 4121 1.1 mrg affine_fn overlaps_a_xz, overlaps_b_xz; 4122 1.1 mrg affine_fn overlaps_a_yz, overlaps_b_yz; 4123 1.1 mrg affine_fn overlaps_a_xyz, overlaps_b_xyz; 4124 1.1 mrg affine_fn ova1, ova2, ovb; 4125 1.1 mrg tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; 4126 1.1 mrg 4127 1.1 mrg step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); 4128 1.1 mrg step_y = int_cst_value (CHREC_RIGHT (chrec_a)); 4129 1.1 mrg step_z = int_cst_value (CHREC_RIGHT (chrec_b)); 4130 1.1 mrg 4131 1.1 mrg niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); 4132 1.1 mrg niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a)); 4133 1.1 mrg niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b)); 4134 1.1 mrg 4135 1.1 mrg if (niter_x < 0 || niter_y < 0 || niter_z < 0) 4136 1.1 mrg { 4137 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4138 1.1 mrg fprintf (dump_file, "overlap steps test failed: no iteration counts.\n"); 4139 1.1 mrg 4140 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4141 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4142 1.1 mrg *last_conflicts = chrec_dont_know; 4143 1.1 mrg return; 4144 1.1 mrg } 4145 1.1 mrg 4146 1.1 mrg niter = MIN (niter_x, niter_z); 4147 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_x, step_z, 4148 1.1 mrg &overlaps_a_xz, 4149 1.1 mrg &overlaps_b_xz, 4150 1.1 mrg &last_conflicts_xz, 1); 4151 1.1 mrg niter = MIN (niter_y, niter_z); 4152 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_y, step_z, 4153 1.1 mrg &overlaps_a_yz, 4154 1.1 mrg &overlaps_b_yz, 4155 1.1 mrg &last_conflicts_yz, 2); 4156 1.1 mrg niter = MIN (niter_x, niter_z); 4157 1.1 mrg niter = MIN (niter_y, niter); 4158 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, 4159 1.1 mrg &overlaps_a_xyz, 4160 1.1 mrg &overlaps_b_xyz, 4161 1.1 mrg &last_conflicts_xyz, 3); 4162 1.1 mrg 4163 1.1 mrg xz_p = !integer_zerop (last_conflicts_xz); 4164 1.1 mrg yz_p = !integer_zerop (last_conflicts_yz); 4165 1.1 mrg xyz_p = !integer_zerop (last_conflicts_xyz); 4166 1.1 mrg 4167 1.1 mrg if (xz_p || yz_p || xyz_p) 4168 1.1 mrg { 4169 1.1 mrg ova1 = affine_fn_cst (integer_zero_node); 4170 1.1 mrg ova2 = affine_fn_cst (integer_zero_node); 4171 1.1 mrg ovb = affine_fn_cst (integer_zero_node); 4172 1.1 mrg if (xz_p) 4173 1.1 mrg { 4174 1.1 mrg affine_fn t0 = ova1; 4175 1.1 mrg affine_fn t2 = ovb; 4176 1.1 mrg 4177 1.1 mrg ova1 = affine_fn_plus (ova1, overlaps_a_xz); 4178 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_xz); 4179 1.1 mrg affine_fn_free (t0); 4180 1.1 mrg affine_fn_free (t2); 4181 1.1 mrg *last_conflicts = last_conflicts_xz; 4182 1.1 mrg } 4183 1.1 mrg if (yz_p) 4184 1.1 mrg { 4185 1.1 mrg affine_fn t0 = ova2; 4186 1.1 mrg affine_fn t2 = ovb; 4187 1.1 mrg 4188 1.1 mrg ova2 = affine_fn_plus (ova2, overlaps_a_yz); 4189 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_yz); 4190 1.1 mrg affine_fn_free (t0); 4191 1.1 mrg affine_fn_free (t2); 4192 1.1 mrg *last_conflicts = last_conflicts_yz; 4193 1.1 mrg } 4194 1.1 mrg if (xyz_p) 4195 1.1 mrg { 4196 1.1 mrg affine_fn t0 = ova1; 4197 1.1 mrg affine_fn t2 = ova2; 4198 1.1 mrg affine_fn t4 = ovb; 4199 1.1 mrg 4200 1.1 mrg ova1 = affine_fn_plus (ova1, overlaps_a_xyz); 4201 1.1 mrg ova2 = affine_fn_plus (ova2, overlaps_a_xyz); 4202 1.1 mrg ovb = affine_fn_plus (ovb, overlaps_b_xyz); 4203 1.1 mrg affine_fn_free (t0); 4204 1.1 mrg affine_fn_free (t2); 4205 1.1 mrg affine_fn_free (t4); 4206 1.1 mrg *last_conflicts = last_conflicts_xyz; 4207 1.1 mrg } 4208 1.1 mrg *overlaps_a = conflict_fn (2, ova1, ova2); 4209 1.1 mrg *overlaps_b = conflict_fn (1, ovb); 4210 1.1 mrg } 4211 1.1 mrg else 4212 1.1 mrg { 4213 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4214 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4215 1.1 mrg *last_conflicts = integer_zero_node; 4216 1.1 mrg } 4217 1.1 mrg 4218 1.1 mrg affine_fn_free (overlaps_a_xz); 4219 1.1 mrg affine_fn_free (overlaps_b_xz); 4220 1.1 mrg affine_fn_free (overlaps_a_yz); 4221 1.1 mrg affine_fn_free (overlaps_b_yz); 4222 1.1 mrg affine_fn_free (overlaps_a_xyz); 4223 1.1 mrg affine_fn_free (overlaps_b_xyz); 4224 1.1 mrg } 4225 1.1 mrg 4226 1.1 mrg /* Copy the elements of vector VEC1 with length SIZE to VEC2. */ 4227 1.1 mrg 4228 1.1 mrg static void 4229 1.1 mrg lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, 4230 1.1 mrg int size) 4231 1.1 mrg { 4232 1.1 mrg memcpy (vec2, vec1, size * sizeof (*vec1)); 4233 1.1 mrg } 4234 1.1 mrg 4235 1.1 mrg /* Copy the elements of M x N matrix MAT1 to MAT2. */ 4236 1.1 mrg 4237 1.1 mrg static void 4238 1.1 mrg lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, 4239 1.1 mrg int m, int n) 4240 1.1 mrg { 4241 1.1 mrg int i; 4242 1.1 mrg 4243 1.1 mrg for (i = 0; i < m; i++) 4244 1.1 mrg lambda_vector_copy (mat1[i], mat2[i], n); 4245 1.1 mrg } 4246 1.1 mrg 4247 1.1 mrg /* Store the N x N identity matrix in MAT. */ 4248 1.1 mrg 4249 1.1 mrg static void 4250 1.1 mrg lambda_matrix_id (lambda_matrix mat, int size) 4251 1.1 mrg { 4252 1.1 mrg int i, j; 4253 1.1 mrg 4254 1.1 mrg for (i = 0; i < size; i++) 4255 1.1 mrg for (j = 0; j < size; j++) 4256 1.1 mrg mat[i][j] = (i == j) ? 1 : 0; 4257 1.1 mrg } 4258 1.1 mrg 4259 1.1 mrg /* Return the index of the first nonzero element of vector VEC1 between 4260 1.1 mrg START and N. We must have START <= N. 4261 1.1 mrg Returns N if VEC1 is the zero vector. */ 4262 1.1 mrg 4263 1.1 mrg static int 4264 1.1 mrg lambda_vector_first_nz (lambda_vector vec1, int n, int start) 4265 1.1 mrg { 4266 1.1 mrg int j = start; 4267 1.1 mrg while (j < n && vec1[j] == 0) 4268 1.1 mrg j++; 4269 1.1 mrg return j; 4270 1.1 mrg } 4271 1.1 mrg 4272 1.1 mrg /* Add a multiple of row R1 of matrix MAT with N columns to row R2: 4273 1.1 mrg R2 = R2 + CONST1 * R1. */ 4274 1.1 mrg 4275 1.1 mrg static bool 4276 1.1 mrg lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, 4277 1.1 mrg lambda_int const1) 4278 1.1 mrg { 4279 1.1 mrg int i; 4280 1.1 mrg 4281 1.1 mrg if (const1 == 0) 4282 1.1 mrg return true; 4283 1.1 mrg 4284 1.1 mrg for (i = 0; i < n; i++) 4285 1.1 mrg { 4286 1.1 mrg bool ovf; 4287 1.1 mrg lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf); 4288 1.1 mrg if (ovf) 4289 1.1 mrg return false; 4290 1.1 mrg lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf); 4291 1.1 mrg if (ovf || tem2 == HOST_WIDE_INT_MIN) 4292 1.1 mrg return false; 4293 1.1 mrg mat[r2][i] = tem2; 4294 1.1 mrg } 4295 1.1 mrg 4296 1.1 mrg return true; 4297 1.1 mrg } 4298 1.1 mrg 4299 1.1 mrg /* Multiply vector VEC1 of length SIZE by a constant CONST1, 4300 1.1 mrg and store the result in VEC2. */ 4301 1.1 mrg 4302 1.1 mrg static void 4303 1.1 mrg lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, 4304 1.1 mrg int size, lambda_int const1) 4305 1.1 mrg { 4306 1.1 mrg int i; 4307 1.1 mrg 4308 1.1 mrg if (const1 == 0) 4309 1.1 mrg lambda_vector_clear (vec2, size); 4310 1.1 mrg else 4311 1.1 mrg for (i = 0; i < size; i++) 4312 1.1 mrg vec2[i] = const1 * vec1[i]; 4313 1.1 mrg } 4314 1.1 mrg 4315 1.1 mrg /* Negate vector VEC1 with length SIZE and store it in VEC2. */ 4316 1.1 mrg 4317 1.1 mrg static void 4318 1.1 mrg lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, 4319 1.1 mrg int size) 4320 1.1 mrg { 4321 1.1 mrg lambda_vector_mult_const (vec1, vec2, size, -1); 4322 1.1 mrg } 4323 1.1 mrg 4324 1.1 mrg /* Negate row R1 of matrix MAT which has N columns. */ 4325 1.1 mrg 4326 1.1 mrg static void 4327 1.1 mrg lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) 4328 1.1 mrg { 4329 1.1 mrg lambda_vector_negate (mat[r1], mat[r1], n); 4330 1.1 mrg } 4331 1.1 mrg 4332 1.1 mrg /* Return true if two vectors are equal. */ 4333 1.1 mrg 4334 1.1 mrg static bool 4335 1.1 mrg lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) 4336 1.1 mrg { 4337 1.1 mrg int i; 4338 1.1 mrg for (i = 0; i < size; i++) 4339 1.1 mrg if (vec1[i] != vec2[i]) 4340 1.1 mrg return false; 4341 1.1 mrg return true; 4342 1.1 mrg } 4343 1.1 mrg 4344 1.1 mrg /* Given an M x N integer matrix A, this function determines an M x 4345 1.1 mrg M unimodular matrix U, and an M x N echelon matrix S such that 4346 1.1 mrg "U.A = S". This decomposition is also known as "right Hermite". 4347 1.1 mrg 4348 1.1 mrg Ref: Algorithm 2.1 page 33 in "Loop Transformations for 4349 1.1 mrg Restructuring Compilers" Utpal Banerjee. */ 4350 1.1 mrg 4351 1.1 mrg static bool 4352 1.1 mrg lambda_matrix_right_hermite (lambda_matrix A, int m, int n, 4353 1.1 mrg lambda_matrix S, lambda_matrix U) 4354 1.1 mrg { 4355 1.1 mrg int i, j, i0 = 0; 4356 1.1 mrg 4357 1.1 mrg lambda_matrix_copy (A, S, m, n); 4358 1.1 mrg lambda_matrix_id (U, m); 4359 1.1 mrg 4360 1.1 mrg for (j = 0; j < n; j++) 4361 1.1 mrg { 4362 1.1 mrg if (lambda_vector_first_nz (S[j], m, i0) < m) 4363 1.1 mrg { 4364 1.1 mrg ++i0; 4365 1.1 mrg for (i = m - 1; i >= i0; i--) 4366 1.1 mrg { 4367 1.1 mrg while (S[i][j] != 0) 4368 1.1 mrg { 4369 1.1 mrg lambda_int factor, a, b; 4370 1.1 mrg 4371 1.1 mrg a = S[i-1][j]; 4372 1.1 mrg b = S[i][j]; 4373 1.1 mrg gcc_assert (a != HOST_WIDE_INT_MIN); 4374 1.1 mrg factor = a / b; 4375 1.1 mrg 4376 1.1 mrg if (!lambda_matrix_row_add (S, n, i, i-1, -factor)) 4377 1.1 mrg return false; 4378 1.1 mrg std::swap (S[i], S[i-1]); 4379 1.1 mrg 4380 1.1 mrg if (!lambda_matrix_row_add (U, m, i, i-1, -factor)) 4381 1.1 mrg return false; 4382 1.1 mrg std::swap (U[i], U[i-1]); 4383 1.1 mrg } 4384 1.1 mrg } 4385 1.1 mrg } 4386 1.1 mrg } 4387 1.1 mrg 4388 1.1 mrg return true; 4389 1.1 mrg } 4390 1.1 mrg 4391 1.1 mrg /* Determines the overlapping elements due to accesses CHREC_A and 4392 1.1 mrg CHREC_B, that are affine functions. This function cannot handle 4393 1.1 mrg symbolic evolution functions, ie. when initial conditions are 4394 1.1 mrg parameters, because it uses lambda matrices of integers. */ 4395 1.1 mrg 4396 1.1 mrg static void 4397 1.1 mrg analyze_subscript_affine_affine (tree chrec_a, 4398 1.1 mrg tree chrec_b, 4399 1.1 mrg conflict_function **overlaps_a, 4400 1.1 mrg conflict_function **overlaps_b, 4401 1.1 mrg tree *last_conflicts) 4402 1.1 mrg { 4403 1.1 mrg unsigned nb_vars_a, nb_vars_b, dim; 4404 1.1 mrg lambda_int gamma, gcd_alpha_beta; 4405 1.1 mrg lambda_matrix A, U, S; 4406 1.1 mrg struct obstack scratch_obstack; 4407 1.1 mrg 4408 1.1 mrg if (eq_evolutions_p (chrec_a, chrec_b)) 4409 1.1 mrg { 4410 1.1 mrg /* The accessed index overlaps for each iteration in the 4411 1.1 mrg loop. */ 4412 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4413 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4414 1.1 mrg *last_conflicts = chrec_dont_know; 4415 1.1 mrg return; 4416 1.1 mrg } 4417 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4418 1.1 mrg fprintf (dump_file, "(analyze_subscript_affine_affine \n"); 4419 1.1 mrg 4420 1.1 mrg /* For determining the initial intersection, we have to solve a 4421 1.1 mrg Diophantine equation. This is the most time consuming part. 4422 1.1 mrg 4423 1.1 mrg For answering to the question: "Is there a dependence?" we have 4424 1.1 mrg to prove that there exists a solution to the Diophantine 4425 1.1 mrg equation, and that the solution is in the iteration domain, 4426 1.1 mrg i.e. the solution is positive or zero, and that the solution 4427 1.1 mrg happens before the upper bound loop.nb_iterations. Otherwise 4428 1.1 mrg there is no dependence. This function outputs a description of 4429 1.1 mrg the iterations that hold the intersections. */ 4430 1.1 mrg 4431 1.1 mrg nb_vars_a = nb_vars_in_chrec (chrec_a); 4432 1.1 mrg nb_vars_b = nb_vars_in_chrec (chrec_b); 4433 1.1 mrg 4434 1.1 mrg gcc_obstack_init (&scratch_obstack); 4435 1.1 mrg 4436 1.1 mrg dim = nb_vars_a + nb_vars_b; 4437 1.1 mrg U = lambda_matrix_new (dim, dim, &scratch_obstack); 4438 1.1 mrg A = lambda_matrix_new (dim, 1, &scratch_obstack); 4439 1.1 mrg S = lambda_matrix_new (dim, 1, &scratch_obstack); 4440 1.1 mrg 4441 1.1 mrg tree init_a = initialize_matrix_A (A, chrec_a, 0, 1); 4442 1.1 mrg tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1); 4443 1.1 mrg if (init_a == chrec_dont_know 4444 1.1 mrg || init_b == chrec_dont_know) 4445 1.1 mrg { 4446 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4447 1.1 mrg fprintf (dump_file, "affine-affine test failed: " 4448 1.1 mrg "representation issue.\n"); 4449 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4450 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4451 1.1 mrg *last_conflicts = chrec_dont_know; 4452 1.1 mrg goto end_analyze_subs_aa; 4453 1.1 mrg } 4454 1.1 mrg gamma = int_cst_value (init_b) - int_cst_value (init_a); 4455 1.1 mrg 4456 1.1 mrg /* Don't do all the hard work of solving the Diophantine equation 4457 1.1 mrg when we already know the solution: for example, 4458 1.1 mrg | {3, +, 1}_1 4459 1.1 mrg | {3, +, 4}_2 4460 1.1 mrg | gamma = 3 - 3 = 0. 4461 1.1 mrg Then the first overlap occurs during the first iterations: 4462 1.1 mrg | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) 4463 1.1 mrg */ 4464 1.1 mrg if (gamma == 0) 4465 1.1 mrg { 4466 1.1 mrg if (nb_vars_a == 1 && nb_vars_b == 1) 4467 1.1 mrg { 4468 1.1 mrg HOST_WIDE_INT step_a, step_b; 4469 1.1 mrg HOST_WIDE_INT niter, niter_a, niter_b; 4470 1.1 mrg affine_fn ova, ovb; 4471 1.1 mrg 4472 1.1 mrg niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a)); 4473 1.1 mrg niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b)); 4474 1.1 mrg niter = MIN (niter_a, niter_b); 4475 1.1 mrg step_a = int_cst_value (CHREC_RIGHT (chrec_a)); 4476 1.1 mrg step_b = int_cst_value (CHREC_RIGHT (chrec_b)); 4477 1.1 mrg 4478 1.1 mrg compute_overlap_steps_for_affine_univar (niter, step_a, step_b, 4479 1.1 mrg &ova, &ovb, 4480 1.1 mrg last_conflicts, 1); 4481 1.1 mrg *overlaps_a = conflict_fn (1, ova); 4482 1.1 mrg *overlaps_b = conflict_fn (1, ovb); 4483 1.1 mrg } 4484 1.1 mrg 4485 1.1 mrg else if (nb_vars_a == 2 && nb_vars_b == 1) 4486 1.1 mrg compute_overlap_steps_for_affine_1_2 4487 1.1 mrg (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); 4488 1.1 mrg 4489 1.1 mrg else if (nb_vars_a == 1 && nb_vars_b == 2) 4490 1.1 mrg compute_overlap_steps_for_affine_1_2 4491 1.1 mrg (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); 4492 1.1 mrg 4493 1.1 mrg else 4494 1.1 mrg { 4495 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4496 1.1 mrg fprintf (dump_file, "affine-affine test failed: too many variables.\n"); 4497 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4498 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4499 1.1 mrg *last_conflicts = chrec_dont_know; 4500 1.1 mrg } 4501 1.1 mrg goto end_analyze_subs_aa; 4502 1.1 mrg } 4503 1.1 mrg 4504 1.1 mrg /* U.A = S */ 4505 1.1 mrg if (!lambda_matrix_right_hermite (A, dim, 1, S, U)) 4506 1.1 mrg { 4507 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4508 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4509 1.1 mrg *last_conflicts = chrec_dont_know; 4510 1.1 mrg goto end_analyze_subs_aa; 4511 1.1 mrg } 4512 1.1 mrg 4513 1.1 mrg if (S[0][0] < 0) 4514 1.1 mrg { 4515 1.1 mrg S[0][0] *= -1; 4516 1.1 mrg lambda_matrix_row_negate (U, dim, 0); 4517 1.1 mrg } 4518 1.1 mrg gcd_alpha_beta = S[0][0]; 4519 1.1 mrg 4520 1.1 mrg /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, 4521 1.1 mrg but that is a quite strange case. Instead of ICEing, answer 4522 1.1 mrg don't know. */ 4523 1.1 mrg if (gcd_alpha_beta == 0) 4524 1.1 mrg { 4525 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4526 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4527 1.1 mrg *last_conflicts = chrec_dont_know; 4528 1.1 mrg goto end_analyze_subs_aa; 4529 1.1 mrg } 4530 1.1 mrg 4531 1.1 mrg /* The classic "gcd-test". */ 4532 1.1 mrg if (!int_divides_p (gcd_alpha_beta, gamma)) 4533 1.1 mrg { 4534 1.1 mrg /* The "gcd-test" has determined that there is no integer 4535 1.1 mrg solution, i.e. there is no dependence. */ 4536 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 4537 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 4538 1.1 mrg *last_conflicts = integer_zero_node; 4539 1.1 mrg } 4540 1.1 mrg 4541 1.1 mrg /* Both access functions are univariate. This includes SIV and MIV cases. */ 4542 1.1 mrg else if (nb_vars_a == 1 && nb_vars_b == 1) 4543 1.1 mrg { 4544 1.1 mrg /* Both functions should have the same evolution sign. */ 4545 1.1 mrg if (((A[0][0] > 0 && -A[1][0] > 0) 4546 1.1 mrg || (A[0][0] < 0 && -A[1][0] < 0))) 4547 1.1 mrg { 4548 1.1 mrg /* The solutions are given by: 4549 1.1 mrg | 4550 1.1 mrg | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] 4551 1.1 mrg | [u21 u22] [y0] 4552 1.1 mrg 4553 1.1 mrg For a given integer t. Using the following variables, 4554 1.1 mrg 4555 1.1 mrg | i0 = u11 * gamma / gcd_alpha_beta 4556 1.1 mrg | j0 = u12 * gamma / gcd_alpha_beta 4557 1.1 mrg | i1 = u21 4558 1.1 mrg | j1 = u22 4559 1.1 mrg 4560 1.1 mrg the solutions are: 4561 1.1 mrg 4562 1.1 mrg | x0 = i0 + i1 * t, 4563 1.1 mrg | y0 = j0 + j1 * t. */ 4564 1.1 mrg HOST_WIDE_INT i0, j0, i1, j1; 4565 1.1 mrg 4566 1.1 mrg i0 = U[0][0] * gamma / gcd_alpha_beta; 4567 1.1 mrg j0 = U[0][1] * gamma / gcd_alpha_beta; 4568 1.1 mrg i1 = U[1][0]; 4569 1.1 mrg j1 = U[1][1]; 4570 1.1 mrg 4571 1.1 mrg if ((i1 == 0 && i0 < 0) 4572 1.1 mrg || (j1 == 0 && j0 < 0)) 4573 1.1 mrg { 4574 1.1 mrg /* There is no solution. 4575 1.1 mrg FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" 4576 1.1 mrg falls in here, but for the moment we don't look at the 4577 1.1 mrg upper bound of the iteration domain. */ 4578 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 4579 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 4580 1.1 mrg *last_conflicts = integer_zero_node; 4581 1.1 mrg goto end_analyze_subs_aa; 4582 1.1 mrg } 4583 1.1 mrg 4584 1.1 mrg if (i1 > 0 && j1 > 0) 4585 1.1 mrg { 4586 1.1 mrg HOST_WIDE_INT niter_a 4587 1.1 mrg = max_stmt_executions_int (get_chrec_loop (chrec_a)); 4588 1.1 mrg HOST_WIDE_INT niter_b 4589 1.1 mrg = max_stmt_executions_int (get_chrec_loop (chrec_b)); 4590 1.1 mrg HOST_WIDE_INT niter = MIN (niter_a, niter_b); 4591 1.1 mrg 4592 1.1 mrg /* (X0, Y0) is a solution of the Diophantine equation: 4593 1.1 mrg "chrec_a (X0) = chrec_b (Y0)". */ 4594 1.1 mrg HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), 4595 1.1 mrg CEIL (-j0, j1)); 4596 1.1 mrg HOST_WIDE_INT x0 = i1 * tau1 + i0; 4597 1.1 mrg HOST_WIDE_INT y0 = j1 * tau1 + j0; 4598 1.1 mrg 4599 1.1 mrg /* (X1, Y1) is the smallest positive solution of the eq 4600 1.1 mrg "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the 4601 1.1 mrg first conflict occurs. */ 4602 1.1 mrg HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); 4603 1.1 mrg HOST_WIDE_INT x1 = x0 - i1 * min_multiple; 4604 1.1 mrg HOST_WIDE_INT y1 = y0 - j1 * min_multiple; 4605 1.1 mrg 4606 1.1 mrg if (niter > 0) 4607 1.1 mrg { 4608 1.1 mrg /* If the overlap occurs outside of the bounds of the 4609 1.1 mrg loop, there is no dependence. */ 4610 1.1 mrg if (x1 >= niter_a || y1 >= niter_b) 4611 1.1 mrg { 4612 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 4613 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 4614 1.1 mrg *last_conflicts = integer_zero_node; 4615 1.1 mrg goto end_analyze_subs_aa; 4616 1.1 mrg } 4617 1.1 mrg 4618 1.1 mrg /* max stmt executions can get quite large, avoid 4619 1.1 mrg overflows by using wide ints here. */ 4620 1.1 mrg widest_int tau2 4621 1.1 mrg = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1), 4622 1.1 mrg wi::sdiv_floor (wi::sub (niter_b, j0), j1)); 4623 1.1 mrg widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1); 4624 1.1 mrg if (wi::min_precision (last_conflict, SIGNED) 4625 1.1 mrg <= TYPE_PRECISION (integer_type_node)) 4626 1.1 mrg *last_conflicts 4627 1.1 mrg = build_int_cst (integer_type_node, 4628 1.1 mrg last_conflict.to_shwi ()); 4629 1.1 mrg else 4630 1.1 mrg *last_conflicts = chrec_dont_know; 4631 1.1 mrg } 4632 1.1 mrg else 4633 1.1 mrg *last_conflicts = chrec_dont_know; 4634 1.1 mrg 4635 1.1 mrg *overlaps_a 4636 1.1 mrg = conflict_fn (1, 4637 1.1 mrg affine_fn_univar (build_int_cst (NULL_TREE, x1), 4638 1.1 mrg 1, 4639 1.1 mrg build_int_cst (NULL_TREE, i1))); 4640 1.1 mrg *overlaps_b 4641 1.1 mrg = conflict_fn (1, 4642 1.1 mrg affine_fn_univar (build_int_cst (NULL_TREE, y1), 4643 1.1 mrg 1, 4644 1.1 mrg build_int_cst (NULL_TREE, j1))); 4645 1.1 mrg } 4646 1.1 mrg else 4647 1.1 mrg { 4648 1.1 mrg /* FIXME: For the moment, the upper bound of the 4649 1.1 mrg iteration domain for i and j is not checked. */ 4650 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4651 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 4652 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4653 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4654 1.1 mrg *last_conflicts = chrec_dont_know; 4655 1.1 mrg } 4656 1.1 mrg } 4657 1.1 mrg else 4658 1.1 mrg { 4659 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4660 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 4661 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4662 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4663 1.1 mrg *last_conflicts = chrec_dont_know; 4664 1.1 mrg } 4665 1.1 mrg } 4666 1.1 mrg else 4667 1.1 mrg { 4668 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4669 1.1 mrg fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 4670 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4671 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4672 1.1 mrg *last_conflicts = chrec_dont_know; 4673 1.1 mrg } 4674 1.1 mrg 4675 1.1 mrg end_analyze_subs_aa: 4676 1.1 mrg obstack_free (&scratch_obstack, NULL); 4677 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4678 1.1 mrg { 4679 1.1 mrg fprintf (dump_file, " (overlaps_a = "); 4680 1.1 mrg dump_conflict_function (dump_file, *overlaps_a); 4681 1.1 mrg fprintf (dump_file, ")\n (overlaps_b = "); 4682 1.1 mrg dump_conflict_function (dump_file, *overlaps_b); 4683 1.1 mrg fprintf (dump_file, "))\n"); 4684 1.1 mrg } 4685 1.1 mrg } 4686 1.1 mrg 4687 1.1 mrg /* Returns true when analyze_subscript_affine_affine can be used for 4688 1.1 mrg determining the dependence relation between chrec_a and chrec_b, 4689 1.1 mrg that contain symbols. This function modifies chrec_a and chrec_b 4690 1.1 mrg such that the analysis result is the same, and such that they don't 4691 1.1 mrg contain symbols, and then can safely be passed to the analyzer. 4692 1.1 mrg 4693 1.1 mrg Example: The analysis of the following tuples of evolutions produce 4694 1.1 mrg the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 4695 1.1 mrg vs. {0, +, 1}_1 4696 1.1 mrg 4697 1.1 mrg {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) 4698 1.1 mrg {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) 4699 1.1 mrg */ 4700 1.1 mrg 4701 1.1 mrg static bool 4702 1.1 mrg can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) 4703 1.1 mrg { 4704 1.1 mrg tree diff, type, left_a, left_b, right_b; 4705 1.1 mrg 4706 1.1 mrg if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) 4707 1.1 mrg || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) 4708 1.1 mrg /* FIXME: For the moment not handled. Might be refined later. */ 4709 1.1 mrg return false; 4710 1.1 mrg 4711 1.1 mrg type = chrec_type (*chrec_a); 4712 1.1 mrg left_a = CHREC_LEFT (*chrec_a); 4713 1.1 mrg left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); 4714 1.1 mrg diff = chrec_fold_minus (type, left_a, left_b); 4715 1.1 mrg 4716 1.1 mrg if (!evolution_function_is_constant_p (diff)) 4717 1.1 mrg return false; 4718 1.1 mrg 4719 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4720 1.1 mrg fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n"); 4721 1.1 mrg 4722 1.1 mrg *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), 4723 1.1 mrg diff, CHREC_RIGHT (*chrec_a)); 4724 1.1 mrg right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); 4725 1.1 mrg *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), 4726 1.1 mrg build_int_cst (type, 0), 4727 1.1 mrg right_b); 4728 1.1 mrg return true; 4729 1.1 mrg } 4730 1.1 mrg 4731 1.1 mrg /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and 4732 1.1 mrg *OVERLAPS_B are initialized to the functions that describe the 4733 1.1 mrg relation between the elements accessed twice by CHREC_A and 4734 1.1 mrg CHREC_B. For k >= 0, the following property is verified: 4735 1.1 mrg 4736 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 4737 1.1 mrg 4738 1.1 mrg static void 4739 1.1 mrg analyze_siv_subscript (tree chrec_a, 4740 1.1 mrg tree chrec_b, 4741 1.1 mrg conflict_function **overlaps_a, 4742 1.1 mrg conflict_function **overlaps_b, 4743 1.1 mrg tree *last_conflicts, 4744 1.1 mrg int loop_nest_num) 4745 1.1 mrg { 4746 1.1 mrg dependence_stats.num_siv++; 4747 1.1 mrg 4748 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4749 1.1 mrg fprintf (dump_file, "(analyze_siv_subscript \n"); 4750 1.1 mrg 4751 1.1 mrg if (evolution_function_is_constant_p (chrec_a) 4752 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 4753 1.1 mrg analyze_siv_subscript_cst_affine (chrec_a, chrec_b, 4754 1.1 mrg overlaps_a, overlaps_b, last_conflicts); 4755 1.1 mrg 4756 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 4757 1.1 mrg && evolution_function_is_constant_p (chrec_b)) 4758 1.1 mrg analyze_siv_subscript_cst_affine (chrec_b, chrec_a, 4759 1.1 mrg overlaps_b, overlaps_a, last_conflicts); 4760 1.1 mrg 4761 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 4762 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 4763 1.1 mrg { 4764 1.1 mrg if (!chrec_contains_symbols (chrec_a) 4765 1.1 mrg && !chrec_contains_symbols (chrec_b)) 4766 1.1 mrg { 4767 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b, 4768 1.1 mrg overlaps_a, overlaps_b, 4769 1.1 mrg last_conflicts); 4770 1.1 mrg 4771 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a) 4772 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b)) 4773 1.1 mrg dependence_stats.num_siv_unimplemented++; 4774 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a) 4775 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b)) 4776 1.1 mrg dependence_stats.num_siv_independent++; 4777 1.1 mrg else 4778 1.1 mrg dependence_stats.num_siv_dependent++; 4779 1.1 mrg } 4780 1.1 mrg else if (can_use_analyze_subscript_affine_affine (&chrec_a, 4781 1.1 mrg &chrec_b)) 4782 1.1 mrg { 4783 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b, 4784 1.1 mrg overlaps_a, overlaps_b, 4785 1.1 mrg last_conflicts); 4786 1.1 mrg 4787 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a) 4788 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b)) 4789 1.1 mrg dependence_stats.num_siv_unimplemented++; 4790 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a) 4791 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b)) 4792 1.1 mrg dependence_stats.num_siv_independent++; 4793 1.1 mrg else 4794 1.1 mrg dependence_stats.num_siv_dependent++; 4795 1.1 mrg } 4796 1.1 mrg else 4797 1.1 mrg goto siv_subscript_dontknow; 4798 1.1 mrg } 4799 1.1 mrg 4800 1.1 mrg else 4801 1.1 mrg { 4802 1.1 mrg siv_subscript_dontknow:; 4803 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4804 1.1 mrg fprintf (dump_file, " siv test failed: unimplemented"); 4805 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4806 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4807 1.1 mrg *last_conflicts = chrec_dont_know; 4808 1.1 mrg dependence_stats.num_siv_unimplemented++; 4809 1.1 mrg } 4810 1.1 mrg 4811 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4812 1.1 mrg fprintf (dump_file, ")\n"); 4813 1.1 mrg } 4814 1.1 mrg 4815 1.1 mrg /* Returns false if we can prove that the greatest common divisor of the steps 4816 1.1 mrg of CHREC does not divide CST, false otherwise. */ 4817 1.1 mrg 4818 1.1 mrg static bool 4819 1.1 mrg gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) 4820 1.1 mrg { 4821 1.1 mrg HOST_WIDE_INT cd = 0, val; 4822 1.1 mrg tree step; 4823 1.1 mrg 4824 1.1 mrg if (!tree_fits_shwi_p (cst)) 4825 1.1 mrg return true; 4826 1.1 mrg val = tree_to_shwi (cst); 4827 1.1 mrg 4828 1.1 mrg while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) 4829 1.1 mrg { 4830 1.1 mrg step = CHREC_RIGHT (chrec); 4831 1.1 mrg if (!tree_fits_shwi_p (step)) 4832 1.1 mrg return true; 4833 1.1 mrg cd = gcd (cd, tree_to_shwi (step)); 4834 1.1 mrg chrec = CHREC_LEFT (chrec); 4835 1.1 mrg } 4836 1.1 mrg 4837 1.1 mrg return val % cd == 0; 4838 1.1 mrg } 4839 1.1 mrg 4840 1.1 mrg /* Analyze a MIV (Multiple Index Variable) subscript with respect to 4841 1.1 mrg LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the 4842 1.1 mrg functions that describe the relation between the elements accessed 4843 1.1 mrg twice by CHREC_A and CHREC_B. For k >= 0, the following property 4844 1.1 mrg is verified: 4845 1.1 mrg 4846 1.1 mrg CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 4847 1.1 mrg 4848 1.1 mrg static void 4849 1.1 mrg analyze_miv_subscript (tree chrec_a, 4850 1.1 mrg tree chrec_b, 4851 1.1 mrg conflict_function **overlaps_a, 4852 1.1 mrg conflict_function **overlaps_b, 4853 1.1 mrg tree *last_conflicts, 4854 1.1 mrg class loop *loop_nest) 4855 1.1 mrg { 4856 1.1 mrg tree type, difference; 4857 1.1 mrg 4858 1.1 mrg dependence_stats.num_miv++; 4859 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4860 1.1 mrg fprintf (dump_file, "(analyze_miv_subscript \n"); 4861 1.1 mrg 4862 1.1 mrg type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 4863 1.1 mrg chrec_a = chrec_convert (type, chrec_a, NULL); 4864 1.1 mrg chrec_b = chrec_convert (type, chrec_b, NULL); 4865 1.1 mrg difference = chrec_fold_minus (type, chrec_a, chrec_b); 4866 1.1 mrg 4867 1.1 mrg if (eq_evolutions_p (chrec_a, chrec_b)) 4868 1.1 mrg { 4869 1.1 mrg /* Access functions are the same: all the elements are accessed 4870 1.1 mrg in the same order. */ 4871 1.1 mrg *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4872 1.1 mrg *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4873 1.1 mrg *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a)); 4874 1.1 mrg dependence_stats.num_miv_dependent++; 4875 1.1 mrg } 4876 1.1 mrg 4877 1.1 mrg else if (evolution_function_is_constant_p (difference) 4878 1.1 mrg && evolution_function_is_affine_multivariate_p (chrec_a, 4879 1.1 mrg loop_nest->num) 4880 1.1 mrg && !gcd_of_steps_may_divide_p (chrec_a, difference)) 4881 1.1 mrg { 4882 1.1 mrg /* testsuite/.../ssa-chrec-33.c 4883 1.1 mrg {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 4884 1.1 mrg 4885 1.1 mrg The difference is 1, and all the evolution steps are multiples 4886 1.1 mrg of 2, consequently there are no overlapping elements. */ 4887 1.1 mrg *overlaps_a = conflict_fn_no_dependence (); 4888 1.1 mrg *overlaps_b = conflict_fn_no_dependence (); 4889 1.1 mrg *last_conflicts = integer_zero_node; 4890 1.1 mrg dependence_stats.num_miv_independent++; 4891 1.1 mrg } 4892 1.1 mrg 4893 1.1 mrg else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num) 4894 1.1 mrg && !chrec_contains_symbols (chrec_a, loop_nest) 4895 1.1 mrg && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num) 4896 1.1 mrg && !chrec_contains_symbols (chrec_b, loop_nest)) 4897 1.1 mrg { 4898 1.1 mrg /* testsuite/.../ssa-chrec-35.c 4899 1.1 mrg {0, +, 1}_2 vs. {0, +, 1}_3 4900 1.1 mrg the overlapping elements are respectively located at iterations: 4901 1.1 mrg {0, +, 1}_x and {0, +, 1}_x, 4902 1.1 mrg in other words, we have the equality: 4903 1.1 mrg {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) 4904 1.1 mrg 4905 1.1 mrg Other examples: 4906 1.1 mrg {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = 4907 1.1 mrg {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) 4908 1.1 mrg 4909 1.1 mrg {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = 4910 1.1 mrg {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) 4911 1.1 mrg */ 4912 1.1 mrg analyze_subscript_affine_affine (chrec_a, chrec_b, 4913 1.1 mrg overlaps_a, overlaps_b, last_conflicts); 4914 1.1 mrg 4915 1.1 mrg if (CF_NOT_KNOWN_P (*overlaps_a) 4916 1.1 mrg || CF_NOT_KNOWN_P (*overlaps_b)) 4917 1.1 mrg dependence_stats.num_miv_unimplemented++; 4918 1.1 mrg else if (CF_NO_DEPENDENCE_P (*overlaps_a) 4919 1.1 mrg || CF_NO_DEPENDENCE_P (*overlaps_b)) 4920 1.1 mrg dependence_stats.num_miv_independent++; 4921 1.1 mrg else 4922 1.1 mrg dependence_stats.num_miv_dependent++; 4923 1.1 mrg } 4924 1.1 mrg 4925 1.1 mrg else 4926 1.1 mrg { 4927 1.1 mrg /* When the analysis is too difficult, answer "don't know". */ 4928 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4929 1.1 mrg fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n"); 4930 1.1 mrg 4931 1.1 mrg *overlaps_a = conflict_fn_not_known (); 4932 1.1 mrg *overlaps_b = conflict_fn_not_known (); 4933 1.1 mrg *last_conflicts = chrec_dont_know; 4934 1.1 mrg dependence_stats.num_miv_unimplemented++; 4935 1.1 mrg } 4936 1.1 mrg 4937 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4938 1.1 mrg fprintf (dump_file, ")\n"); 4939 1.1 mrg } 4940 1.1 mrg 4941 1.1 mrg /* Determines the iterations for which CHREC_A is equal to CHREC_B in 4942 1.1 mrg with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and 4943 1.1 mrg OVERLAP_ITERATIONS_B are initialized with two functions that 4944 1.1 mrg describe the iterations that contain conflicting elements. 4945 1.1 mrg 4946 1.1 mrg Remark: For an integer k >= 0, the following equality is true: 4947 1.1 mrg 4948 1.1 mrg CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). 4949 1.1 mrg */ 4950 1.1 mrg 4951 1.1 mrg static void 4952 1.1 mrg analyze_overlapping_iterations (tree chrec_a, 4953 1.1 mrg tree chrec_b, 4954 1.1 mrg conflict_function **overlap_iterations_a, 4955 1.1 mrg conflict_function **overlap_iterations_b, 4956 1.1 mrg tree *last_conflicts, class loop *loop_nest) 4957 1.1 mrg { 4958 1.1 mrg unsigned int lnn = loop_nest->num; 4959 1.1 mrg 4960 1.1 mrg dependence_stats.num_subscript_tests++; 4961 1.1 mrg 4962 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4963 1.1 mrg { 4964 1.1 mrg fprintf (dump_file, "(analyze_overlapping_iterations \n"); 4965 1.1 mrg fprintf (dump_file, " (chrec_a = "); 4966 1.1 mrg print_generic_expr (dump_file, chrec_a); 4967 1.1 mrg fprintf (dump_file, ")\n (chrec_b = "); 4968 1.1 mrg print_generic_expr (dump_file, chrec_b); 4969 1.1 mrg fprintf (dump_file, ")\n"); 4970 1.1 mrg } 4971 1.1 mrg 4972 1.1 mrg if (chrec_a == NULL_TREE 4973 1.1 mrg || chrec_b == NULL_TREE 4974 1.1 mrg || chrec_contains_undetermined (chrec_a) 4975 1.1 mrg || chrec_contains_undetermined (chrec_b)) 4976 1.1 mrg { 4977 1.1 mrg dependence_stats.num_subscript_undetermined++; 4978 1.1 mrg 4979 1.1 mrg *overlap_iterations_a = conflict_fn_not_known (); 4980 1.1 mrg *overlap_iterations_b = conflict_fn_not_known (); 4981 1.1 mrg } 4982 1.1 mrg 4983 1.1 mrg /* If they are the same chrec, and are affine, they overlap 4984 1.1 mrg on every iteration. */ 4985 1.1 mrg else if (eq_evolutions_p (chrec_a, chrec_b) 4986 1.1 mrg && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) 4987 1.1 mrg || operand_equal_p (chrec_a, chrec_b, 0))) 4988 1.1 mrg { 4989 1.1 mrg dependence_stats.num_same_subscript_function++; 4990 1.1 mrg *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4991 1.1 mrg *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 4992 1.1 mrg *last_conflicts = chrec_dont_know; 4993 1.1 mrg } 4994 1.1 mrg 4995 1.1 mrg /* If they aren't the same, and aren't affine, we can't do anything 4996 1.1 mrg yet. */ 4997 1.1 mrg else if ((chrec_contains_symbols (chrec_a) 4998 1.1 mrg || chrec_contains_symbols (chrec_b)) 4999 1.1 mrg && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) 5000 1.1 mrg || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) 5001 1.1 mrg { 5002 1.1 mrg dependence_stats.num_subscript_undetermined++; 5003 1.1 mrg *overlap_iterations_a = conflict_fn_not_known (); 5004 1.1 mrg *overlap_iterations_b = conflict_fn_not_known (); 5005 1.1 mrg } 5006 1.1 mrg 5007 1.1 mrg else if (ziv_subscript_p (chrec_a, chrec_b)) 5008 1.1 mrg analyze_ziv_subscript (chrec_a, chrec_b, 5009 1.1 mrg overlap_iterations_a, overlap_iterations_b, 5010 1.1 mrg last_conflicts); 5011 1.1 mrg 5012 1.1 mrg else if (siv_subscript_p (chrec_a, chrec_b)) 5013 1.1 mrg analyze_siv_subscript (chrec_a, chrec_b, 5014 1.1 mrg overlap_iterations_a, overlap_iterations_b, 5015 1.1 mrg last_conflicts, lnn); 5016 1.1 mrg 5017 1.1 mrg else 5018 1.1 mrg analyze_miv_subscript (chrec_a, chrec_b, 5019 1.1 mrg overlap_iterations_a, overlap_iterations_b, 5020 1.1 mrg last_conflicts, loop_nest); 5021 1.1 mrg 5022 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 5023 1.1 mrg { 5024 1.1 mrg fprintf (dump_file, " (overlap_iterations_a = "); 5025 1.1 mrg dump_conflict_function (dump_file, *overlap_iterations_a); 5026 1.1 mrg fprintf (dump_file, ")\n (overlap_iterations_b = "); 5027 1.1 mrg dump_conflict_function (dump_file, *overlap_iterations_b); 5028 1.1 mrg fprintf (dump_file, "))\n"); 5029 1.1 mrg } 5030 1.1 mrg } 5031 1.1 mrg 5032 1.1 mrg /* Helper function for uniquely inserting distance vectors. */ 5033 1.1 mrg 5034 1.1 mrg static void 5035 1.1 mrg save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) 5036 1.1 mrg { 5037 1.1 mrg for (lambda_vector v : DDR_DIST_VECTS (ddr)) 5038 1.1 mrg if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr))) 5039 1.1 mrg return; 5040 1.1 mrg 5041 1.1 mrg DDR_DIST_VECTS (ddr).safe_push (dist_v); 5042 1.1 mrg } 5043 1.1 mrg 5044 1.1 mrg /* Helper function for uniquely inserting direction vectors. */ 5045 1.1 mrg 5046 1.1 mrg static void 5047 1.1 mrg save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) 5048 1.1 mrg { 5049 1.1 mrg for (lambda_vector v : DDR_DIR_VECTS (ddr)) 5050 1.1 mrg if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr))) 5051 1.1 mrg return; 5052 1.1 mrg 5053 1.1 mrg DDR_DIR_VECTS (ddr).safe_push (dir_v); 5054 1.1 mrg } 5055 1.1 mrg 5056 1.1 mrg /* Add a distance of 1 on all the loops outer than INDEX. If we 5057 1.1 mrg haven't yet determined a distance for this outer loop, push a new 5058 1.1 mrg distance vector composed of the previous distance, and a distance 5059 1.1 mrg of 1 for this outer loop. Example: 5060 1.1 mrg 5061 1.1 mrg | loop_1 5062 1.1 mrg | loop_2 5063 1.1 mrg | A[10] 5064 1.1 mrg | endloop_2 5065 1.1 mrg | endloop_1 5066 1.1 mrg 5067 1.1 mrg Saved vectors are of the form (dist_in_1, dist_in_2). First, we 5068 1.1 mrg save (0, 1), then we have to save (1, 0). */ 5069 1.1 mrg 5070 1.1 mrg static void 5071 1.1 mrg add_outer_distances (struct data_dependence_relation *ddr, 5072 1.1 mrg lambda_vector dist_v, int index) 5073 1.1 mrg { 5074 1.1 mrg /* For each outer loop where init_v is not set, the accesses are 5075 1.1 mrg in dependence of distance 1 in the loop. */ 5076 1.1 mrg while (--index >= 0) 5077 1.1 mrg { 5078 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5079 1.1 mrg lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 5080 1.1 mrg save_v[index] = 1; 5081 1.1 mrg save_dist_v (ddr, save_v); 5082 1.1 mrg } 5083 1.1 mrg } 5084 1.1 mrg 5085 1.1 mrg /* Return false when fail to represent the data dependence as a 5086 1.1 mrg distance vector. A_INDEX is the index of the first reference 5087 1.1 mrg (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the 5088 1.1 mrg second reference. INIT_B is set to true when a component has been 5089 1.1 mrg added to the distance vector DIST_V. INDEX_CARRY is then set to 5090 1.1 mrg the index in DIST_V that carries the dependence. */ 5091 1.1 mrg 5092 1.1 mrg static bool 5093 1.1 mrg build_classic_dist_vector_1 (struct data_dependence_relation *ddr, 5094 1.1 mrg unsigned int a_index, unsigned int b_index, 5095 1.1 mrg lambda_vector dist_v, bool *init_b, 5096 1.1 mrg int *index_carry) 5097 1.1 mrg { 5098 1.1 mrg unsigned i; 5099 1.1 mrg lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5100 1.1 mrg class loop *loop = DDR_LOOP_NEST (ddr)[0]; 5101 1.1 mrg 5102 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 5103 1.1 mrg { 5104 1.1 mrg tree access_fn_a, access_fn_b; 5105 1.1 mrg struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); 5106 1.1 mrg 5107 1.1 mrg if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) 5108 1.1 mrg { 5109 1.1 mrg non_affine_dependence_relation (ddr); 5110 1.1 mrg return false; 5111 1.1 mrg } 5112 1.1 mrg 5113 1.1 mrg access_fn_a = SUB_ACCESS_FN (subscript, a_index); 5114 1.1 mrg access_fn_b = SUB_ACCESS_FN (subscript, b_index); 5115 1.1 mrg 5116 1.1 mrg if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC 5117 1.1 mrg && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) 5118 1.1 mrg { 5119 1.1 mrg HOST_WIDE_INT dist; 5120 1.1 mrg int index; 5121 1.1 mrg int var_a = CHREC_VARIABLE (access_fn_a); 5122 1.1 mrg int var_b = CHREC_VARIABLE (access_fn_b); 5123 1.1 mrg 5124 1.1 mrg if (var_a != var_b 5125 1.1 mrg || chrec_contains_undetermined (SUB_DISTANCE (subscript))) 5126 1.1 mrg { 5127 1.1 mrg non_affine_dependence_relation (ddr); 5128 1.1 mrg return false; 5129 1.1 mrg } 5130 1.1 mrg 5131 1.1 mrg /* When data references are collected in a loop while data 5132 1.1 mrg dependences are analyzed in loop nest nested in the loop, we 5133 1.1 mrg would have more number of access functions than number of 5134 1.1 mrg loops. Skip access functions of loops not in the loop nest. 5135 1.1 mrg 5136 1.1 mrg See PR89725 for more information. */ 5137 1.1 mrg if (flow_loop_nested_p (get_loop (cfun, var_a), loop)) 5138 1.1 mrg continue; 5139 1.1 mrg 5140 1.1 mrg dist = int_cst_value (SUB_DISTANCE (subscript)); 5141 1.1 mrg index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr)); 5142 1.1 mrg *index_carry = MIN (index, *index_carry); 5143 1.1 mrg 5144 1.1 mrg /* This is the subscript coupling test. If we have already 5145 1.1 mrg recorded a distance for this loop (a distance coming from 5146 1.1 mrg another subscript), it should be the same. For example, 5147 1.1 mrg in the following code, there is no dependence: 5148 1.1 mrg 5149 1.1 mrg | loop i = 0, N, 1 5150 1.1 mrg | T[i+1][i] = ... 5151 1.1 mrg | ... = T[i][i] 5152 1.1 mrg | endloop 5153 1.1 mrg */ 5154 1.1 mrg if (init_v[index] != 0 && dist_v[index] != dist) 5155 1.1 mrg { 5156 1.1 mrg finalize_ddr_dependent (ddr, chrec_known); 5157 1.1 mrg return false; 5158 1.1 mrg } 5159 1.1 mrg 5160 1.1 mrg dist_v[index] = dist; 5161 1.1 mrg init_v[index] = 1; 5162 1.1 mrg *init_b = true; 5163 1.1 mrg } 5164 1.1 mrg else if (!operand_equal_p (access_fn_a, access_fn_b, 0)) 5165 1.1 mrg { 5166 1.1 mrg /* This can be for example an affine vs. constant dependence 5167 1.1 mrg (T[i] vs. T[3]) that is not an affine dependence and is 5168 1.1 mrg not representable as a distance vector. */ 5169 1.1 mrg non_affine_dependence_relation (ddr); 5170 1.1 mrg return false; 5171 1.1 mrg } 5172 1.1 mrg } 5173 1.1 mrg 5174 1.1 mrg return true; 5175 1.1 mrg } 5176 1.1 mrg 5177 1.1 mrg /* Return true when the DDR contains only invariant access functions wrto. loop 5178 1.1 mrg number LNUM. */ 5179 1.1 mrg 5180 1.1 mrg static bool 5181 1.1 mrg invariant_access_functions (const struct data_dependence_relation *ddr, 5182 1.1 mrg int lnum) 5183 1.1 mrg { 5184 1.1 mrg for (subscript *sub : DDR_SUBSCRIPTS (ddr)) 5185 1.1 mrg if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum) 5186 1.1 mrg || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum)) 5187 1.1 mrg return false; 5188 1.1 mrg 5189 1.1 mrg return true; 5190 1.1 mrg } 5191 1.1 mrg 5192 1.1 mrg /* Helper function for the case where DDR_A and DDR_B are the same 5193 1.1 mrg multivariate access function with a constant step. For an example 5194 1.1 mrg see pr34635-1.c. */ 5195 1.1 mrg 5196 1.1 mrg static void 5197 1.1 mrg add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) 5198 1.1 mrg { 5199 1.1 mrg int x_1, x_2; 5200 1.1 mrg tree c_1 = CHREC_LEFT (c_2); 5201 1.1 mrg tree c_0 = CHREC_LEFT (c_1); 5202 1.1 mrg lambda_vector dist_v; 5203 1.1 mrg HOST_WIDE_INT v1, v2, cd; 5204 1.1 mrg 5205 1.1 mrg /* Polynomials with more than 2 variables are not handled yet. When 5206 1.1 mrg the evolution steps are parameters, it is not possible to 5207 1.1 mrg represent the dependence using classical distance vectors. */ 5208 1.1 mrg if (TREE_CODE (c_0) != INTEGER_CST 5209 1.1 mrg || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST 5210 1.1 mrg || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) 5211 1.1 mrg { 5212 1.1 mrg DDR_AFFINE_P (ddr) = false; 5213 1.1 mrg return; 5214 1.1 mrg } 5215 1.1 mrg 5216 1.1 mrg x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); 5217 1.1 mrg x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); 5218 1.1 mrg 5219 1.1 mrg /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ 5220 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5221 1.1 mrg v1 = int_cst_value (CHREC_RIGHT (c_1)); 5222 1.1 mrg v2 = int_cst_value (CHREC_RIGHT (c_2)); 5223 1.1 mrg cd = gcd (v1, v2); 5224 1.1 mrg v1 /= cd; 5225 1.1 mrg v2 /= cd; 5226 1.1 mrg 5227 1.1 mrg if (v2 < 0) 5228 1.1 mrg { 5229 1.1 mrg v2 = -v2; 5230 1.1 mrg v1 = -v1; 5231 1.1 mrg } 5232 1.1 mrg 5233 1.1 mrg dist_v[x_1] = v2; 5234 1.1 mrg dist_v[x_2] = -v1; 5235 1.1 mrg save_dist_v (ddr, dist_v); 5236 1.1 mrg 5237 1.1 mrg add_outer_distances (ddr, dist_v, x_1); 5238 1.1 mrg } 5239 1.1 mrg 5240 1.1 mrg /* Helper function for the case where DDR_A and DDR_B are the same 5241 1.1 mrg access functions. */ 5242 1.1 mrg 5243 1.1 mrg static void 5244 1.1 mrg add_other_self_distances (struct data_dependence_relation *ddr) 5245 1.1 mrg { 5246 1.1 mrg lambda_vector dist_v; 5247 1.1 mrg unsigned i; 5248 1.1 mrg int index_carry = DDR_NB_LOOPS (ddr); 5249 1.1 mrg subscript *sub; 5250 1.1 mrg class loop *loop = DDR_LOOP_NEST (ddr)[0]; 5251 1.1 mrg 5252 1.1 mrg FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) 5253 1.1 mrg { 5254 1.1 mrg tree access_fun = SUB_ACCESS_FN (sub, 0); 5255 1.1 mrg 5256 1.1 mrg if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) 5257 1.1 mrg { 5258 1.1 mrg if (!evolution_function_is_univariate_p (access_fun, loop->num)) 5259 1.1 mrg { 5260 1.1 mrg if (DDR_NUM_SUBSCRIPTS (ddr) != 1) 5261 1.1 mrg { 5262 1.1 mrg DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; 5263 1.1 mrg return; 5264 1.1 mrg } 5265 1.1 mrg 5266 1.1 mrg access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0); 5267 1.1 mrg 5268 1.1 mrg if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) 5269 1.1 mrg add_multivariate_self_dist (ddr, access_fun); 5270 1.1 mrg else 5271 1.1 mrg /* The evolution step is not constant: it varies in 5272 1.1 mrg the outer loop, so this cannot be represented by a 5273 1.1 mrg distance vector. For example in pr34635.c the 5274 1.1 mrg evolution is {0, +, {0, +, 4}_1}_2. */ 5275 1.1 mrg DDR_AFFINE_P (ddr) = false; 5276 1.1 mrg 5277 1.1 mrg return; 5278 1.1 mrg } 5279 1.1 mrg 5280 1.1 mrg /* When data references are collected in a loop while data 5281 1.1 mrg dependences are analyzed in loop nest nested in the loop, we 5282 1.1 mrg would have more number of access functions than number of 5283 1.1 mrg loops. Skip access functions of loops not in the loop nest. 5284 1.1 mrg 5285 1.1 mrg See PR89725 for more information. */ 5286 1.1 mrg if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)), 5287 1.1 mrg loop)) 5288 1.1 mrg continue; 5289 1.1 mrg 5290 1.1 mrg index_carry = MIN (index_carry, 5291 1.1 mrg index_in_loop_nest (CHREC_VARIABLE (access_fun), 5292 1.1 mrg DDR_LOOP_NEST (ddr))); 5293 1.1 mrg } 5294 1.1 mrg } 5295 1.1 mrg 5296 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5297 1.1 mrg add_outer_distances (ddr, dist_v, index_carry); 5298 1.1 mrg } 5299 1.1 mrg 5300 1.1 mrg static void 5301 1.1 mrg insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) 5302 1.1 mrg { 5303 1.1 mrg lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5304 1.1 mrg 5305 1.1 mrg dist_v[0] = 1; 5306 1.1 mrg save_dist_v (ddr, dist_v); 5307 1.1 mrg } 5308 1.1 mrg 5309 1.1 mrg /* Adds a unit distance vector to DDR when there is a 0 overlap. This 5310 1.1 mrg is the case for example when access functions are the same and 5311 1.1 mrg equal to a constant, as in: 5312 1.1 mrg 5313 1.1 mrg | loop_1 5314 1.1 mrg | A[3] = ... 5315 1.1 mrg | ... = A[3] 5316 1.1 mrg | endloop_1 5317 1.1 mrg 5318 1.1 mrg in which case the distance vectors are (0) and (1). */ 5319 1.1 mrg 5320 1.1 mrg static void 5321 1.1 mrg add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) 5322 1.1 mrg { 5323 1.1 mrg unsigned i, j; 5324 1.1 mrg 5325 1.1 mrg for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 5326 1.1 mrg { 5327 1.1 mrg subscript_p sub = DDR_SUBSCRIPT (ddr, i); 5328 1.1 mrg conflict_function *ca = SUB_CONFLICTS_IN_A (sub); 5329 1.1 mrg conflict_function *cb = SUB_CONFLICTS_IN_B (sub); 5330 1.1 mrg 5331 1.1 mrg for (j = 0; j < ca->n; j++) 5332 1.1 mrg if (affine_function_zero_p (ca->fns[j])) 5333 1.1 mrg { 5334 1.1 mrg insert_innermost_unit_dist_vector (ddr); 5335 1.1 mrg return; 5336 1.1 mrg } 5337 1.1 mrg 5338 1.1 mrg for (j = 0; j < cb->n; j++) 5339 1.1 mrg if (affine_function_zero_p (cb->fns[j])) 5340 1.1 mrg { 5341 1.1 mrg insert_innermost_unit_dist_vector (ddr); 5342 1.1 mrg return; 5343 1.1 mrg } 5344 1.1 mrg } 5345 1.1 mrg } 5346 1.1 mrg 5347 1.1 mrg /* Return true when the DDR contains two data references that have the 5348 1.1 mrg same access functions. */ 5349 1.1 mrg 5350 1.1 mrg static inline bool 5351 1.1 mrg same_access_functions (const struct data_dependence_relation *ddr) 5352 1.1 mrg { 5353 1.1 mrg for (subscript *sub : DDR_SUBSCRIPTS (ddr)) 5354 1.1 mrg if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0), 5355 1.1 mrg SUB_ACCESS_FN (sub, 1))) 5356 1.1 mrg return false; 5357 1.1 mrg 5358 1.1 mrg return true; 5359 1.1 mrg } 5360 1.1 mrg 5361 1.1 mrg /* Compute the classic per loop distance vector. DDR is the data 5362 1.1 mrg dependence relation to build a vector from. Return false when fail 5363 1.1 mrg to represent the data dependence as a distance vector. */ 5364 1.1 mrg 5365 1.1 mrg static bool 5366 1.1 mrg build_classic_dist_vector (struct data_dependence_relation *ddr, 5367 1.1 mrg class loop *loop_nest) 5368 1.1 mrg { 5369 1.1 mrg bool init_b = false; 5370 1.1 mrg int index_carry = DDR_NB_LOOPS (ddr); 5371 1.1 mrg lambda_vector dist_v; 5372 1.1 mrg 5373 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) 5374 1.1 mrg return false; 5375 1.1 mrg 5376 1.1 mrg if (same_access_functions (ddr)) 5377 1.1 mrg { 5378 1.1 mrg /* Save the 0 vector. */ 5379 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5380 1.1 mrg save_dist_v (ddr, dist_v); 5381 1.1 mrg 5382 1.1 mrg if (invariant_access_functions (ddr, loop_nest->num)) 5383 1.1 mrg add_distance_for_zero_overlaps (ddr); 5384 1.1 mrg 5385 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1) 5386 1.1 mrg add_other_self_distances (ddr); 5387 1.1 mrg 5388 1.1 mrg return true; 5389 1.1 mrg } 5390 1.1 mrg 5391 1.1 mrg dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5392 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry)) 5393 1.1 mrg return false; 5394 1.1 mrg 5395 1.1 mrg /* Save the distance vector if we initialized one. */ 5396 1.1 mrg if (init_b) 5397 1.1 mrg { 5398 1.1 mrg /* Verify a basic constraint: classic distance vectors should 5399 1.1 mrg always be lexicographically positive. 5400 1.1 mrg 5401 1.1 mrg Data references are collected in the order of execution of 5402 1.1 mrg the program, thus for the following loop 5403 1.1 mrg 5404 1.1 mrg | for (i = 1; i < 100; i++) 5405 1.1 mrg | for (j = 1; j < 100; j++) 5406 1.1 mrg | { 5407 1.1 mrg | t = T[j+1][i-1]; // A 5408 1.1 mrg | T[j][i] = t + 2; // B 5409 1.1 mrg | } 5410 1.1 mrg 5411 1.1 mrg references are collected following the direction of the wind: 5412 1.1 mrg A then B. The data dependence tests are performed also 5413 1.1 mrg following this order, such that we're looking at the distance 5414 1.1 mrg separating the elements accessed by A from the elements later 5415 1.1 mrg accessed by B. But in this example, the distance returned by 5416 1.1 mrg test_dep (A, B) is lexicographically negative (-1, 1), that 5417 1.1 mrg means that the access A occurs later than B with respect to 5418 1.1 mrg the outer loop, ie. we're actually looking upwind. In this 5419 1.1 mrg case we solve test_dep (B, A) looking downwind to the 5420 1.1 mrg lexicographically positive solution, that returns the 5421 1.1 mrg distance vector (1, -1). */ 5422 1.1 mrg if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr))) 5423 1.1 mrg { 5424 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5425 1.1 mrg if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) 5426 1.1 mrg return false; 5427 1.1 mrg compute_subscript_distance (ddr); 5428 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b, 5429 1.1 mrg &index_carry)) 5430 1.1 mrg return false; 5431 1.1 mrg save_dist_v (ddr, save_v); 5432 1.1 mrg DDR_REVERSED_P (ddr) = true; 5433 1.1 mrg 5434 1.1 mrg /* In this case there is a dependence forward for all the 5435 1.1 mrg outer loops: 5436 1.1 mrg 5437 1.1 mrg | for (k = 1; k < 100; k++) 5438 1.1 mrg | for (i = 1; i < 100; i++) 5439 1.1 mrg | for (j = 1; j < 100; j++) 5440 1.1 mrg | { 5441 1.1 mrg | t = T[j+1][i-1]; // A 5442 1.1 mrg | T[j][i] = t + 2; // B 5443 1.1 mrg | } 5444 1.1 mrg 5445 1.1 mrg the vectors are: 5446 1.1 mrg (0, 1, -1) 5447 1.1 mrg (1, 1, -1) 5448 1.1 mrg (1, -1, 1) 5449 1.1 mrg */ 5450 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1) 5451 1.1 mrg { 5452 1.1 mrg add_outer_distances (ddr, save_v, index_carry); 5453 1.1 mrg add_outer_distances (ddr, dist_v, index_carry); 5454 1.1 mrg } 5455 1.1 mrg } 5456 1.1 mrg else 5457 1.1 mrg { 5458 1.1 mrg lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5459 1.1 mrg lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 5460 1.1 mrg 5461 1.1 mrg if (DDR_NB_LOOPS (ddr) > 1) 5462 1.1 mrg { 5463 1.1 mrg lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5464 1.1 mrg 5465 1.1 mrg if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) 5466 1.1 mrg return false; 5467 1.1 mrg compute_subscript_distance (ddr); 5468 1.1 mrg if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b, 5469 1.1 mrg &index_carry)) 5470 1.1 mrg return false; 5471 1.1 mrg 5472 1.1 mrg save_dist_v (ddr, save_v); 5473 1.1 mrg add_outer_distances (ddr, dist_v, index_carry); 5474 1.1 mrg add_outer_distances (ddr, opposite_v, index_carry); 5475 1.1 mrg } 5476 1.1 mrg else 5477 1.1 mrg save_dist_v (ddr, save_v); 5478 1.1 mrg } 5479 1.1 mrg } 5480 1.1 mrg else 5481 1.1 mrg { 5482 1.1 mrg /* There is a distance of 1 on all the outer loops: Example: 5483 1.1 mrg there is a dependence of distance 1 on loop_1 for the array A. 5484 1.1 mrg 5485 1.1 mrg | loop_1 5486 1.1 mrg | A[5] = ... 5487 1.1 mrg | endloop 5488 1.1 mrg */ 5489 1.1 mrg add_outer_distances (ddr, dist_v, 5490 1.1 mrg lambda_vector_first_nz (dist_v, 5491 1.1 mrg DDR_NB_LOOPS (ddr), 0)); 5492 1.1 mrg } 5493 1.1 mrg 5494 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 5495 1.1 mrg { 5496 1.1 mrg unsigned i; 5497 1.1 mrg 5498 1.1 mrg fprintf (dump_file, "(build_classic_dist_vector\n"); 5499 1.1 mrg for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 5500 1.1 mrg { 5501 1.1 mrg fprintf (dump_file, " dist_vector = ("); 5502 1.1 mrg print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i), 5503 1.1 mrg DDR_NB_LOOPS (ddr)); 5504 1.1 mrg fprintf (dump_file, " )\n"); 5505 1.1 mrg } 5506 1.1 mrg fprintf (dump_file, ")\n"); 5507 1.1 mrg } 5508 1.1 mrg 5509 1.1 mrg return true; 5510 1.1 mrg } 5511 1.1 mrg 5512 1.1 mrg /* Return the direction for a given distance. 5513 1.1 mrg FIXME: Computing dir this way is suboptimal, since dir can catch 5514 1.1 mrg cases that dist is unable to represent. */ 5515 1.1 mrg 5516 1.1 mrg static inline enum data_dependence_direction 5517 1.1 mrg dir_from_dist (int dist) 5518 1.1 mrg { 5519 1.1 mrg if (dist > 0) 5520 1.1 mrg return dir_positive; 5521 1.1 mrg else if (dist < 0) 5522 1.1 mrg return dir_negative; 5523 1.1 mrg else 5524 1.1 mrg return dir_equal; 5525 1.1 mrg } 5526 1.1 mrg 5527 1.1 mrg /* Compute the classic per loop direction vector. DDR is the data 5528 1.1 mrg dependence relation to build a vector from. */ 5529 1.1 mrg 5530 1.1 mrg static void 5531 1.1 mrg build_classic_dir_vector (struct data_dependence_relation *ddr) 5532 1.1 mrg { 5533 1.1 mrg unsigned i, j; 5534 1.1 mrg lambda_vector dist_v; 5535 1.1 mrg 5536 1.1 mrg FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 5537 1.1 mrg { 5538 1.1 mrg lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 5539 1.1 mrg 5540 1.1 mrg for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 5541 1.1 mrg dir_v[j] = dir_from_dist (dist_v[j]); 5542 1.1 mrg 5543 1.1 mrg save_dir_v (ddr, dir_v); 5544 1.1 mrg } 5545 1.1 mrg } 5546 1.1 mrg 5547 1.1 mrg /* Helper function. Returns true when there is a dependence between the 5548 1.1 mrg data references. A_INDEX is the index of the first reference (0 for 5549 1.1 mrg DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */ 5550 1.1 mrg 5551 1.1 mrg static bool 5552 1.1 mrg subscript_dependence_tester_1 (struct data_dependence_relation *ddr, 5553 1.1 mrg unsigned int a_index, unsigned int b_index, 5554 1.1 mrg class loop *loop_nest) 5555 1.1 mrg { 5556 1.1 mrg unsigned int i; 5557 1.1 mrg tree last_conflicts; 5558 1.1 mrg struct subscript *subscript; 5559 1.1 mrg tree res = NULL_TREE; 5560 1.1 mrg 5561 1.1 mrg for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++) 5562 1.1 mrg { 5563 1.1 mrg conflict_function *overlaps_a, *overlaps_b; 5564 1.1 mrg 5565 1.1 mrg analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index), 5566 1.1 mrg SUB_ACCESS_FN (subscript, b_index), 5567 1.1 mrg &overlaps_a, &overlaps_b, 5568 1.1 mrg &last_conflicts, loop_nest); 5569 1.1 mrg 5570 1.1 mrg if (SUB_CONFLICTS_IN_A (subscript)) 5571 1.1 mrg free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); 5572 1.1 mrg if (SUB_CONFLICTS_IN_B (subscript)) 5573 1.1 mrg free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); 5574 1.1 mrg 5575 1.1 mrg SUB_CONFLICTS_IN_A (subscript) = overlaps_a; 5576 1.1 mrg SUB_CONFLICTS_IN_B (subscript) = overlaps_b; 5577 1.1 mrg SUB_LAST_CONFLICT (subscript) = last_conflicts; 5578 1.1 mrg 5579 1.1 mrg /* If there is any undetermined conflict function we have to 5580 1.1 mrg give a conservative answer in case we cannot prove that 5581 1.1 mrg no dependence exists when analyzing another subscript. */ 5582 1.1 mrg if (CF_NOT_KNOWN_P (overlaps_a) 5583 1.1 mrg || CF_NOT_KNOWN_P (overlaps_b)) 5584 1.1 mrg { 5585 1.1 mrg res = chrec_dont_know; 5586 1.1 mrg continue; 5587 1.1 mrg } 5588 1.1 mrg 5589 1.1 mrg /* When there is a subscript with no dependence we can stop. */ 5590 1.1 mrg else if (CF_NO_DEPENDENCE_P (overlaps_a) 5591 1.1 mrg || CF_NO_DEPENDENCE_P (overlaps_b)) 5592 1.1 mrg { 5593 1.1 mrg res = chrec_known; 5594 1.1 mrg break; 5595 1.1 mrg } 5596 1.1 mrg } 5597 1.1 mrg 5598 1.1 mrg if (res == NULL_TREE) 5599 1.1 mrg return true; 5600 1.1 mrg 5601 1.1 mrg if (res == chrec_known) 5602 1.1 mrg dependence_stats.num_dependence_independent++; 5603 1.1 mrg else 5604 1.1 mrg dependence_stats.num_dependence_undetermined++; 5605 1.1 mrg finalize_ddr_dependent (ddr, res); 5606 1.1 mrg return false; 5607 1.1 mrg } 5608 1.1 mrg 5609 1.1 mrg /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ 5610 1.1 mrg 5611 1.1 mrg static void 5612 1.1 mrg subscript_dependence_tester (struct data_dependence_relation *ddr, 5613 1.1 mrg class loop *loop_nest) 5614 1.1 mrg { 5615 1.1 mrg if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest)) 5616 1.1 mrg dependence_stats.num_dependence_dependent++; 5617 1.1 mrg 5618 1.1 mrg compute_subscript_distance (ddr); 5619 1.1 mrg if (build_classic_dist_vector (ddr, loop_nest)) 5620 1.1 mrg build_classic_dir_vector (ddr); 5621 1.1 mrg } 5622 1.1 mrg 5623 1.1 mrg /* Returns true when all the access functions of A are affine or 5624 1.1 mrg constant with respect to LOOP_NEST. */ 5625 1.1 mrg 5626 1.1 mrg static bool 5627 1.1 mrg access_functions_are_affine_or_constant_p (const struct data_reference *a, 5628 1.1 mrg const class loop *loop_nest) 5629 1.1 mrg { 5630 1.1 mrg vec<tree> fns = DR_ACCESS_FNS (a); 5631 1.1 mrg for (tree t : fns) 5632 1.1 mrg if (!evolution_function_is_invariant_p (t, loop_nest->num) 5633 1.1 mrg && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) 5634 1.1 mrg return false; 5635 1.1 mrg 5636 1.1 mrg return true; 5637 1.1 mrg } 5638 1.1 mrg 5639 1.1 mrg /* This computes the affine dependence relation between A and B with 5640 1.1 mrg respect to LOOP_NEST. CHREC_KNOWN is used for representing the 5641 1.1 mrg independence between two accesses, while CHREC_DONT_KNOW is used 5642 1.1 mrg for representing the unknown relation. 5643 1.1 mrg 5644 1.1 mrg Note that it is possible to stop the computation of the dependence 5645 1.1 mrg relation the first time we detect a CHREC_KNOWN element for a given 5646 1.1 mrg subscript. */ 5647 1.1 mrg 5648 1.1 mrg void 5649 1.1 mrg compute_affine_dependence (struct data_dependence_relation *ddr, 5650 1.1 mrg class loop *loop_nest) 5651 1.1 mrg { 5652 1.1 mrg struct data_reference *dra = DDR_A (ddr); 5653 1.1 mrg struct data_reference *drb = DDR_B (ddr); 5654 1.1 mrg 5655 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 5656 1.1 mrg { 5657 1.1 mrg fprintf (dump_file, "(compute_affine_dependence\n"); 5658 1.1 mrg fprintf (dump_file, " ref_a: "); 5659 1.1 mrg print_generic_expr (dump_file, DR_REF (dra)); 5660 1.1 mrg fprintf (dump_file, ", stmt_a: "); 5661 1.1 mrg print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); 5662 1.1 mrg fprintf (dump_file, " ref_b: "); 5663 1.1 mrg print_generic_expr (dump_file, DR_REF (drb)); 5664 1.1 mrg fprintf (dump_file, ", stmt_b: "); 5665 1.1 mrg print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); 5666 1.1 mrg } 5667 1.1 mrg 5668 1.1 mrg /* Analyze only when the dependence relation is not yet known. */ 5669 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 5670 1.1 mrg { 5671 1.1 mrg dependence_stats.num_dependence_tests++; 5672 1.1 mrg 5673 1.1 mrg if (access_functions_are_affine_or_constant_p (dra, loop_nest) 5674 1.1 mrg && access_functions_are_affine_or_constant_p (drb, loop_nest)) 5675 1.1 mrg subscript_dependence_tester (ddr, loop_nest); 5676 1.1 mrg 5677 1.1 mrg /* As a last case, if the dependence cannot be determined, or if 5678 1.1 mrg the dependence is considered too difficult to determine, answer 5679 1.1 mrg "don't know". */ 5680 1.1 mrg else 5681 1.1 mrg { 5682 1.1 mrg dependence_stats.num_dependence_undetermined++; 5683 1.1 mrg 5684 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 5685 1.1 mrg { 5686 1.1 mrg fprintf (dump_file, "Data ref a:\n"); 5687 1.1 mrg dump_data_reference (dump_file, dra); 5688 1.1 mrg fprintf (dump_file, "Data ref b:\n"); 5689 1.1 mrg dump_data_reference (dump_file, drb); 5690 1.1 mrg fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n"); 5691 1.1 mrg } 5692 1.1 mrg finalize_ddr_dependent (ddr, chrec_dont_know); 5693 1.1 mrg } 5694 1.1 mrg } 5695 1.1 mrg 5696 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 5697 1.1 mrg { 5698 1.1 mrg if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 5699 1.1 mrg fprintf (dump_file, ") -> no dependence\n"); 5700 1.1 mrg else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 5701 1.1 mrg fprintf (dump_file, ") -> dependence analysis failed\n"); 5702 1.1 mrg else 5703 1.1 mrg fprintf (dump_file, ")\n"); 5704 1.1 mrg } 5705 1.1 mrg } 5706 1.1 mrg 5707 1.1 mrg /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all 5708 1.1 mrg the data references in DATAREFS, in the LOOP_NEST. When 5709 1.1 mrg COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self 5710 1.1 mrg relations. Return true when successful, i.e. data references number 5711 1.1 mrg is small enough to be handled. */ 5712 1.1 mrg 5713 1.1 mrg bool 5714 1.1 mrg compute_all_dependences (const vec<data_reference_p> &datarefs, 5715 1.1 mrg vec<ddr_p> *dependence_relations, 5716 1.1 mrg const vec<loop_p> &loop_nest, 5717 1.1 mrg bool compute_self_and_rr) 5718 1.1 mrg { 5719 1.1 mrg struct data_dependence_relation *ddr; 5720 1.1 mrg struct data_reference *a, *b; 5721 1.1 mrg unsigned int i, j; 5722 1.1 mrg 5723 1.1 mrg if ((int) datarefs.length () 5724 1.1 mrg > param_loop_max_datarefs_for_datadeps) 5725 1.1 mrg { 5726 1.1 mrg struct data_dependence_relation *ddr; 5727 1.1 mrg 5728 1.1 mrg /* Insert a single relation into dependence_relations: 5729 1.1 mrg chrec_dont_know. */ 5730 1.1 mrg ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); 5731 1.1 mrg dependence_relations->safe_push (ddr); 5732 1.1 mrg return false; 5733 1.1 mrg } 5734 1.1 mrg 5735 1.1 mrg FOR_EACH_VEC_ELT (datarefs, i, a) 5736 1.1 mrg for (j = i + 1; datarefs.iterate (j, &b); j++) 5737 1.1 mrg if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) 5738 1.1 mrg { 5739 1.1 mrg ddr = initialize_data_dependence_relation (a, b, loop_nest); 5740 1.1 mrg dependence_relations->safe_push (ddr); 5741 1.1 mrg if (loop_nest.exists ()) 5742 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]); 5743 1.1 mrg } 5744 1.1 mrg 5745 1.1 mrg if (compute_self_and_rr) 5746 1.1 mrg FOR_EACH_VEC_ELT (datarefs, i, a) 5747 1.1 mrg { 5748 1.1 mrg ddr = initialize_data_dependence_relation (a, a, loop_nest); 5749 1.1 mrg dependence_relations->safe_push (ddr); 5750 1.1 mrg if (loop_nest.exists ()) 5751 1.1 mrg compute_affine_dependence (ddr, loop_nest[0]); 5752 1.1 mrg } 5753 1.1 mrg 5754 1.1 mrg return true; 5755 1.1 mrg } 5756 1.1 mrg 5757 1.1 mrg /* Describes a location of a memory reference. */ 5758 1.1 mrg 5759 1.1 mrg struct data_ref_loc 5760 1.1 mrg { 5761 1.1 mrg /* The memory reference. */ 5762 1.1 mrg tree ref; 5763 1.1 mrg 5764 1.1 mrg /* True if the memory reference is read. */ 5765 1.1 mrg bool is_read; 5766 1.1 mrg 5767 1.1 mrg /* True if the data reference is conditional within the containing 5768 1.1 mrg statement, i.e. if it might not occur even when the statement 5769 1.1 mrg is executed and runs to completion. */ 5770 1.1 mrg bool is_conditional_in_stmt; 5771 1.1 mrg }; 5772 1.1 mrg 5773 1.1 mrg 5774 1.1 mrg /* Stores the locations of memory references in STMT to REFERENCES. Returns 5775 1.1 mrg true if STMT clobbers memory, false otherwise. */ 5776 1.1 mrg 5777 1.1 mrg static bool 5778 1.1 mrg get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references) 5779 1.1 mrg { 5780 1.1 mrg bool clobbers_memory = false; 5781 1.1 mrg data_ref_loc ref; 5782 1.1 mrg tree op0, op1; 5783 1.1 mrg enum gimple_code stmt_code = gimple_code (stmt); 5784 1.1 mrg 5785 1.1 mrg /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. 5786 1.1 mrg As we cannot model data-references to not spelled out 5787 1.1 mrg accesses give up if they may occur. */ 5788 1.1 mrg if (stmt_code == GIMPLE_CALL 5789 1.1 mrg && !(gimple_call_flags (stmt) & ECF_CONST)) 5790 1.1 mrg { 5791 1.1 mrg /* Allow IFN_GOMP_SIMD_LANE in their own loops. */ 5792 1.1 mrg if (gimple_call_internal_p (stmt)) 5793 1.1 mrg switch (gimple_call_internal_fn (stmt)) 5794 1.1 mrg { 5795 1.1 mrg case IFN_GOMP_SIMD_LANE: 5796 1.1 mrg { 5797 1.1 mrg class loop *loop = gimple_bb (stmt)->loop_father; 5798 1.1 mrg tree uid = gimple_call_arg (stmt, 0); 5799 1.1 mrg gcc_assert (TREE_CODE (uid) == SSA_NAME); 5800 1.1 mrg if (loop == NULL 5801 1.1 mrg || loop->simduid != SSA_NAME_VAR (uid)) 5802 1.1 mrg clobbers_memory = true; 5803 1.1 mrg break; 5804 1.1 mrg } 5805 1.1 mrg case IFN_MASK_LOAD: 5806 1.1 mrg case IFN_MASK_STORE: 5807 1.1 mrg break; 5808 1.1 mrg default: 5809 1.1 mrg clobbers_memory = true; 5810 1.1 mrg break; 5811 1.1 mrg } 5812 1.1 mrg else 5813 1.1 mrg clobbers_memory = true; 5814 1.1 mrg } 5815 1.1 mrg else if (stmt_code == GIMPLE_ASM 5816 1.1 mrg && (gimple_asm_volatile_p (as_a <gasm *> (stmt)) 5817 1.1 mrg || gimple_vuse (stmt))) 5818 1.1 mrg clobbers_memory = true; 5819 1.1 mrg 5820 1.1 mrg if (!gimple_vuse (stmt)) 5821 1.1 mrg return clobbers_memory; 5822 1.1 mrg 5823 1.1 mrg if (stmt_code == GIMPLE_ASSIGN) 5824 1.1 mrg { 5825 1.1 mrg tree base; 5826 1.1 mrg op0 = gimple_assign_lhs (stmt); 5827 1.1 mrg op1 = gimple_assign_rhs1 (stmt); 5828 1.1 mrg 5829 1.1 mrg if (DECL_P (op1) 5830 1.1 mrg || (REFERENCE_CLASS_P (op1) 5831 1.1 mrg && (base = get_base_address (op1)) 5832 1.1 mrg && TREE_CODE (base) != SSA_NAME 5833 1.1 mrg && !is_gimple_min_invariant (base))) 5834 1.1 mrg { 5835 1.1 mrg ref.ref = op1; 5836 1.1 mrg ref.is_read = true; 5837 1.1 mrg ref.is_conditional_in_stmt = false; 5838 1.1 mrg references->safe_push (ref); 5839 1.1 mrg } 5840 1.1 mrg } 5841 1.1 mrg else if (stmt_code == GIMPLE_CALL) 5842 1.1 mrg { 5843 1.1 mrg unsigned i, n; 5844 1.1 mrg tree ptr, type; 5845 1.1 mrg unsigned int align; 5846 1.1 mrg 5847 1.1 mrg ref.is_read = false; 5848 1.1 mrg if (gimple_call_internal_p (stmt)) 5849 1.1 mrg switch (gimple_call_internal_fn (stmt)) 5850 1.1 mrg { 5851 1.1 mrg case IFN_MASK_LOAD: 5852 1.1 mrg if (gimple_call_lhs (stmt) == NULL_TREE) 5853 1.1 mrg break; 5854 1.1 mrg ref.is_read = true; 5855 1.1 mrg /* FALLTHRU */ 5856 1.1 mrg case IFN_MASK_STORE: 5857 1.1 mrg ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0); 5858 1.1 mrg align = tree_to_shwi (gimple_call_arg (stmt, 1)); 5859 1.1 mrg if (ref.is_read) 5860 1.1 mrg type = TREE_TYPE (gimple_call_lhs (stmt)); 5861 1.1 mrg else 5862 1.1 mrg type = TREE_TYPE (gimple_call_arg (stmt, 3)); 5863 1.1 mrg if (TYPE_ALIGN (type) != align) 5864 1.1 mrg type = build_aligned_type (type, align); 5865 1.1 mrg ref.is_conditional_in_stmt = true; 5866 1.1 mrg ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), 5867 1.1 mrg ptr); 5868 1.1 mrg references->safe_push (ref); 5869 1.1 mrg return false; 5870 1.1 mrg default: 5871 1.1 mrg break; 5872 1.1 mrg } 5873 1.1 mrg 5874 1.1 mrg op0 = gimple_call_lhs (stmt); 5875 1.1 mrg n = gimple_call_num_args (stmt); 5876 1.1 mrg for (i = 0; i < n; i++) 5877 1.1 mrg { 5878 1.1 mrg op1 = gimple_call_arg (stmt, i); 5879 1.1 mrg 5880 1.1 mrg if (DECL_P (op1) 5881 1.1 mrg || (REFERENCE_CLASS_P (op1) && get_base_address (op1))) 5882 1.1 mrg { 5883 1.1 mrg ref.ref = op1; 5884 1.1 mrg ref.is_read = true; 5885 1.1 mrg ref.is_conditional_in_stmt = false; 5886 1.1 mrg references->safe_push (ref); 5887 1.1 mrg } 5888 1.1 mrg } 5889 1.1 mrg } 5890 1.1 mrg else 5891 1.1 mrg return clobbers_memory; 5892 1.1 mrg 5893 1.1 mrg if (op0 5894 1.1 mrg && (DECL_P (op0) 5895 1.1 mrg || (REFERENCE_CLASS_P (op0) && get_base_address (op0)))) 5896 1.1 mrg { 5897 1.1 mrg ref.ref = op0; 5898 1.1 mrg ref.is_read = false; 5899 1.1 mrg ref.is_conditional_in_stmt = false; 5900 1.1 mrg references->safe_push (ref); 5901 1.1 mrg } 5902 1.1 mrg return clobbers_memory; 5903 1.1 mrg } 5904 1.1 mrg 5905 1.1 mrg 5906 1.1 mrg /* Returns true if the loop-nest has any data reference. */ 5907 1.1 mrg 5908 1.1 mrg bool 5909 1.1 mrg loop_nest_has_data_refs (loop_p loop) 5910 1.1 mrg { 5911 1.1 mrg basic_block *bbs = get_loop_body (loop); 5912 1.1 mrg auto_vec<data_ref_loc, 3> references; 5913 1.1 mrg 5914 1.1 mrg for (unsigned i = 0; i < loop->num_nodes; i++) 5915 1.1 mrg { 5916 1.1 mrg basic_block bb = bbs[i]; 5917 1.1 mrg gimple_stmt_iterator bsi; 5918 1.1 mrg 5919 1.1 mrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 5920 1.1 mrg { 5921 1.1 mrg gimple *stmt = gsi_stmt (bsi); 5922 1.1 mrg get_references_in_stmt (stmt, &references); 5923 1.1 mrg if (references.length ()) 5924 1.1 mrg { 5925 1.1 mrg free (bbs); 5926 1.1 mrg return true; 5927 1.1 mrg } 5928 1.1 mrg } 5929 1.1 mrg } 5930 1.1 mrg free (bbs); 5931 1.1 mrg return false; 5932 1.1 mrg } 5933 1.1 mrg 5934 1.1 mrg /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable 5935 1.1 mrg reference, returns false, otherwise returns true. NEST is the outermost 5936 1.1 mrg loop of the loop nest in which the references should be analyzed. */ 5937 1.1 mrg 5938 1.1 mrg opt_result 5939 1.1 mrg find_data_references_in_stmt (class loop *nest, gimple *stmt, 5940 1.1 mrg vec<data_reference_p> *datarefs) 5941 1.1 mrg { 5942 1.1 mrg auto_vec<data_ref_loc, 2> references; 5943 1.1 mrg data_reference_p dr; 5944 1.1 mrg 5945 1.1 mrg if (get_references_in_stmt (stmt, &references)) 5946 1.1 mrg return opt_result::failure_at (stmt, "statement clobbers memory: %G", 5947 1.1 mrg stmt); 5948 1.1 mrg 5949 1.1 mrg for (const data_ref_loc &ref : references) 5950 1.1 mrg { 5951 1.1 mrg dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL, 5952 1.1 mrg loop_containing_stmt (stmt), ref.ref, 5953 1.1 mrg stmt, ref.is_read, ref.is_conditional_in_stmt); 5954 1.1 mrg gcc_assert (dr != NULL); 5955 1.1 mrg datarefs->safe_push (dr); 5956 1.1 mrg } 5957 1.1 mrg 5958 1.1 mrg return opt_result::success (); 5959 1.1 mrg } 5960 1.1 mrg 5961 1.1 mrg /* Stores the data references in STMT to DATAREFS. If there is an 5962 1.1 mrg unanalyzable reference, returns false, otherwise returns true. 5963 1.1 mrg NEST is the outermost loop of the loop nest in which the references 5964 1.1 mrg should be instantiated, LOOP is the loop in which the references 5965 1.1 mrg should be analyzed. */ 5966 1.1 mrg 5967 1.1 mrg bool 5968 1.1 mrg graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt, 5969 1.1 mrg vec<data_reference_p> *datarefs) 5970 1.1 mrg { 5971 1.1 mrg auto_vec<data_ref_loc, 2> references; 5972 1.1 mrg bool ret = true; 5973 1.1 mrg data_reference_p dr; 5974 1.1 mrg 5975 1.1 mrg if (get_references_in_stmt (stmt, &references)) 5976 1.1 mrg return false; 5977 1.1 mrg 5978 1.1 mrg for (const data_ref_loc &ref : references) 5979 1.1 mrg { 5980 1.1 mrg dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read, 5981 1.1 mrg ref.is_conditional_in_stmt); 5982 1.1 mrg gcc_assert (dr != NULL); 5983 1.1 mrg datarefs->safe_push (dr); 5984 1.1 mrg } 5985 1.1 mrg 5986 1.1 mrg return ret; 5987 1.1 mrg } 5988 1.1 mrg 5989 1.1 mrg /* Search the data references in LOOP, and record the information into 5990 1.1 mrg DATAREFS. Returns chrec_dont_know when failing to analyze a 5991 1.1 mrg difficult case, returns NULL_TREE otherwise. */ 5992 1.1 mrg 5993 1.1 mrg tree 5994 1.1 mrg find_data_references_in_bb (class loop *loop, basic_block bb, 5995 1.1 mrg vec<data_reference_p> *datarefs) 5996 1.1 mrg { 5997 1.1 mrg gimple_stmt_iterator bsi; 5998 1.1 mrg 5999 1.1 mrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 6000 1.1 mrg { 6001 1.1 mrg gimple *stmt = gsi_stmt (bsi); 6002 1.1 mrg 6003 1.1 mrg if (!find_data_references_in_stmt (loop, stmt, datarefs)) 6004 1.1 mrg { 6005 1.1 mrg struct data_reference *res; 6006 1.1 mrg res = XCNEW (struct data_reference); 6007 1.1 mrg datarefs->safe_push (res); 6008 1.1 mrg 6009 1.1 mrg return chrec_dont_know; 6010 1.1 mrg } 6011 1.1 mrg } 6012 1.1 mrg 6013 1.1 mrg return NULL_TREE; 6014 1.1 mrg } 6015 1.1 mrg 6016 1.1 mrg /* Search the data references in LOOP, and record the information into 6017 1.1 mrg DATAREFS. Returns chrec_dont_know when failing to analyze a 6018 1.1 mrg difficult case, returns NULL_TREE otherwise. 6019 1.1 mrg 6020 1.1 mrg TODO: This function should be made smarter so that it can handle address 6021 1.1 mrg arithmetic as if they were array accesses, etc. */ 6022 1.1 mrg 6023 1.1 mrg tree 6024 1.1 mrg find_data_references_in_loop (class loop *loop, 6025 1.1 mrg vec<data_reference_p> *datarefs) 6026 1.1 mrg { 6027 1.1 mrg basic_block bb, *bbs; 6028 1.1 mrg unsigned int i; 6029 1.1 mrg 6030 1.1 mrg bbs = get_loop_body_in_dom_order (loop); 6031 1.1 mrg 6032 1.1 mrg for (i = 0; i < loop->num_nodes; i++) 6033 1.1 mrg { 6034 1.1 mrg bb = bbs[i]; 6035 1.1 mrg 6036 1.1 mrg if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) 6037 1.1 mrg { 6038 1.1 mrg free (bbs); 6039 1.1 mrg return chrec_dont_know; 6040 1.1 mrg } 6041 1.1 mrg } 6042 1.1 mrg free (bbs); 6043 1.1 mrg 6044 1.1 mrg return NULL_TREE; 6045 1.1 mrg } 6046 1.1 mrg 6047 1.1 mrg /* Return the alignment in bytes that DRB is guaranteed to have at all 6048 1.1 mrg times. */ 6049 1.1 mrg 6050 1.1 mrg unsigned int 6051 1.1 mrg dr_alignment (innermost_loop_behavior *drb) 6052 1.1 mrg { 6053 1.1 mrg /* Get the alignment of BASE_ADDRESS + INIT. */ 6054 1.1 mrg unsigned int alignment = drb->base_alignment; 6055 1.1 mrg unsigned int misalignment = (drb->base_misalignment 6056 1.1 mrg + TREE_INT_CST_LOW (drb->init)); 6057 1.1 mrg if (misalignment != 0) 6058 1.1 mrg alignment = MIN (alignment, misalignment & -misalignment); 6059 1.1 mrg 6060 1.1 mrg /* Cap it to the alignment of OFFSET. */ 6061 1.1 mrg if (!integer_zerop (drb->offset)) 6062 1.1 mrg alignment = MIN (alignment, drb->offset_alignment); 6063 1.1 mrg 6064 1.1 mrg /* Cap it to the alignment of STEP. */ 6065 1.1 mrg if (!integer_zerop (drb->step)) 6066 1.1 mrg alignment = MIN (alignment, drb->step_alignment); 6067 1.1 mrg 6068 1.1 mrg return alignment; 6069 1.1 mrg } 6070 1.1 mrg 6071 1.1 mrg /* If BASE is a pointer-typed SSA name, try to find the object that it 6072 1.1 mrg is based on. Return this object X on success and store the alignment 6073 1.1 mrg in bytes of BASE - &X in *ALIGNMENT_OUT. */ 6074 1.1 mrg 6075 1.1 mrg static tree 6076 1.1 mrg get_base_for_alignment_1 (tree base, unsigned int *alignment_out) 6077 1.1 mrg { 6078 1.1 mrg if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base))) 6079 1.1 mrg return NULL_TREE; 6080 1.1 mrg 6081 1.1 mrg gimple *def = SSA_NAME_DEF_STMT (base); 6082 1.1 mrg base = analyze_scalar_evolution (loop_containing_stmt (def), base); 6083 1.1 mrg 6084 1.1 mrg /* Peel chrecs and record the minimum alignment preserved by 6085 1.1 mrg all steps. */ 6086 1.1 mrg unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; 6087 1.1 mrg while (TREE_CODE (base) == POLYNOMIAL_CHREC) 6088 1.1 mrg { 6089 1.1 mrg unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base)); 6090 1.1 mrg alignment = MIN (alignment, step_alignment); 6091 1.1 mrg base = CHREC_LEFT (base); 6092 1.1 mrg } 6093 1.1 mrg 6094 1.1 mrg /* Punt if the expression is too complicated to handle. */ 6095 1.1 mrg if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base))) 6096 1.1 mrg return NULL_TREE; 6097 1.1 mrg 6098 1.1 mrg /* The only useful cases are those for which a dereference folds to something 6099 1.1 mrg other than an INDIRECT_REF. */ 6100 1.1 mrg tree ref_type = TREE_TYPE (TREE_TYPE (base)); 6101 1.1 mrg tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base); 6102 1.1 mrg if (!ref) 6103 1.1 mrg return NULL_TREE; 6104 1.1 mrg 6105 1.1 mrg /* Analyze the base to which the steps we peeled were applied. */ 6106 1.1 mrg poly_int64 bitsize, bitpos, bytepos; 6107 1.1 mrg machine_mode mode; 6108 1.1 mrg int unsignedp, reversep, volatilep; 6109 1.1 mrg tree offset; 6110 1.1 mrg base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, 6111 1.1 mrg &unsignedp, &reversep, &volatilep); 6112 1.1 mrg if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) 6113 1.1 mrg return NULL_TREE; 6114 1.1 mrg 6115 1.1 mrg /* Restrict the alignment to that guaranteed by the offsets. */ 6116 1.1 mrg unsigned int bytepos_alignment = known_alignment (bytepos); 6117 1.1 mrg if (bytepos_alignment != 0) 6118 1.1 mrg alignment = MIN (alignment, bytepos_alignment); 6119 1.1 mrg if (offset) 6120 1.1 mrg { 6121 1.1 mrg unsigned int offset_alignment = highest_pow2_factor (offset); 6122 1.1 mrg alignment = MIN (alignment, offset_alignment); 6123 1.1 mrg } 6124 1.1 mrg 6125 1.1 mrg *alignment_out = alignment; 6126 1.1 mrg return base; 6127 1.1 mrg } 6128 1.1 mrg 6129 1.1 mrg /* Return the object whose alignment would need to be changed in order 6130 1.1 mrg to increase the alignment of ADDR. Store the maximum achievable 6131 1.1 mrg alignment in *MAX_ALIGNMENT. */ 6132 1.1 mrg 6133 1.1 mrg tree 6134 1.1 mrg get_base_for_alignment (tree addr, unsigned int *max_alignment) 6135 1.1 mrg { 6136 1.1 mrg tree base = get_base_for_alignment_1 (addr, max_alignment); 6137 1.1 mrg if (base) 6138 1.1 mrg return base; 6139 1.1 mrg 6140 1.1 mrg if (TREE_CODE (addr) == ADDR_EXPR) 6141 1.1 mrg addr = TREE_OPERAND (addr, 0); 6142 1.1 mrg *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; 6143 1.1 mrg return addr; 6144 1.1 mrg } 6145 1.1 mrg 6146 1.1 mrg /* Recursive helper function. */ 6147 1.1 mrg 6148 1.1 mrg static bool 6149 1.1 mrg find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest) 6150 1.1 mrg { 6151 1.1 mrg /* Inner loops of the nest should not contain siblings. Example: 6152 1.1 mrg when there are two consecutive loops, 6153 1.1 mrg 6154 1.1 mrg | loop_0 6155 1.1 mrg | loop_1 6156 1.1 mrg | A[{0, +, 1}_1] 6157 1.1 mrg | endloop_1 6158 1.1 mrg | loop_2 6159 1.1 mrg | A[{0, +, 1}_2] 6160 1.1 mrg | endloop_2 6161 1.1 mrg | endloop_0 6162 1.1 mrg 6163 1.1 mrg the dependence relation cannot be captured by the distance 6164 1.1 mrg abstraction. */ 6165 1.1 mrg if (loop->next) 6166 1.1 mrg return false; 6167 1.1 mrg 6168 1.1 mrg loop_nest->safe_push (loop); 6169 1.1 mrg if (loop->inner) 6170 1.1 mrg return find_loop_nest_1 (loop->inner, loop_nest); 6171 1.1 mrg return true; 6172 1.1 mrg } 6173 1.1 mrg 6174 1.1 mrg /* Return false when the LOOP is not well nested. Otherwise return 6175 1.1 mrg true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will 6176 1.1 mrg contain the loops from the outermost to the innermost, as they will 6177 1.1 mrg appear in the classic distance vector. */ 6178 1.1 mrg 6179 1.1 mrg bool 6180 1.1 mrg find_loop_nest (class loop *loop, vec<loop_p> *loop_nest) 6181 1.1 mrg { 6182 1.1 mrg loop_nest->safe_push (loop); 6183 1.1 mrg if (loop->inner) 6184 1.1 mrg return find_loop_nest_1 (loop->inner, loop_nest); 6185 1.1 mrg return true; 6186 1.1 mrg } 6187 1.1 mrg 6188 1.1 mrg /* Returns true when the data dependences have been computed, false otherwise. 6189 1.1 mrg Given a loop nest LOOP, the following vectors are returned: 6190 1.1 mrg DATAREFS is initialized to all the array elements contained in this loop, 6191 1.1 mrg DEPENDENCE_RELATIONS contains the relations between the data references. 6192 1.1 mrg Compute read-read and self relations if 6193 1.1 mrg COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 6194 1.1 mrg 6195 1.1 mrg bool 6196 1.1 mrg compute_data_dependences_for_loop (class loop *loop, 6197 1.1 mrg bool compute_self_and_read_read_dependences, 6198 1.1 mrg vec<loop_p> *loop_nest, 6199 1.1 mrg vec<data_reference_p> *datarefs, 6200 1.1 mrg vec<ddr_p> *dependence_relations) 6201 1.1 mrg { 6202 1.1 mrg bool res = true; 6203 1.1 mrg 6204 1.1 mrg memset (&dependence_stats, 0, sizeof (dependence_stats)); 6205 1.1 mrg 6206 1.1 mrg /* If the loop nest is not well formed, or one of the data references 6207 1.1 mrg is not computable, give up without spending time to compute other 6208 1.1 mrg dependences. */ 6209 1.1 mrg if (!loop 6210 1.1 mrg || !find_loop_nest (loop, loop_nest) 6211 1.1 mrg || find_data_references_in_loop (loop, datarefs) == chrec_dont_know 6212 1.1 mrg || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest, 6213 1.1 mrg compute_self_and_read_read_dependences)) 6214 1.1 mrg res = false; 6215 1.1 mrg 6216 1.1 mrg if (dump_file && (dump_flags & TDF_STATS)) 6217 1.1 mrg { 6218 1.1 mrg fprintf (dump_file, "Dependence tester statistics:\n"); 6219 1.1 mrg 6220 1.1 mrg fprintf (dump_file, "Number of dependence tests: %d\n", 6221 1.1 mrg dependence_stats.num_dependence_tests); 6222 1.1 mrg fprintf (dump_file, "Number of dependence tests classified dependent: %d\n", 6223 1.1 mrg dependence_stats.num_dependence_dependent); 6224 1.1 mrg fprintf (dump_file, "Number of dependence tests classified independent: %d\n", 6225 1.1 mrg dependence_stats.num_dependence_independent); 6226 1.1 mrg fprintf (dump_file, "Number of undetermined dependence tests: %d\n", 6227 1.1 mrg dependence_stats.num_dependence_undetermined); 6228 1.1 mrg 6229 1.1 mrg fprintf (dump_file, "Number of subscript tests: %d\n", 6230 1.1 mrg dependence_stats.num_subscript_tests); 6231 1.1 mrg fprintf (dump_file, "Number of undetermined subscript tests: %d\n", 6232 1.1 mrg dependence_stats.num_subscript_undetermined); 6233 1.1 mrg fprintf (dump_file, "Number of same subscript function: %d\n", 6234 1.1 mrg dependence_stats.num_same_subscript_function); 6235 1.1 mrg 6236 1.1 mrg fprintf (dump_file, "Number of ziv tests: %d\n", 6237 1.1 mrg dependence_stats.num_ziv); 6238 1.1 mrg fprintf (dump_file, "Number of ziv tests returning dependent: %d\n", 6239 1.1 mrg dependence_stats.num_ziv_dependent); 6240 1.1 mrg fprintf (dump_file, "Number of ziv tests returning independent: %d\n", 6241 1.1 mrg dependence_stats.num_ziv_independent); 6242 1.1 mrg fprintf (dump_file, "Number of ziv tests unimplemented: %d\n", 6243 1.1 mrg dependence_stats.num_ziv_unimplemented); 6244 1.1 mrg 6245 1.1 mrg fprintf (dump_file, "Number of siv tests: %d\n", 6246 1.1 mrg dependence_stats.num_siv); 6247 1.1 mrg fprintf (dump_file, "Number of siv tests returning dependent: %d\n", 6248 1.1 mrg dependence_stats.num_siv_dependent); 6249 1.1 mrg fprintf (dump_file, "Number of siv tests returning independent: %d\n", 6250 1.1 mrg dependence_stats.num_siv_independent); 6251 1.1 mrg fprintf (dump_file, "Number of siv tests unimplemented: %d\n", 6252 1.1 mrg dependence_stats.num_siv_unimplemented); 6253 1.1 mrg 6254 1.1 mrg fprintf (dump_file, "Number of miv tests: %d\n", 6255 1.1 mrg dependence_stats.num_miv); 6256 1.1 mrg fprintf (dump_file, "Number of miv tests returning dependent: %d\n", 6257 1.1 mrg dependence_stats.num_miv_dependent); 6258 1.1 mrg fprintf (dump_file, "Number of miv tests returning independent: %d\n", 6259 1.1 mrg dependence_stats.num_miv_independent); 6260 1.1 mrg fprintf (dump_file, "Number of miv tests unimplemented: %d\n", 6261 1.1 mrg dependence_stats.num_miv_unimplemented); 6262 1.1 mrg } 6263 1.1 mrg 6264 1.1 mrg return res; 6265 1.1 mrg } 6266 1.1 mrg 6267 1.1 mrg /* Free the memory used by a data dependence relation DDR. */ 6268 1.1 mrg 6269 1.1 mrg void 6270 1.1 mrg free_dependence_relation (struct data_dependence_relation *ddr) 6271 1.1 mrg { 6272 1.1 mrg if (ddr == NULL) 6273 1.1 mrg return; 6274 1.1 mrg 6275 1.1 mrg if (DDR_SUBSCRIPTS (ddr).exists ()) 6276 1.1 mrg free_subscripts (DDR_SUBSCRIPTS (ddr)); 6277 1.1 mrg DDR_DIST_VECTS (ddr).release (); 6278 1.1 mrg DDR_DIR_VECTS (ddr).release (); 6279 1.1 mrg 6280 1.1 mrg free (ddr); 6281 1.1 mrg } 6282 1.1 mrg 6283 1.1 mrg /* Free the memory used by the data dependence relations from 6284 1.1 mrg DEPENDENCE_RELATIONS. */ 6285 1.1 mrg 6286 1.1 mrg void 6287 1.1 mrg free_dependence_relations (vec<ddr_p>& dependence_relations) 6288 1.1 mrg { 6289 1.1 mrg for (data_dependence_relation *ddr : dependence_relations) 6290 1.1 mrg if (ddr) 6291 1.1 mrg free_dependence_relation (ddr); 6292 1.1 mrg 6293 1.1 mrg dependence_relations.release (); 6294 1.1 mrg } 6295 1.1 mrg 6296 1.1 mrg /* Free the memory used by the data references from DATAREFS. */ 6297 1.1 mrg 6298 1.1 mrg void 6299 1.1 mrg free_data_refs (vec<data_reference_p>& datarefs) 6300 1.1 mrg { 6301 1.1 mrg for (data_reference *dr : datarefs) 6302 1.1 mrg free_data_ref (dr); 6303 1.1 mrg datarefs.release (); 6304 1.1 mrg } 6305 1.1 mrg 6306 1.1 mrg /* Common routine implementing both dr_direction_indicator and 6307 1.1 mrg dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known 6308 1.1 mrg to be >= USEFUL_MIN and -1 if the indicator is known to be negative. 6309 1.1 mrg Return the step as the indicator otherwise. */ 6310 1.1 mrg 6311 1.1 mrg static tree 6312 1.1 mrg dr_step_indicator (struct data_reference *dr, int useful_min) 6313 1.1 mrg { 6314 1.1 mrg tree step = DR_STEP (dr); 6315 1.1 mrg if (!step) 6316 1.1 mrg return NULL_TREE; 6317 1.1 mrg STRIP_NOPS (step); 6318 1.1 mrg /* Look for cases where the step is scaled by a positive constant 6319 1.1 mrg integer, which will often be the access size. If the multiplication 6320 1.1 mrg doesn't change the sign (due to overflow effects) then we can 6321 1.1 mrg test the unscaled value instead. */ 6322 1.1 mrg if (TREE_CODE (step) == MULT_EXPR 6323 1.1 mrg && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST 6324 1.1 mrg && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0) 6325 1.1 mrg { 6326 1.1 mrg tree factor = TREE_OPERAND (step, 1); 6327 1.1 mrg step = TREE_OPERAND (step, 0); 6328 1.1 mrg 6329 1.1 mrg /* Strip widening and truncating conversions as well as nops. */ 6330 1.1 mrg if (CONVERT_EXPR_P (step) 6331 1.1 mrg && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0)))) 6332 1.1 mrg step = TREE_OPERAND (step, 0); 6333 1.1 mrg tree type = TREE_TYPE (step); 6334 1.1 mrg 6335 1.1 mrg /* Get the range of step values that would not cause overflow. */ 6336 1.1 mrg widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype)) 6337 1.1 mrg / wi::to_widest (factor)); 6338 1.1 mrg widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype)) 6339 1.1 mrg / wi::to_widest (factor)); 6340 1.1 mrg 6341 1.1 mrg /* Get the range of values that the unconverted step actually has. */ 6342 1.1 mrg wide_int step_min, step_max; 6343 1.1 mrg value_range vr; 6344 1.1 mrg if (TREE_CODE (step) != SSA_NAME 6345 1.1 mrg || !get_range_query (cfun)->range_of_expr (vr, step) 6346 1.1 mrg || vr.kind () != VR_RANGE) 6347 1.1 mrg { 6348 1.1 mrg step_min = wi::to_wide (TYPE_MIN_VALUE (type)); 6349 1.1 mrg step_max = wi::to_wide (TYPE_MAX_VALUE (type)); 6350 1.1 mrg } 6351 1.1 mrg else 6352 1.1 mrg { 6353 1.1 mrg step_min = vr.lower_bound (); 6354 1.1 mrg step_max = vr.upper_bound (); 6355 1.1 mrg } 6356 1.1 mrg 6357 1.1 mrg /* Check whether the unconverted step has an acceptable range. */ 6358 1.1 mrg signop sgn = TYPE_SIGN (type); 6359 1.1 mrg if (wi::les_p (minv, widest_int::from (step_min, sgn)) 6360 1.1 mrg && wi::ges_p (maxv, widest_int::from (step_max, sgn))) 6361 1.1 mrg { 6362 1.1 mrg if (wi::ge_p (step_min, useful_min, sgn)) 6363 1.1 mrg return ssize_int (useful_min); 6364 1.1 mrg else if (wi::lt_p (step_max, 0, sgn)) 6365 1.1 mrg return ssize_int (-1); 6366 1.1 mrg else 6367 1.1 mrg return fold_convert (ssizetype, step); 6368 1.1 mrg } 6369 1.1 mrg } 6370 1.1 mrg return DR_STEP (dr); 6371 1.1 mrg } 6372 1.1 mrg 6373 1.1 mrg /* Return a value that is negative iff DR has a negative step. */ 6374 1.1 mrg 6375 1.1 mrg tree 6376 1.1 mrg dr_direction_indicator (struct data_reference *dr) 6377 1.1 mrg { 6378 1.1 mrg return dr_step_indicator (dr, 0); 6379 1.1 mrg } 6380 1.1 mrg 6381 1.1 mrg /* Return a value that is zero iff DR has a zero step. */ 6382 1.1 mrg 6383 1.1 mrg tree 6384 1.1 mrg dr_zero_step_indicator (struct data_reference *dr) 6385 1.1 mrg { 6386 1.1 mrg return dr_step_indicator (dr, 1); 6387 1.1 mrg } 6388 1.1 mrg 6389 1.1 mrg /* Return true if DR is known to have a nonnegative (but possibly zero) 6390 1.1 mrg step. */ 6391 1.1 mrg 6392 1.1 mrg bool 6393 1.1 mrg dr_known_forward_stride_p (struct data_reference *dr) 6394 1.1 mrg { 6395 1.1 mrg tree indicator = dr_direction_indicator (dr); 6396 1.1 mrg tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node, 6397 1.1 mrg fold_convert (ssizetype, indicator), 6398 1.1 mrg ssize_int (0)); 6399 return neg_step_val && integer_zerop (neg_step_val); 6400 } 6401