1 /* 2 * Copyright 2011 INRIA Saclay 3 * Copyright 2012-2014 Ecole Normale Superieure 4 * Copyright 2015-2016 Sven Verdoolaege 5 * Copyright 2016 INRIA Paris 6 * Copyright 2017 Sven Verdoolaege 7 * 8 * Use of this software is governed by the MIT license 9 * 10 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, 11 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, 12 * 91893 Orsay, France 13 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France 14 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12, 15 * CS 42112, 75589 Paris Cedex 12, France 16 */ 17 18 #include <isl_ctx_private.h> 19 #include <isl_map_private.h> 20 #include <isl_space_private.h> 21 #include <isl_aff_private.h> 22 #include <isl/hash.h> 23 #include <isl/id.h> 24 #include <isl/constraint.h> 25 #include <isl/schedule.h> 26 #include <isl_schedule_constraints.h> 27 #include <isl/schedule_node.h> 28 #include <isl_mat_private.h> 29 #include <isl_vec_private.h> 30 #include <isl/set.h> 31 #include <isl_union_set_private.h> 32 #include <isl_seq.h> 33 #include <isl_tab.h> 34 #include <isl_dim_map.h> 35 #include <isl/map_to_basic_set.h> 36 #include <isl_sort.h> 37 #include <isl_options_private.h> 38 #include <isl_tarjan.h> 39 #include <isl_morph.h> 40 #include <isl/ilp.h> 41 #include <isl_val_private.h> 42 43 #include "isl_scheduler.h" 44 #include "isl_scheduler_clustering.h" 45 46 /* 47 * The scheduling algorithm implemented in this file was inspired by 48 * Bondhugula et al., "Automatic Transformations for Communication-Minimized 49 * Parallelization and Locality Optimization in the Polyhedral Model". 50 * 51 * For a detailed description of the variant implemented in isl, 52 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017). 53 */ 54 55 56 static isl_bool node_has_tuples(const void *entry, const void *val) 57 { 58 struct isl_sched_node *node = (struct isl_sched_node *)entry; 59 isl_space *space = (isl_space *) val; 60 61 return isl_space_has_equal_tuples(node->space, space); 62 } 63 64 int isl_sched_node_scc_exactly(struct isl_sched_node *node, int scc) 65 { 66 return node->scc == scc; 67 } 68 69 static int node_scc_at_most(struct isl_sched_node *node, int scc) 70 { 71 return node->scc <= scc; 72 } 73 74 static int node_scc_at_least(struct isl_sched_node *node, int scc) 75 { 76 return node->scc >= scc; 77 } 78 79 /* Is "edge" marked as being of type "type"? 80 */ 81 int isl_sched_edge_has_type(struct isl_sched_edge *edge, 82 enum isl_edge_type type) 83 { 84 return ISL_FL_ISSET(edge->types, 1 << type); 85 } 86 87 /* Mark "edge" as being of type "type". 88 */ 89 static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type) 90 { 91 ISL_FL_SET(edge->types, 1 << type); 92 } 93 94 /* No longer mark "edge" as being of type "type"? 95 */ 96 static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type) 97 { 98 ISL_FL_CLR(edge->types, 1 << type); 99 } 100 101 /* Is "edge" marked as a validity edge? 102 */ 103 static int is_validity(struct isl_sched_edge *edge) 104 { 105 return isl_sched_edge_has_type(edge, isl_edge_validity); 106 } 107 108 /* Mark "edge" as a validity edge. 109 */ 110 static void set_validity(struct isl_sched_edge *edge) 111 { 112 set_type(edge, isl_edge_validity); 113 } 114 115 /* Is "edge" marked as a proximity edge? 116 */ 117 int isl_sched_edge_is_proximity(struct isl_sched_edge *edge) 118 { 119 return isl_sched_edge_has_type(edge, isl_edge_proximity); 120 } 121 122 /* Is "edge" marked as a local edge? 123 */ 124 static int is_local(struct isl_sched_edge *edge) 125 { 126 return isl_sched_edge_has_type(edge, isl_edge_local); 127 } 128 129 /* Mark "edge" as a local edge. 130 */ 131 static void set_local(struct isl_sched_edge *edge) 132 { 133 set_type(edge, isl_edge_local); 134 } 135 136 /* No longer mark "edge" as a local edge. 137 */ 138 static void clear_local(struct isl_sched_edge *edge) 139 { 140 clear_type(edge, isl_edge_local); 141 } 142 143 /* Is "edge" marked as a coincidence edge? 144 */ 145 static int is_coincidence(struct isl_sched_edge *edge) 146 { 147 return isl_sched_edge_has_type(edge, isl_edge_coincidence); 148 } 149 150 /* Is "edge" marked as a condition edge? 151 */ 152 int isl_sched_edge_is_condition(struct isl_sched_edge *edge) 153 { 154 return isl_sched_edge_has_type(edge, isl_edge_condition); 155 } 156 157 /* Is "edge" marked as a conditional validity edge? 158 */ 159 int isl_sched_edge_is_conditional_validity(struct isl_sched_edge *edge) 160 { 161 return isl_sched_edge_has_type(edge, isl_edge_conditional_validity); 162 } 163 164 /* Is "edge" of a type that can appear multiple times between 165 * the same pair of nodes? 166 * 167 * Condition edges and conditional validity edges may have tagged 168 * dependence relations, in which case an edge is added for each 169 * pair of tags. 170 */ 171 static int is_multi_edge_type(struct isl_sched_edge *edge) 172 { 173 return isl_sched_edge_is_condition(edge) || 174 isl_sched_edge_is_conditional_validity(edge); 175 } 176 177 /* Initialize node_table based on the list of nodes. 178 */ 179 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph) 180 { 181 int i; 182 183 graph->node_table = isl_hash_table_alloc(ctx, graph->n); 184 if (!graph->node_table) 185 return -1; 186 187 for (i = 0; i < graph->n; ++i) { 188 struct isl_hash_table_entry *entry; 189 uint32_t hash; 190 191 hash = isl_space_get_tuple_hash(graph->node[i].space); 192 entry = isl_hash_table_find(ctx, graph->node_table, hash, 193 &node_has_tuples, 194 graph->node[i].space, 1); 195 if (!entry) 196 return -1; 197 entry->data = &graph->node[i]; 198 } 199 200 return 0; 201 } 202 203 /* Return a pointer to the node that lives within the given space, 204 * an invalid node if there is no such node, or NULL in case of error. 205 */ 206 struct isl_sched_node *isl_sched_graph_find_node(isl_ctx *ctx, 207 struct isl_sched_graph *graph, __isl_keep isl_space *space) 208 { 209 struct isl_hash_table_entry *entry; 210 uint32_t hash; 211 212 if (!space) 213 return NULL; 214 215 hash = isl_space_get_tuple_hash(space); 216 entry = isl_hash_table_find(ctx, graph->node_table, hash, 217 &node_has_tuples, space, 0); 218 if (!entry) 219 return NULL; 220 if (entry == isl_hash_table_entry_none) 221 return graph->node + graph->n; 222 223 return entry->data; 224 } 225 226 /* Is "node" a node in "graph"? 227 */ 228 int isl_sched_graph_is_node(struct isl_sched_graph *graph, 229 struct isl_sched_node *node) 230 { 231 return node && node >= &graph->node[0] && node < &graph->node[graph->n]; 232 } 233 234 static isl_bool edge_has_src_and_dst(const void *entry, const void *val) 235 { 236 const struct isl_sched_edge *edge = entry; 237 const struct isl_sched_edge *temp = val; 238 239 return isl_bool_ok(edge->src == temp->src && edge->dst == temp->dst); 240 } 241 242 /* Add the given edge to graph->edge_table[type]. 243 */ 244 static isl_stat graph_edge_table_add(isl_ctx *ctx, 245 struct isl_sched_graph *graph, enum isl_edge_type type, 246 struct isl_sched_edge *edge) 247 { 248 struct isl_hash_table_entry *entry; 249 uint32_t hash; 250 251 hash = isl_hash_init(); 252 hash = isl_hash_builtin(hash, edge->src); 253 hash = isl_hash_builtin(hash, edge->dst); 254 entry = isl_hash_table_find(ctx, graph->edge_table[type], hash, 255 &edge_has_src_and_dst, edge, 1); 256 if (!entry) 257 return isl_stat_error; 258 entry->data = edge; 259 260 return isl_stat_ok; 261 } 262 263 /* Add "edge" to all relevant edge tables. 264 * That is, for every type of the edge, add it to the corresponding table. 265 */ 266 static isl_stat graph_edge_tables_add(isl_ctx *ctx, 267 struct isl_sched_graph *graph, struct isl_sched_edge *edge) 268 { 269 enum isl_edge_type t; 270 271 for (t = isl_edge_first; t <= isl_edge_last; ++t) { 272 if (!isl_sched_edge_has_type(edge, t)) 273 continue; 274 if (graph_edge_table_add(ctx, graph, t, edge) < 0) 275 return isl_stat_error; 276 } 277 278 return isl_stat_ok; 279 } 280 281 /* Allocate the edge_tables based on the maximal number of edges of 282 * each type. 283 */ 284 static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph) 285 { 286 int i; 287 288 for (i = 0; i <= isl_edge_last; ++i) { 289 graph->edge_table[i] = isl_hash_table_alloc(ctx, 290 graph->max_edge[i]); 291 if (!graph->edge_table[i]) 292 return -1; 293 } 294 295 return 0; 296 } 297 298 /* If graph->edge_table[type] contains an edge from the given source 299 * to the given destination, then return the hash table entry of this edge. 300 * Otherwise, return NULL. 301 */ 302 static struct isl_hash_table_entry *graph_find_edge_entry( 303 struct isl_sched_graph *graph, 304 enum isl_edge_type type, 305 struct isl_sched_node *src, struct isl_sched_node *dst) 306 { 307 isl_ctx *ctx = isl_space_get_ctx(src->space); 308 uint32_t hash; 309 struct isl_sched_edge temp = { .src = src, .dst = dst }; 310 311 hash = isl_hash_init(); 312 hash = isl_hash_builtin(hash, temp.src); 313 hash = isl_hash_builtin(hash, temp.dst); 314 return isl_hash_table_find(ctx, graph->edge_table[type], hash, 315 &edge_has_src_and_dst, &temp, 0); 316 } 317 318 319 /* If graph->edge_table[type] contains an edge from the given source 320 * to the given destination, then return this edge. 321 * Return "none" if no such edge can be found. 322 * Return NULL on error. 323 */ 324 static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph, 325 enum isl_edge_type type, 326 struct isl_sched_node *src, struct isl_sched_node *dst, 327 struct isl_sched_edge *none) 328 { 329 struct isl_hash_table_entry *entry; 330 331 entry = graph_find_edge_entry(graph, type, src, dst); 332 if (!entry) 333 return NULL; 334 if (entry == isl_hash_table_entry_none) 335 return none; 336 337 return entry->data; 338 } 339 340 /* Check whether the dependence graph has an edge of the given type 341 * between the given two nodes. 342 */ 343 static isl_bool graph_has_edge(struct isl_sched_graph *graph, 344 enum isl_edge_type type, 345 struct isl_sched_node *src, struct isl_sched_node *dst) 346 { 347 struct isl_sched_edge dummy; 348 struct isl_sched_edge *edge; 349 isl_bool empty; 350 351 edge = graph_find_edge(graph, type, src, dst, &dummy); 352 if (!edge) 353 return isl_bool_error; 354 if (edge == &dummy) 355 return isl_bool_false; 356 357 empty = isl_map_plain_is_empty(edge->map); 358 359 return isl_bool_not(empty); 360 } 361 362 /* Look for any edge with the same src, dst and map fields as "model". 363 * 364 * Return the matching edge if one can be found. 365 * Return "model" if no matching edge is found. 366 * Return NULL on error. 367 */ 368 static struct isl_sched_edge *graph_find_matching_edge( 369 struct isl_sched_graph *graph, struct isl_sched_edge *model) 370 { 371 enum isl_edge_type i; 372 struct isl_sched_edge *edge; 373 374 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 375 int is_equal; 376 377 edge = graph_find_edge(graph, i, model->src, model->dst, model); 378 if (!edge) 379 return NULL; 380 if (edge == model) 381 continue; 382 is_equal = isl_map_plain_is_equal(model->map, edge->map); 383 if (is_equal < 0) 384 return NULL; 385 if (is_equal) 386 return edge; 387 } 388 389 return model; 390 } 391 392 /* Remove the given edge from all the edge_tables that refer to it. 393 */ 394 static isl_stat graph_remove_edge(struct isl_sched_graph *graph, 395 struct isl_sched_edge *edge) 396 { 397 isl_ctx *ctx = isl_map_get_ctx(edge->map); 398 enum isl_edge_type i; 399 400 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 401 struct isl_hash_table_entry *entry; 402 403 entry = graph_find_edge_entry(graph, i, edge->src, edge->dst); 404 if (!entry) 405 return isl_stat_error; 406 if (entry == isl_hash_table_entry_none) 407 continue; 408 if (entry->data != edge) 409 continue; 410 isl_hash_table_remove(ctx, graph->edge_table[i], entry); 411 } 412 413 return isl_stat_ok; 414 } 415 416 /* Check whether the dependence graph has any edge 417 * between the given two nodes. 418 */ 419 static isl_bool graph_has_any_edge(struct isl_sched_graph *graph, 420 struct isl_sched_node *src, struct isl_sched_node *dst) 421 { 422 enum isl_edge_type i; 423 isl_bool r; 424 425 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 426 r = graph_has_edge(graph, i, src, dst); 427 if (r < 0 || r) 428 return r; 429 } 430 431 return r; 432 } 433 434 /* Check whether the dependence graph has a validity edge 435 * between the given two nodes. 436 * 437 * Conditional validity edges are essentially validity edges that 438 * can be ignored if the corresponding condition edges are iteration private. 439 * Here, we are only checking for the presence of validity 440 * edges, so we need to consider the conditional validity edges too. 441 * In particular, this function is used during the detection 442 * of strongly connected components and we cannot ignore 443 * conditional validity edges during this detection. 444 */ 445 isl_bool isl_sched_graph_has_validity_edge(struct isl_sched_graph *graph, 446 struct isl_sched_node *src, struct isl_sched_node *dst) 447 { 448 isl_bool r; 449 450 r = graph_has_edge(graph, isl_edge_validity, src, dst); 451 if (r < 0 || r) 452 return r; 453 454 return graph_has_edge(graph, isl_edge_conditional_validity, src, dst); 455 } 456 457 /* Perform all the required memory allocations for a schedule graph "graph" 458 * with "n_node" nodes and "n_edge" edge and initialize the corresponding 459 * fields. 460 */ 461 static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph, 462 int n_node, int n_edge) 463 { 464 int i; 465 466 graph->n = n_node; 467 graph->n_edge = n_edge; 468 graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n); 469 graph->sorted = isl_calloc_array(ctx, int, graph->n); 470 graph->region = isl_alloc_array(ctx, 471 struct isl_trivial_region, graph->n); 472 graph->edge = isl_calloc_array(ctx, 473 struct isl_sched_edge, graph->n_edge); 474 475 graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); 476 graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); 477 graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); 478 479 if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) || 480 !graph->sorted) 481 return isl_stat_error; 482 483 for(i = 0; i < graph->n; ++i) 484 graph->sorted[i] = i; 485 486 return isl_stat_ok; 487 } 488 489 /* Free the memory associated to node "node" in "graph". 490 * The "coincident" field is shared by nodes in a graph and its subgraph. 491 * It therefore only needs to be freed for the original dependence graph, 492 * i.e., one that is not the result of splitting. 493 */ 494 static void clear_node(struct isl_sched_graph *graph, 495 struct isl_sched_node *node) 496 { 497 isl_space_free(node->space); 498 isl_set_free(node->hull); 499 isl_multi_aff_free(node->compress); 500 isl_pw_multi_aff_free(node->decompress); 501 isl_mat_free(node->sched); 502 isl_map_free(node->sched_map); 503 isl_mat_free(node->indep); 504 isl_mat_free(node->vmap); 505 if (graph->root == graph) 506 free(node->coincident); 507 isl_multi_val_free(node->sizes); 508 isl_basic_set_free(node->bounds); 509 isl_vec_free(node->max); 510 } 511 512 void isl_sched_graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) 513 { 514 int i; 515 516 isl_map_to_basic_set_free(graph->intra_hmap); 517 isl_map_to_basic_set_free(graph->intra_hmap_param); 518 isl_map_to_basic_set_free(graph->inter_hmap); 519 520 if (graph->node) 521 for (i = 0; i < graph->n; ++i) 522 clear_node(graph, &graph->node[i]); 523 free(graph->node); 524 free(graph->sorted); 525 if (graph->edge) 526 for (i = 0; i < graph->n_edge; ++i) { 527 isl_map_free(graph->edge[i].map); 528 isl_union_map_free(graph->edge[i].tagged_condition); 529 isl_union_map_free(graph->edge[i].tagged_validity); 530 } 531 free(graph->edge); 532 free(graph->region); 533 for (i = 0; i <= isl_edge_last; ++i) 534 isl_hash_table_free(ctx, graph->edge_table[i]); 535 isl_hash_table_free(ctx, graph->node_table); 536 isl_basic_set_free(graph->lp); 537 } 538 539 /* For each "set" on which this function is called, increment 540 * graph->n by one and update graph->maxvar. 541 */ 542 static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user) 543 { 544 struct isl_sched_graph *graph = user; 545 isl_size nvar = isl_set_dim(set, isl_dim_set); 546 547 graph->n++; 548 if (nvar > graph->maxvar) 549 graph->maxvar = nvar; 550 551 isl_set_free(set); 552 553 if (nvar < 0) 554 return isl_stat_error; 555 return isl_stat_ok; 556 } 557 558 /* Compute the number of rows that should be allocated for the schedule. 559 * In particular, we need one row for each variable or one row 560 * for each basic map in the dependences. 561 * Note that it is practically impossible to exhaust both 562 * the number of dependences and the number of variables. 563 */ 564 static isl_stat compute_max_row(struct isl_sched_graph *graph, 565 __isl_keep isl_schedule_constraints *sc) 566 { 567 int n_edge; 568 isl_stat r; 569 isl_union_set *domain; 570 571 graph->n = 0; 572 graph->maxvar = 0; 573 domain = isl_schedule_constraints_get_domain(sc); 574 r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph); 575 isl_union_set_free(domain); 576 if (r < 0) 577 return isl_stat_error; 578 n_edge = isl_schedule_constraints_n_basic_map(sc); 579 if (n_edge < 0) 580 return isl_stat_error; 581 graph->max_row = n_edge + graph->maxvar; 582 583 return isl_stat_ok; 584 } 585 586 /* Does "bset" have any defining equalities for its set variables? 587 */ 588 static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset) 589 { 590 int i; 591 isl_size n; 592 593 n = isl_basic_set_dim(bset, isl_dim_set); 594 if (n < 0) 595 return isl_bool_error; 596 597 for (i = 0; i < n; ++i) { 598 isl_bool has; 599 600 has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i, 601 NULL); 602 if (has < 0 || has) 603 return has; 604 } 605 606 return isl_bool_false; 607 } 608 609 /* Set the entries of node->max to the value of the schedule_max_coefficient 610 * option, if set. 611 */ 612 static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node) 613 { 614 int max; 615 616 max = isl_options_get_schedule_max_coefficient(ctx); 617 if (max == -1) 618 return isl_stat_ok; 619 620 node->max = isl_vec_alloc(ctx, node->nvar); 621 node->max = isl_vec_set_si(node->max, max); 622 if (!node->max) 623 return isl_stat_error; 624 625 return isl_stat_ok; 626 } 627 628 /* Set the entries of node->max to the minimum of the schedule_max_coefficient 629 * option (if set) and half of the minimum of the sizes in the other 630 * dimensions. Round up when computing the half such that 631 * if the minimum of the sizes is one, half of the size is taken to be one 632 * rather than zero. 633 * If the global minimum is unbounded (i.e., if both 634 * the schedule_max_coefficient is not set and the sizes in the other 635 * dimensions are unbounded), then store a negative value. 636 * If the schedule coefficient is close to the size of the instance set 637 * in another dimension, then the schedule may represent a loop 638 * coalescing transformation (especially if the coefficient 639 * in that other dimension is one). Forcing the coefficient to be 640 * smaller than or equal to half the minimal size should avoid this 641 * situation. 642 */ 643 static isl_stat compute_max_coefficient(isl_ctx *ctx, 644 struct isl_sched_node *node) 645 { 646 int max; 647 int i, j; 648 isl_vec *v; 649 650 max = isl_options_get_schedule_max_coefficient(ctx); 651 v = isl_vec_alloc(ctx, node->nvar); 652 if (!v) 653 return isl_stat_error; 654 655 for (i = 0; i < node->nvar; ++i) { 656 isl_int_set_si(v->el[i], max); 657 isl_int_mul_si(v->el[i], v->el[i], 2); 658 } 659 660 for (i = 0; i < node->nvar; ++i) { 661 isl_val *size; 662 663 size = isl_multi_val_get_val(node->sizes, i); 664 if (!size) 665 goto error; 666 if (!isl_val_is_int(size)) { 667 isl_val_free(size); 668 continue; 669 } 670 for (j = 0; j < node->nvar; ++j) { 671 if (j == i) 672 continue; 673 if (isl_int_is_neg(v->el[j]) || 674 isl_int_gt(v->el[j], size->n)) 675 isl_int_set(v->el[j], size->n); 676 } 677 isl_val_free(size); 678 } 679 680 for (i = 0; i < node->nvar; ++i) 681 isl_int_cdiv_q_ui(v->el[i], v->el[i], 2); 682 683 node->max = v; 684 return isl_stat_ok; 685 error: 686 isl_vec_free(v); 687 return isl_stat_error; 688 } 689 690 /* Construct an identifier for node "node", which will represent "set". 691 * The name of the identifier is either "compressed" or 692 * "compressed_<name>", with <name> the name of the space of "set". 693 * The user pointer of the identifier points to "node". 694 */ 695 static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set, 696 struct isl_sched_node *node) 697 { 698 isl_bool has_name; 699 isl_ctx *ctx; 700 isl_id *id; 701 isl_printer *p; 702 const char *name; 703 char *id_name; 704 705 has_name = isl_set_has_tuple_name(set); 706 if (has_name < 0) 707 return NULL; 708 709 ctx = isl_set_get_ctx(set); 710 if (!has_name) 711 return isl_id_alloc(ctx, "compressed", node); 712 713 p = isl_printer_to_str(ctx); 714 name = isl_set_get_tuple_name(set); 715 p = isl_printer_print_str(p, "compressed_"); 716 p = isl_printer_print_str(p, name); 717 id_name = isl_printer_get_str(p); 718 isl_printer_free(p); 719 720 id = isl_id_alloc(ctx, id_name, node); 721 free(id_name); 722 723 return id; 724 } 725 726 /* Construct a map that isolates the variable in position "pos" in "set". 727 * 728 * That is, construct 729 * 730 * [i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos] 731 */ 732 static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos) 733 { 734 isl_map *map; 735 736 map = isl_set_project_onto_map(set, isl_dim_set, pos, 1); 737 map = isl_map_project_out(map, isl_dim_in, pos, 1); 738 return map; 739 } 740 741 /* Compute and return the size of "set" in dimension "dim". 742 * The size is taken to be the difference in values for that variable 743 * for fixed values of the other variables. 744 * This assumes that "set" is convex. 745 * In particular, the variable is first isolated from the other variables 746 * in the range of a map 747 * 748 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim] 749 * 750 * and then duplicated 751 * 752 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']] 753 * 754 * The shared variables are then projected out and the maximal value 755 * of i_dim' - i_dim is computed. 756 */ 757 static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim) 758 { 759 isl_map *map; 760 isl_local_space *ls; 761 isl_aff *obj; 762 isl_val *v; 763 764 map = isolate(set, dim); 765 map = isl_map_range_product(map, isl_map_copy(map)); 766 map = isl_set_unwrap(isl_map_range(map)); 767 set = isl_map_deltas(map); 768 ls = isl_local_space_from_space(isl_set_get_space(set)); 769 obj = isl_aff_var_on_domain(ls, isl_dim_set, 0); 770 v = isl_set_max_val(set, obj); 771 isl_aff_free(obj); 772 isl_set_free(set); 773 774 return v; 775 } 776 777 /* Perform a compression on "node" where "hull" represents the constraints 778 * that were used to derive the compression, while "compress" and 779 * "decompress" map the original space to the compressed space and 780 * vice versa. 781 * 782 * If "node" was not compressed already, then simply store 783 * the compression information. 784 * Otherwise the "original" space is actually the result 785 * of a previous compression, which is then combined 786 * with the present compression. 787 * 788 * The dimensionality of the compressed domain is also adjusted. 789 * Other information, such as the sizes and the maximal coefficient values, 790 * has not been computed yet and therefore does not need to be adjusted. 791 */ 792 static isl_stat compress_node(struct isl_sched_node *node, 793 __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, 794 __isl_take isl_pw_multi_aff *decompress) 795 { 796 node->nvar = isl_multi_aff_dim(compress, isl_dim_out); 797 if (!node->compressed) { 798 node->compressed = 1; 799 node->hull = hull; 800 node->compress = compress; 801 node->decompress = decompress; 802 } else { 803 hull = isl_set_preimage_multi_aff(hull, 804 isl_multi_aff_copy(node->compress)); 805 node->hull = isl_set_intersect(node->hull, hull); 806 node->compress = isl_multi_aff_pullback_multi_aff( 807 compress, node->compress); 808 node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff( 809 node->decompress, decompress); 810 } 811 812 if (!node->hull || !node->compress || !node->decompress) 813 return isl_stat_error; 814 815 return isl_stat_ok; 816 } 817 818 /* Given that dimension "pos" in "set" has a fixed value 819 * in terms of the other dimensions, (further) compress "node" 820 * by projecting out this dimension. 821 * "set" may be the result of a previous compression. 822 * "uncompressed" is the original domain (without compression). 823 * 824 * The compression function simply projects out the dimension. 825 * The decompression function adds back the dimension 826 * in the right position as an expression of the other dimensions 827 * derived from "set". 828 * As in extract_node, the compressed space has an identifier 829 * that references "node" such that each compressed space is unique and 830 * such that the node can be recovered from the compressed space. 831 * 832 * The constraint removed through the compression is added to the "hull" 833 * such that only edges that relate to the original domains 834 * are taken into account. 835 * In particular, it is obtained by composing compression and decompression and 836 * taking the relation among the variables in the range. 837 */ 838 static isl_stat project_out_fixed(struct isl_sched_node *node, 839 __isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos) 840 { 841 isl_id *id; 842 isl_space *space; 843 isl_set *domain; 844 isl_map *map; 845 isl_multi_aff *compress; 846 isl_pw_multi_aff *decompress, *pma; 847 isl_multi_pw_aff *mpa; 848 isl_set *hull; 849 850 map = isolate(isl_set_copy(set), pos); 851 pma = isl_pw_multi_aff_from_map(map); 852 domain = isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(pma)); 853 pma = isl_pw_multi_aff_gist(pma, domain); 854 space = isl_pw_multi_aff_get_domain_space(pma); 855 mpa = isl_multi_pw_aff_identity(isl_space_map_from_set(space)); 856 mpa = isl_multi_pw_aff_range_splice(mpa, pos, 857 isl_multi_pw_aff_from_pw_multi_aff(pma)); 858 decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa); 859 space = isl_set_get_space(set); 860 compress = isl_multi_aff_project_out_map(space, isl_dim_set, pos, 1); 861 id = construct_compressed_id(uncompressed, node); 862 compress = isl_multi_aff_set_tuple_id(compress, isl_dim_out, id); 863 space = isl_space_reverse(isl_multi_aff_get_space(compress)); 864 decompress = isl_pw_multi_aff_reset_space(decompress, space); 865 pma = isl_pw_multi_aff_pullback_multi_aff( 866 isl_pw_multi_aff_copy(decompress), isl_multi_aff_copy(compress)); 867 hull = isl_map_range(isl_map_from_pw_multi_aff(pma)); 868 869 isl_set_free(set); 870 871 return compress_node(node, hull, compress, decompress); 872 } 873 874 /* Compute the size of the compressed domain in each dimension and 875 * store the results in node->sizes. 876 * "uncompressed" is the original domain (without compression). 877 * 878 * First compress the domain if needed and then compute the size 879 * in each direction. 880 * If the domain is not convex, then the sizes are computed 881 * on a convex superset in order to avoid picking up sizes 882 * that are valid for the individual disjuncts, but not for 883 * the domain as a whole. 884 * 885 * If any of the sizes turns out to be zero, then this means 886 * that this dimension has a fixed value in terms of 887 * the other dimensions. Perform an (extra) compression 888 * to remove this dimension. 889 */ 890 static isl_stat compute_sizes(struct isl_sched_node *node, 891 __isl_keep isl_set *uncompressed) 892 { 893 int j; 894 isl_size n; 895 isl_multi_val *mv; 896 isl_set *set = isl_set_copy(uncompressed); 897 898 if (node->compressed) 899 set = isl_set_preimage_pw_multi_aff(set, 900 isl_pw_multi_aff_copy(node->decompress)); 901 set = isl_set_from_basic_set(isl_set_simple_hull(set)); 902 mv = isl_multi_val_zero(isl_set_get_space(set)); 903 n = isl_set_dim(set, isl_dim_set); 904 if (n < 0) 905 mv = isl_multi_val_free(mv); 906 for (j = 0; j < n; ++j) { 907 isl_bool is_zero; 908 isl_val *v; 909 910 v = compute_size(isl_set_copy(set), j); 911 is_zero = isl_val_is_zero(v); 912 mv = isl_multi_val_set_val(mv, j, v); 913 if (is_zero >= 0 && is_zero) { 914 isl_multi_val_free(mv); 915 if (project_out_fixed(node, uncompressed, set, j) < 0) 916 return isl_stat_error; 917 return compute_sizes(node, uncompressed); 918 } 919 } 920 node->sizes = mv; 921 isl_set_free(set); 922 if (!node->sizes) 923 return isl_stat_error; 924 return isl_stat_ok; 925 } 926 927 /* Compute the size of the instance set "set" of "node", after compression, 928 * as well as bounds on the corresponding coefficients, if needed. 929 * 930 * The sizes are needed when the schedule_treat_coalescing option is set. 931 * The bounds are needed when the schedule_treat_coalescing option or 932 * the schedule_max_coefficient option is set. 933 * 934 * If the schedule_treat_coalescing option is not set, then at most 935 * the bounds need to be set and this is done in set_max_coefficient. 936 * Otherwise, compute the size of the compressed domain 937 * in each direction and store the results in node->size. 938 * Finally, set the bounds on the coefficients based on the sizes 939 * and the schedule_max_coefficient option in compute_max_coefficient. 940 */ 941 static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node, 942 __isl_take isl_set *set) 943 { 944 isl_stat r; 945 946 if (!isl_options_get_schedule_treat_coalescing(ctx)) { 947 isl_set_free(set); 948 return set_max_coefficient(ctx, node); 949 } 950 951 r = compute_sizes(node, set); 952 isl_set_free(set); 953 if (r < 0) 954 return isl_stat_error; 955 return compute_max_coefficient(ctx, node); 956 } 957 958 /* Add a new node to the graph representing the given instance set. 959 * "nvar" is the (possibly compressed) number of variables and 960 * may be smaller than then number of set variables in "set" 961 * if "compressed" is set. 962 * If "compressed" is set, then "hull" represents the constraints 963 * that were used to derive the compression, while "compress" and 964 * "decompress" map the original space to the compressed space and 965 * vice versa. 966 * If "compressed" is not set, then "hull", "compress" and "decompress" 967 * should be NULL. 968 * 969 * Compute the size of the instance set and bounds on the coefficients, 970 * if needed. 971 */ 972 static isl_stat add_node(struct isl_sched_graph *graph, 973 __isl_take isl_set *set, int nvar, int compressed, 974 __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, 975 __isl_take isl_pw_multi_aff *decompress) 976 { 977 isl_size nparam; 978 isl_ctx *ctx; 979 isl_mat *sched; 980 isl_space *space; 981 int *coincident; 982 struct isl_sched_node *node; 983 984 nparam = isl_set_dim(set, isl_dim_param); 985 if (nparam < 0) 986 goto error; 987 988 ctx = isl_set_get_ctx(set); 989 if (!ctx->opt->schedule_parametric) 990 nparam = 0; 991 sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar); 992 node = &graph->node[graph->n]; 993 graph->n++; 994 space = isl_set_get_space(set); 995 node->space = space; 996 node->nvar = nvar; 997 node->nparam = nparam; 998 node->sched = sched; 999 node->sched_map = NULL; 1000 coincident = isl_calloc_array(ctx, int, graph->max_row); 1001 node->coincident = coincident; 1002 node->compressed = compressed; 1003 node->hull = hull; 1004 node->compress = compress; 1005 node->decompress = decompress; 1006 if (compute_sizes_and_max(ctx, node, set) < 0) 1007 return isl_stat_error; 1008 1009 if (!space || !sched || (graph->max_row && !coincident)) 1010 return isl_stat_error; 1011 if (compressed && (!hull || !compress || !decompress)) 1012 return isl_stat_error; 1013 1014 return isl_stat_ok; 1015 error: 1016 isl_set_free(set); 1017 isl_set_free(hull); 1018 isl_multi_aff_free(compress); 1019 isl_pw_multi_aff_free(decompress); 1020 return isl_stat_error; 1021 } 1022 1023 /* Add a new node to the graph representing the given set. 1024 * 1025 * If any of the set variables is defined by an equality, then 1026 * we perform variable compression such that we can perform 1027 * the scheduling on the compressed domain. 1028 * In this case, an identifier is used that references the new node 1029 * such that each compressed space is unique and 1030 * such that the node can be recovered from the compressed space. 1031 */ 1032 static isl_stat extract_node(__isl_take isl_set *set, void *user) 1033 { 1034 isl_size nvar; 1035 isl_bool has_equality; 1036 isl_id *id; 1037 isl_basic_set *hull; 1038 isl_set *hull_set; 1039 isl_morph *morph; 1040 isl_multi_aff *compress, *decompress_ma; 1041 isl_pw_multi_aff *decompress; 1042 struct isl_sched_graph *graph = user; 1043 1044 hull = isl_set_affine_hull(isl_set_copy(set)); 1045 hull = isl_basic_set_remove_divs(hull); 1046 nvar = isl_set_dim(set, isl_dim_set); 1047 has_equality = has_any_defining_equality(hull); 1048 1049 if (nvar < 0 || has_equality < 0) 1050 goto error; 1051 if (!has_equality) { 1052 isl_basic_set_free(hull); 1053 return add_node(graph, set, nvar, 0, NULL, NULL, NULL); 1054 } 1055 1056 id = construct_compressed_id(set, &graph->node[graph->n]); 1057 morph = isl_basic_set_variable_compression_with_id(hull, id); 1058 isl_id_free(id); 1059 nvar = isl_morph_ran_dim(morph, isl_dim_set); 1060 if (nvar < 0) 1061 set = isl_set_free(set); 1062 compress = isl_morph_get_var_multi_aff(morph); 1063 morph = isl_morph_inverse(morph); 1064 decompress_ma = isl_morph_get_var_multi_aff(morph); 1065 decompress = isl_pw_multi_aff_from_multi_aff(decompress_ma); 1066 isl_morph_free(morph); 1067 1068 hull_set = isl_set_from_basic_set(hull); 1069 return add_node(graph, set, nvar, 1, hull_set, compress, decompress); 1070 error: 1071 isl_basic_set_free(hull); 1072 isl_set_free(set); 1073 return isl_stat_error; 1074 } 1075 1076 struct isl_extract_edge_data { 1077 isl_schedule_constraints *sc; 1078 enum isl_edge_type type; 1079 struct isl_sched_graph *graph; 1080 }; 1081 1082 /* Merge edge2 into edge1, freeing the contents of edge2. 1083 * Return 0 on success and -1 on failure. 1084 * 1085 * edge1 and edge2 are assumed to have the same value for the map field. 1086 */ 1087 static int merge_edge(struct isl_sched_edge *edge1, 1088 struct isl_sched_edge *edge2) 1089 { 1090 edge1->types |= edge2->types; 1091 isl_map_free(edge2->map); 1092 1093 if (isl_sched_edge_is_condition(edge2)) { 1094 if (!edge1->tagged_condition) 1095 edge1->tagged_condition = edge2->tagged_condition; 1096 else 1097 edge1->tagged_condition = 1098 isl_union_map_union(edge1->tagged_condition, 1099 edge2->tagged_condition); 1100 } 1101 1102 if (isl_sched_edge_is_conditional_validity(edge2)) { 1103 if (!edge1->tagged_validity) 1104 edge1->tagged_validity = edge2->tagged_validity; 1105 else 1106 edge1->tagged_validity = 1107 isl_union_map_union(edge1->tagged_validity, 1108 edge2->tagged_validity); 1109 } 1110 1111 if (isl_sched_edge_is_condition(edge2) && !edge1->tagged_condition) 1112 return -1; 1113 if (isl_sched_edge_is_conditional_validity(edge2) && 1114 !edge1->tagged_validity) 1115 return -1; 1116 1117 return 0; 1118 } 1119 1120 /* Insert dummy tags in domain and range of "map". 1121 * 1122 * In particular, if "map" is of the form 1123 * 1124 * A -> B 1125 * 1126 * then return 1127 * 1128 * [A -> dummy_tag] -> [B -> dummy_tag] 1129 * 1130 * where the dummy_tags are identical and equal to any dummy tags 1131 * introduced by any other call to this function. 1132 */ 1133 static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map) 1134 { 1135 static char dummy; 1136 isl_ctx *ctx; 1137 isl_id *id; 1138 isl_space *space; 1139 isl_set *domain, *range; 1140 1141 ctx = isl_map_get_ctx(map); 1142 1143 id = isl_id_alloc(ctx, NULL, &dummy); 1144 space = isl_space_params(isl_map_get_space(map)); 1145 space = isl_space_set_from_params(space); 1146 space = isl_space_set_tuple_id(space, isl_dim_set, id); 1147 space = isl_space_map_from_set(space); 1148 1149 domain = isl_map_wrap(map); 1150 range = isl_map_wrap(isl_map_universe(space)); 1151 map = isl_map_from_domain_and_range(domain, range); 1152 map = isl_map_zip(map); 1153 1154 return map; 1155 } 1156 1157 /* Given that at least one of "src" or "dst" is compressed, return 1158 * a map between the spaces of these nodes restricted to the affine 1159 * hull that was used in the compression. 1160 */ 1161 static __isl_give isl_map *extract_hull(struct isl_sched_node *src, 1162 struct isl_sched_node *dst) 1163 { 1164 isl_set *dom, *ran; 1165 1166 if (src->compressed) 1167 dom = isl_set_copy(src->hull); 1168 else 1169 dom = isl_set_universe(isl_space_copy(src->space)); 1170 if (dst->compressed) 1171 ran = isl_set_copy(dst->hull); 1172 else 1173 ran = isl_set_universe(isl_space_copy(dst->space)); 1174 1175 return isl_map_from_domain_and_range(dom, ran); 1176 } 1177 1178 /* Intersect the domains of the nested relations in domain and range 1179 * of "tagged" with "map". 1180 */ 1181 static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged, 1182 __isl_keep isl_map *map) 1183 { 1184 isl_set *set; 1185 1186 tagged = isl_map_zip(tagged); 1187 set = isl_map_wrap(isl_map_copy(map)); 1188 tagged = isl_map_intersect_domain(tagged, set); 1189 tagged = isl_map_zip(tagged); 1190 return tagged; 1191 } 1192 1193 /* Return a pointer to the node that lives in the domain space of "map", 1194 * an invalid node if there is no such node, or NULL in case of error. 1195 */ 1196 static struct isl_sched_node *find_domain_node(isl_ctx *ctx, 1197 struct isl_sched_graph *graph, __isl_keep isl_map *map) 1198 { 1199 struct isl_sched_node *node; 1200 isl_space *space; 1201 1202 space = isl_space_domain(isl_map_get_space(map)); 1203 node = isl_sched_graph_find_node(ctx, graph, space); 1204 isl_space_free(space); 1205 1206 return node; 1207 } 1208 1209 /* Return a pointer to the node that lives in the range space of "map", 1210 * an invalid node if there is no such node, or NULL in case of error. 1211 */ 1212 static struct isl_sched_node *find_range_node(isl_ctx *ctx, 1213 struct isl_sched_graph *graph, __isl_keep isl_map *map) 1214 { 1215 struct isl_sched_node *node; 1216 isl_space *space; 1217 1218 space = isl_space_range(isl_map_get_space(map)); 1219 node = isl_sched_graph_find_node(ctx, graph, space); 1220 isl_space_free(space); 1221 1222 return node; 1223 } 1224 1225 /* Refrain from adding a new edge based on "map". 1226 * Instead, just free the map. 1227 * "tagged" is either a copy of "map" with additional tags or NULL. 1228 */ 1229 static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged) 1230 { 1231 isl_map_free(map); 1232 isl_map_free(tagged); 1233 1234 return isl_stat_ok; 1235 } 1236 1237 /* Add a new edge to the graph based on the given map 1238 * and add it to data->graph->edge_table[data->type]. 1239 * If a dependence relation of a given type happens to be identical 1240 * to one of the dependence relations of a type that was added before, 1241 * then we don't create a new edge, but instead mark the original edge 1242 * as also representing a dependence of the current type. 1243 * 1244 * Edges of type isl_edge_condition or isl_edge_conditional_validity 1245 * may be specified as "tagged" dependence relations. That is, "map" 1246 * may contain elements (i -> a) -> (j -> b), where i -> j denotes 1247 * the dependence on iterations and a and b are tags. 1248 * edge->map is set to the relation containing the elements i -> j, 1249 * while edge->tagged_condition and edge->tagged_validity contain 1250 * the union of all the "map" relations 1251 * for which extract_edge is called that result in the same edge->map. 1252 * 1253 * Compute the gist with respect to the context. 1254 * This may remove some constraints on the parameters or 1255 * eliminate some parts of the dependence relation 1256 * that are not relevant on the context. 1257 * 1258 * If the source or the destination node is compressed, then 1259 * intersect both "map" and "tagged" with the constraints that 1260 * were used to construct the compression. 1261 * This ensures that there are no schedule constraints defined 1262 * outside of these domains, while the scheduler no longer has 1263 * any control over those outside parts. 1264 */ 1265 static isl_stat extract_edge(__isl_take isl_map *map, void *user) 1266 { 1267 isl_bool empty; 1268 isl_ctx *ctx = isl_map_get_ctx(map); 1269 struct isl_extract_edge_data *data = user; 1270 struct isl_sched_graph *graph = data->graph; 1271 struct isl_sched_node *src, *dst; 1272 struct isl_sched_edge *edge; 1273 isl_set *context; 1274 isl_map *tagged = NULL; 1275 isl_schedule_constraints *sc = data->sc; 1276 1277 if (data->type == isl_edge_condition || 1278 data->type == isl_edge_conditional_validity) { 1279 if (isl_map_can_zip(map)) { 1280 tagged = isl_map_copy(map); 1281 map = isl_set_unwrap(isl_map_domain(isl_map_zip(map))); 1282 } else { 1283 tagged = insert_dummy_tags(isl_map_copy(map)); 1284 } 1285 } 1286 1287 src = find_domain_node(ctx, graph, map); 1288 dst = find_range_node(ctx, graph, map); 1289 1290 if (!src || !dst) 1291 goto error; 1292 if (!isl_sched_graph_is_node(graph, src) || 1293 !isl_sched_graph_is_node(graph, dst)) 1294 return skip_edge(map, tagged); 1295 1296 context = isl_schedule_constraints_get_context(sc); 1297 map = isl_map_gist_params(map, context); 1298 1299 if (src->compressed || dst->compressed) { 1300 isl_map *hull; 1301 hull = extract_hull(src, dst); 1302 if (tagged) 1303 tagged = map_intersect_domains(tagged, hull); 1304 map = isl_map_intersect(map, hull); 1305 } 1306 1307 empty = isl_map_plain_is_empty(map); 1308 if (empty < 0) 1309 goto error; 1310 if (empty) 1311 return skip_edge(map, tagged); 1312 1313 graph->edge[graph->n_edge].src = src; 1314 graph->edge[graph->n_edge].dst = dst; 1315 graph->edge[graph->n_edge].map = map; 1316 graph->edge[graph->n_edge].types = 0; 1317 graph->edge[graph->n_edge].tagged_condition = NULL; 1318 graph->edge[graph->n_edge].tagged_validity = NULL; 1319 set_type(&graph->edge[graph->n_edge], data->type); 1320 if (data->type == isl_edge_condition) 1321 graph->edge[graph->n_edge].tagged_condition = 1322 isl_union_map_from_map(tagged); 1323 if (data->type == isl_edge_conditional_validity) 1324 graph->edge[graph->n_edge].tagged_validity = 1325 isl_union_map_from_map(tagged); 1326 1327 edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]); 1328 if (!edge) { 1329 graph->n_edge++; 1330 return isl_stat_error; 1331 } 1332 if (edge == &graph->edge[graph->n_edge]) 1333 return graph_edge_table_add(ctx, graph, data->type, 1334 &graph->edge[graph->n_edge++]); 1335 1336 if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0) 1337 return isl_stat_error; 1338 1339 return graph_edge_table_add(ctx, graph, data->type, edge); 1340 error: 1341 isl_map_free(map); 1342 isl_map_free(tagged); 1343 return isl_stat_error; 1344 } 1345 1346 /* Initialize the schedule graph "graph" from the schedule constraints "sc". 1347 * 1348 * The context is included in the domain before the nodes of 1349 * the graphs are extracted in order to be able to exploit 1350 * any possible additional equalities. 1351 * Note that this intersection is only performed locally here. 1352 */ 1353 isl_stat isl_sched_graph_init(struct isl_sched_graph *graph, 1354 __isl_keep isl_schedule_constraints *sc) 1355 { 1356 isl_ctx *ctx; 1357 isl_union_set *domain; 1358 isl_union_map *c; 1359 struct isl_extract_edge_data data = { sc }; 1360 enum isl_edge_type i; 1361 isl_stat r; 1362 isl_size n; 1363 1364 if (!sc) 1365 return isl_stat_error; 1366 1367 ctx = isl_schedule_constraints_get_ctx(sc); 1368 1369 domain = isl_schedule_constraints_get_domain(sc); 1370 n = isl_union_set_n_set(domain); 1371 graph->n = n; 1372 isl_union_set_free(domain); 1373 if (n < 0) 1374 return isl_stat_error; 1375 1376 n = isl_schedule_constraints_n_map(sc); 1377 if (n < 0 || graph_alloc(ctx, graph, graph->n, n) < 0) 1378 return isl_stat_error; 1379 1380 if (compute_max_row(graph, sc) < 0) 1381 return isl_stat_error; 1382 graph->root = graph; 1383 graph->n = 0; 1384 domain = isl_schedule_constraints_get_domain(sc); 1385 domain = isl_union_set_intersect_params(domain, 1386 isl_schedule_constraints_get_context(sc)); 1387 r = isl_union_set_foreach_set(domain, &extract_node, graph); 1388 isl_union_set_free(domain); 1389 if (r < 0) 1390 return isl_stat_error; 1391 if (graph_init_table(ctx, graph) < 0) 1392 return isl_stat_error; 1393 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 1394 isl_size n; 1395 1396 c = isl_schedule_constraints_get(sc, i); 1397 n = isl_union_map_n_map(c); 1398 graph->max_edge[i] = n; 1399 isl_union_map_free(c); 1400 if (n < 0) 1401 return isl_stat_error; 1402 } 1403 if (graph_init_edge_tables(ctx, graph) < 0) 1404 return isl_stat_error; 1405 graph->n_edge = 0; 1406 data.graph = graph; 1407 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 1408 isl_stat r; 1409 1410 data.type = i; 1411 c = isl_schedule_constraints_get(sc, i); 1412 r = isl_union_map_foreach_map(c, &extract_edge, &data); 1413 isl_union_map_free(c); 1414 if (r < 0) 1415 return isl_stat_error; 1416 } 1417 1418 return isl_stat_ok; 1419 } 1420 1421 /* Check whether there is any dependence from node[j] to node[i] 1422 * or from node[i] to node[j]. 1423 */ 1424 static isl_bool node_follows_weak(int i, int j, void *user) 1425 { 1426 isl_bool f; 1427 struct isl_sched_graph *graph = user; 1428 1429 f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]); 1430 if (f < 0 || f) 1431 return f; 1432 return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]); 1433 } 1434 1435 /* Check whether there is a (conditional) validity dependence from node[j] 1436 * to node[i], forcing node[i] to follow node[j]. 1437 */ 1438 static isl_bool node_follows_strong(int i, int j, void *user) 1439 { 1440 struct isl_sched_graph *graph = user; 1441 1442 return isl_sched_graph_has_validity_edge(graph, &graph->node[j], 1443 &graph->node[i]); 1444 } 1445 1446 /* Use Tarjan's algorithm for computing the strongly connected components 1447 * in the dependence graph only considering those edges defined by "follows". 1448 */ 1449 isl_stat isl_sched_graph_detect_ccs(isl_ctx *ctx, 1450 struct isl_sched_graph *graph, 1451 isl_bool (*follows)(int i, int j, void *user)) 1452 { 1453 int i, n; 1454 struct isl_tarjan_graph *g = NULL; 1455 1456 g = isl_tarjan_graph_init(ctx, graph->n, follows, graph); 1457 if (!g) 1458 return isl_stat_error; 1459 1460 graph->scc = 0; 1461 i = 0; 1462 n = graph->n; 1463 while (n) { 1464 while (g->order[i] != -1) { 1465 graph->node[g->order[i]].scc = graph->scc; 1466 --n; 1467 ++i; 1468 } 1469 ++i; 1470 graph->scc++; 1471 } 1472 1473 isl_tarjan_graph_free(g); 1474 1475 return isl_stat_ok; 1476 } 1477 1478 /* Apply Tarjan's algorithm to detect the strongly connected components 1479 * in the dependence graph. 1480 * Only consider the (conditional) validity dependences and clear "weak". 1481 */ 1482 static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) 1483 { 1484 graph->weak = 0; 1485 return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_strong); 1486 } 1487 1488 /* Apply Tarjan's algorithm to detect the (weakly) connected components 1489 * in the dependence graph. 1490 * Consider all dependences and set "weak". 1491 */ 1492 static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) 1493 { 1494 graph->weak = 1; 1495 return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_weak); 1496 } 1497 1498 static int cmp_scc(const void *a, const void *b, void *data) 1499 { 1500 struct isl_sched_graph *graph = data; 1501 const int *i1 = a; 1502 const int *i2 = b; 1503 1504 return graph->node[*i1].scc - graph->node[*i2].scc; 1505 } 1506 1507 /* Sort the elements of graph->sorted according to the corresponding SCCs. 1508 */ 1509 static int sort_sccs(struct isl_sched_graph *graph) 1510 { 1511 return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph); 1512 } 1513 1514 /* Return a non-parametric set in the compressed space of "node" that is 1515 * bounded by the size in each direction 1516 * 1517 * { [x] : -S_i <= x_i <= S_i } 1518 * 1519 * If S_i is infinity in direction i, then there are no constraints 1520 * in that direction. 1521 * 1522 * Cache the result in node->bounds. 1523 */ 1524 static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node) 1525 { 1526 isl_space *space; 1527 isl_basic_set *bounds; 1528 int i; 1529 1530 if (node->bounds) 1531 return isl_basic_set_copy(node->bounds); 1532 1533 if (node->compressed) 1534 space = isl_pw_multi_aff_get_domain_space(node->decompress); 1535 else 1536 space = isl_space_copy(node->space); 1537 space = isl_space_drop_all_params(space); 1538 bounds = isl_basic_set_universe(space); 1539 1540 for (i = 0; i < node->nvar; ++i) { 1541 isl_val *size; 1542 1543 size = isl_multi_val_get_val(node->sizes, i); 1544 if (!size) 1545 return isl_basic_set_free(bounds); 1546 if (!isl_val_is_int(size)) { 1547 isl_val_free(size); 1548 continue; 1549 } 1550 bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i, 1551 isl_val_copy(size)); 1552 bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i, 1553 isl_val_neg(size)); 1554 } 1555 1556 node->bounds = isl_basic_set_copy(bounds); 1557 return bounds; 1558 } 1559 1560 /* Compress the dependence relation "map", if needed, i.e., 1561 * when the source node "src" and/or the destination node "dst" 1562 * has been compressed. 1563 */ 1564 static __isl_give isl_map *compress(__isl_take isl_map *map, 1565 struct isl_sched_node *src, struct isl_sched_node *dst) 1566 { 1567 if (src->compressed) 1568 map = isl_map_preimage_domain_pw_multi_aff(map, 1569 isl_pw_multi_aff_copy(src->decompress)); 1570 if (dst->compressed) 1571 map = isl_map_preimage_range_pw_multi_aff(map, 1572 isl_pw_multi_aff_copy(dst->decompress)); 1573 return map; 1574 } 1575 1576 /* Drop some constraints from "delta" that could be exploited 1577 * to construct loop coalescing schedules. 1578 * In particular, drop those constraint that bound the difference 1579 * to the size of the domain. 1580 * First project out the parameters to improve the effectiveness. 1581 */ 1582 static __isl_give isl_set *drop_coalescing_constraints( 1583 __isl_take isl_set *delta, struct isl_sched_node *node) 1584 { 1585 isl_size nparam; 1586 isl_basic_set *bounds; 1587 1588 nparam = isl_set_dim(delta, isl_dim_param); 1589 if (nparam < 0) 1590 return isl_set_free(delta); 1591 1592 bounds = get_size_bounds(node); 1593 1594 delta = isl_set_project_out(delta, isl_dim_param, 0, nparam); 1595 delta = isl_set_remove_divs(delta); 1596 delta = isl_set_plain_gist_basic_set(delta, bounds); 1597 return delta; 1598 } 1599 1600 /* Given a dependence relation R from "node" to itself, 1601 * construct the set of coefficients of valid constraints for elements 1602 * in that dependence relation. 1603 * In particular, the result contains tuples of coefficients 1604 * c_0, c_n, c_x such that 1605 * 1606 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R 1607 * 1608 * or, equivalently, 1609 * 1610 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } 1611 * 1612 * We choose here to compute the dual of delta R. 1613 * Alternatively, we could have computed the dual of R, resulting 1614 * in a set of tuples c_0, c_n, c_x, c_y, and then 1615 * plugged in (c_0, c_n, c_x, -c_x). 1616 * 1617 * If "need_param" is set, then the resulting coefficients effectively 1618 * include coefficients for the parameters c_n. Otherwise, they may 1619 * have been projected out already. 1620 * Since the constraints may be different for these two cases, 1621 * they are stored in separate caches. 1622 * In particular, if no parameter coefficients are required and 1623 * the schedule_treat_coalescing option is set, then the parameters 1624 * are projected out and some constraints that could be exploited 1625 * to construct coalescing schedules are removed before the dual 1626 * is computed. 1627 * 1628 * If "node" has been compressed, then the dependence relation 1629 * is also compressed before the set of coefficients is computed. 1630 */ 1631 static __isl_give isl_basic_set *intra_coefficients( 1632 struct isl_sched_graph *graph, struct isl_sched_node *node, 1633 __isl_take isl_map *map, int need_param) 1634 { 1635 isl_ctx *ctx; 1636 isl_set *delta; 1637 isl_map *key; 1638 isl_basic_set *coef; 1639 isl_maybe_isl_basic_set m; 1640 isl_map_to_basic_set **hmap = &graph->intra_hmap; 1641 int treat; 1642 1643 if (!map) 1644 return NULL; 1645 1646 ctx = isl_map_get_ctx(map); 1647 treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx); 1648 if (!treat) 1649 hmap = &graph->intra_hmap_param; 1650 m = isl_map_to_basic_set_try_get(*hmap, map); 1651 if (m.valid < 0 || m.valid) { 1652 isl_map_free(map); 1653 return m.value; 1654 } 1655 1656 key = isl_map_copy(map); 1657 map = compress(map, node, node); 1658 delta = isl_map_deltas(map); 1659 if (treat) 1660 delta = drop_coalescing_constraints(delta, node); 1661 delta = isl_set_remove_divs(delta); 1662 coef = isl_set_coefficients(delta); 1663 *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef)); 1664 1665 return coef; 1666 } 1667 1668 /* Given a dependence relation R, construct the set of coefficients 1669 * of valid constraints for elements in that dependence relation. 1670 * In particular, the result contains tuples of coefficients 1671 * c_0, c_n, c_x, c_y such that 1672 * 1673 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R 1674 * 1675 * If the source or destination nodes of "edge" have been compressed, 1676 * then the dependence relation is also compressed before 1677 * the set of coefficients is computed. 1678 */ 1679 static __isl_give isl_basic_set *inter_coefficients( 1680 struct isl_sched_graph *graph, struct isl_sched_edge *edge, 1681 __isl_take isl_map *map) 1682 { 1683 isl_set *set; 1684 isl_map *key; 1685 isl_basic_set *coef; 1686 isl_maybe_isl_basic_set m; 1687 1688 m = isl_map_to_basic_set_try_get(graph->inter_hmap, map); 1689 if (m.valid < 0 || m.valid) { 1690 isl_map_free(map); 1691 return m.value; 1692 } 1693 1694 key = isl_map_copy(map); 1695 map = compress(map, edge->src, edge->dst); 1696 set = isl_map_wrap(isl_map_remove_divs(map)); 1697 coef = isl_set_coefficients(set); 1698 graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key, 1699 isl_basic_set_copy(coef)); 1700 1701 return coef; 1702 } 1703 1704 /* Return the position of the coefficients of the variables in 1705 * the coefficients constraints "coef". 1706 * 1707 * The space of "coef" is of the form 1708 * 1709 * { coefficients[[cst, params] -> S] } 1710 * 1711 * Return the position of S. 1712 */ 1713 static isl_size coef_var_offset(__isl_keep isl_basic_set *coef) 1714 { 1715 isl_size offset; 1716 isl_space *space; 1717 1718 space = isl_space_unwrap(isl_basic_set_get_space(coef)); 1719 offset = isl_space_dim(space, isl_dim_in); 1720 isl_space_free(space); 1721 1722 return offset; 1723 } 1724 1725 /* Return the offset of the coefficient of the constant term of "node" 1726 * within the (I)LP. 1727 * 1728 * Within each node, the coefficients have the following order: 1729 * - positive and negative parts of c_i_x 1730 * - c_i_n (if parametric) 1731 * - c_i_0 1732 */ 1733 static int node_cst_coef_offset(struct isl_sched_node *node) 1734 { 1735 return node->start + 2 * node->nvar + node->nparam; 1736 } 1737 1738 /* Return the offset of the coefficients of the parameters of "node" 1739 * within the (I)LP. 1740 * 1741 * Within each node, the coefficients have the following order: 1742 * - positive and negative parts of c_i_x 1743 * - c_i_n (if parametric) 1744 * - c_i_0 1745 */ 1746 static int node_par_coef_offset(struct isl_sched_node *node) 1747 { 1748 return node->start + 2 * node->nvar; 1749 } 1750 1751 /* Return the offset of the coefficients of the variables of "node" 1752 * within the (I)LP. 1753 * 1754 * Within each node, the coefficients have the following order: 1755 * - positive and negative parts of c_i_x 1756 * - c_i_n (if parametric) 1757 * - c_i_0 1758 */ 1759 static int node_var_coef_offset(struct isl_sched_node *node) 1760 { 1761 return node->start; 1762 } 1763 1764 /* Return the position of the pair of variables encoding 1765 * coefficient "i" of "node". 1766 * 1767 * The order of these variable pairs is the opposite of 1768 * that of the coefficients, with 2 variables per coefficient. 1769 */ 1770 static int node_var_coef_pos(struct isl_sched_node *node, int i) 1771 { 1772 return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i); 1773 } 1774 1775 /* Construct an isl_dim_map for mapping constraints on coefficients 1776 * for "node" to the corresponding positions in graph->lp. 1777 * "offset" is the offset of the coefficients for the variables 1778 * in the input constraints. 1779 * "s" is the sign of the mapping. 1780 * 1781 * The input constraints are given in terms of the coefficients 1782 * (c_0, c_x) or (c_0, c_n, c_x). 1783 * The mapping produced by this function essentially plugs in 1784 * (0, c_i_x^+ - c_i_x^-) if s = 1 and 1785 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or 1786 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and 1787 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1. 1788 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. 1789 * Furthermore, the order of these pairs is the opposite of that 1790 * of the corresponding coefficients. 1791 * 1792 * The caller can extend the mapping to also map the other coefficients 1793 * (and therefore not plug in 0). 1794 */ 1795 static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx, 1796 struct isl_sched_graph *graph, struct isl_sched_node *node, 1797 int offset, int s) 1798 { 1799 int pos; 1800 isl_size total; 1801 isl_dim_map *dim_map; 1802 1803 total = isl_basic_set_dim(graph->lp, isl_dim_all); 1804 if (!node || total < 0) 1805 return NULL; 1806 1807 pos = node_var_coef_pos(node, 0); 1808 dim_map = isl_dim_map_alloc(ctx, total); 1809 isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s); 1810 isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s); 1811 1812 return dim_map; 1813 } 1814 1815 /* Construct an isl_dim_map for mapping constraints on coefficients 1816 * for "src" (node i) and "dst" (node j) to the corresponding positions 1817 * in graph->lp. 1818 * "offset" is the offset of the coefficients for the variables of "src" 1819 * in the input constraints. 1820 * "s" is the sign of the mapping. 1821 * 1822 * The input constraints are given in terms of the coefficients 1823 * (c_0, c_n, c_x, c_y). 1824 * The mapping produced by this function essentially plugs in 1825 * (c_j_0 - c_i_0, c_j_n - c_i_n, 1826 * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and 1827 * (-c_j_0 + c_i_0, -c_j_n + c_i_n, 1828 * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1. 1829 * In graph->lp, the c_*^- appear before their c_*^+ counterpart. 1830 * Furthermore, the order of these pairs is the opposite of that 1831 * of the corresponding coefficients. 1832 * 1833 * The caller can further extend the mapping. 1834 */ 1835 static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx, 1836 struct isl_sched_graph *graph, struct isl_sched_node *src, 1837 struct isl_sched_node *dst, int offset, int s) 1838 { 1839 int pos; 1840 isl_size total; 1841 isl_dim_map *dim_map; 1842 1843 total = isl_basic_set_dim(graph->lp, isl_dim_all); 1844 if (!src || !dst || total < 0) 1845 return NULL; 1846 1847 dim_map = isl_dim_map_alloc(ctx, total); 1848 1849 pos = node_cst_coef_offset(dst); 1850 isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s); 1851 pos = node_par_coef_offset(dst); 1852 isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s); 1853 pos = node_var_coef_pos(dst, 0); 1854 isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1, 1855 dst->nvar, -s); 1856 isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1, 1857 dst->nvar, s); 1858 1859 pos = node_cst_coef_offset(src); 1860 isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s); 1861 pos = node_par_coef_offset(src); 1862 isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s); 1863 pos = node_var_coef_pos(src, 0); 1864 isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s); 1865 isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s); 1866 1867 return dim_map; 1868 } 1869 1870 /* Add the constraints from "src" to "dst" using "dim_map", 1871 * after making sure there is enough room in "dst" for the extra constraints. 1872 */ 1873 static __isl_give isl_basic_set *add_constraints_dim_map( 1874 __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src, 1875 __isl_take isl_dim_map *dim_map) 1876 { 1877 isl_size n_eq, n_ineq; 1878 1879 n_eq = isl_basic_set_n_equality(src); 1880 n_ineq = isl_basic_set_n_inequality(src); 1881 if (n_eq < 0 || n_ineq < 0) 1882 dst = isl_basic_set_free(dst); 1883 dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq); 1884 dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map); 1885 return dst; 1886 } 1887 1888 /* Add constraints to graph->lp that force validity for the given 1889 * dependence from a node i to itself. 1890 * That is, add constraints that enforce 1891 * 1892 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x) 1893 * = c_i_x (y - x) >= 0 1894 * 1895 * for each (x,y) in R. 1896 * We obtain general constraints on coefficients (c_0, c_x) 1897 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-), 1898 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. 1899 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. 1900 * Note that the result of intra_coefficients may also contain 1901 * parameter coefficients c_n, in which case 0 is plugged in for them as well. 1902 */ 1903 static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, 1904 struct isl_sched_edge *edge) 1905 { 1906 isl_size offset; 1907 isl_map *map = isl_map_copy(edge->map); 1908 isl_ctx *ctx = isl_map_get_ctx(map); 1909 isl_dim_map *dim_map; 1910 isl_basic_set *coef; 1911 struct isl_sched_node *node = edge->src; 1912 1913 coef = intra_coefficients(graph, node, map, 0); 1914 1915 offset = coef_var_offset(coef); 1916 if (offset < 0) 1917 coef = isl_basic_set_free(coef); 1918 if (!coef) 1919 return isl_stat_error; 1920 1921 dim_map = intra_dim_map(ctx, graph, node, offset, 1); 1922 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 1923 1924 return isl_stat_ok; 1925 } 1926 1927 /* Add constraints to graph->lp that force validity for the given 1928 * dependence from node i to node j. 1929 * That is, add constraints that enforce 1930 * 1931 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0 1932 * 1933 * for each (x,y) in R. 1934 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) 1935 * of valid constraints for R and then plug in 1936 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-), 1937 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. 1938 * In graph->lp, the c_*^- appear before their c_*^+ counterpart. 1939 */ 1940 static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, 1941 struct isl_sched_edge *edge) 1942 { 1943 isl_size offset; 1944 isl_map *map; 1945 isl_ctx *ctx; 1946 isl_dim_map *dim_map; 1947 isl_basic_set *coef; 1948 struct isl_sched_node *src = edge->src; 1949 struct isl_sched_node *dst = edge->dst; 1950 1951 if (!graph->lp) 1952 return isl_stat_error; 1953 1954 map = isl_map_copy(edge->map); 1955 ctx = isl_map_get_ctx(map); 1956 coef = inter_coefficients(graph, edge, map); 1957 1958 offset = coef_var_offset(coef); 1959 if (offset < 0) 1960 coef = isl_basic_set_free(coef); 1961 if (!coef) 1962 return isl_stat_error; 1963 1964 dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1); 1965 1966 edge->start = graph->lp->n_ineq; 1967 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 1968 if (!graph->lp) 1969 return isl_stat_error; 1970 edge->end = graph->lp->n_ineq; 1971 1972 return isl_stat_ok; 1973 } 1974 1975 /* Add constraints to graph->lp that bound the dependence distance for the given 1976 * dependence from a node i to itself. 1977 * If s = 1, we add the constraint 1978 * 1979 * c_i_x (y - x) <= m_0 + m_n n 1980 * 1981 * or 1982 * 1983 * -c_i_x (y - x) + m_0 + m_n n >= 0 1984 * 1985 * for each (x,y) in R. 1986 * If s = -1, we add the constraint 1987 * 1988 * -c_i_x (y - x) <= m_0 + m_n n 1989 * 1990 * or 1991 * 1992 * c_i_x (y - x) + m_0 + m_n n >= 0 1993 * 1994 * for each (x,y) in R. 1995 * We obtain general constraints on coefficients (c_0, c_n, c_x) 1996 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x), 1997 * with each coefficient (except m_0) represented as a pair of non-negative 1998 * coefficients. 1999 * 2000 * 2001 * If "local" is set, then we add constraints 2002 * 2003 * c_i_x (y - x) <= 0 2004 * 2005 * or 2006 * 2007 * -c_i_x (y - x) <= 0 2008 * 2009 * instead, forcing the dependence distance to be (less than or) equal to 0. 2010 * That is, we plug in (0, 0, -s * c_i_x), 2011 * intra_coefficients is not required to have c_n in its result when 2012 * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in. 2013 * Note that dependences marked local are treated as validity constraints 2014 * by add_all_validity_constraints and therefore also have 2015 * their distances bounded by 0 from below. 2016 */ 2017 static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, 2018 struct isl_sched_edge *edge, int s, int local) 2019 { 2020 isl_size offset; 2021 isl_size nparam; 2022 isl_map *map = isl_map_copy(edge->map); 2023 isl_ctx *ctx = isl_map_get_ctx(map); 2024 isl_dim_map *dim_map; 2025 isl_basic_set *coef; 2026 struct isl_sched_node *node = edge->src; 2027 2028 coef = intra_coefficients(graph, node, map, !local); 2029 nparam = isl_space_dim(node->space, isl_dim_param); 2030 2031 offset = coef_var_offset(coef); 2032 if (nparam < 0 || offset < 0) 2033 coef = isl_basic_set_free(coef); 2034 if (!coef) 2035 return isl_stat_error; 2036 2037 dim_map = intra_dim_map(ctx, graph, node, offset, -s); 2038 2039 if (!local) { 2040 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); 2041 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); 2042 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); 2043 } 2044 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 2045 2046 return isl_stat_ok; 2047 } 2048 2049 /* Add constraints to graph->lp that bound the dependence distance for the given 2050 * dependence from node i to node j. 2051 * If s = 1, we add the constraint 2052 * 2053 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) 2054 * <= m_0 + m_n n 2055 * 2056 * or 2057 * 2058 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) + 2059 * m_0 + m_n n >= 0 2060 * 2061 * for each (x,y) in R. 2062 * If s = -1, we add the constraint 2063 * 2064 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) 2065 * <= m_0 + m_n n 2066 * 2067 * or 2068 * 2069 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) + 2070 * m_0 + m_n n >= 0 2071 * 2072 * for each (x,y) in R. 2073 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) 2074 * of valid constraints for R and then plug in 2075 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n, 2076 * s*c_i_x, -s*c_j_x) 2077 * with each coefficient (except m_0, c_*_0 and c_*_n) 2078 * represented as a pair of non-negative coefficients. 2079 * 2080 * 2081 * If "local" is set (and s = 1), then we add constraints 2082 * 2083 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0 2084 * 2085 * or 2086 * 2087 * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0 2088 * 2089 * instead, forcing the dependence distance to be (less than or) equal to 0. 2090 * That is, we plug in 2091 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x). 2092 * Note that dependences marked local are treated as validity constraints 2093 * by add_all_validity_constraints and therefore also have 2094 * their distances bounded by 0 from below. 2095 */ 2096 static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph, 2097 struct isl_sched_edge *edge, int s, int local) 2098 { 2099 isl_size offset; 2100 isl_size nparam; 2101 isl_map *map = isl_map_copy(edge->map); 2102 isl_ctx *ctx = isl_map_get_ctx(map); 2103 isl_dim_map *dim_map; 2104 isl_basic_set *coef; 2105 struct isl_sched_node *src = edge->src; 2106 struct isl_sched_node *dst = edge->dst; 2107 2108 coef = inter_coefficients(graph, edge, map); 2109 nparam = isl_space_dim(src->space, isl_dim_param); 2110 2111 offset = coef_var_offset(coef); 2112 if (nparam < 0 || offset < 0) 2113 coef = isl_basic_set_free(coef); 2114 if (!coef) 2115 return isl_stat_error; 2116 2117 dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s); 2118 2119 if (!local) { 2120 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); 2121 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); 2122 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); 2123 } 2124 2125 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 2126 2127 return isl_stat_ok; 2128 } 2129 2130 /* Should the distance over "edge" be forced to zero? 2131 * That is, is it marked as a local edge? 2132 * If "use_coincidence" is set, then coincidence edges are treated 2133 * as local edges. 2134 */ 2135 static int force_zero(struct isl_sched_edge *edge, int use_coincidence) 2136 { 2137 return is_local(edge) || (use_coincidence && is_coincidence(edge)); 2138 } 2139 2140 /* Add all validity constraints to graph->lp. 2141 * 2142 * An edge that is forced to be local needs to have its dependence 2143 * distances equal to zero. We take care of bounding them by 0 from below 2144 * here. add_all_proximity_constraints takes care of bounding them by 0 2145 * from above. 2146 * 2147 * If "use_coincidence" is set, then we treat coincidence edges as local edges. 2148 * Otherwise, we ignore them. 2149 */ 2150 static int add_all_validity_constraints(struct isl_sched_graph *graph, 2151 int use_coincidence) 2152 { 2153 int i; 2154 2155 for (i = 0; i < graph->n_edge; ++i) { 2156 struct isl_sched_edge *edge = &graph->edge[i]; 2157 int zero; 2158 2159 zero = force_zero(edge, use_coincidence); 2160 if (!is_validity(edge) && !zero) 2161 continue; 2162 if (edge->src != edge->dst) 2163 continue; 2164 if (add_intra_validity_constraints(graph, edge) < 0) 2165 return -1; 2166 } 2167 2168 for (i = 0; i < graph->n_edge; ++i) { 2169 struct isl_sched_edge *edge = &graph->edge[i]; 2170 int zero; 2171 2172 zero = force_zero(edge, use_coincidence); 2173 if (!is_validity(edge) && !zero) 2174 continue; 2175 if (edge->src == edge->dst) 2176 continue; 2177 if (add_inter_validity_constraints(graph, edge) < 0) 2178 return -1; 2179 } 2180 2181 return 0; 2182 } 2183 2184 /* Add constraints to graph->lp that bound the dependence distance 2185 * for all dependence relations. 2186 * If a given proximity dependence is identical to a validity 2187 * dependence, then the dependence distance is already bounded 2188 * from below (by zero), so we only need to bound the distance 2189 * from above. (This includes the case of "local" dependences 2190 * which are treated as validity dependence by add_all_validity_constraints.) 2191 * Otherwise, we need to bound the distance both from above and from below. 2192 * 2193 * If "use_coincidence" is set, then we treat coincidence edges as local edges. 2194 * Otherwise, we ignore them. 2195 */ 2196 static int add_all_proximity_constraints(struct isl_sched_graph *graph, 2197 int use_coincidence) 2198 { 2199 int i; 2200 2201 for (i = 0; i < graph->n_edge; ++i) { 2202 struct isl_sched_edge *edge = &graph->edge[i]; 2203 int zero; 2204 2205 zero = force_zero(edge, use_coincidence); 2206 if (!isl_sched_edge_is_proximity(edge) && !zero) 2207 continue; 2208 if (edge->src == edge->dst && 2209 add_intra_proximity_constraints(graph, edge, 1, zero) < 0) 2210 return -1; 2211 if (edge->src != edge->dst && 2212 add_inter_proximity_constraints(graph, edge, 1, zero) < 0) 2213 return -1; 2214 if (is_validity(edge) || zero) 2215 continue; 2216 if (edge->src == edge->dst && 2217 add_intra_proximity_constraints(graph, edge, -1, 0) < 0) 2218 return -1; 2219 if (edge->src != edge->dst && 2220 add_inter_proximity_constraints(graph, edge, -1, 0) < 0) 2221 return -1; 2222 } 2223 2224 return 0; 2225 } 2226 2227 /* Normalize the rows of "indep" such that all rows are lexicographically 2228 * positive and such that each row contains as many final zeros as possible, 2229 * given the choice for the previous rows. 2230 * Do this by performing elementary row operations. 2231 */ 2232 static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep) 2233 { 2234 indep = isl_mat_reverse_gauss(indep); 2235 indep = isl_mat_lexnonneg_rows(indep); 2236 return indep; 2237 } 2238 2239 /* Extract the linear part of the current schedule for node "node". 2240 */ 2241 static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node) 2242 { 2243 isl_size n_row = isl_mat_rows(node->sched); 2244 2245 if (n_row < 0) 2246 return NULL; 2247 return isl_mat_sub_alloc(node->sched, 0, n_row, 2248 1 + node->nparam, node->nvar); 2249 } 2250 2251 /* Compute a basis for the rows in the linear part of the schedule 2252 * and extend this basis to a full basis. The remaining rows 2253 * can then be used to force linear independence from the rows 2254 * in the schedule. 2255 * 2256 * In particular, given the schedule rows S, we compute 2257 * 2258 * S = H Q 2259 * S U = H 2260 * 2261 * with H the Hermite normal form of S. That is, all but the 2262 * first rank columns of H are zero and so each row in S is 2263 * a linear combination of the first rank rows of Q. 2264 * The matrix Q can be used as a variable transformation 2265 * that isolates the directions of S in the first rank rows. 2266 * Transposing S U = H yields 2267 * 2268 * U^T S^T = H^T 2269 * 2270 * with all but the first rank rows of H^T zero. 2271 * The last rows of U^T are therefore linear combinations 2272 * of schedule coefficients that are all zero on schedule 2273 * coefficients that are linearly dependent on the rows of S. 2274 * At least one of these combinations is non-zero on 2275 * linearly independent schedule coefficients. 2276 * The rows are normalized to involve as few of the last 2277 * coefficients as possible and to have a positive initial value. 2278 */ 2279 isl_stat isl_sched_node_update_vmap(struct isl_sched_node *node) 2280 { 2281 isl_mat *H, *U, *Q; 2282 2283 H = extract_linear_schedule(node); 2284 2285 H = isl_mat_left_hermite(H, 0, &U, &Q); 2286 isl_mat_free(node->indep); 2287 isl_mat_free(node->vmap); 2288 node->vmap = Q; 2289 node->indep = isl_mat_transpose(U); 2290 node->rank = isl_mat_initial_non_zero_cols(H); 2291 node->indep = isl_mat_drop_rows(node->indep, 0, node->rank); 2292 node->indep = normalize_independent(node->indep); 2293 isl_mat_free(H); 2294 2295 if (!node->indep || !node->vmap || node->rank < 0) 2296 return isl_stat_error; 2297 return isl_stat_ok; 2298 } 2299 2300 /* Is "edge" marked as a validity or a conditional validity edge? 2301 */ 2302 static int is_any_validity(struct isl_sched_edge *edge) 2303 { 2304 return is_validity(edge) || 2305 isl_sched_edge_is_conditional_validity(edge); 2306 } 2307 2308 /* How many times should we count the constraints in "edge"? 2309 * 2310 * We count as follows 2311 * validity -> 1 (>= 0) 2312 * validity+proximity -> 2 (>= 0 and upper bound) 2313 * proximity -> 2 (lower and upper bound) 2314 * local(+any) -> 2 (>= 0 and <= 0) 2315 * 2316 * If an edge is only marked conditional_validity then it counts 2317 * as zero since it is only checked afterwards. 2318 * 2319 * If "use_coincidence" is set, then we treat coincidence edges as local edges. 2320 * Otherwise, we ignore them. 2321 */ 2322 static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence) 2323 { 2324 if (isl_sched_edge_is_proximity(edge) || 2325 force_zero(edge, use_coincidence)) 2326 return 2; 2327 if (is_validity(edge)) 2328 return 1; 2329 return 0; 2330 } 2331 2332 /* How many times should the constraints in "edge" be counted 2333 * as a parametric intra-node constraint? 2334 * 2335 * Only proximity edges that are not forced zero need 2336 * coefficient constraints that include coefficients for parameters. 2337 * If the edge is also a validity edge, then only 2338 * an upper bound is introduced. Otherwise, both lower and upper bounds 2339 * are introduced. 2340 */ 2341 static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge, 2342 int use_coincidence) 2343 { 2344 if (edge->src != edge->dst) 2345 return 0; 2346 if (!isl_sched_edge_is_proximity(edge)) 2347 return 0; 2348 if (force_zero(edge, use_coincidence)) 2349 return 0; 2350 if (is_validity(edge)) 2351 return 1; 2352 else 2353 return 2; 2354 } 2355 2356 /* Add "f" times the number of equality and inequality constraints of "bset" 2357 * to "n_eq" and "n_ineq" and free "bset". 2358 */ 2359 static isl_stat update_count(__isl_take isl_basic_set *bset, 2360 int f, int *n_eq, int *n_ineq) 2361 { 2362 isl_size eq, ineq; 2363 2364 eq = isl_basic_set_n_equality(bset); 2365 ineq = isl_basic_set_n_inequality(bset); 2366 isl_basic_set_free(bset); 2367 2368 if (eq < 0 || ineq < 0) 2369 return isl_stat_error; 2370 2371 *n_eq += eq; 2372 *n_ineq += ineq; 2373 2374 return isl_stat_ok; 2375 } 2376 2377 /* Count the number of equality and inequality constraints 2378 * that will be added for the given map. 2379 * 2380 * The edges that require parameter coefficients are counted separately. 2381 * 2382 * "use_coincidence" is set if we should take into account coincidence edges. 2383 */ 2384 static isl_stat count_map_constraints(struct isl_sched_graph *graph, 2385 struct isl_sched_edge *edge, __isl_take isl_map *map, 2386 int *n_eq, int *n_ineq, int use_coincidence) 2387 { 2388 isl_map *copy; 2389 isl_basic_set *coef; 2390 int f = edge_multiplicity(edge, use_coincidence); 2391 int fp = parametric_intra_edge_multiplicity(edge, use_coincidence); 2392 2393 if (f == 0) { 2394 isl_map_free(map); 2395 return isl_stat_ok; 2396 } 2397 2398 if (edge->src != edge->dst) { 2399 coef = inter_coefficients(graph, edge, map); 2400 return update_count(coef, f, n_eq, n_ineq); 2401 } 2402 2403 if (fp > 0) { 2404 copy = isl_map_copy(map); 2405 coef = intra_coefficients(graph, edge->src, copy, 1); 2406 if (update_count(coef, fp, n_eq, n_ineq) < 0) 2407 goto error; 2408 } 2409 2410 if (f > fp) { 2411 copy = isl_map_copy(map); 2412 coef = intra_coefficients(graph, edge->src, copy, 0); 2413 if (update_count(coef, f - fp, n_eq, n_ineq) < 0) 2414 goto error; 2415 } 2416 2417 isl_map_free(map); 2418 return isl_stat_ok; 2419 error: 2420 isl_map_free(map); 2421 return isl_stat_error; 2422 } 2423 2424 /* Count the number of equality and inequality constraints 2425 * that will be added to the main lp problem. 2426 * We count as follows 2427 * validity -> 1 (>= 0) 2428 * validity+proximity -> 2 (>= 0 and upper bound) 2429 * proximity -> 2 (lower and upper bound) 2430 * local(+any) -> 2 (>= 0 and <= 0) 2431 * 2432 * If "use_coincidence" is set, then we treat coincidence edges as local edges. 2433 * Otherwise, we ignore them. 2434 */ 2435 static int count_constraints(struct isl_sched_graph *graph, 2436 int *n_eq, int *n_ineq, int use_coincidence) 2437 { 2438 int i; 2439 2440 *n_eq = *n_ineq = 0; 2441 for (i = 0; i < graph->n_edge; ++i) { 2442 struct isl_sched_edge *edge = &graph->edge[i]; 2443 isl_map *map = isl_map_copy(edge->map); 2444 2445 if (count_map_constraints(graph, edge, map, n_eq, n_ineq, 2446 use_coincidence) < 0) 2447 return -1; 2448 } 2449 2450 return 0; 2451 } 2452 2453 /* Count the number of constraints that will be added by 2454 * add_bound_constant_constraints to bound the values of the constant terms 2455 * and increment *n_eq and *n_ineq accordingly. 2456 * 2457 * In practice, add_bound_constant_constraints only adds inequalities. 2458 */ 2459 static isl_stat count_bound_constant_constraints(isl_ctx *ctx, 2460 struct isl_sched_graph *graph, int *n_eq, int *n_ineq) 2461 { 2462 if (isl_options_get_schedule_max_constant_term(ctx) == -1) 2463 return isl_stat_ok; 2464 2465 *n_ineq += graph->n; 2466 2467 return isl_stat_ok; 2468 } 2469 2470 /* Add constraints to bound the values of the constant terms in the schedule, 2471 * if requested by the user. 2472 * 2473 * The maximal value of the constant terms is defined by the option 2474 * "schedule_max_constant_term". 2475 */ 2476 static isl_stat add_bound_constant_constraints(isl_ctx *ctx, 2477 struct isl_sched_graph *graph) 2478 { 2479 int i, k; 2480 int max; 2481 isl_size total; 2482 2483 max = isl_options_get_schedule_max_constant_term(ctx); 2484 if (max == -1) 2485 return isl_stat_ok; 2486 2487 total = isl_basic_set_dim(graph->lp, isl_dim_set); 2488 if (total < 0) 2489 return isl_stat_error; 2490 2491 for (i = 0; i < graph->n; ++i) { 2492 struct isl_sched_node *node = &graph->node[i]; 2493 int pos; 2494 2495 k = isl_basic_set_alloc_inequality(graph->lp); 2496 if (k < 0) 2497 return isl_stat_error; 2498 isl_seq_clr(graph->lp->ineq[k], 1 + total); 2499 pos = node_cst_coef_offset(node); 2500 isl_int_set_si(graph->lp->ineq[k][1 + pos], -1); 2501 isl_int_set_si(graph->lp->ineq[k][0], max); 2502 } 2503 2504 return isl_stat_ok; 2505 } 2506 2507 /* Count the number of constraints that will be added by 2508 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq 2509 * accordingly. 2510 * 2511 * In practice, add_bound_coefficient_constraints only adds inequalities. 2512 */ 2513 static int count_bound_coefficient_constraints(isl_ctx *ctx, 2514 struct isl_sched_graph *graph, int *n_eq, int *n_ineq) 2515 { 2516 int i; 2517 2518 if (isl_options_get_schedule_max_coefficient(ctx) == -1 && 2519 !isl_options_get_schedule_treat_coalescing(ctx)) 2520 return 0; 2521 2522 for (i = 0; i < graph->n; ++i) 2523 *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar; 2524 2525 return 0; 2526 } 2527 2528 /* Add constraints to graph->lp that bound the values of 2529 * the parameter schedule coefficients of "node" to "max" and 2530 * the variable schedule coefficients to the corresponding entry 2531 * in node->max. 2532 * In either case, a negative value means that no bound needs to be imposed. 2533 * 2534 * For parameter coefficients, this amounts to adding a constraint 2535 * 2536 * c_n <= max 2537 * 2538 * i.e., 2539 * 2540 * -c_n + max >= 0 2541 * 2542 * The variables coefficients are, however, not represented directly. 2543 * Instead, the variable coefficients c_x are written as differences 2544 * c_x = c_x^+ - c_x^-. 2545 * That is, 2546 * 2547 * -max_i <= c_x_i <= max_i 2548 * 2549 * is encoded as 2550 * 2551 * -max_i <= c_x_i^+ - c_x_i^- <= max_i 2552 * 2553 * or 2554 * 2555 * -(c_x_i^+ - c_x_i^-) + max_i >= 0 2556 * c_x_i^+ - c_x_i^- + max_i >= 0 2557 */ 2558 static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, 2559 struct isl_sched_graph *graph, struct isl_sched_node *node, int max) 2560 { 2561 int i, j, k; 2562 isl_size total; 2563 isl_vec *ineq; 2564 2565 total = isl_basic_set_dim(graph->lp, isl_dim_set); 2566 if (total < 0) 2567 return isl_stat_error; 2568 2569 for (j = 0; j < node->nparam; ++j) { 2570 int dim; 2571 2572 if (max < 0) 2573 continue; 2574 2575 k = isl_basic_set_alloc_inequality(graph->lp); 2576 if (k < 0) 2577 return isl_stat_error; 2578 dim = 1 + node_par_coef_offset(node) + j; 2579 isl_seq_clr(graph->lp->ineq[k], 1 + total); 2580 isl_int_set_si(graph->lp->ineq[k][dim], -1); 2581 isl_int_set_si(graph->lp->ineq[k][0], max); 2582 } 2583 2584 ineq = isl_vec_alloc(ctx, 1 + total); 2585 ineq = isl_vec_clr(ineq); 2586 if (!ineq) 2587 return isl_stat_error; 2588 for (i = 0; i < node->nvar; ++i) { 2589 int pos = 1 + node_var_coef_pos(node, i); 2590 2591 if (isl_int_is_neg(node->max->el[i])) 2592 continue; 2593 2594 isl_int_set_si(ineq->el[pos], 1); 2595 isl_int_set_si(ineq->el[pos + 1], -1); 2596 isl_int_set(ineq->el[0], node->max->el[i]); 2597 2598 k = isl_basic_set_alloc_inequality(graph->lp); 2599 if (k < 0) 2600 goto error; 2601 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total); 2602 2603 isl_seq_neg(ineq->el + pos, ineq->el + pos, 2); 2604 k = isl_basic_set_alloc_inequality(graph->lp); 2605 if (k < 0) 2606 goto error; 2607 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total); 2608 2609 isl_seq_clr(ineq->el + pos, 2); 2610 } 2611 isl_vec_free(ineq); 2612 2613 return isl_stat_ok; 2614 error: 2615 isl_vec_free(ineq); 2616 return isl_stat_error; 2617 } 2618 2619 /* Add constraints that bound the values of the variable and parameter 2620 * coefficients of the schedule. 2621 * 2622 * The maximal value of the coefficients is defined by the option 2623 * 'schedule_max_coefficient' and the entries in node->max. 2624 * These latter entries are only set if either the schedule_max_coefficient 2625 * option or the schedule_treat_coalescing option is set. 2626 */ 2627 static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx, 2628 struct isl_sched_graph *graph) 2629 { 2630 int i; 2631 int max; 2632 2633 max = isl_options_get_schedule_max_coefficient(ctx); 2634 2635 if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx)) 2636 return isl_stat_ok; 2637 2638 for (i = 0; i < graph->n; ++i) { 2639 struct isl_sched_node *node = &graph->node[i]; 2640 2641 if (node_add_coefficient_constraints(ctx, graph, node, max) < 0) 2642 return isl_stat_error; 2643 } 2644 2645 return isl_stat_ok; 2646 } 2647 2648 /* Add a constraint to graph->lp that equates the value at position 2649 * "sum_pos" to the sum of the "n" values starting at "first". 2650 */ 2651 static isl_stat add_sum_constraint(struct isl_sched_graph *graph, 2652 int sum_pos, int first, int n) 2653 { 2654 int i, k; 2655 isl_size total; 2656 2657 total = isl_basic_set_dim(graph->lp, isl_dim_set); 2658 if (total < 0) 2659 return isl_stat_error; 2660 2661 k = isl_basic_set_alloc_equality(graph->lp); 2662 if (k < 0) 2663 return isl_stat_error; 2664 isl_seq_clr(graph->lp->eq[k], 1 + total); 2665 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); 2666 for (i = 0; i < n; ++i) 2667 isl_int_set_si(graph->lp->eq[k][1 + first + i], 1); 2668 2669 return isl_stat_ok; 2670 } 2671 2672 /* Add a constraint to graph->lp that equates the value at position 2673 * "sum_pos" to the sum of the parameter coefficients of all nodes. 2674 */ 2675 static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph, 2676 int sum_pos) 2677 { 2678 int i, j, k; 2679 isl_size total; 2680 2681 total = isl_basic_set_dim(graph->lp, isl_dim_set); 2682 if (total < 0) 2683 return isl_stat_error; 2684 2685 k = isl_basic_set_alloc_equality(graph->lp); 2686 if (k < 0) 2687 return isl_stat_error; 2688 isl_seq_clr(graph->lp->eq[k], 1 + total); 2689 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); 2690 for (i = 0; i < graph->n; ++i) { 2691 int pos = 1 + node_par_coef_offset(&graph->node[i]); 2692 2693 for (j = 0; j < graph->node[i].nparam; ++j) 2694 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 2695 } 2696 2697 return isl_stat_ok; 2698 } 2699 2700 /* Add a constraint to graph->lp that equates the value at position 2701 * "sum_pos" to the sum of the variable coefficients of all nodes. 2702 */ 2703 static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph, 2704 int sum_pos) 2705 { 2706 int i, j, k; 2707 isl_size total; 2708 2709 total = isl_basic_set_dim(graph->lp, isl_dim_set); 2710 if (total < 0) 2711 return isl_stat_error; 2712 2713 k = isl_basic_set_alloc_equality(graph->lp); 2714 if (k < 0) 2715 return isl_stat_error; 2716 isl_seq_clr(graph->lp->eq[k], 1 + total); 2717 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); 2718 for (i = 0; i < graph->n; ++i) { 2719 struct isl_sched_node *node = &graph->node[i]; 2720 int pos = 1 + node_var_coef_offset(node); 2721 2722 for (j = 0; j < 2 * node->nvar; ++j) 2723 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 2724 } 2725 2726 return isl_stat_ok; 2727 } 2728 2729 /* Construct an ILP problem for finding schedule coefficients 2730 * that result in non-negative, but small dependence distances 2731 * over all dependences. 2732 * In particular, the dependence distances over proximity edges 2733 * are bounded by m_0 + m_n n and we compute schedule coefficients 2734 * with small values (preferably zero) of m_n and m_0. 2735 * 2736 * All variables of the ILP are non-negative. The actual coefficients 2737 * may be negative, so each coefficient is represented as the difference 2738 * of two non-negative variables. The negative part always appears 2739 * immediately before the positive part. 2740 * Other than that, the variables have the following order 2741 * 2742 * - sum of positive and negative parts of m_n coefficients 2743 * - m_0 2744 * - sum of all c_n coefficients 2745 * (unconstrained when computing non-parametric schedules) 2746 * - sum of positive and negative parts of all c_x coefficients 2747 * - positive and negative parts of m_n coefficients 2748 * - for each node 2749 * - positive and negative parts of c_i_x, in opposite order 2750 * - c_i_n (if parametric) 2751 * - c_i_0 2752 * 2753 * The constraints are those from the edges plus two or three equalities 2754 * to express the sums. 2755 * 2756 * If "use_coincidence" is set, then we treat coincidence edges as local edges. 2757 * Otherwise, we ignore them. 2758 */ 2759 static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, 2760 int use_coincidence) 2761 { 2762 int i; 2763 isl_size nparam; 2764 unsigned total; 2765 isl_space *space; 2766 int parametric; 2767 int param_pos; 2768 int n_eq, n_ineq; 2769 2770 parametric = ctx->opt->schedule_parametric; 2771 nparam = isl_space_dim(graph->node[0].space, isl_dim_param); 2772 if (nparam < 0) 2773 return isl_stat_error; 2774 param_pos = 4; 2775 total = param_pos + 2 * nparam; 2776 for (i = 0; i < graph->n; ++i) { 2777 struct isl_sched_node *node = &graph->node[graph->sorted[i]]; 2778 if (isl_sched_node_update_vmap(node) < 0) 2779 return isl_stat_error; 2780 node->start = total; 2781 total += 1 + node->nparam + 2 * node->nvar; 2782 } 2783 2784 if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0) 2785 return isl_stat_error; 2786 if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0) 2787 return isl_stat_error; 2788 if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0) 2789 return isl_stat_error; 2790 2791 space = isl_space_set_alloc(ctx, 0, total); 2792 isl_basic_set_free(graph->lp); 2793 n_eq += 2 + parametric; 2794 2795 graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq); 2796 2797 if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0) 2798 return isl_stat_error; 2799 if (parametric && add_param_sum_constraint(graph, 2) < 0) 2800 return isl_stat_error; 2801 if (add_var_sum_constraint(graph, 3) < 0) 2802 return isl_stat_error; 2803 if (add_bound_constant_constraints(ctx, graph) < 0) 2804 return isl_stat_error; 2805 if (add_bound_coefficient_constraints(ctx, graph) < 0) 2806 return isl_stat_error; 2807 if (add_all_validity_constraints(graph, use_coincidence) < 0) 2808 return isl_stat_error; 2809 if (add_all_proximity_constraints(graph, use_coincidence) < 0) 2810 return isl_stat_error; 2811 2812 return isl_stat_ok; 2813 } 2814 2815 /* Analyze the conflicting constraint found by 2816 * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity 2817 * constraint of one of the edges between distinct nodes, living, moreover 2818 * in distinct SCCs, then record the source and sink SCC as this may 2819 * be a good place to cut between SCCs. 2820 */ 2821 static int check_conflict(int con, void *user) 2822 { 2823 int i; 2824 struct isl_sched_graph *graph = user; 2825 2826 if (graph->src_scc >= 0) 2827 return 0; 2828 2829 con -= graph->lp->n_eq; 2830 2831 if (con >= graph->lp->n_ineq) 2832 return 0; 2833 2834 for (i = 0; i < graph->n_edge; ++i) { 2835 if (!is_validity(&graph->edge[i])) 2836 continue; 2837 if (graph->edge[i].src == graph->edge[i].dst) 2838 continue; 2839 if (graph->edge[i].src->scc == graph->edge[i].dst->scc) 2840 continue; 2841 if (graph->edge[i].start > con) 2842 continue; 2843 if (graph->edge[i].end <= con) 2844 continue; 2845 graph->src_scc = graph->edge[i].src->scc; 2846 graph->dst_scc = graph->edge[i].dst->scc; 2847 } 2848 2849 return 0; 2850 } 2851 2852 /* Check whether the next schedule row of the given node needs to be 2853 * non-trivial. Lower-dimensional domains may have some trivial rows, 2854 * but as soon as the number of remaining required non-trivial rows 2855 * is as large as the number or remaining rows to be computed, 2856 * all remaining rows need to be non-trivial. 2857 */ 2858 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) 2859 { 2860 return node->nvar - node->rank >= graph->maxvar - graph->n_row; 2861 } 2862 2863 /* Construct a non-triviality region with triviality directions 2864 * corresponding to the rows of "indep". 2865 * The rows of "indep" are expressed in terms of the schedule coefficients c_i, 2866 * while the triviality directions are expressed in terms of 2867 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing 2868 * before c^+_i. Furthermore, 2869 * the pairs of non-negative variables representing the coefficients 2870 * are stored in the opposite order. 2871 */ 2872 static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep) 2873 { 2874 isl_ctx *ctx; 2875 isl_mat *mat; 2876 int i, j; 2877 isl_size n, n_var; 2878 2879 n = isl_mat_rows(indep); 2880 n_var = isl_mat_cols(indep); 2881 if (n < 0 || n_var < 0) 2882 return NULL; 2883 2884 ctx = isl_mat_get_ctx(indep); 2885 mat = isl_mat_alloc(ctx, n, 2 * n_var); 2886 if (!mat) 2887 return NULL; 2888 for (i = 0; i < n; ++i) { 2889 for (j = 0; j < n_var; ++j) { 2890 int nj = n_var - 1 - j; 2891 isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]); 2892 isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]); 2893 } 2894 } 2895 2896 return mat; 2897 } 2898 2899 /* Solve the ILP problem constructed in setup_lp. 2900 * For each node such that all the remaining rows of its schedule 2901 * need to be non-trivial, we construct a non-triviality region. 2902 * This region imposes that the next row is independent of previous rows. 2903 * In particular, the non-triviality region enforces that at least 2904 * one of the linear combinations in the rows of node->indep is non-zero. 2905 */ 2906 static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) 2907 { 2908 int i; 2909 isl_vec *sol; 2910 isl_basic_set *lp; 2911 2912 for (i = 0; i < graph->n; ++i) { 2913 struct isl_sched_node *node = &graph->node[i]; 2914 isl_mat *trivial; 2915 2916 graph->region[i].pos = node_var_coef_offset(node); 2917 if (needs_row(graph, node)) 2918 trivial = construct_trivial(node->indep); 2919 else 2920 trivial = isl_mat_zero(ctx, 0, 0); 2921 graph->region[i].trivial = trivial; 2922 } 2923 lp = isl_basic_set_copy(graph->lp); 2924 sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n, 2925 graph->region, &check_conflict, graph); 2926 for (i = 0; i < graph->n; ++i) 2927 isl_mat_free(graph->region[i].trivial); 2928 return sol; 2929 } 2930 2931 /* Extract the coefficients for the variables of "node" from "sol". 2932 * 2933 * Each schedule coefficient c_i_x is represented as the difference 2934 * between two non-negative variables c_i_x^+ - c_i_x^-. 2935 * The c_i_x^- appear before their c_i_x^+ counterpart. 2936 * Furthermore, the order of these pairs is the opposite of that 2937 * of the corresponding coefficients. 2938 * 2939 * Return c_i_x = c_i_x^+ - c_i_x^- 2940 */ 2941 static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node, 2942 __isl_keep isl_vec *sol) 2943 { 2944 int i; 2945 int pos; 2946 isl_vec *csol; 2947 2948 if (!sol) 2949 return NULL; 2950 csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar); 2951 if (!csol) 2952 return NULL; 2953 2954 pos = 1 + node_var_coef_offset(node); 2955 for (i = 0; i < node->nvar; ++i) 2956 isl_int_sub(csol->el[node->nvar - 1 - i], 2957 sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]); 2958 2959 return csol; 2960 } 2961 2962 /* Update the schedules of all nodes based on the given solution 2963 * of the LP problem. 2964 * The new row is added to the current band. 2965 * All possibly negative coefficients are encoded as a difference 2966 * of two non-negative variables, so we need to perform the subtraction 2967 * here. 2968 * 2969 * If coincident is set, then the caller guarantees that the new 2970 * row satisfies the coincidence constraints. 2971 */ 2972 static int update_schedule(struct isl_sched_graph *graph, 2973 __isl_take isl_vec *sol, int coincident) 2974 { 2975 int i, j; 2976 isl_vec *csol = NULL; 2977 2978 if (!sol) 2979 goto error; 2980 if (sol->size == 0) 2981 isl_die(sol->ctx, isl_error_internal, 2982 "no solution found", goto error); 2983 if (graph->n_total_row >= graph->max_row) 2984 isl_die(sol->ctx, isl_error_internal, 2985 "too many schedule rows", goto error); 2986 2987 for (i = 0; i < graph->n; ++i) { 2988 struct isl_sched_node *node = &graph->node[i]; 2989 int pos; 2990 isl_size row = isl_mat_rows(node->sched); 2991 2992 isl_vec_free(csol); 2993 csol = extract_var_coef(node, sol); 2994 if (row < 0 || !csol) 2995 goto error; 2996 2997 isl_map_free(node->sched_map); 2998 node->sched_map = NULL; 2999 node->sched = isl_mat_add_rows(node->sched, 1); 3000 if (!node->sched) 3001 goto error; 3002 pos = node_cst_coef_offset(node); 3003 node->sched = isl_mat_set_element(node->sched, 3004 row, 0, sol->el[1 + pos]); 3005 pos = node_par_coef_offset(node); 3006 for (j = 0; j < node->nparam; ++j) 3007 node->sched = isl_mat_set_element(node->sched, 3008 row, 1 + j, sol->el[1 + pos + j]); 3009 for (j = 0; j < node->nvar; ++j) 3010 node->sched = isl_mat_set_element(node->sched, 3011 row, 1 + node->nparam + j, csol->el[j]); 3012 node->coincident[graph->n_total_row] = coincident; 3013 } 3014 isl_vec_free(sol); 3015 isl_vec_free(csol); 3016 3017 graph->n_row++; 3018 graph->n_total_row++; 3019 3020 return 0; 3021 error: 3022 isl_vec_free(sol); 3023 isl_vec_free(csol); 3024 return -1; 3025 } 3026 3027 /* Convert row "row" of node->sched into an isl_aff living in "ls" 3028 * and return this isl_aff. 3029 */ 3030 static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls, 3031 struct isl_sched_node *node, int row) 3032 { 3033 int j; 3034 isl_int v; 3035 isl_aff *aff; 3036 3037 isl_int_init(v); 3038 3039 aff = isl_aff_zero_on_domain(ls); 3040 if (isl_mat_get_element(node->sched, row, 0, &v) < 0) 3041 goto error; 3042 aff = isl_aff_set_constant(aff, v); 3043 for (j = 0; j < node->nparam; ++j) { 3044 if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0) 3045 goto error; 3046 aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v); 3047 } 3048 for (j = 0; j < node->nvar; ++j) { 3049 if (isl_mat_get_element(node->sched, row, 3050 1 + node->nparam + j, &v) < 0) 3051 goto error; 3052 aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v); 3053 } 3054 3055 isl_int_clear(v); 3056 3057 return aff; 3058 error: 3059 isl_int_clear(v); 3060 isl_aff_free(aff); 3061 return NULL; 3062 } 3063 3064 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff 3065 * and return this multi_aff. 3066 * 3067 * The result is defined over the uncompressed node domain. 3068 */ 3069 __isl_give isl_multi_aff *isl_sched_node_extract_partial_schedule_multi_aff( 3070 struct isl_sched_node *node, int first, int n) 3071 { 3072 int i; 3073 isl_space *space; 3074 isl_local_space *ls; 3075 isl_aff *aff; 3076 isl_multi_aff *ma; 3077 isl_size nrow; 3078 3079 if (!node) 3080 return NULL; 3081 nrow = isl_mat_rows(node->sched); 3082 if (nrow < 0) 3083 return NULL; 3084 if (node->compressed) 3085 space = isl_pw_multi_aff_get_domain_space(node->decompress); 3086 else 3087 space = isl_space_copy(node->space); 3088 ls = isl_local_space_from_space(isl_space_copy(space)); 3089 space = isl_space_from_domain(space); 3090 space = isl_space_add_dims(space, isl_dim_out, n); 3091 ma = isl_multi_aff_zero(space); 3092 3093 for (i = first; i < first + n; ++i) { 3094 aff = extract_schedule_row(isl_local_space_copy(ls), node, i); 3095 ma = isl_multi_aff_set_aff(ma, i - first, aff); 3096 } 3097 3098 isl_local_space_free(ls); 3099 3100 if (node->compressed) 3101 ma = isl_multi_aff_pullback_multi_aff(ma, 3102 isl_multi_aff_copy(node->compress)); 3103 3104 return ma; 3105 } 3106 3107 /* Convert node->sched into a multi_aff and return this multi_aff. 3108 * 3109 * The result is defined over the uncompressed node domain. 3110 */ 3111 static __isl_give isl_multi_aff *node_extract_schedule_multi_aff( 3112 struct isl_sched_node *node) 3113 { 3114 isl_size nrow; 3115 3116 nrow = isl_mat_rows(node->sched); 3117 if (nrow < 0) 3118 return NULL; 3119 return isl_sched_node_extract_partial_schedule_multi_aff(node, 0, nrow); 3120 } 3121 3122 /* Convert node->sched into a map and return this map. 3123 * 3124 * The result is cached in node->sched_map, which needs to be released 3125 * whenever node->sched is updated. 3126 * It is defined over the uncompressed node domain. 3127 */ 3128 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node) 3129 { 3130 if (!node->sched_map) { 3131 isl_multi_aff *ma; 3132 3133 ma = node_extract_schedule_multi_aff(node); 3134 node->sched_map = isl_map_from_multi_aff(ma); 3135 } 3136 3137 return isl_map_copy(node->sched_map); 3138 } 3139 3140 /* Construct a map that can be used to update a dependence relation 3141 * based on the current schedule. 3142 * That is, construct a map expressing that source and sink 3143 * are executed within the same iteration of the current schedule. 3144 * This map can then be intersected with the dependence relation. 3145 * This is not the most efficient way, but this shouldn't be a critical 3146 * operation. 3147 */ 3148 static __isl_give isl_map *specializer(struct isl_sched_node *src, 3149 struct isl_sched_node *dst) 3150 { 3151 isl_map *src_sched, *dst_sched; 3152 3153 src_sched = node_extract_schedule(src); 3154 dst_sched = node_extract_schedule(dst); 3155 return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched)); 3156 } 3157 3158 /* Intersect the domains of the nested relations in domain and range 3159 * of "umap" with "map". 3160 */ 3161 static __isl_give isl_union_map *intersect_domains( 3162 __isl_take isl_union_map *umap, __isl_keep isl_map *map) 3163 { 3164 isl_union_set *uset; 3165 3166 umap = isl_union_map_zip(umap); 3167 uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map))); 3168 umap = isl_union_map_intersect_domain(umap, uset); 3169 umap = isl_union_map_zip(umap); 3170 return umap; 3171 } 3172 3173 /* Update the dependence relation of the given edge based 3174 * on the current schedule. 3175 * If the dependence is carried completely by the current schedule, then 3176 * it is removed from the edge_tables. It is kept in the list of edges 3177 * as otherwise all edge_tables would have to be recomputed. 3178 * 3179 * If the edge is of a type that can appear multiple times 3180 * between the same pair of nodes, then it is added to 3181 * the edge table (again). This prevents the situation 3182 * where none of these edges is referenced from the edge table 3183 * because the one that was referenced turned out to be empty and 3184 * was therefore removed from the table. 3185 */ 3186 static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph, 3187 struct isl_sched_edge *edge) 3188 { 3189 int empty; 3190 isl_map *id; 3191 3192 id = specializer(edge->src, edge->dst); 3193 edge->map = isl_map_intersect(edge->map, isl_map_copy(id)); 3194 if (!edge->map) 3195 goto error; 3196 3197 if (edge->tagged_condition) { 3198 edge->tagged_condition = 3199 intersect_domains(edge->tagged_condition, id); 3200 if (!edge->tagged_condition) 3201 goto error; 3202 } 3203 if (edge->tagged_validity) { 3204 edge->tagged_validity = 3205 intersect_domains(edge->tagged_validity, id); 3206 if (!edge->tagged_validity) 3207 goto error; 3208 } 3209 3210 empty = isl_map_plain_is_empty(edge->map); 3211 if (empty < 0) 3212 goto error; 3213 if (empty) { 3214 if (graph_remove_edge(graph, edge) < 0) 3215 goto error; 3216 } else if (is_multi_edge_type(edge)) { 3217 if (graph_edge_tables_add(ctx, graph, edge) < 0) 3218 goto error; 3219 } 3220 3221 isl_map_free(id); 3222 return isl_stat_ok; 3223 error: 3224 isl_map_free(id); 3225 return isl_stat_error; 3226 } 3227 3228 /* Does the domain of "umap" intersect "uset"? 3229 */ 3230 static int domain_intersects(__isl_keep isl_union_map *umap, 3231 __isl_keep isl_union_set *uset) 3232 { 3233 int empty; 3234 3235 umap = isl_union_map_copy(umap); 3236 umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset)); 3237 empty = isl_union_map_is_empty(umap); 3238 isl_union_map_free(umap); 3239 3240 return empty < 0 ? -1 : !empty; 3241 } 3242 3243 /* Does the range of "umap" intersect "uset"? 3244 */ 3245 static int range_intersects(__isl_keep isl_union_map *umap, 3246 __isl_keep isl_union_set *uset) 3247 { 3248 int empty; 3249 3250 umap = isl_union_map_copy(umap); 3251 umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset)); 3252 empty = isl_union_map_is_empty(umap); 3253 isl_union_map_free(umap); 3254 3255 return empty < 0 ? -1 : !empty; 3256 } 3257 3258 /* Are the condition dependences of "edge" local with respect to 3259 * the current schedule? 3260 * 3261 * That is, are domain and range of the condition dependences mapped 3262 * to the same point? 3263 * 3264 * In other words, is the condition false? 3265 */ 3266 static int is_condition_false(struct isl_sched_edge *edge) 3267 { 3268 isl_union_map *umap; 3269 isl_map *map, *sched, *test; 3270 int empty, local; 3271 3272 empty = isl_union_map_is_empty(edge->tagged_condition); 3273 if (empty < 0 || empty) 3274 return empty; 3275 3276 umap = isl_union_map_copy(edge->tagged_condition); 3277 umap = isl_union_map_zip(umap); 3278 umap = isl_union_set_unwrap(isl_union_map_domain(umap)); 3279 map = isl_map_from_union_map(umap); 3280 3281 sched = node_extract_schedule(edge->src); 3282 map = isl_map_apply_domain(map, sched); 3283 sched = node_extract_schedule(edge->dst); 3284 map = isl_map_apply_range(map, sched); 3285 3286 test = isl_map_identity(isl_map_get_space(map)); 3287 local = isl_map_is_subset(map, test); 3288 isl_map_free(map); 3289 isl_map_free(test); 3290 3291 return local; 3292 } 3293 3294 /* For each conditional validity constraint that is adjacent 3295 * to a condition with domain in condition_source or range in condition_sink, 3296 * turn it into an unconditional validity constraint. 3297 */ 3298 static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph, 3299 __isl_take isl_union_set *condition_source, 3300 __isl_take isl_union_set *condition_sink) 3301 { 3302 int i; 3303 3304 condition_source = isl_union_set_coalesce(condition_source); 3305 condition_sink = isl_union_set_coalesce(condition_sink); 3306 3307 for (i = 0; i < graph->n_edge; ++i) { 3308 int adjacent; 3309 isl_union_map *validity; 3310 3311 if (!isl_sched_edge_is_conditional_validity(&graph->edge[i])) 3312 continue; 3313 if (is_validity(&graph->edge[i])) 3314 continue; 3315 3316 validity = graph->edge[i].tagged_validity; 3317 adjacent = domain_intersects(validity, condition_sink); 3318 if (adjacent >= 0 && !adjacent) 3319 adjacent = range_intersects(validity, condition_source); 3320 if (adjacent < 0) 3321 goto error; 3322 if (!adjacent) 3323 continue; 3324 3325 set_validity(&graph->edge[i]); 3326 } 3327 3328 isl_union_set_free(condition_source); 3329 isl_union_set_free(condition_sink); 3330 return 0; 3331 error: 3332 isl_union_set_free(condition_source); 3333 isl_union_set_free(condition_sink); 3334 return -1; 3335 } 3336 3337 /* Update the dependence relations of all edges based on the current schedule 3338 * and enforce conditional validity constraints that are adjacent 3339 * to satisfied condition constraints. 3340 * 3341 * First check if any of the condition constraints are satisfied 3342 * (i.e., not local to the outer schedule) and keep track of 3343 * their domain and range. 3344 * Then update all dependence relations (which removes the non-local 3345 * constraints). 3346 * Finally, if any condition constraints turned out to be satisfied, 3347 * then turn all adjacent conditional validity constraints into 3348 * unconditional validity constraints. 3349 */ 3350 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph) 3351 { 3352 int i; 3353 int any = 0; 3354 isl_union_set *source, *sink; 3355 3356 source = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); 3357 sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); 3358 for (i = 0; i < graph->n_edge; ++i) { 3359 int local; 3360 isl_union_set *uset; 3361 isl_union_map *umap; 3362 3363 if (!isl_sched_edge_is_condition(&graph->edge[i])) 3364 continue; 3365 if (is_local(&graph->edge[i])) 3366 continue; 3367 local = is_condition_false(&graph->edge[i]); 3368 if (local < 0) 3369 goto error; 3370 if (local) 3371 continue; 3372 3373 any = 1; 3374 3375 umap = isl_union_map_copy(graph->edge[i].tagged_condition); 3376 uset = isl_union_map_domain(umap); 3377 source = isl_union_set_union(source, uset); 3378 3379 umap = isl_union_map_copy(graph->edge[i].tagged_condition); 3380 uset = isl_union_map_range(umap); 3381 sink = isl_union_set_union(sink, uset); 3382 } 3383 3384 for (i = 0; i < graph->n_edge; ++i) { 3385 if (update_edge(ctx, graph, &graph->edge[i]) < 0) 3386 goto error; 3387 } 3388 3389 if (any) 3390 return unconditionalize_adjacent_validity(graph, source, sink); 3391 3392 isl_union_set_free(source); 3393 isl_union_set_free(sink); 3394 return 0; 3395 error: 3396 isl_union_set_free(source); 3397 isl_union_set_free(sink); 3398 return -1; 3399 } 3400 3401 static void next_band(struct isl_sched_graph *graph) 3402 { 3403 graph->band_start = graph->n_total_row; 3404 } 3405 3406 /* Return the union of the universe domains of the nodes in "graph" 3407 * that satisfy "pred". 3408 */ 3409 static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx, 3410 struct isl_sched_graph *graph, 3411 int (*pred)(struct isl_sched_node *node, int data), int data) 3412 { 3413 int i; 3414 isl_set *set; 3415 isl_union_set *dom; 3416 3417 for (i = 0; i < graph->n; ++i) 3418 if (pred(&graph->node[i], data)) 3419 break; 3420 3421 if (i >= graph->n) 3422 isl_die(ctx, isl_error_internal, 3423 "empty component", return NULL); 3424 3425 set = isl_set_universe(isl_space_copy(graph->node[i].space)); 3426 dom = isl_union_set_from_set(set); 3427 3428 for (i = i + 1; i < graph->n; ++i) { 3429 if (!pred(&graph->node[i], data)) 3430 continue; 3431 set = isl_set_universe(isl_space_copy(graph->node[i].space)); 3432 dom = isl_union_set_union(dom, isl_union_set_from_set(set)); 3433 } 3434 3435 return dom; 3436 } 3437 3438 /* Return a union of universe domains corresponding to the nodes 3439 * in the SCC with index "scc". 3440 */ 3441 __isl_give isl_union_set *isl_sched_graph_extract_scc(isl_ctx *ctx, 3442 struct isl_sched_graph *graph, int scc) 3443 { 3444 return isl_sched_graph_domain(ctx, graph, 3445 &isl_sched_node_scc_exactly, scc); 3446 } 3447 3448 /* Return a list of unions of universe domains, where each element 3449 * in the list corresponds to an SCC (or WCC) indexed by node->scc. 3450 */ 3451 __isl_give isl_union_set_list *isl_sched_graph_extract_sccs(isl_ctx *ctx, 3452 struct isl_sched_graph *graph) 3453 { 3454 int i; 3455 isl_union_set_list *filters; 3456 3457 filters = isl_union_set_list_alloc(ctx, graph->scc); 3458 for (i = 0; i < graph->scc; ++i) { 3459 isl_union_set *dom; 3460 3461 dom = isl_sched_graph_extract_scc(ctx, graph, i); 3462 filters = isl_union_set_list_add(filters, dom); 3463 } 3464 3465 return filters; 3466 } 3467 3468 /* Return a list of two unions of universe domains, one for the SCCs up 3469 * to and including graph->src_scc and another for the other SCCs. 3470 */ 3471 static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx, 3472 struct isl_sched_graph *graph) 3473 { 3474 isl_union_set *dom; 3475 isl_union_set_list *filters; 3476 3477 filters = isl_union_set_list_alloc(ctx, 2); 3478 dom = isl_sched_graph_domain(ctx, graph, 3479 &node_scc_at_most, graph->src_scc); 3480 filters = isl_union_set_list_add(filters, dom); 3481 dom = isl_sched_graph_domain(ctx, graph, 3482 &node_scc_at_least, graph->src_scc + 1); 3483 filters = isl_union_set_list_add(filters, dom); 3484 3485 return filters; 3486 } 3487 3488 /* Copy nodes that satisfy node_pred from the src dependence graph 3489 * to the dst dependence graph. 3490 */ 3491 static isl_stat copy_nodes(struct isl_sched_graph *dst, 3492 struct isl_sched_graph *src, 3493 int (*node_pred)(struct isl_sched_node *node, int data), int data) 3494 { 3495 int i; 3496 3497 dst->n = 0; 3498 for (i = 0; i < src->n; ++i) { 3499 int j; 3500 3501 if (!node_pred(&src->node[i], data)) 3502 continue; 3503 3504 j = dst->n; 3505 dst->node[j].space = isl_space_copy(src->node[i].space); 3506 dst->node[j].compressed = src->node[i].compressed; 3507 dst->node[j].hull = isl_set_copy(src->node[i].hull); 3508 dst->node[j].compress = 3509 isl_multi_aff_copy(src->node[i].compress); 3510 dst->node[j].decompress = 3511 isl_pw_multi_aff_copy(src->node[i].decompress); 3512 dst->node[j].nvar = src->node[i].nvar; 3513 dst->node[j].nparam = src->node[i].nparam; 3514 dst->node[j].sched = isl_mat_copy(src->node[i].sched); 3515 dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map); 3516 dst->node[j].coincident = src->node[i].coincident; 3517 dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes); 3518 dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds); 3519 dst->node[j].max = isl_vec_copy(src->node[i].max); 3520 dst->n++; 3521 3522 if (!dst->node[j].space || !dst->node[j].sched) 3523 return isl_stat_error; 3524 if (dst->node[j].compressed && 3525 (!dst->node[j].hull || !dst->node[j].compress || 3526 !dst->node[j].decompress)) 3527 return isl_stat_error; 3528 } 3529 3530 return isl_stat_ok; 3531 } 3532 3533 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph 3534 * to the dst dependence graph. 3535 * If the source or destination node of the edge is not in the destination 3536 * graph, then it must be a backward proximity edge and it should simply 3537 * be ignored. 3538 */ 3539 static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, 3540 struct isl_sched_graph *src, 3541 int (*edge_pred)(struct isl_sched_edge *edge, int data), int data) 3542 { 3543 int i; 3544 3545 dst->n_edge = 0; 3546 for (i = 0; i < src->n_edge; ++i) { 3547 struct isl_sched_edge *edge = &src->edge[i]; 3548 isl_map *map; 3549 isl_union_map *tagged_condition; 3550 isl_union_map *tagged_validity; 3551 struct isl_sched_node *dst_src, *dst_dst; 3552 3553 if (!edge_pred(edge, data)) 3554 continue; 3555 3556 if (isl_map_plain_is_empty(edge->map)) 3557 continue; 3558 3559 dst_src = isl_sched_graph_find_node(ctx, dst, edge->src->space); 3560 dst_dst = isl_sched_graph_find_node(ctx, dst, edge->dst->space); 3561 if (!dst_src || !dst_dst) 3562 return isl_stat_error; 3563 if (!isl_sched_graph_is_node(dst, dst_src) || 3564 !isl_sched_graph_is_node(dst, dst_dst)) { 3565 if (is_validity(edge) || 3566 isl_sched_edge_is_conditional_validity(edge)) 3567 isl_die(ctx, isl_error_internal, 3568 "backward (conditional) validity edge", 3569 return isl_stat_error); 3570 continue; 3571 } 3572 3573 map = isl_map_copy(edge->map); 3574 tagged_condition = isl_union_map_copy(edge->tagged_condition); 3575 tagged_validity = isl_union_map_copy(edge->tagged_validity); 3576 3577 dst->edge[dst->n_edge].src = dst_src; 3578 dst->edge[dst->n_edge].dst = dst_dst; 3579 dst->edge[dst->n_edge].map = map; 3580 dst->edge[dst->n_edge].tagged_condition = tagged_condition; 3581 dst->edge[dst->n_edge].tagged_validity = tagged_validity; 3582 dst->edge[dst->n_edge].types = edge->types; 3583 dst->n_edge++; 3584 3585 if (edge->tagged_condition && !tagged_condition) 3586 return isl_stat_error; 3587 if (edge->tagged_validity && !tagged_validity) 3588 return isl_stat_error; 3589 3590 if (graph_edge_tables_add(ctx, dst, 3591 &dst->edge[dst->n_edge - 1]) < 0) 3592 return isl_stat_error; 3593 } 3594 3595 return isl_stat_ok; 3596 } 3597 3598 /* Compute the maximal number of variables over all nodes. 3599 * This is the maximal number of linearly independent schedule 3600 * rows that we need to compute. 3601 * Just in case we end up in a part of the dependence graph 3602 * with only lower-dimensional domains, we make sure we will 3603 * compute the required amount of extra linearly independent rows. 3604 */ 3605 isl_stat isl_sched_graph_compute_maxvar(struct isl_sched_graph *graph) 3606 { 3607 int i; 3608 3609 graph->maxvar = 0; 3610 for (i = 0; i < graph->n; ++i) { 3611 struct isl_sched_node *node = &graph->node[i]; 3612 int nvar; 3613 3614 if (isl_sched_node_update_vmap(node) < 0) 3615 return isl_stat_error; 3616 nvar = node->nvar + graph->n_row - node->rank; 3617 if (nvar > graph->maxvar) 3618 graph->maxvar = nvar; 3619 } 3620 3621 return isl_stat_ok; 3622 } 3623 3624 /* Extract the subgraph of "graph" that consists of the nodes satisfying 3625 * "node_pred" and the edges satisfying "edge_pred" and store 3626 * the result in "sub". 3627 */ 3628 isl_stat isl_sched_graph_extract_sub_graph(isl_ctx *ctx, 3629 struct isl_sched_graph *graph, 3630 int (*node_pred)(struct isl_sched_node *node, int data), 3631 int (*edge_pred)(struct isl_sched_edge *edge, int data), 3632 int data, struct isl_sched_graph *sub) 3633 { 3634 int i, n = 0, n_edge = 0; 3635 int t; 3636 3637 for (i = 0; i < graph->n; ++i) 3638 if (node_pred(&graph->node[i], data)) 3639 ++n; 3640 for (i = 0; i < graph->n_edge; ++i) 3641 if (edge_pred(&graph->edge[i], data)) 3642 ++n_edge; 3643 if (graph_alloc(ctx, sub, n, n_edge) < 0) 3644 return isl_stat_error; 3645 sub->root = graph->root; 3646 if (copy_nodes(sub, graph, node_pred, data) < 0) 3647 return isl_stat_error; 3648 if (graph_init_table(ctx, sub) < 0) 3649 return isl_stat_error; 3650 for (t = 0; t <= isl_edge_last; ++t) 3651 sub->max_edge[t] = graph->max_edge[t]; 3652 if (graph_init_edge_tables(ctx, sub) < 0) 3653 return isl_stat_error; 3654 if (copy_edges(ctx, sub, graph, edge_pred, data) < 0) 3655 return isl_stat_error; 3656 sub->n_row = graph->n_row; 3657 sub->max_row = graph->max_row; 3658 sub->n_total_row = graph->n_total_row; 3659 sub->band_start = graph->band_start; 3660 3661 return isl_stat_ok; 3662 } 3663 3664 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, 3665 struct isl_sched_graph *graph); 3666 static __isl_give isl_schedule_node *compute_schedule_wcc( 3667 isl_schedule_node *node, struct isl_sched_graph *graph); 3668 3669 /* Compute a schedule for a subgraph of "graph". In particular, for 3670 * the graph composed of nodes that satisfy node_pred and edges that 3671 * that satisfy edge_pred. 3672 * If the subgraph is known to consist of a single component, then wcc should 3673 * be set and then we call compute_schedule_wcc on the constructed subgraph. 3674 * Otherwise, we call compute_schedule, which will check whether the subgraph 3675 * is connected. 3676 * 3677 * The schedule is inserted at "node" and the updated schedule node 3678 * is returned. 3679 */ 3680 static __isl_give isl_schedule_node *compute_sub_schedule( 3681 __isl_take isl_schedule_node *node, isl_ctx *ctx, 3682 struct isl_sched_graph *graph, 3683 int (*node_pred)(struct isl_sched_node *node, int data), 3684 int (*edge_pred)(struct isl_sched_edge *edge, int data), 3685 int data, int wcc) 3686 { 3687 struct isl_sched_graph split = { 0 }; 3688 3689 if (isl_sched_graph_extract_sub_graph(ctx, graph, node_pred, edge_pred, 3690 data, &split) < 0) 3691 goto error; 3692 3693 if (wcc) 3694 node = compute_schedule_wcc(node, &split); 3695 else 3696 node = compute_schedule(node, &split); 3697 3698 isl_sched_graph_free(ctx, &split); 3699 return node; 3700 error: 3701 isl_sched_graph_free(ctx, &split); 3702 return isl_schedule_node_free(node); 3703 } 3704 3705 int isl_sched_edge_scc_exactly(struct isl_sched_edge *edge, int scc) 3706 { 3707 return edge->src->scc == scc && edge->dst->scc == scc; 3708 } 3709 3710 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) 3711 { 3712 return edge->dst->scc <= scc; 3713 } 3714 3715 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc) 3716 { 3717 return edge->src->scc >= scc; 3718 } 3719 3720 /* Reset the current band by dropping all its schedule rows. 3721 */ 3722 static isl_stat reset_band(struct isl_sched_graph *graph) 3723 { 3724 int i; 3725 int drop; 3726 3727 drop = graph->n_total_row - graph->band_start; 3728 graph->n_total_row -= drop; 3729 graph->n_row -= drop; 3730 3731 for (i = 0; i < graph->n; ++i) { 3732 struct isl_sched_node *node = &graph->node[i]; 3733 3734 isl_map_free(node->sched_map); 3735 node->sched_map = NULL; 3736 3737 node->sched = isl_mat_drop_rows(node->sched, 3738 graph->band_start, drop); 3739 3740 if (!node->sched) 3741 return isl_stat_error; 3742 } 3743 3744 return isl_stat_ok; 3745 } 3746 3747 /* Split the current graph into two parts and compute a schedule for each 3748 * part individually. In particular, one part consists of all SCCs up 3749 * to and including graph->src_scc, while the other part contains the other 3750 * SCCs. The split is enforced by a sequence node inserted at position "node" 3751 * in the schedule tree. Return the updated schedule node. 3752 * If either of these two parts consists of a sequence, then it is spliced 3753 * into the sequence containing the two parts. 3754 * 3755 * The current band is reset. It would be possible to reuse 3756 * the previously computed rows as the first rows in the next 3757 * band, but recomputing them may result in better rows as we are looking 3758 * at a smaller part of the dependence graph. 3759 */ 3760 static __isl_give isl_schedule_node *compute_split_schedule( 3761 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 3762 { 3763 isl_ctx *ctx; 3764 isl_union_set_list *filters; 3765 3766 if (!node) 3767 return NULL; 3768 3769 if (reset_band(graph) < 0) 3770 return isl_schedule_node_free(node); 3771 3772 next_band(graph); 3773 3774 ctx = isl_schedule_node_get_ctx(node); 3775 filters = extract_split(ctx, graph); 3776 node = isl_schedule_node_insert_sequence(node, filters); 3777 node = isl_schedule_node_grandchild(node, 1, 0); 3778 3779 node = compute_sub_schedule(node, ctx, graph, 3780 &node_scc_at_least, &edge_src_scc_at_least, 3781 graph->src_scc + 1, 0); 3782 node = isl_schedule_node_grandparent(node); 3783 node = isl_schedule_node_grandchild(node, 0, 0); 3784 node = compute_sub_schedule(node, ctx, graph, 3785 &node_scc_at_most, &edge_dst_scc_at_most, 3786 graph->src_scc, 0); 3787 node = isl_schedule_node_grandparent(node); 3788 3789 node = isl_schedule_node_sequence_splice_children(node); 3790 3791 return node; 3792 } 3793 3794 /* Insert a band node at position "node" in the schedule tree corresponding 3795 * to the current band in "graph". Mark the band node permutable 3796 * if "permutable" is set. 3797 * The partial schedules and the coincidence property are extracted 3798 * from the graph nodes. 3799 * Return the updated schedule node. 3800 */ 3801 static __isl_give isl_schedule_node *insert_current_band( 3802 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 3803 int permutable) 3804 { 3805 int i; 3806 int start, end, n; 3807 isl_multi_aff *ma; 3808 isl_multi_pw_aff *mpa; 3809 isl_multi_union_pw_aff *mupa; 3810 3811 if (!node) 3812 return NULL; 3813 3814 if (graph->n < 1) 3815 isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, 3816 "graph should have at least one node", 3817 return isl_schedule_node_free(node)); 3818 3819 start = graph->band_start; 3820 end = graph->n_total_row; 3821 n = end - start; 3822 3823 ma = isl_sched_node_extract_partial_schedule_multi_aff(&graph->node[0], 3824 start, n); 3825 mpa = isl_multi_pw_aff_from_multi_aff(ma); 3826 mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); 3827 3828 for (i = 1; i < graph->n; ++i) { 3829 isl_multi_union_pw_aff *mupa_i; 3830 3831 ma = isl_sched_node_extract_partial_schedule_multi_aff( 3832 &graph->node[i], start, n); 3833 mpa = isl_multi_pw_aff_from_multi_aff(ma); 3834 mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); 3835 mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i); 3836 } 3837 node = isl_schedule_node_insert_partial_schedule(node, mupa); 3838 3839 for (i = 0; i < n; ++i) 3840 node = isl_schedule_node_band_member_set_coincident(node, i, 3841 graph->node[0].coincident[start + i]); 3842 node = isl_schedule_node_band_set_permutable(node, permutable); 3843 3844 return node; 3845 } 3846 3847 /* Update the dependence relations based on the current schedule, 3848 * add the current band to "node" and then continue with the computation 3849 * of the next band. 3850 * Return the updated schedule node. 3851 */ 3852 static __isl_give isl_schedule_node *compute_next_band( 3853 __isl_take isl_schedule_node *node, 3854 struct isl_sched_graph *graph, int permutable) 3855 { 3856 isl_ctx *ctx; 3857 3858 if (!node) 3859 return NULL; 3860 3861 ctx = isl_schedule_node_get_ctx(node); 3862 if (update_edges(ctx, graph) < 0) 3863 return isl_schedule_node_free(node); 3864 node = insert_current_band(node, graph, permutable); 3865 next_band(graph); 3866 3867 node = isl_schedule_node_child(node, 0); 3868 node = compute_schedule(node, graph); 3869 node = isl_schedule_node_parent(node); 3870 3871 return node; 3872 } 3873 3874 /* Add the constraints "coef" derived from an edge from "node" to itself 3875 * to graph->lp in order to respect the dependences and to try and carry them. 3876 * "pos" is the sequence number of the edge that needs to be carried. 3877 * "coef" represents general constraints on coefficients (c_0, c_x) 3878 * of valid constraints for (y - x) with x and y instances of the node. 3879 * 3880 * The constraints added to graph->lp need to enforce 3881 * 3882 * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x) 3883 * = c_j_x (y - x) >= e_i 3884 * 3885 * for each (x,y) in the dependence relation of the edge. 3886 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x), 3887 * taking into account that each coefficient in c_j_x is represented 3888 * as a pair of non-negative coefficients. 3889 */ 3890 static isl_stat add_intra_constraints(struct isl_sched_graph *graph, 3891 struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos) 3892 { 3893 isl_size offset; 3894 isl_ctx *ctx; 3895 isl_dim_map *dim_map; 3896 3897 offset = coef_var_offset(coef); 3898 if (offset < 0) 3899 coef = isl_basic_set_free(coef); 3900 if (!coef) 3901 return isl_stat_error; 3902 3903 ctx = isl_basic_set_get_ctx(coef); 3904 dim_map = intra_dim_map(ctx, graph, node, offset, 1); 3905 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); 3906 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 3907 3908 return isl_stat_ok; 3909 } 3910 3911 /* Add the constraints "coef" derived from an edge from "src" to "dst" 3912 * to graph->lp in order to respect the dependences and to try and carry them. 3913 * "pos" is the sequence number of the edge that needs to be carried or 3914 * -1 if no attempt should be made to carry the dependences. 3915 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y) 3916 * of valid constraints for (x, y) with x and y instances of "src" and "dst". 3917 * 3918 * The constraints added to graph->lp need to enforce 3919 * 3920 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i 3921 * 3922 * for each (x,y) in the dependence relation of the edge or 3923 * 3924 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0 3925 * 3926 * if pos is -1. 3927 * That is, 3928 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) 3929 * or 3930 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) 3931 * needs to be plugged in for (c_0, c_n, c_x, c_y), 3932 * taking into account that each coefficient in c_j_x and c_k_x is represented 3933 * as a pair of non-negative coefficients. 3934 */ 3935 static isl_stat add_inter_constraints(struct isl_sched_graph *graph, 3936 struct isl_sched_node *src, struct isl_sched_node *dst, 3937 __isl_take isl_basic_set *coef, int pos) 3938 { 3939 isl_size offset; 3940 isl_ctx *ctx; 3941 isl_dim_map *dim_map; 3942 3943 offset = coef_var_offset(coef); 3944 if (offset < 0) 3945 coef = isl_basic_set_free(coef); 3946 if (!coef) 3947 return isl_stat_error; 3948 3949 ctx = isl_basic_set_get_ctx(coef); 3950 dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1); 3951 if (pos >= 0) 3952 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); 3953 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); 3954 3955 return isl_stat_ok; 3956 } 3957 3958 /* Data structure for keeping track of the data needed 3959 * to exploit non-trivial lineality spaces. 3960 * 3961 * "any_non_trivial" is true if there are any non-trivial lineality spaces. 3962 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL. 3963 * "equivalent" connects instances to other instances on the same line(s). 3964 * "mask" contains the domain spaces of "equivalent". 3965 * Any instance set not in "mask" does not have a non-trivial lineality space. 3966 */ 3967 struct isl_exploit_lineality_data { 3968 isl_bool any_non_trivial; 3969 isl_union_map *equivalent; 3970 isl_union_set *mask; 3971 }; 3972 3973 /* Data structure collecting information used during the construction 3974 * of an LP for carrying dependences. 3975 * 3976 * "intra" is a sequence of coefficient constraints for intra-node edges. 3977 * "inter" is a sequence of coefficient constraints for inter-node edges. 3978 * "lineality" contains data used to exploit non-trivial lineality spaces. 3979 */ 3980 struct isl_carry { 3981 isl_basic_set_list *intra; 3982 isl_basic_set_list *inter; 3983 struct isl_exploit_lineality_data lineality; 3984 }; 3985 3986 /* Free all the data stored in "carry". 3987 */ 3988 static void isl_carry_clear(struct isl_carry *carry) 3989 { 3990 isl_basic_set_list_free(carry->intra); 3991 isl_basic_set_list_free(carry->inter); 3992 isl_union_map_free(carry->lineality.equivalent); 3993 isl_union_set_free(carry->lineality.mask); 3994 } 3995 3996 /* Return a pointer to the node in "graph" that lives in "space". 3997 * If the requested node has been compressed, then "space" 3998 * corresponds to the compressed space. 3999 * The graph is assumed to have such a node. 4000 * Return NULL in case of error. 4001 * 4002 * First try and see if "space" is the space of an uncompressed node. 4003 * If so, return that node. 4004 * Otherwise, "space" was constructed by construct_compressed_id and 4005 * contains a user pointer pointing to the node in the tuple id. 4006 * However, this node belongs to the original dependence graph. 4007 * If "graph" is a subgraph of this original dependence graph, 4008 * then the node with the same space still needs to be looked up 4009 * in the current graph. 4010 */ 4011 static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx, 4012 struct isl_sched_graph *graph, __isl_keep isl_space *space) 4013 { 4014 isl_id *id; 4015 struct isl_sched_node *node; 4016 4017 if (!space) 4018 return NULL; 4019 4020 node = isl_sched_graph_find_node(ctx, graph, space); 4021 if (!node) 4022 return NULL; 4023 if (isl_sched_graph_is_node(graph, node)) 4024 return node; 4025 4026 id = isl_space_get_tuple_id(space, isl_dim_set); 4027 node = isl_id_get_user(id); 4028 isl_id_free(id); 4029 4030 if (!node) 4031 return NULL; 4032 4033 if (!isl_sched_graph_is_node(graph->root, node)) 4034 isl_die(ctx, isl_error_internal, 4035 "space points to invalid node", return NULL); 4036 if (graph != graph->root) 4037 node = isl_sched_graph_find_node(ctx, graph, node->space); 4038 if (!isl_sched_graph_is_node(graph, node)) 4039 isl_die(ctx, isl_error_internal, 4040 "unable to find node", return NULL); 4041 4042 return node; 4043 } 4044 4045 /* Internal data structure for add_all_constraints. 4046 * 4047 * "graph" is the schedule constraint graph for which an LP problem 4048 * is being constructed. 4049 * "carry_inter" indicates whether inter-node edges should be carried. 4050 * "pos" is the position of the next edge that needs to be carried. 4051 */ 4052 struct isl_add_all_constraints_data { 4053 isl_ctx *ctx; 4054 struct isl_sched_graph *graph; 4055 int carry_inter; 4056 int pos; 4057 }; 4058 4059 /* Add the constraints "coef" derived from an edge from a node to itself 4060 * to data->graph->lp in order to respect the dependences and 4061 * to try and carry them. 4062 * 4063 * The space of "coef" is of the form 4064 * 4065 * coefficients[[c_cst] -> S[c_x]] 4066 * 4067 * with S[c_x] the (compressed) space of the node. 4068 * Extract the node from the space and call add_intra_constraints. 4069 */ 4070 static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user) 4071 { 4072 struct isl_add_all_constraints_data *data = user; 4073 isl_space *space; 4074 struct isl_sched_node *node; 4075 4076 space = isl_basic_set_get_space(coef); 4077 space = isl_space_range(isl_space_unwrap(space)); 4078 node = graph_find_compressed_node(data->ctx, data->graph, space); 4079 isl_space_free(space); 4080 return add_intra_constraints(data->graph, node, coef, data->pos++); 4081 } 4082 4083 /* Add the constraints "coef" derived from an edge from a node j 4084 * to a node k to data->graph->lp in order to respect the dependences and 4085 * to try and carry them (provided data->carry_inter is set). 4086 * 4087 * The space of "coef" is of the form 4088 * 4089 * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]] 4090 * 4091 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes. 4092 * Extract the nodes from the space and call add_inter_constraints. 4093 */ 4094 static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user) 4095 { 4096 struct isl_add_all_constraints_data *data = user; 4097 isl_space *space, *dom; 4098 struct isl_sched_node *src, *dst; 4099 int pos; 4100 4101 space = isl_basic_set_get_space(coef); 4102 space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space))); 4103 dom = isl_space_domain(isl_space_copy(space)); 4104 src = graph_find_compressed_node(data->ctx, data->graph, dom); 4105 isl_space_free(dom); 4106 space = isl_space_range(space); 4107 dst = graph_find_compressed_node(data->ctx, data->graph, space); 4108 isl_space_free(space); 4109 4110 pos = data->carry_inter ? data->pos++ : -1; 4111 return add_inter_constraints(data->graph, src, dst, coef, pos); 4112 } 4113 4114 /* Add constraints to graph->lp that force all (conditional) validity 4115 * dependences to be respected and attempt to carry them. 4116 * "intra" is the sequence of coefficient constraints for intra-node edges. 4117 * "inter" is the sequence of coefficient constraints for inter-node edges. 4118 * "carry_inter" indicates whether inter-node edges should be carried or 4119 * only respected. 4120 */ 4121 static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, 4122 __isl_keep isl_basic_set_list *intra, 4123 __isl_keep isl_basic_set_list *inter, int carry_inter) 4124 { 4125 struct isl_add_all_constraints_data data = { ctx, graph, carry_inter }; 4126 4127 data.pos = 0; 4128 if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0) 4129 return isl_stat_error; 4130 if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0) 4131 return isl_stat_error; 4132 return isl_stat_ok; 4133 } 4134 4135 /* Internal data structure for count_all_constraints 4136 * for keeping track of the number of equality and inequality constraints. 4137 */ 4138 struct isl_sched_count { 4139 int n_eq; 4140 int n_ineq; 4141 }; 4142 4143 /* Add the number of equality and inequality constraints of "bset" 4144 * to data->n_eq and data->n_ineq. 4145 */ 4146 static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user) 4147 { 4148 struct isl_sched_count *data = user; 4149 4150 return update_count(bset, 1, &data->n_eq, &data->n_ineq); 4151 } 4152 4153 /* Count the number of equality and inequality constraints 4154 * that will be added to the carry_lp problem. 4155 * We count each edge exactly once. 4156 * "intra" is the sequence of coefficient constraints for intra-node edges. 4157 * "inter" is the sequence of coefficient constraints for inter-node edges. 4158 */ 4159 static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra, 4160 __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq) 4161 { 4162 struct isl_sched_count data; 4163 4164 data.n_eq = data.n_ineq = 0; 4165 if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0) 4166 return isl_stat_error; 4167 if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0) 4168 return isl_stat_error; 4169 4170 *n_eq = data.n_eq; 4171 *n_ineq = data.n_ineq; 4172 4173 return isl_stat_ok; 4174 } 4175 4176 /* Construct an LP problem for finding schedule coefficients 4177 * such that the schedule carries as many validity dependences as possible. 4178 * In particular, for each dependence i, we bound the dependence distance 4179 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum 4180 * of all e_i's. Dependences with e_i = 0 in the solution are simply 4181 * respected, while those with e_i > 0 (in practice e_i = 1) are carried. 4182 * "intra" is the sequence of coefficient constraints for intra-node edges. 4183 * "inter" is the sequence of coefficient constraints for inter-node edges. 4184 * "n_edge" is the total number of edges. 4185 * "carry_inter" indicates whether inter-node edges should be carried or 4186 * only respected. That is, if "carry_inter" is not set, then 4187 * no e_i variables are introduced for the inter-node edges. 4188 * 4189 * All variables of the LP are non-negative. The actual coefficients 4190 * may be negative, so each coefficient is represented as the difference 4191 * of two non-negative variables. The negative part always appears 4192 * immediately before the positive part. 4193 * Other than that, the variables have the following order 4194 * 4195 * - sum of (1 - e_i) over all edges 4196 * - sum of all c_n coefficients 4197 * (unconstrained when computing non-parametric schedules) 4198 * - sum of positive and negative parts of all c_x coefficients 4199 * - for each edge 4200 * - e_i 4201 * - for each node 4202 * - positive and negative parts of c_i_x, in opposite order 4203 * - c_i_n (if parametric) 4204 * - c_i_0 4205 * 4206 * The constraints are those from the (validity) edges plus three equalities 4207 * to express the sums and n_edge inequalities to express e_i <= 1. 4208 */ 4209 static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, 4210 int n_edge, __isl_keep isl_basic_set_list *intra, 4211 __isl_keep isl_basic_set_list *inter, int carry_inter) 4212 { 4213 int i; 4214 int k; 4215 isl_space *space; 4216 unsigned total; 4217 int n_eq, n_ineq; 4218 4219 total = 3 + n_edge; 4220 for (i = 0; i < graph->n; ++i) { 4221 struct isl_sched_node *node = &graph->node[graph->sorted[i]]; 4222 node->start = total; 4223 total += 1 + node->nparam + 2 * node->nvar; 4224 } 4225 4226 if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0) 4227 return isl_stat_error; 4228 4229 space = isl_space_set_alloc(ctx, 0, total); 4230 isl_basic_set_free(graph->lp); 4231 n_eq += 3; 4232 n_ineq += n_edge; 4233 graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq); 4234 graph->lp = isl_basic_set_set_rational(graph->lp); 4235 4236 k = isl_basic_set_alloc_equality(graph->lp); 4237 if (k < 0) 4238 return isl_stat_error; 4239 isl_seq_clr(graph->lp->eq[k], 1 + total); 4240 isl_int_set_si(graph->lp->eq[k][0], -n_edge); 4241 isl_int_set_si(graph->lp->eq[k][1], 1); 4242 for (i = 0; i < n_edge; ++i) 4243 isl_int_set_si(graph->lp->eq[k][4 + i], 1); 4244 4245 if (add_param_sum_constraint(graph, 1) < 0) 4246 return isl_stat_error; 4247 if (add_var_sum_constraint(graph, 2) < 0) 4248 return isl_stat_error; 4249 4250 for (i = 0; i < n_edge; ++i) { 4251 k = isl_basic_set_alloc_inequality(graph->lp); 4252 if (k < 0) 4253 return isl_stat_error; 4254 isl_seq_clr(graph->lp->ineq[k], 1 + total); 4255 isl_int_set_si(graph->lp->ineq[k][4 + i], -1); 4256 isl_int_set_si(graph->lp->ineq[k][0], 1); 4257 } 4258 4259 if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0) 4260 return isl_stat_error; 4261 4262 return isl_stat_ok; 4263 } 4264 4265 static __isl_give isl_schedule_node *compute_component_schedule( 4266 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 4267 int wcc); 4268 4269 /* If the schedule_split_scaled option is set and if the linear 4270 * parts of the scheduling rows for all nodes in the graphs have 4271 * a non-trivial common divisor, then remove this 4272 * common divisor from the linear part. 4273 * Otherwise, insert a band node directly and continue with 4274 * the construction of the schedule. 4275 * 4276 * If a non-trivial common divisor is found, then 4277 * the linear part is reduced and the remainder is ignored. 4278 * The pieces of the graph that are assigned different remainders 4279 * form (groups of) strongly connected components within 4280 * the scaled down band. If needed, they can therefore 4281 * be ordered along this remainder in a sequence node. 4282 * However, this ordering is not enforced here in order to allow 4283 * the scheduler to combine some of the strongly connected components. 4284 */ 4285 static __isl_give isl_schedule_node *split_scaled( 4286 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 4287 { 4288 int i; 4289 int row; 4290 isl_ctx *ctx; 4291 isl_int gcd, gcd_i; 4292 isl_size n_row; 4293 4294 if (!node) 4295 return NULL; 4296 4297 ctx = isl_schedule_node_get_ctx(node); 4298 if (!ctx->opt->schedule_split_scaled) 4299 return compute_next_band(node, graph, 0); 4300 if (graph->n <= 1) 4301 return compute_next_band(node, graph, 0); 4302 n_row = isl_mat_rows(graph->node[0].sched); 4303 if (n_row < 0) 4304 return isl_schedule_node_free(node); 4305 4306 isl_int_init(gcd); 4307 isl_int_init(gcd_i); 4308 4309 isl_int_set_si(gcd, 0); 4310 4311 row = n_row - 1; 4312 4313 for (i = 0; i < graph->n; ++i) { 4314 struct isl_sched_node *node = &graph->node[i]; 4315 isl_size cols = isl_mat_cols(node->sched); 4316 4317 if (cols < 0) 4318 break; 4319 isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i); 4320 isl_int_gcd(gcd, gcd, gcd_i); 4321 } 4322 4323 isl_int_clear(gcd_i); 4324 if (i < graph->n) 4325 goto error; 4326 4327 if (isl_int_cmp_si(gcd, 1) <= 0) { 4328 isl_int_clear(gcd); 4329 return compute_next_band(node, graph, 0); 4330 } 4331 4332 for (i = 0; i < graph->n; ++i) { 4333 struct isl_sched_node *node = &graph->node[i]; 4334 4335 isl_int_fdiv_q(node->sched->row[row][0], 4336 node->sched->row[row][0], gcd); 4337 isl_int_mul(node->sched->row[row][0], 4338 node->sched->row[row][0], gcd); 4339 node->sched = isl_mat_scale_down_row(node->sched, row, gcd); 4340 if (!node->sched) 4341 goto error; 4342 } 4343 4344 isl_int_clear(gcd); 4345 4346 return compute_next_band(node, graph, 0); 4347 error: 4348 isl_int_clear(gcd); 4349 return isl_schedule_node_free(node); 4350 } 4351 4352 /* Is the schedule row "sol" trivial on node "node"? 4353 * That is, is the solution zero on the dimensions linearly independent of 4354 * the previously found solutions? 4355 * Return 1 if the solution is trivial, 0 if it is not and -1 on error. 4356 * 4357 * Each coefficient is represented as the difference between 4358 * two non-negative values in "sol". 4359 * We construct the schedule row s and check if it is linearly 4360 * independent of previously computed schedule rows 4361 * by computing T s, with T the linear combinations that are zero 4362 * on linearly dependent schedule rows. 4363 * If the result consists of all zeros, then the solution is trivial. 4364 */ 4365 static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) 4366 { 4367 int trivial; 4368 isl_vec *node_sol; 4369 4370 if (!sol) 4371 return -1; 4372 if (node->nvar == node->rank) 4373 return 0; 4374 4375 node_sol = extract_var_coef(node, sol); 4376 node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol); 4377 if (!node_sol) 4378 return -1; 4379 4380 trivial = isl_seq_first_non_zero(node_sol->el, 4381 node->nvar - node->rank) == -1; 4382 4383 isl_vec_free(node_sol); 4384 4385 return trivial; 4386 } 4387 4388 /* Is the schedule row "sol" trivial on any node where it should 4389 * not be trivial? 4390 * Return 1 if any solution is trivial, 0 if they are not and -1 on error. 4391 */ 4392 static int is_any_trivial(struct isl_sched_graph *graph, 4393 __isl_keep isl_vec *sol) 4394 { 4395 int i; 4396 4397 for (i = 0; i < graph->n; ++i) { 4398 struct isl_sched_node *node = &graph->node[i]; 4399 int trivial; 4400 4401 if (!needs_row(graph, node)) 4402 continue; 4403 trivial = is_trivial(node, sol); 4404 if (trivial < 0 || trivial) 4405 return trivial; 4406 } 4407 4408 return 0; 4409 } 4410 4411 /* Does the schedule represented by "sol" perform loop coalescing on "node"? 4412 * If so, return the position of the coalesced dimension. 4413 * Otherwise, return node->nvar or -1 on error. 4414 * 4415 * In particular, look for pairs of coefficients c_i and c_j such that 4416 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|. 4417 * If any such pair is found, then return i. 4418 * If size_i is infinity, then no check on c_i needs to be performed. 4419 */ 4420 static int find_node_coalescing(struct isl_sched_node *node, 4421 __isl_keep isl_vec *sol) 4422 { 4423 int i, j; 4424 isl_int max; 4425 isl_vec *csol; 4426 4427 if (node->nvar <= 1) 4428 return node->nvar; 4429 4430 csol = extract_var_coef(node, sol); 4431 if (!csol) 4432 return -1; 4433 isl_int_init(max); 4434 for (i = 0; i < node->nvar; ++i) { 4435 isl_val *v; 4436 4437 if (isl_int_is_zero(csol->el[i])) 4438 continue; 4439 v = isl_multi_val_get_val(node->sizes, i); 4440 if (!v) 4441 goto error; 4442 if (!isl_val_is_int(v)) { 4443 isl_val_free(v); 4444 continue; 4445 } 4446 v = isl_val_div_ui(v, 2); 4447 v = isl_val_ceil(v); 4448 if (!v) 4449 goto error; 4450 isl_int_mul(max, v->n, csol->el[i]); 4451 isl_val_free(v); 4452 4453 for (j = 0; j < node->nvar; ++j) { 4454 if (j == i) 4455 continue; 4456 if (isl_int_abs_gt(csol->el[j], max)) 4457 break; 4458 } 4459 if (j < node->nvar) 4460 break; 4461 } 4462 4463 isl_int_clear(max); 4464 isl_vec_free(csol); 4465 return i; 4466 error: 4467 isl_int_clear(max); 4468 isl_vec_free(csol); 4469 return -1; 4470 } 4471 4472 /* Force the schedule coefficient at position "pos" of "node" to be zero 4473 * in "tl". 4474 * The coefficient is encoded as the difference between two non-negative 4475 * variables. Force these two variables to have the same value. 4476 */ 4477 static __isl_give isl_tab_lexmin *zero_out_node_coef( 4478 __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos) 4479 { 4480 int dim; 4481 isl_ctx *ctx; 4482 isl_vec *eq; 4483 4484 ctx = isl_space_get_ctx(node->space); 4485 dim = isl_tab_lexmin_dim(tl); 4486 if (dim < 0) 4487 return isl_tab_lexmin_free(tl); 4488 eq = isl_vec_alloc(ctx, 1 + dim); 4489 eq = isl_vec_clr(eq); 4490 if (!eq) 4491 return isl_tab_lexmin_free(tl); 4492 4493 pos = 1 + node_var_coef_pos(node, pos); 4494 isl_int_set_si(eq->el[pos], 1); 4495 isl_int_set_si(eq->el[pos + 1], -1); 4496 tl = isl_tab_lexmin_add_eq(tl, eq->el); 4497 isl_vec_free(eq); 4498 4499 return tl; 4500 } 4501 4502 /* Return the lexicographically smallest rational point in the basic set 4503 * from which "tl" was constructed, double checking that this input set 4504 * was not empty. 4505 */ 4506 static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl) 4507 { 4508 isl_vec *sol; 4509 4510 sol = isl_tab_lexmin_get_solution(tl); 4511 if (!sol) 4512 return NULL; 4513 if (sol->size == 0) 4514 isl_die(isl_vec_get_ctx(sol), isl_error_internal, 4515 "error in schedule construction", 4516 return isl_vec_free(sol)); 4517 return sol; 4518 } 4519 4520 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp 4521 * carry any of the "n_edge" groups of dependences? 4522 * The value in the first position is the sum of (1 - e_i) over all "n_edge" 4523 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented 4524 * by the edge are carried by the solution. 4525 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least 4526 * one of those is carried. 4527 * 4528 * Note that despite the fact that the problem is solved using a rational 4529 * solver, the solution is guaranteed to be integral. 4530 * Specifically, the dependence distance lower bounds e_i (and therefore 4531 * also their sum) are integers. See Lemma 5 of [1]. 4532 * 4533 * Any potential denominator of the sum is cleared by this function. 4534 * The denominator is not relevant for any of the other elements 4535 * in the solution. 4536 * 4537 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling 4538 * Problem, Part II: Multi-Dimensional Time. 4539 * In Intl. Journal of Parallel Programming, 1992. 4540 */ 4541 static int carries_dependences(__isl_keep isl_vec *sol, int n_edge) 4542 { 4543 isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]); 4544 isl_int_set_si(sol->el[0], 1); 4545 return isl_int_cmp_si(sol->el[1], n_edge) < 0; 4546 } 4547 4548 /* Return the lexicographically smallest rational point in "lp", 4549 * assuming that all variables are non-negative and performing some 4550 * additional sanity checks. 4551 * If "want_integral" is set, then compute the lexicographically smallest 4552 * integer point instead. 4553 * In particular, "lp" should not be empty by construction. 4554 * Double check that this is the case. 4555 * If dependences are not carried for any of the "n_edge" edges, 4556 * then return an empty vector. 4557 * 4558 * If the schedule_treat_coalescing option is set and 4559 * if the computed schedule performs loop coalescing on a given node, 4560 * i.e., if it is of the form 4561 * 4562 * c_i i + c_j j + ... 4563 * 4564 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero 4565 * to cut out this solution. Repeat this process until no more loop 4566 * coalescing occurs or until no more dependences can be carried. 4567 * In the latter case, revert to the previously computed solution. 4568 * 4569 * If the caller requests an integral solution and if coalescing should 4570 * be treated, then perform the coalescing treatment first as 4571 * an integral solution computed before coalescing treatment 4572 * would carry the same number of edges and would therefore probably 4573 * also be coalescing. 4574 * 4575 * To allow the coalescing treatment to be performed first, 4576 * the initial solution is allowed to be rational and it is only 4577 * cut out (if needed) in the next iteration, if no coalescing measures 4578 * were taken. 4579 */ 4580 static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph, 4581 __isl_take isl_basic_set *lp, int n_edge, int want_integral) 4582 { 4583 int i, pos, cut; 4584 isl_ctx *ctx; 4585 isl_tab_lexmin *tl; 4586 isl_vec *sol = NULL, *prev; 4587 int treat_coalescing; 4588 int try_again; 4589 4590 if (!lp) 4591 return NULL; 4592 ctx = isl_basic_set_get_ctx(lp); 4593 treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx); 4594 tl = isl_tab_lexmin_from_basic_set(lp); 4595 4596 cut = 0; 4597 do { 4598 int integral; 4599 4600 try_again = 0; 4601 if (cut) 4602 tl = isl_tab_lexmin_cut_to_integer(tl); 4603 prev = sol; 4604 sol = non_empty_solution(tl); 4605 if (!sol) 4606 goto error; 4607 4608 integral = isl_int_is_one(sol->el[0]); 4609 if (!carries_dependences(sol, n_edge)) { 4610 if (!prev) 4611 prev = isl_vec_alloc(ctx, 0); 4612 isl_vec_free(sol); 4613 sol = prev; 4614 break; 4615 } 4616 prev = isl_vec_free(prev); 4617 cut = want_integral && !integral; 4618 if (cut) 4619 try_again = 1; 4620 if (!treat_coalescing) 4621 continue; 4622 for (i = 0; i < graph->n; ++i) { 4623 struct isl_sched_node *node = &graph->node[i]; 4624 4625 pos = find_node_coalescing(node, sol); 4626 if (pos < 0) 4627 goto error; 4628 if (pos < node->nvar) 4629 break; 4630 } 4631 if (i < graph->n) { 4632 try_again = 1; 4633 tl = zero_out_node_coef(tl, &graph->node[i], pos); 4634 cut = 0; 4635 } 4636 } while (try_again); 4637 4638 isl_tab_lexmin_free(tl); 4639 4640 return sol; 4641 error: 4642 isl_tab_lexmin_free(tl); 4643 isl_vec_free(prev); 4644 isl_vec_free(sol); 4645 return NULL; 4646 } 4647 4648 /* If "edge" is an edge from a node to itself, then add the corresponding 4649 * dependence relation to "umap". 4650 * If "node" has been compressed, then the dependence relation 4651 * is also compressed first. 4652 */ 4653 static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap, 4654 struct isl_sched_edge *edge) 4655 { 4656 isl_map *map; 4657 struct isl_sched_node *node = edge->src; 4658 4659 if (edge->src != edge->dst) 4660 return umap; 4661 4662 map = isl_map_copy(edge->map); 4663 map = compress(map, node, node); 4664 umap = isl_union_map_add_map(umap, map); 4665 return umap; 4666 } 4667 4668 /* If "edge" is an edge from a node to another node, then add the corresponding 4669 * dependence relation to "umap". 4670 * If the source or destination nodes of "edge" have been compressed, 4671 * then the dependence relation is also compressed first. 4672 */ 4673 static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap, 4674 struct isl_sched_edge *edge) 4675 { 4676 isl_map *map; 4677 4678 if (edge->src == edge->dst) 4679 return umap; 4680 4681 map = isl_map_copy(edge->map); 4682 map = compress(map, edge->src, edge->dst); 4683 umap = isl_union_map_add_map(umap, map); 4684 return umap; 4685 } 4686 4687 /* Internal data structure used by union_drop_coalescing_constraints 4688 * to collect bounds on all relevant statements. 4689 * 4690 * "graph" is the schedule constraint graph for which an LP problem 4691 * is being constructed. 4692 * "bounds" collects the bounds. 4693 */ 4694 struct isl_collect_bounds_data { 4695 isl_ctx *ctx; 4696 struct isl_sched_graph *graph; 4697 isl_union_set *bounds; 4698 }; 4699 4700 /* Add the size bounds for the node with instance deltas in "set" 4701 * to data->bounds. 4702 */ 4703 static isl_stat collect_bounds(__isl_take isl_set *set, void *user) 4704 { 4705 struct isl_collect_bounds_data *data = user; 4706 struct isl_sched_node *node; 4707 isl_space *space; 4708 isl_set *bounds; 4709 4710 space = isl_set_get_space(set); 4711 isl_set_free(set); 4712 4713 node = graph_find_compressed_node(data->ctx, data->graph, space); 4714 isl_space_free(space); 4715 4716 bounds = isl_set_from_basic_set(get_size_bounds(node)); 4717 data->bounds = isl_union_set_add_set(data->bounds, bounds); 4718 4719 return isl_stat_ok; 4720 } 4721 4722 /* Drop some constraints from "delta" that could be exploited 4723 * to construct loop coalescing schedules. 4724 * In particular, drop those constraint that bound the difference 4725 * to the size of the domain. 4726 * Do this for each set/node in "delta" separately. 4727 * The parameters are assumed to have been projected out by the caller. 4728 */ 4729 static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx, 4730 struct isl_sched_graph *graph, __isl_take isl_union_set *delta) 4731 { 4732 struct isl_collect_bounds_data data = { ctx, graph }; 4733 4734 data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); 4735 if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0) 4736 data.bounds = isl_union_set_free(data.bounds); 4737 delta = isl_union_set_plain_gist(delta, data.bounds); 4738 4739 return delta; 4740 } 4741 4742 /* Given a non-trivial lineality space "lineality", add the corresponding 4743 * universe set to data->mask and add a map from elements to 4744 * other elements along the lines in "lineality" to data->equivalent. 4745 * If this is the first time this function gets called 4746 * (data->any_non_trivial is still false), then set data->any_non_trivial and 4747 * initialize data->mask and data->equivalent. 4748 * 4749 * In particular, if the lineality space is defined by equality constraints 4750 * 4751 * E x = 0 4752 * 4753 * then construct an affine mapping 4754 * 4755 * f : x -> E x 4756 * 4757 * and compute the equivalence relation of having the same image under f: 4758 * 4759 * { x -> x' : E x = E x' } 4760 */ 4761 static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality, 4762 struct isl_exploit_lineality_data *data) 4763 { 4764 isl_mat *eq; 4765 isl_space *space; 4766 isl_set *univ; 4767 isl_multi_aff *ma; 4768 isl_multi_pw_aff *mpa; 4769 isl_map *map; 4770 isl_size n; 4771 4772 if (isl_basic_set_check_no_locals(lineality) < 0) 4773 goto error; 4774 4775 space = isl_basic_set_get_space(lineality); 4776 if (!data->any_non_trivial) { 4777 data->equivalent = isl_union_map_empty(isl_space_copy(space)); 4778 data->mask = isl_union_set_empty(isl_space_copy(space)); 4779 } 4780 data->any_non_trivial = isl_bool_true; 4781 4782 univ = isl_set_universe(isl_space_copy(space)); 4783 data->mask = isl_union_set_add_set(data->mask, univ); 4784 4785 eq = isl_basic_set_extract_equalities(lineality); 4786 n = isl_mat_rows(eq); 4787 if (n < 0) 4788 space = isl_space_free(space); 4789 eq = isl_mat_insert_zero_rows(eq, 0, 1); 4790 eq = isl_mat_set_element_si(eq, 0, 0, 1); 4791 space = isl_space_from_domain(space); 4792 space = isl_space_add_dims(space, isl_dim_out, n); 4793 ma = isl_multi_aff_from_aff_mat(space, eq); 4794 mpa = isl_multi_pw_aff_from_multi_aff(ma); 4795 map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa)); 4796 data->equivalent = isl_union_map_add_map(data->equivalent, map); 4797 4798 isl_basic_set_free(lineality); 4799 return isl_stat_ok; 4800 error: 4801 isl_basic_set_free(lineality); 4802 return isl_stat_error; 4803 } 4804 4805 /* Check if the lineality space "set" is non-trivial (i.e., is not just 4806 * the origin or, in other words, satisfies a number of equality constraints 4807 * that is smaller than the dimension of the set). 4808 * If so, extend data->mask and data->equivalent accordingly. 4809 * 4810 * The input should not have any local variables already, but 4811 * isl_set_remove_divs is called to make sure it does not. 4812 */ 4813 static isl_stat add_lineality(__isl_take isl_set *set, void *user) 4814 { 4815 struct isl_exploit_lineality_data *data = user; 4816 isl_basic_set *hull; 4817 isl_size dim; 4818 isl_size n_eq; 4819 4820 set = isl_set_remove_divs(set); 4821 hull = isl_set_unshifted_simple_hull(set); 4822 dim = isl_basic_set_dim(hull, isl_dim_set); 4823 n_eq = isl_basic_set_n_equality(hull); 4824 if (dim < 0 || n_eq < 0) 4825 goto error; 4826 if (dim != n_eq) 4827 return add_non_trivial_lineality(hull, data); 4828 isl_basic_set_free(hull); 4829 return isl_stat_ok; 4830 error: 4831 isl_basic_set_free(hull); 4832 return isl_stat_error; 4833 } 4834 4835 /* Check if the difference set on intra-node schedule constraints "intra" 4836 * has any non-trivial lineality space. 4837 * If so, then extend the difference set to a difference set 4838 * on equivalent elements. That is, if "intra" is 4839 * 4840 * { y - x : (x,y) \in V } 4841 * 4842 * and elements are equivalent if they have the same image under f, 4843 * then return 4844 * 4845 * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } 4846 * 4847 * or, since f is linear, 4848 * 4849 * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') } 4850 * 4851 * The results of the search for non-trivial lineality spaces is stored 4852 * in "data". 4853 */ 4854 static __isl_give isl_union_set *exploit_intra_lineality( 4855 __isl_take isl_union_set *intra, 4856 struct isl_exploit_lineality_data *data) 4857 { 4858 isl_union_set *lineality; 4859 isl_union_set *uset; 4860 4861 data->any_non_trivial = isl_bool_false; 4862 lineality = isl_union_set_copy(intra); 4863 lineality = isl_union_set_combined_lineality_space(lineality); 4864 if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0) 4865 data->any_non_trivial = isl_bool_error; 4866 isl_union_set_free(lineality); 4867 4868 if (data->any_non_trivial < 0) 4869 return isl_union_set_free(intra); 4870 if (!data->any_non_trivial) 4871 return intra; 4872 4873 uset = isl_union_set_copy(intra); 4874 intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask)); 4875 uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent)); 4876 intra = isl_union_set_union(intra, uset); 4877 4878 intra = isl_union_set_remove_divs(intra); 4879 4880 return intra; 4881 } 4882 4883 /* If the difference set on intra-node schedule constraints was found to have 4884 * any non-trivial lineality space by exploit_intra_lineality, 4885 * as recorded in "data", then extend the inter-node 4886 * schedule constraints "inter" to schedule constraints on equivalent elements. 4887 * That is, if "inter" is V and 4888 * elements are equivalent if they have the same image under f, then return 4889 * 4890 * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } 4891 */ 4892 static __isl_give isl_union_map *exploit_inter_lineality( 4893 __isl_take isl_union_map *inter, 4894 struct isl_exploit_lineality_data *data) 4895 { 4896 isl_union_map *umap; 4897 4898 if (data->any_non_trivial < 0) 4899 return isl_union_map_free(inter); 4900 if (!data->any_non_trivial) 4901 return inter; 4902 4903 umap = isl_union_map_copy(inter); 4904 inter = isl_union_map_subtract_range(inter, 4905 isl_union_set_copy(data->mask)); 4906 umap = isl_union_map_apply_range(umap, 4907 isl_union_map_copy(data->equivalent)); 4908 inter = isl_union_map_union(inter, umap); 4909 umap = isl_union_map_copy(inter); 4910 inter = isl_union_map_subtract_domain(inter, 4911 isl_union_set_copy(data->mask)); 4912 umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent), 4913 umap); 4914 inter = isl_union_map_union(inter, umap); 4915 4916 inter = isl_union_map_remove_divs(inter); 4917 4918 return inter; 4919 } 4920 4921 /* For each (conditional) validity edge in "graph", 4922 * add the corresponding dependence relation using "add" 4923 * to a collection of dependence relations and return the result. 4924 * If "coincidence" is set, then coincidence edges are considered as well. 4925 */ 4926 static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph, 4927 __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap, 4928 struct isl_sched_edge *edge), int coincidence) 4929 { 4930 int i; 4931 isl_space *space; 4932 isl_union_map *umap; 4933 4934 space = isl_space_copy(graph->node[0].space); 4935 umap = isl_union_map_empty(space); 4936 4937 for (i = 0; i < graph->n_edge; ++i) { 4938 struct isl_sched_edge *edge = &graph->edge[i]; 4939 4940 if (!is_any_validity(edge) && 4941 (!coincidence || !is_coincidence(edge))) 4942 continue; 4943 4944 umap = add(umap, edge); 4945 } 4946 4947 return umap; 4948 } 4949 4950 /* For each dependence relation on a (conditional) validity edge 4951 * from a node to itself, 4952 * construct the set of coefficients of valid constraints for elements 4953 * in that dependence relation and collect the results. 4954 * If "coincidence" is set, then coincidence edges are considered as well. 4955 * 4956 * In particular, for each dependence relation R, constraints 4957 * on coefficients (c_0, c_x) are constructed such that 4958 * 4959 * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } 4960 * 4961 * If the schedule_treat_coalescing option is set, then some constraints 4962 * that could be exploited to construct coalescing schedules 4963 * are removed before the dual is computed, but after the parameters 4964 * have been projected out. 4965 * The entire computation is essentially the same as that performed 4966 * by intra_coefficients, except that it operates on multiple 4967 * edges together and that the parameters are always projected out. 4968 * 4969 * Additionally, exploit any non-trivial lineality space 4970 * in the difference set after removing coalescing constraints and 4971 * store the results of the non-trivial lineality space detection in "data". 4972 * The procedure is currently run unconditionally, but it is unlikely 4973 * to find any non-trivial lineality spaces if no coalescing constraints 4974 * have been removed. 4975 * 4976 * Note that if a dependence relation is a union of basic maps, 4977 * then each basic map needs to be treated individually as it may only 4978 * be possible to carry the dependences expressed by some of those 4979 * basic maps and not all of them. 4980 * The collected validity constraints are therefore not coalesced and 4981 * it is assumed that they are not coalesced automatically. 4982 * Duplicate basic maps can be removed, however. 4983 * In particular, if the same basic map appears as a disjunct 4984 * in multiple edges, then it only needs to be carried once. 4985 */ 4986 static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx, 4987 struct isl_sched_graph *graph, int coincidence, 4988 struct isl_exploit_lineality_data *data) 4989 { 4990 isl_union_map *intra; 4991 isl_union_set *delta; 4992 isl_basic_set_list *list; 4993 4994 intra = collect_validity(graph, &add_intra, coincidence); 4995 delta = isl_union_map_deltas(intra); 4996 delta = isl_union_set_project_out_all_params(delta); 4997 delta = isl_union_set_remove_divs(delta); 4998 if (isl_options_get_schedule_treat_coalescing(ctx)) 4999 delta = union_drop_coalescing_constraints(ctx, graph, delta); 5000 delta = exploit_intra_lineality(delta, data); 5001 list = isl_union_set_get_basic_set_list(delta); 5002 isl_union_set_free(delta); 5003 5004 return isl_basic_set_list_coefficients(list); 5005 } 5006 5007 /* For each dependence relation on a (conditional) validity edge 5008 * from a node to some other node, 5009 * construct the set of coefficients of valid constraints for elements 5010 * in that dependence relation and collect the results. 5011 * If "coincidence" is set, then coincidence edges are considered as well. 5012 * 5013 * In particular, for each dependence relation R, constraints 5014 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that 5015 * 5016 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R 5017 * 5018 * This computation is essentially the same as that performed 5019 * by inter_coefficients, except that it operates on multiple 5020 * edges together. 5021 * 5022 * Additionally, exploit any non-trivial lineality space 5023 * that may have been discovered by collect_intra_validity 5024 * (as stored in "data"). 5025 * 5026 * Note that if a dependence relation is a union of basic maps, 5027 * then each basic map needs to be treated individually as it may only 5028 * be possible to carry the dependences expressed by some of those 5029 * basic maps and not all of them. 5030 * The collected validity constraints are therefore not coalesced and 5031 * it is assumed that they are not coalesced automatically. 5032 * Duplicate basic maps can be removed, however. 5033 * In particular, if the same basic map appears as a disjunct 5034 * in multiple edges, then it only needs to be carried once. 5035 */ 5036 static __isl_give isl_basic_set_list *collect_inter_validity( 5037 struct isl_sched_graph *graph, int coincidence, 5038 struct isl_exploit_lineality_data *data) 5039 { 5040 isl_union_map *inter; 5041 isl_union_set *wrap; 5042 isl_basic_set_list *list; 5043 5044 inter = collect_validity(graph, &add_inter, coincidence); 5045 inter = exploit_inter_lineality(inter, data); 5046 inter = isl_union_map_remove_divs(inter); 5047 wrap = isl_union_map_wrap(inter); 5048 list = isl_union_set_get_basic_set_list(wrap); 5049 isl_union_set_free(wrap); 5050 return isl_basic_set_list_coefficients(list); 5051 } 5052 5053 /* Construct an LP problem for finding schedule coefficients 5054 * such that the schedule carries as many of the "n_edge" groups of 5055 * dependences as possible based on the corresponding coefficient 5056 * constraints and return the lexicographically smallest non-trivial solution. 5057 * "intra" is the sequence of coefficient constraints for intra-node edges. 5058 * "inter" is the sequence of coefficient constraints for inter-node edges. 5059 * If "want_integral" is set, then compute an integral solution 5060 * for the coefficients rather than using the numerators 5061 * of a rational solution. 5062 * "carry_inter" indicates whether inter-node edges should be carried or 5063 * only respected. 5064 * 5065 * If none of the "n_edge" groups can be carried 5066 * then return an empty vector. 5067 */ 5068 static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, 5069 struct isl_sched_graph *graph, int n_edge, 5070 __isl_keep isl_basic_set_list *intra, 5071 __isl_keep isl_basic_set_list *inter, int want_integral, 5072 int carry_inter) 5073 { 5074 isl_basic_set *lp; 5075 5076 if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0) 5077 return NULL; 5078 5079 lp = isl_basic_set_copy(graph->lp); 5080 return non_neg_lexmin(graph, lp, n_edge, want_integral); 5081 } 5082 5083 /* Construct an LP problem for finding schedule coefficients 5084 * such that the schedule carries as many of the validity dependences 5085 * as possible and 5086 * return the lexicographically smallest non-trivial solution. 5087 * If "fallback" is set, then the carrying is performed as a fallback 5088 * for the Pluto-like scheduler. 5089 * If "coincidence" is set, then try and carry coincidence edges as well. 5090 * 5091 * The variable "n_edge" stores the number of groups that should be carried. 5092 * If none of the "n_edge" groups can be carried 5093 * then return an empty vector. 5094 * If, moreover, "n_edge" is zero, then the LP problem does not even 5095 * need to be constructed. 5096 * 5097 * If a fallback solution is being computed, then compute an integral solution 5098 * for the coefficients rather than using the numerators 5099 * of a rational solution. 5100 * 5101 * If a fallback solution is being computed, if there are any intra-node 5102 * dependences, and if requested by the user, then first try 5103 * to only carry those intra-node dependences. 5104 * If this fails to carry any dependences, then try again 5105 * with the inter-node dependences included. 5106 */ 5107 static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, 5108 struct isl_sched_graph *graph, int fallback, int coincidence) 5109 { 5110 isl_size n_intra, n_inter; 5111 int n_edge; 5112 struct isl_carry carry = { 0 }; 5113 isl_vec *sol; 5114 5115 carry.intra = collect_intra_validity(ctx, graph, coincidence, 5116 &carry.lineality); 5117 carry.inter = collect_inter_validity(graph, coincidence, 5118 &carry.lineality); 5119 n_intra = isl_basic_set_list_n_basic_set(carry.intra); 5120 n_inter = isl_basic_set_list_n_basic_set(carry.inter); 5121 if (n_intra < 0 || n_inter < 0) 5122 goto error; 5123 5124 if (fallback && n_intra > 0 && 5125 isl_options_get_schedule_carry_self_first(ctx)) { 5126 sol = compute_carrying_sol_coef(ctx, graph, n_intra, 5127 carry.intra, carry.inter, fallback, 0); 5128 if (!sol || sol->size != 0 || n_inter == 0) { 5129 isl_carry_clear(&carry); 5130 return sol; 5131 } 5132 isl_vec_free(sol); 5133 } 5134 5135 n_edge = n_intra + n_inter; 5136 if (n_edge == 0) { 5137 isl_carry_clear(&carry); 5138 return isl_vec_alloc(ctx, 0); 5139 } 5140 5141 sol = compute_carrying_sol_coef(ctx, graph, n_edge, 5142 carry.intra, carry.inter, fallback, 1); 5143 isl_carry_clear(&carry); 5144 return sol; 5145 error: 5146 isl_carry_clear(&carry); 5147 return NULL; 5148 } 5149 5150 /* Construct a schedule row for each node such that as many validity dependences 5151 * as possible are carried and then continue with the next band. 5152 * If "fallback" is set, then the carrying is performed as a fallback 5153 * for the Pluto-like scheduler. 5154 * If "coincidence" is set, then try and carry coincidence edges as well. 5155 * 5156 * If there are no validity dependences, then no dependence can be carried and 5157 * the procedure is guaranteed to fail. If there is more than one component, 5158 * then try computing a schedule on each component separately 5159 * to prevent or at least postpone this failure. 5160 * 5161 * If a schedule row is computed, then check that dependences are carried 5162 * for at least one of the edges. 5163 * 5164 * If the computed schedule row turns out to be trivial on one or 5165 * more nodes where it should not be trivial, then we throw it away 5166 * and try again on each component separately. 5167 * 5168 * If there is only one component, then we accept the schedule row anyway, 5169 * but we do not consider it as a complete row and therefore do not 5170 * increment graph->n_row. Note that the ranks of the nodes that 5171 * do get a non-trivial schedule part will get updated regardless and 5172 * graph->maxvar is computed based on these ranks. The test for 5173 * whether more schedule rows are required in compute_schedule_wcc 5174 * is therefore not affected. 5175 * 5176 * Insert a band corresponding to the schedule row at position "node" 5177 * of the schedule tree and continue with the construction of the schedule. 5178 * This insertion and the continued construction is performed by split_scaled 5179 * after optionally checking for non-trivial common divisors. 5180 */ 5181 static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node, 5182 struct isl_sched_graph *graph, int fallback, int coincidence) 5183 { 5184 int trivial; 5185 isl_ctx *ctx; 5186 isl_vec *sol; 5187 5188 if (!node) 5189 return NULL; 5190 5191 ctx = isl_schedule_node_get_ctx(node); 5192 sol = compute_carrying_sol(ctx, graph, fallback, coincidence); 5193 if (!sol) 5194 return isl_schedule_node_free(node); 5195 if (sol->size == 0) { 5196 isl_vec_free(sol); 5197 if (graph->scc > 1) 5198 return compute_component_schedule(node, graph, 1); 5199 isl_die(ctx, isl_error_unknown, "unable to carry dependences", 5200 return isl_schedule_node_free(node)); 5201 } 5202 5203 trivial = is_any_trivial(graph, sol); 5204 if (trivial < 0) { 5205 sol = isl_vec_free(sol); 5206 } else if (trivial && graph->scc > 1) { 5207 isl_vec_free(sol); 5208 return compute_component_schedule(node, graph, 1); 5209 } 5210 5211 if (update_schedule(graph, sol, 0) < 0) 5212 return isl_schedule_node_free(node); 5213 if (trivial) 5214 graph->n_row--; 5215 5216 return split_scaled(node, graph); 5217 } 5218 5219 /* Construct a schedule row for each node such that as many validity dependences 5220 * as possible are carried and then continue with the next band. 5221 * Do so as a fallback for the Pluto-like scheduler. 5222 * If "coincidence" is set, then try and carry coincidence edges as well. 5223 */ 5224 static __isl_give isl_schedule_node *carry_fallback( 5225 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 5226 int coincidence) 5227 { 5228 return carry(node, graph, 1, coincidence); 5229 } 5230 5231 /* Construct a schedule row for each node such that as many validity dependences 5232 * as possible are carried and then continue with the next band. 5233 * Do so for the case where the Feautrier scheduler was selected 5234 * by the user. 5235 */ 5236 static __isl_give isl_schedule_node *carry_feautrier( 5237 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 5238 { 5239 return carry(node, graph, 0, 0); 5240 } 5241 5242 /* Construct a schedule row for each node such that as many validity dependences 5243 * as possible are carried and then continue with the next band. 5244 * Do so as a fallback for the Pluto-like scheduler. 5245 */ 5246 static __isl_give isl_schedule_node *carry_dependences( 5247 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 5248 { 5249 return carry_fallback(node, graph, 0); 5250 } 5251 5252 /* Construct a schedule row for each node such that as many validity or 5253 * coincidence dependences as possible are carried and 5254 * then continue with the next band. 5255 * Do so as a fallback for the Pluto-like scheduler. 5256 */ 5257 static __isl_give isl_schedule_node *carry_coincidence( 5258 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 5259 { 5260 return carry_fallback(node, graph, 1); 5261 } 5262 5263 /* Topologically sort statements mapped to the same schedule iteration 5264 * and add insert a sequence node in front of "node" 5265 * corresponding to this order. 5266 * If "initialized" is set, then it may be assumed that 5267 * isl_sched_graph_compute_maxvar 5268 * has been called on the current band. Otherwise, call 5269 * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. 5270 * 5271 * If it turns out to be impossible to sort the statements apart, 5272 * because different dependences impose different orderings 5273 * on the statements, then we extend the schedule such that 5274 * it carries at least one more dependence. 5275 */ 5276 static __isl_give isl_schedule_node *sort_statements( 5277 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 5278 int initialized) 5279 { 5280 isl_ctx *ctx; 5281 isl_union_set_list *filters; 5282 5283 if (!node) 5284 return NULL; 5285 5286 ctx = isl_schedule_node_get_ctx(node); 5287 if (graph->n < 1) 5288 isl_die(ctx, isl_error_internal, 5289 "graph should have at least one node", 5290 return isl_schedule_node_free(node)); 5291 5292 if (graph->n == 1) 5293 return node; 5294 5295 if (update_edges(ctx, graph) < 0) 5296 return isl_schedule_node_free(node); 5297 5298 if (graph->n_edge == 0) 5299 return node; 5300 5301 if (detect_sccs(ctx, graph) < 0) 5302 return isl_schedule_node_free(node); 5303 5304 next_band(graph); 5305 if (graph->scc < graph->n) { 5306 if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) 5307 return isl_schedule_node_free(node); 5308 return carry_dependences(node, graph); 5309 } 5310 5311 filters = isl_sched_graph_extract_sccs(ctx, graph); 5312 node = isl_schedule_node_insert_sequence(node, filters); 5313 5314 return node; 5315 } 5316 5317 /* Are there any (non-empty) (conditional) validity edges in the graph? 5318 */ 5319 static int has_validity_edges(struct isl_sched_graph *graph) 5320 { 5321 int i; 5322 5323 for (i = 0; i < graph->n_edge; ++i) { 5324 int empty; 5325 5326 empty = isl_map_plain_is_empty(graph->edge[i].map); 5327 if (empty < 0) 5328 return -1; 5329 if (empty) 5330 continue; 5331 if (is_any_validity(&graph->edge[i])) 5332 return 1; 5333 } 5334 5335 return 0; 5336 } 5337 5338 /* Should we apply a Feautrier step? 5339 * That is, did the user request the Feautrier algorithm and are 5340 * there any validity dependences (left)? 5341 */ 5342 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) 5343 { 5344 if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER) 5345 return 0; 5346 5347 return has_validity_edges(graph); 5348 } 5349 5350 /* Compute a schedule for a connected dependence graph using Feautrier's 5351 * multi-dimensional scheduling algorithm and return the updated schedule node. 5352 * 5353 * The original algorithm is described in [1]. 5354 * The main idea is to minimize the number of scheduling dimensions, by 5355 * trying to satisfy as many dependences as possible per scheduling dimension. 5356 * 5357 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling 5358 * Problem, Part II: Multi-Dimensional Time. 5359 * In Intl. Journal of Parallel Programming, 1992. 5360 */ 5361 static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier( 5362 isl_schedule_node *node, struct isl_sched_graph *graph) 5363 { 5364 return carry_feautrier(node, graph); 5365 } 5366 5367 /* Turn off the "local" bit on all (condition) edges. 5368 */ 5369 static void clear_local_edges(struct isl_sched_graph *graph) 5370 { 5371 int i; 5372 5373 for (i = 0; i < graph->n_edge; ++i) 5374 if (isl_sched_edge_is_condition(&graph->edge[i])) 5375 clear_local(&graph->edge[i]); 5376 } 5377 5378 /* Does "graph" have both condition and conditional validity edges? 5379 */ 5380 static int need_condition_check(struct isl_sched_graph *graph) 5381 { 5382 int i; 5383 int any_condition = 0; 5384 int any_conditional_validity = 0; 5385 5386 for (i = 0; i < graph->n_edge; ++i) { 5387 if (isl_sched_edge_is_condition(&graph->edge[i])) 5388 any_condition = 1; 5389 if (isl_sched_edge_is_conditional_validity(&graph->edge[i])) 5390 any_conditional_validity = 1; 5391 } 5392 5393 return any_condition && any_conditional_validity; 5394 } 5395 5396 /* Does "graph" contain any coincidence edge? 5397 */ 5398 static int has_any_coincidence(struct isl_sched_graph *graph) 5399 { 5400 int i; 5401 5402 for (i = 0; i < graph->n_edge; ++i) 5403 if (is_coincidence(&graph->edge[i])) 5404 return 1; 5405 5406 return 0; 5407 } 5408 5409 /* Extract the final schedule row as a map with the iteration domain 5410 * of "node" as domain. 5411 */ 5412 static __isl_give isl_map *final_row(struct isl_sched_node *node) 5413 { 5414 isl_multi_aff *ma; 5415 isl_size n_row; 5416 5417 n_row = isl_mat_rows(node->sched); 5418 if (n_row < 0) 5419 return NULL; 5420 ma = isl_sched_node_extract_partial_schedule_multi_aff(node, 5421 n_row - 1, 1); 5422 return isl_map_from_multi_aff(ma); 5423 } 5424 5425 /* Is the conditional validity dependence in the edge with index "edge_index" 5426 * violated by the latest (i.e., final) row of the schedule? 5427 * That is, is i scheduled after j 5428 * for any conditional validity dependence i -> j? 5429 */ 5430 static int is_violated(struct isl_sched_graph *graph, int edge_index) 5431 { 5432 isl_map *src_sched, *dst_sched, *map; 5433 struct isl_sched_edge *edge = &graph->edge[edge_index]; 5434 int empty; 5435 5436 src_sched = final_row(edge->src); 5437 dst_sched = final_row(edge->dst); 5438 map = isl_map_copy(edge->map); 5439 map = isl_map_apply_domain(map, src_sched); 5440 map = isl_map_apply_range(map, dst_sched); 5441 map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0); 5442 empty = isl_map_is_empty(map); 5443 isl_map_free(map); 5444 5445 if (empty < 0) 5446 return -1; 5447 5448 return !empty; 5449 } 5450 5451 /* Does "graph" have any satisfied condition edges that 5452 * are adjacent to the conditional validity constraint with 5453 * domain "conditional_source" and range "conditional_sink"? 5454 * 5455 * A satisfied condition is one that is not local. 5456 * If a condition was forced to be local already (i.e., marked as local) 5457 * then there is no need to check if it is in fact local. 5458 * 5459 * Additionally, mark all adjacent condition edges found as local. 5460 */ 5461 static int has_adjacent_true_conditions(struct isl_sched_graph *graph, 5462 __isl_keep isl_union_set *conditional_source, 5463 __isl_keep isl_union_set *conditional_sink) 5464 { 5465 int i; 5466 int any = 0; 5467 5468 for (i = 0; i < graph->n_edge; ++i) { 5469 int adjacent, local; 5470 isl_union_map *condition; 5471 5472 if (!isl_sched_edge_is_condition(&graph->edge[i])) 5473 continue; 5474 if (is_local(&graph->edge[i])) 5475 continue; 5476 5477 condition = graph->edge[i].tagged_condition; 5478 adjacent = domain_intersects(condition, conditional_sink); 5479 if (adjacent >= 0 && !adjacent) 5480 adjacent = range_intersects(condition, 5481 conditional_source); 5482 if (adjacent < 0) 5483 return -1; 5484 if (!adjacent) 5485 continue; 5486 5487 set_local(&graph->edge[i]); 5488 5489 local = is_condition_false(&graph->edge[i]); 5490 if (local < 0) 5491 return -1; 5492 if (!local) 5493 any = 1; 5494 } 5495 5496 return any; 5497 } 5498 5499 /* Are there any violated conditional validity dependences with 5500 * adjacent condition dependences that are not local with respect 5501 * to the current schedule? 5502 * That is, is the conditional validity constraint violated? 5503 * 5504 * Additionally, mark all those adjacent condition dependences as local. 5505 * We also mark those adjacent condition dependences that were not marked 5506 * as local before, but just happened to be local already. This ensures 5507 * that they remain local if the schedule is recomputed. 5508 * 5509 * We first collect domain and range of all violated conditional validity 5510 * dependences and then check if there are any adjacent non-local 5511 * condition dependences. 5512 */ 5513 static int has_violated_conditional_constraint(isl_ctx *ctx, 5514 struct isl_sched_graph *graph) 5515 { 5516 int i; 5517 int any = 0; 5518 isl_union_set *source, *sink; 5519 5520 source = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); 5521 sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); 5522 for (i = 0; i < graph->n_edge; ++i) { 5523 isl_union_set *uset; 5524 isl_union_map *umap; 5525 int violated; 5526 5527 if (!isl_sched_edge_is_conditional_validity(&graph->edge[i])) 5528 continue; 5529 5530 violated = is_violated(graph, i); 5531 if (violated < 0) 5532 goto error; 5533 if (!violated) 5534 continue; 5535 5536 any = 1; 5537 5538 umap = isl_union_map_copy(graph->edge[i].tagged_validity); 5539 uset = isl_union_map_domain(umap); 5540 source = isl_union_set_union(source, uset); 5541 source = isl_union_set_coalesce(source); 5542 5543 umap = isl_union_map_copy(graph->edge[i].tagged_validity); 5544 uset = isl_union_map_range(umap); 5545 sink = isl_union_set_union(sink, uset); 5546 sink = isl_union_set_coalesce(sink); 5547 } 5548 5549 if (any) 5550 any = has_adjacent_true_conditions(graph, source, sink); 5551 5552 isl_union_set_free(source); 5553 isl_union_set_free(sink); 5554 return any; 5555 error: 5556 isl_union_set_free(source); 5557 isl_union_set_free(sink); 5558 return -1; 5559 } 5560 5561 /* Examine the current band (the rows between graph->band_start and 5562 * graph->n_total_row), deciding whether to drop it or add it to "node" 5563 * and then continue with the computation of the next band, if any. 5564 * If "initialized" is set, then it may be assumed that 5565 * isl_sched_graph_compute_maxvar 5566 * has been called on the current band. Otherwise, call 5567 * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. 5568 * 5569 * The caller keeps looking for a new row as long as 5570 * graph->n_row < graph->maxvar. If the latest attempt to find 5571 * such a row failed (i.e., we still have graph->n_row < graph->maxvar), 5572 * then we either 5573 * - split between SCCs and start over (assuming we found an interesting 5574 * pair of SCCs between which to split) 5575 * - continue with the next band (assuming the current band has at least 5576 * one row) 5577 * - if there is more than one SCC left, then split along all SCCs 5578 * - if outer coincidence needs to be enforced, then try to carry as many 5579 * validity or coincidence dependences as possible and 5580 * continue with the next band 5581 * - try to carry as many validity dependences as possible and 5582 * continue with the next band 5583 * In each case, we first insert a band node in the schedule tree 5584 * if any rows have been computed. 5585 * 5586 * If the caller managed to complete the schedule and the current band 5587 * is empty, then finish off by topologically 5588 * sorting the statements based on the remaining dependences. 5589 * If, on the other hand, the current band has at least one row, 5590 * then continue with the next band. Note that this next band 5591 * will necessarily be empty, but the graph may still be split up 5592 * into weakly connected components before arriving back here. 5593 */ 5594 __isl_give isl_schedule_node *isl_schedule_node_compute_finish_band( 5595 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 5596 int initialized) 5597 { 5598 int empty; 5599 5600 if (!node) 5601 return NULL; 5602 5603 empty = graph->n_total_row == graph->band_start; 5604 if (graph->n_row < graph->maxvar) { 5605 isl_ctx *ctx; 5606 5607 ctx = isl_schedule_node_get_ctx(node); 5608 if (!ctx->opt->schedule_maximize_band_depth && !empty) 5609 return compute_next_band(node, graph, 1); 5610 if (graph->src_scc >= 0) 5611 return compute_split_schedule(node, graph); 5612 if (!empty) 5613 return compute_next_band(node, graph, 1); 5614 if (graph->scc > 1) 5615 return compute_component_schedule(node, graph, 1); 5616 if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) 5617 return isl_schedule_node_free(node); 5618 if (isl_options_get_schedule_outer_coincidence(ctx)) 5619 return carry_coincidence(node, graph); 5620 return carry_dependences(node, graph); 5621 } 5622 5623 if (!empty) 5624 return compute_next_band(node, graph, 1); 5625 return sort_statements(node, graph, initialized); 5626 } 5627 5628 /* Construct a band of schedule rows for a connected dependence graph. 5629 * The caller is responsible for determining the strongly connected 5630 * components and calling isl_sched_graph_compute_maxvar first. 5631 * 5632 * We try to find a sequence of as many schedule rows as possible that result 5633 * in non-negative dependence distances (independent of the previous rows 5634 * in the sequence, i.e., such that the sequence is tilable), with as 5635 * many of the initial rows as possible satisfying the coincidence constraints. 5636 * The computation stops if we can't find any more rows or if we have found 5637 * all the rows we wanted to find. 5638 * 5639 * If ctx->opt->schedule_outer_coincidence is set, then we force the 5640 * outermost dimension to satisfy the coincidence constraints. If this 5641 * turns out to be impossible, we fall back on the general scheme above 5642 * and try to carry as many dependences as possible. 5643 * 5644 * If "graph" contains both condition and conditional validity dependences, 5645 * then we need to check that that the conditional schedule constraint 5646 * is satisfied, i.e., there are no violated conditional validity dependences 5647 * that are adjacent to any non-local condition dependences. 5648 * If there are, then we mark all those adjacent condition dependences 5649 * as local and recompute the current band. Those dependences that 5650 * are marked local will then be forced to be local. 5651 * The initial computation is performed with no dependences marked as local. 5652 * If we are lucky, then there will be no violated conditional validity 5653 * dependences adjacent to any non-local condition dependences. 5654 * Otherwise, we mark some additional condition dependences as local and 5655 * recompute. We continue this process until there are no violations left or 5656 * until we are no longer able to compute a schedule. 5657 * Since there are only a finite number of dependences, 5658 * there will only be a finite number of iterations. 5659 */ 5660 isl_stat isl_schedule_node_compute_wcc_band(isl_ctx *ctx, 5661 struct isl_sched_graph *graph) 5662 { 5663 int has_coincidence; 5664 int use_coincidence; 5665 int force_coincidence = 0; 5666 int check_conditional; 5667 5668 if (sort_sccs(graph) < 0) 5669 return isl_stat_error; 5670 5671 clear_local_edges(graph); 5672 check_conditional = need_condition_check(graph); 5673 has_coincidence = has_any_coincidence(graph); 5674 5675 if (ctx->opt->schedule_outer_coincidence) 5676 force_coincidence = 1; 5677 5678 use_coincidence = has_coincidence; 5679 while (graph->n_row < graph->maxvar) { 5680 isl_vec *sol; 5681 int violated; 5682 int coincident; 5683 5684 graph->src_scc = -1; 5685 graph->dst_scc = -1; 5686 5687 if (setup_lp(ctx, graph, use_coincidence) < 0) 5688 return isl_stat_error; 5689 sol = solve_lp(ctx, graph); 5690 if (!sol) 5691 return isl_stat_error; 5692 if (sol->size == 0) { 5693 int empty = graph->n_total_row == graph->band_start; 5694 5695 isl_vec_free(sol); 5696 if (use_coincidence && (!force_coincidence || !empty)) { 5697 use_coincidence = 0; 5698 continue; 5699 } 5700 return isl_stat_ok; 5701 } 5702 coincident = !has_coincidence || use_coincidence; 5703 if (update_schedule(graph, sol, coincident) < 0) 5704 return isl_stat_error; 5705 5706 if (!check_conditional) 5707 continue; 5708 violated = has_violated_conditional_constraint(ctx, graph); 5709 if (violated < 0) 5710 return isl_stat_error; 5711 if (!violated) 5712 continue; 5713 if (reset_band(graph) < 0) 5714 return isl_stat_error; 5715 use_coincidence = has_coincidence; 5716 } 5717 5718 return isl_stat_ok; 5719 } 5720 5721 /* Compute a schedule for a connected dependence graph by considering 5722 * the graph as a whole and return the updated schedule node. 5723 * 5724 * The actual schedule rows of the current band are computed by 5725 * isl_schedule_node_compute_wcc_band. isl_schedule_node_compute_finish_band 5726 * takes care of integrating the band into "node" and continuing 5727 * the computation. 5728 */ 5729 static __isl_give isl_schedule_node *compute_schedule_wcc_whole( 5730 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 5731 { 5732 isl_ctx *ctx; 5733 5734 if (!node) 5735 return NULL; 5736 5737 ctx = isl_schedule_node_get_ctx(node); 5738 if (isl_schedule_node_compute_wcc_band(ctx, graph) < 0) 5739 return isl_schedule_node_free(node); 5740 5741 return isl_schedule_node_compute_finish_band(node, graph, 1); 5742 } 5743 5744 /* Compute a schedule for a connected dependence graph and return 5745 * the updated schedule node. 5746 * 5747 * If Feautrier's algorithm is selected, we first recursively try to satisfy 5748 * as many validity dependences as possible. When all validity dependences 5749 * are satisfied we extend the schedule to a full-dimensional schedule. 5750 * 5751 * Call compute_schedule_wcc_whole or isl_schedule_node_compute_wcc_clustering 5752 * depending on whether the user has selected the option to try and 5753 * compute a schedule for the entire (weakly connected) component first. 5754 * If there is only a single strongly connected component (SCC), then 5755 * there is no point in trying to combine SCCs 5756 * in isl_schedule_node_compute_wcc_clustering, so compute_schedule_wcc_whole 5757 * is called instead. 5758 */ 5759 static __isl_give isl_schedule_node *compute_schedule_wcc( 5760 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 5761 { 5762 isl_ctx *ctx; 5763 5764 if (!node) 5765 return NULL; 5766 5767 ctx = isl_schedule_node_get_ctx(node); 5768 if (detect_sccs(ctx, graph) < 0) 5769 return isl_schedule_node_free(node); 5770 5771 if (isl_sched_graph_compute_maxvar(graph) < 0) 5772 return isl_schedule_node_free(node); 5773 5774 if (need_feautrier_step(ctx, graph)) 5775 return compute_schedule_wcc_feautrier(node, graph); 5776 5777 if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) 5778 return compute_schedule_wcc_whole(node, graph); 5779 else 5780 return isl_schedule_node_compute_wcc_clustering(node, graph); 5781 } 5782 5783 /* Compute a schedule for each group of nodes identified by node->scc 5784 * separately and then combine them in a sequence node (or as set node 5785 * if graph->weak is set) inserted at position "node" of the schedule tree. 5786 * Return the updated schedule node. 5787 * 5788 * If "wcc" is set then each of the groups belongs to a single 5789 * weakly connected component in the dependence graph so that 5790 * there is no need for compute_sub_schedule to look for weakly 5791 * connected components. 5792 * 5793 * If a set node would be introduced and if the number of components 5794 * is equal to the number of nodes, then check if the schedule 5795 * is already complete. If so, a redundant set node would be introduced 5796 * (without any further descendants) stating that the statements 5797 * can be executed in arbitrary order, which is also expressed 5798 * by the absence of any node. Refrain from inserting any nodes 5799 * in this case and simply return. 5800 */ 5801 static __isl_give isl_schedule_node *compute_component_schedule( 5802 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 5803 int wcc) 5804 { 5805 int component; 5806 isl_ctx *ctx; 5807 isl_union_set_list *filters; 5808 5809 if (!node) 5810 return NULL; 5811 5812 if (graph->weak && graph->scc == graph->n) { 5813 if (isl_sched_graph_compute_maxvar(graph) < 0) 5814 return isl_schedule_node_free(node); 5815 if (graph->n_row >= graph->maxvar) 5816 return node; 5817 } 5818 5819 ctx = isl_schedule_node_get_ctx(node); 5820 filters = isl_sched_graph_extract_sccs(ctx, graph); 5821 if (graph->weak) 5822 node = isl_schedule_node_insert_set(node, filters); 5823 else 5824 node = isl_schedule_node_insert_sequence(node, filters); 5825 5826 for (component = 0; component < graph->scc; ++component) { 5827 node = isl_schedule_node_grandchild(node, component, 0); 5828 node = compute_sub_schedule(node, ctx, graph, 5829 &isl_sched_node_scc_exactly, 5830 &isl_sched_edge_scc_exactly, 5831 component, wcc); 5832 node = isl_schedule_node_grandparent(node); 5833 } 5834 5835 return node; 5836 } 5837 5838 /* Compute a schedule for the given dependence graph and insert it at "node". 5839 * Return the updated schedule node. 5840 * 5841 * We first check if the graph is connected (through validity and conditional 5842 * validity dependences) and, if not, compute a schedule 5843 * for each component separately. 5844 * If the schedule_serialize_sccs option is set, then we check for strongly 5845 * connected components instead and compute a separate schedule for 5846 * each such strongly connected component. 5847 */ 5848 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, 5849 struct isl_sched_graph *graph) 5850 { 5851 isl_ctx *ctx; 5852 5853 if (!node) 5854 return NULL; 5855 5856 ctx = isl_schedule_node_get_ctx(node); 5857 if (isl_options_get_schedule_serialize_sccs(ctx)) { 5858 if (detect_sccs(ctx, graph) < 0) 5859 return isl_schedule_node_free(node); 5860 } else { 5861 if (detect_wccs(ctx, graph) < 0) 5862 return isl_schedule_node_free(node); 5863 } 5864 5865 if (graph->scc > 1) 5866 return compute_component_schedule(node, graph, 1); 5867 5868 return compute_schedule_wcc(node, graph); 5869 } 5870 5871 /* Compute a schedule on sc->domain that respects the given schedule 5872 * constraints. 5873 * 5874 * In particular, the schedule respects all the validity dependences. 5875 * If the default isl scheduling algorithm is used, it tries to minimize 5876 * the dependence distances over the proximity dependences. 5877 * If Feautrier's scheduling algorithm is used, the proximity dependence 5878 * distances are only minimized during the extension to a full-dimensional 5879 * schedule. 5880 * 5881 * If there are any condition and conditional validity dependences, 5882 * then the conditional validity dependences may be violated inside 5883 * a tilable band, provided they have no adjacent non-local 5884 * condition dependences. 5885 */ 5886 __isl_give isl_schedule *isl_schedule_constraints_compute_schedule( 5887 __isl_take isl_schedule_constraints *sc) 5888 { 5889 isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc); 5890 struct isl_sched_graph graph = { 0 }; 5891 isl_schedule *sched; 5892 isl_schedule_node *node; 5893 isl_union_set *domain; 5894 isl_size n; 5895 5896 sc = isl_schedule_constraints_align_params(sc); 5897 5898 domain = isl_schedule_constraints_get_domain(sc); 5899 n = isl_union_set_n_set(domain); 5900 if (n == 0) { 5901 isl_schedule_constraints_free(sc); 5902 return isl_schedule_from_domain(domain); 5903 } 5904 5905 if (n < 0 || isl_sched_graph_init(&graph, sc) < 0) 5906 domain = isl_union_set_free(domain); 5907 5908 node = isl_schedule_node_from_domain(domain); 5909 node = isl_schedule_node_child(node, 0); 5910 if (graph.n > 0) 5911 node = compute_schedule(node, &graph); 5912 sched = isl_schedule_node_get_schedule(node); 5913 isl_schedule_node_free(node); 5914 5915 isl_sched_graph_free(ctx, &graph); 5916 isl_schedule_constraints_free(sc); 5917 5918 return sched; 5919 } 5920 5921 /* Compute a schedule for the given union of domains that respects 5922 * all the validity dependences and minimizes 5923 * the dependence distances over the proximity dependences. 5924 * 5925 * This function is kept for backward compatibility. 5926 */ 5927 __isl_give isl_schedule *isl_union_set_compute_schedule( 5928 __isl_take isl_union_set *domain, 5929 __isl_take isl_union_map *validity, 5930 __isl_take isl_union_map *proximity) 5931 { 5932 isl_schedule_constraints *sc; 5933 5934 sc = isl_schedule_constraints_on_domain(domain); 5935 sc = isl_schedule_constraints_set_validity(sc, validity); 5936 sc = isl_schedule_constraints_set_proximity(sc, proximity); 5937 5938 return isl_schedule_constraints_compute_schedule(sc); 5939 } 5940