1 1.1 mrg /* 2 1.1 mrg * Copyright 2015 Sven Verdoolaege 3 1.1 mrg * 4 1.1 mrg * Use of this software is governed by the MIT license 5 1.1 mrg * 6 1.1 mrg * Written by Sven Verdoolaege 7 1.1 mrg */ 8 1.1 mrg 9 1.1 mrg #include "isl_map_private.h" 10 1.1 mrg 11 1.1 mrg #include <isl/id.h> 12 1.1 mrg #include <isl/schedule_node.h> 13 1.1 mrg #include <isl/union_set.h> 14 1.1 mrg 15 1.1 mrg #include "isl_mat_private.h" 16 1.1 mrg #include "isl_scheduler_clustering.h" 17 1.1 mrg #include "isl_scheduler_scc.h" 18 1.1 mrg #include "isl_seq.h" 19 1.1 mrg #include "isl_tarjan.h" 20 1.1 mrg 21 1.1 mrg /* Initialize the clustering data structure "c" from "graph". 22 1.1 mrg * 23 1.1 mrg * In particular, allocate memory, extract the SCCs from "graph" 24 1.1 mrg * into c->scc, initialize scc_cluster and construct 25 1.1 mrg * a band of schedule rows for each SCC. 26 1.1 mrg * Within each SCC, there is only one SCC by definition. 27 1.1 mrg * Each SCC initially belongs to a cluster containing only that SCC. 28 1.1 mrg */ 29 1.1 mrg static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, 30 1.1 mrg struct isl_sched_graph *graph) 31 1.1 mrg { 32 1.1 mrg int i; 33 1.1 mrg 34 1.1 mrg c->n = graph->scc; 35 1.1 mrg c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n); 36 1.1 mrg c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n); 37 1.1 mrg c->scc_cluster = isl_calloc_array(ctx, int, c->n); 38 1.1 mrg c->scc_node = isl_calloc_array(ctx, int, c->n); 39 1.1 mrg c->scc_in_merge = isl_calloc_array(ctx, int, c->n); 40 1.1 mrg if (!c->scc || !c->cluster || 41 1.1 mrg !c->scc_cluster || !c->scc_node || !c->scc_in_merge) 42 1.1 mrg return isl_stat_error; 43 1.1 mrg 44 1.1 mrg for (i = 0; i < c->n; ++i) { 45 1.1 mrg if (isl_sched_graph_extract_sub_graph(ctx, graph, 46 1.1 mrg &isl_sched_node_scc_exactly, 47 1.1 mrg &isl_sched_edge_scc_exactly, 48 1.1 mrg i, &c->scc[i]) < 0) 49 1.1 mrg return isl_stat_error; 50 1.1 mrg c->scc[i].scc = 1; 51 1.1 mrg if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0) 52 1.1 mrg return isl_stat_error; 53 1.1 mrg if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0) 54 1.1 mrg return isl_stat_error; 55 1.1 mrg c->scc_cluster[i] = i; 56 1.1 mrg } 57 1.1 mrg 58 1.1 mrg return isl_stat_ok; 59 1.1 mrg } 60 1.1 mrg 61 1.1 mrg /* Free all memory allocated for "c". 62 1.1 mrg */ 63 1.1 mrg static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) 64 1.1 mrg { 65 1.1 mrg int i; 66 1.1 mrg 67 1.1 mrg if (c->scc) 68 1.1 mrg for (i = 0; i < c->n; ++i) 69 1.1 mrg isl_sched_graph_free(ctx, &c->scc[i]); 70 1.1 mrg free(c->scc); 71 1.1 mrg if (c->cluster) 72 1.1 mrg for (i = 0; i < c->n; ++i) 73 1.1 mrg isl_sched_graph_free(ctx, &c->cluster[i]); 74 1.1 mrg free(c->cluster); 75 1.1 mrg free(c->scc_cluster); 76 1.1 mrg free(c->scc_node); 77 1.1 mrg free(c->scc_in_merge); 78 1.1 mrg } 79 1.1 mrg 80 1.1 mrg /* Should we refrain from merging the cluster in "graph" with 81 1.1 mrg * any other cluster? 82 1.1 mrg * In particular, is its current schedule band empty and incomplete. 83 1.1 mrg */ 84 1.1 mrg static int bad_cluster(struct isl_sched_graph *graph) 85 1.1 mrg { 86 1.1 mrg return graph->n_row < graph->maxvar && 87 1.1 mrg graph->n_total_row == graph->band_start; 88 1.1 mrg } 89 1.1 mrg 90 1.1 mrg /* Is "edge" a proximity edge with a non-empty dependence relation? 91 1.1 mrg */ 92 1.1 mrg static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge) 93 1.1 mrg { 94 1.1 mrg if (!isl_sched_edge_is_proximity(edge)) 95 1.1 mrg return isl_bool_false; 96 1.1 mrg return isl_bool_not(isl_map_plain_is_empty(edge->map)); 97 1.1 mrg } 98 1.1 mrg 99 1.1 mrg /* Return the index of an edge in "graph" that can be used to merge 100 1.1 mrg * two clusters in "c". 101 1.1 mrg * Return graph->n_edge if no such edge can be found. 102 1.1 mrg * Return -1 on error. 103 1.1 mrg * 104 1.1 mrg * In particular, return a proximity edge between two clusters 105 1.1 mrg * that is not marked "no_merge" and such that neither of the 106 1.1 mrg * two clusters has an incomplete, empty band. 107 1.1 mrg * 108 1.1 mrg * If there are multiple such edges, then try and find the most 109 1.1 mrg * appropriate edge to use for merging. In particular, pick the edge 110 1.1 mrg * with the greatest weight. If there are multiple of those, 111 1.1 mrg * then pick one with the shortest distance between 112 1.1 mrg * the two cluster representatives. 113 1.1 mrg */ 114 1.1 mrg static int find_proximity(struct isl_sched_graph *graph, 115 1.1 mrg struct isl_clustering *c) 116 1.1 mrg { 117 1.1 mrg int i, best = graph->n_edge, best_dist, best_weight; 118 1.1 mrg 119 1.1 mrg for (i = 0; i < graph->n_edge; ++i) { 120 1.1 mrg struct isl_sched_edge *edge = &graph->edge[i]; 121 1.1 mrg int dist, weight; 122 1.1 mrg isl_bool prox; 123 1.1 mrg 124 1.1 mrg prox = is_non_empty_proximity(edge); 125 1.1 mrg if (prox < 0) 126 1.1 mrg return -1; 127 1.1 mrg if (!prox) 128 1.1 mrg continue; 129 1.1 mrg if (edge->no_merge) 130 1.1 mrg continue; 131 1.1 mrg if (bad_cluster(&c->scc[edge->src->scc]) || 132 1.1 mrg bad_cluster(&c->scc[edge->dst->scc])) 133 1.1 mrg continue; 134 1.1 mrg dist = c->scc_cluster[edge->dst->scc] - 135 1.1 mrg c->scc_cluster[edge->src->scc]; 136 1.1 mrg if (dist == 0) 137 1.1 mrg continue; 138 1.1 mrg weight = edge->weight; 139 1.1 mrg if (best < graph->n_edge) { 140 1.1 mrg if (best_weight > weight) 141 1.1 mrg continue; 142 1.1 mrg if (best_weight == weight && best_dist <= dist) 143 1.1 mrg continue; 144 1.1 mrg } 145 1.1 mrg best = i; 146 1.1 mrg best_dist = dist; 147 1.1 mrg best_weight = weight; 148 1.1 mrg } 149 1.1 mrg 150 1.1 mrg return best; 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg /* Internal data structure used in mark_merge_sccs. 154 1.1 mrg * 155 1.1 mrg * "graph" is the dependence graph in which a strongly connected 156 1.1 mrg * component is constructed. 157 1.1 mrg * "scc_cluster" maps each SCC index to the cluster to which it belongs. 158 1.1 mrg * "src" and "dst" are the indices of the nodes that are being merged. 159 1.1 mrg */ 160 1.1 mrg struct isl_mark_merge_sccs_data { 161 1.1 mrg struct isl_sched_graph *graph; 162 1.1 mrg int *scc_cluster; 163 1.1 mrg int src; 164 1.1 mrg int dst; 165 1.1 mrg }; 166 1.1 mrg 167 1.1 mrg /* Check whether the cluster containing node "i" depends on the cluster 168 1.1 mrg * containing node "j". If "i" and "j" belong to the same cluster, 169 1.1 mrg * then they are taken to depend on each other to ensure that 170 1.1 mrg * the resulting strongly connected component consists of complete 171 1.1 mrg * clusters. Furthermore, if "i" and "j" are the two nodes that 172 1.1 mrg * are being merged, then they are taken to depend on each other as well. 173 1.1 mrg * Otherwise, check if there is a (conditional) validity dependence 174 1.1 mrg * from node[j] to node[i], forcing node[i] to follow node[j]. 175 1.1 mrg */ 176 1.1 mrg static isl_bool cluster_follows(int i, int j, void *user) 177 1.1 mrg { 178 1.1 mrg struct isl_mark_merge_sccs_data *data = user; 179 1.1 mrg struct isl_sched_graph *graph = data->graph; 180 1.1 mrg int *scc_cluster = data->scc_cluster; 181 1.1 mrg 182 1.1 mrg if (data->src == i && data->dst == j) 183 1.1 mrg return isl_bool_true; 184 1.1 mrg if (data->src == j && data->dst == i) 185 1.1 mrg return isl_bool_true; 186 1.1 mrg if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) 187 1.1 mrg return isl_bool_true; 188 1.1 mrg 189 1.1 mrg return isl_sched_graph_has_validity_edge(graph, &graph->node[j], 190 1.1 mrg &graph->node[i]); 191 1.1 mrg } 192 1.1 mrg 193 1.1 mrg /* Mark all SCCs that belong to either of the two clusters in "c" 194 1.1 mrg * connected by the edge in "graph" with index "edge", or to any 195 1.1 mrg * of the intermediate clusters. 196 1.1 mrg * The marking is recorded in c->scc_in_merge. 197 1.1 mrg * 198 1.1 mrg * The given edge has been selected for merging two clusters, 199 1.1 mrg * meaning that there is at least a proximity edge between the two nodes. 200 1.1 mrg * However, there may also be (indirect) validity dependences 201 1.1 mrg * between the two nodes. When merging the two clusters, all clusters 202 1.1 mrg * containing one or more of the intermediate nodes along the 203 1.1 mrg * indirect validity dependences need to be merged in as well. 204 1.1 mrg * 205 1.1 mrg * First collect all such nodes by computing the strongly connected 206 1.1 mrg * component (SCC) containing the two nodes connected by the edge, where 207 1.1 mrg * the two nodes are considered to depend on each other to make 208 1.1 mrg * sure they end up in the same SCC. Similarly, each node is considered 209 1.1 mrg * to depend on every other node in the same cluster to ensure 210 1.1 mrg * that the SCC consists of complete clusters. 211 1.1 mrg * 212 1.1 mrg * Then the original SCCs that contain any of these nodes are marked 213 1.1 mrg * in c->scc_in_merge. 214 1.1 mrg */ 215 1.1 mrg static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, 216 1.1 mrg int edge, struct isl_clustering *c) 217 1.1 mrg { 218 1.1 mrg struct isl_mark_merge_sccs_data data; 219 1.1 mrg struct isl_tarjan_graph *g; 220 1.1 mrg int i; 221 1.1 mrg 222 1.1 mrg for (i = 0; i < c->n; ++i) 223 1.1 mrg c->scc_in_merge[i] = 0; 224 1.1 mrg 225 1.1 mrg data.graph = graph; 226 1.1 mrg data.scc_cluster = c->scc_cluster; 227 1.1 mrg data.src = graph->edge[edge].src - graph->node; 228 1.1 mrg data.dst = graph->edge[edge].dst - graph->node; 229 1.1 mrg 230 1.1 mrg g = isl_tarjan_graph_component(ctx, graph->n, data.dst, 231 1.1 mrg &cluster_follows, &data); 232 1.1 mrg if (!g) 233 1.1 mrg goto error; 234 1.1 mrg 235 1.1 mrg i = g->op; 236 1.1 mrg if (i < 3) 237 1.1 mrg isl_die(ctx, isl_error_internal, 238 1.1 mrg "expecting at least two nodes in component", 239 1.1 mrg goto error); 240 1.1 mrg if (g->order[--i] != -1) 241 1.1 mrg isl_die(ctx, isl_error_internal, 242 1.1 mrg "expecting end of component marker", goto error); 243 1.1 mrg 244 1.1 mrg for (--i; i >= 0 && g->order[i] != -1; --i) { 245 1.1 mrg int scc = graph->node[g->order[i]].scc; 246 1.1 mrg c->scc_in_merge[scc] = 1; 247 1.1 mrg } 248 1.1 mrg 249 1.1 mrg isl_tarjan_graph_free(g); 250 1.1 mrg return isl_stat_ok; 251 1.1 mrg error: 252 1.1 mrg isl_tarjan_graph_free(g); 253 1.1 mrg return isl_stat_error; 254 1.1 mrg } 255 1.1 mrg 256 1.1 mrg /* Construct the identifier "cluster_i". 257 1.1 mrg */ 258 1.1 mrg static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) 259 1.1 mrg { 260 1.1 mrg char name[40]; 261 1.1 mrg 262 1.1 mrg snprintf(name, sizeof(name), "cluster_%d", i); 263 1.1 mrg return isl_id_alloc(ctx, name, NULL); 264 1.1 mrg } 265 1.1 mrg 266 1.1 mrg /* Construct the space of the cluster with index "i" containing 267 1.1 mrg * the strongly connected component "scc". 268 1.1 mrg * 269 1.1 mrg * In particular, construct a space called cluster_i with dimension equal 270 1.1 mrg * to the number of schedule rows in the current band of "scc". 271 1.1 mrg */ 272 1.1 mrg static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) 273 1.1 mrg { 274 1.1 mrg int nvar; 275 1.1 mrg isl_space *space; 276 1.1 mrg isl_id *id; 277 1.1 mrg 278 1.1 mrg nvar = scc->n_total_row - scc->band_start; 279 1.1 mrg space = isl_space_copy(scc->node[0].space); 280 1.1 mrg space = isl_space_params(space); 281 1.1 mrg space = isl_space_set_from_params(space); 282 1.1 mrg space = isl_space_add_dims(space, isl_dim_set, nvar); 283 1.1 mrg id = cluster_id(isl_space_get_ctx(space), i); 284 1.1 mrg space = isl_space_set_tuple_id(space, isl_dim_set, id); 285 1.1 mrg 286 1.1 mrg return space; 287 1.1 mrg } 288 1.1 mrg 289 1.1 mrg /* Collect the domain of the graph for merging clusters. 290 1.1 mrg * 291 1.1 mrg * In particular, for each cluster with first SCC "i", construct 292 1.1 mrg * a set in the space called cluster_i with dimension equal 293 1.1 mrg * to the number of schedule rows in the current band of the cluster. 294 1.1 mrg */ 295 1.1 mrg static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, 296 1.1 mrg struct isl_sched_graph *graph, struct isl_clustering *c) 297 1.1 mrg { 298 1.1 mrg int i; 299 1.1 mrg isl_space *space; 300 1.1 mrg isl_union_set *domain; 301 1.1 mrg 302 1.1 mrg space = isl_space_params_alloc(ctx, 0); 303 1.1 mrg domain = isl_union_set_empty(space); 304 1.1 mrg 305 1.1 mrg for (i = 0; i < graph->scc; ++i) { 306 1.1 mrg isl_space *space; 307 1.1 mrg 308 1.1 mrg if (!c->scc_in_merge[i]) 309 1.1 mrg continue; 310 1.1 mrg if (c->scc_cluster[i] != i) 311 1.1 mrg continue; 312 1.1 mrg space = cluster_space(&c->scc[i], i); 313 1.1 mrg domain = isl_union_set_add_set(domain, isl_set_universe(space)); 314 1.1 mrg } 315 1.1 mrg 316 1.1 mrg return domain; 317 1.1 mrg } 318 1.1 mrg 319 1.1 mrg /* Construct a map from the original instances to the corresponding 320 1.1 mrg * cluster instance in the current bands of the clusters in "c". 321 1.1 mrg */ 322 1.1 mrg static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, 323 1.1 mrg struct isl_sched_graph *graph, struct isl_clustering *c) 324 1.1 mrg { 325 1.1 mrg int i, j; 326 1.1 mrg isl_space *space; 327 1.1 mrg isl_union_map *cluster_map; 328 1.1 mrg 329 1.1 mrg space = isl_space_params_alloc(ctx, 0); 330 1.1 mrg cluster_map = isl_union_map_empty(space); 331 1.1 mrg for (i = 0; i < graph->scc; ++i) { 332 1.1 mrg int start, n; 333 1.1 mrg isl_id *id; 334 1.1 mrg 335 1.1 mrg if (!c->scc_in_merge[i]) 336 1.1 mrg continue; 337 1.1 mrg 338 1.1 mrg id = cluster_id(ctx, c->scc_cluster[i]); 339 1.1 mrg start = c->scc[i].band_start; 340 1.1 mrg n = c->scc[i].n_total_row - start; 341 1.1 mrg for (j = 0; j < c->scc[i].n; ++j) { 342 1.1 mrg isl_multi_aff *ma; 343 1.1 mrg isl_map *map; 344 1.1 mrg struct isl_sched_node *node = &c->scc[i].node[j]; 345 1.1 mrg 346 1.1 mrg ma = isl_sched_node_extract_partial_schedule_multi_aff( 347 1.1 mrg node, start, n); 348 1.1 mrg ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, 349 1.1 mrg isl_id_copy(id)); 350 1.1 mrg map = isl_map_from_multi_aff(ma); 351 1.1 mrg cluster_map = isl_union_map_add_map(cluster_map, map); 352 1.1 mrg } 353 1.1 mrg isl_id_free(id); 354 1.1 mrg } 355 1.1 mrg 356 1.1 mrg return cluster_map; 357 1.1 mrg } 358 1.1 mrg 359 1.1 mrg /* Add "umap" to the schedule constraints "sc" of all types of "edge" 360 1.1 mrg * that are not isl_edge_condition or isl_edge_conditional_validity. 361 1.1 mrg */ 362 1.1 mrg static __isl_give isl_schedule_constraints *add_non_conditional_constraints( 363 1.1 mrg struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, 364 1.1 mrg __isl_take isl_schedule_constraints *sc) 365 1.1 mrg { 366 1.1 mrg enum isl_edge_type t; 367 1.1 mrg 368 1.1 mrg if (!sc) 369 1.1 mrg return NULL; 370 1.1 mrg 371 1.1 mrg for (t = isl_edge_first; t <= isl_edge_last; ++t) { 372 1.1 mrg if (t == isl_edge_condition || 373 1.1 mrg t == isl_edge_conditional_validity) 374 1.1 mrg continue; 375 1.1 mrg if (!isl_sched_edge_has_type(edge, t)) 376 1.1 mrg continue; 377 1.1 mrg sc = isl_schedule_constraints_add(sc, t, 378 1.1 mrg isl_union_map_copy(umap)); 379 1.1 mrg } 380 1.1 mrg 381 1.1 mrg return sc; 382 1.1 mrg } 383 1.1 mrg 384 1.1 mrg /* Add schedule constraints of types isl_edge_condition and 385 1.1 mrg * isl_edge_conditional_validity to "sc" by applying "umap" to 386 1.1 mrg * the domains of the wrapped relations in domain and range 387 1.1 mrg * of the corresponding tagged constraints of "edge". 388 1.1 mrg */ 389 1.1 mrg static __isl_give isl_schedule_constraints *add_conditional_constraints( 390 1.1 mrg struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, 391 1.1 mrg __isl_take isl_schedule_constraints *sc) 392 1.1 mrg { 393 1.1 mrg enum isl_edge_type t; 394 1.1 mrg isl_union_map *tagged; 395 1.1 mrg 396 1.1 mrg for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { 397 1.1 mrg if (!isl_sched_edge_has_type(edge, t)) 398 1.1 mrg continue; 399 1.1 mrg if (t == isl_edge_condition) 400 1.1 mrg tagged = isl_union_map_copy(edge->tagged_condition); 401 1.1 mrg else 402 1.1 mrg tagged = isl_union_map_copy(edge->tagged_validity); 403 1.1 mrg tagged = isl_union_map_zip(tagged); 404 1.1 mrg tagged = isl_union_map_apply_domain(tagged, 405 1.1 mrg isl_union_map_copy(umap)); 406 1.1 mrg tagged = isl_union_map_zip(tagged); 407 1.1 mrg sc = isl_schedule_constraints_add(sc, t, tagged); 408 1.1 mrg if (!sc) 409 1.1 mrg return NULL; 410 1.1 mrg } 411 1.1 mrg 412 1.1 mrg return sc; 413 1.1 mrg } 414 1.1 mrg 415 1.1 mrg /* Given a mapping "cluster_map" from the original instances to 416 1.1 mrg * the cluster instances, add schedule constraints on the clusters 417 1.1 mrg * to "sc" corresponding to the original constraints represented by "edge". 418 1.1 mrg * 419 1.1 mrg * For non-tagged dependence constraints, the cluster constraints 420 1.1 mrg * are obtained by applying "cluster_map" to the edge->map. 421 1.1 mrg * 422 1.1 mrg * For tagged dependence constraints, "cluster_map" needs to be applied 423 1.1 mrg * to the domains of the wrapped relations in domain and range 424 1.1 mrg * of the tagged dependence constraints. Pick out the mappings 425 1.1 mrg * from these domains from "cluster_map" and construct their product. 426 1.1 mrg * This mapping can then be applied to the pair of domains. 427 1.1 mrg */ 428 1.1 mrg static __isl_give isl_schedule_constraints *collect_edge_constraints( 429 1.1 mrg struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, 430 1.1 mrg __isl_take isl_schedule_constraints *sc) 431 1.1 mrg { 432 1.1 mrg isl_union_map *umap; 433 1.1 mrg isl_space *space; 434 1.1 mrg isl_union_set *uset; 435 1.1 mrg isl_union_map *umap1, *umap2; 436 1.1 mrg 437 1.1 mrg if (!sc) 438 1.1 mrg return NULL; 439 1.1 mrg 440 1.1 mrg umap = isl_union_map_from_map(isl_map_copy(edge->map)); 441 1.1 mrg umap = isl_union_map_apply_domain(umap, 442 1.1 mrg isl_union_map_copy(cluster_map)); 443 1.1 mrg umap = isl_union_map_apply_range(umap, 444 1.1 mrg isl_union_map_copy(cluster_map)); 445 1.1 mrg sc = add_non_conditional_constraints(edge, umap, sc); 446 1.1 mrg isl_union_map_free(umap); 447 1.1 mrg 448 1.1 mrg if (!sc || 449 1.1 mrg (!isl_sched_edge_is_condition(edge) && 450 1.1 mrg !isl_sched_edge_is_conditional_validity(edge))) 451 1.1 mrg return sc; 452 1.1 mrg 453 1.1 mrg space = isl_space_domain(isl_map_get_space(edge->map)); 454 1.1 mrg uset = isl_union_set_from_set(isl_set_universe(space)); 455 1.1 mrg umap1 = isl_union_map_copy(cluster_map); 456 1.1 mrg umap1 = isl_union_map_intersect_domain(umap1, uset); 457 1.1 mrg space = isl_space_range(isl_map_get_space(edge->map)); 458 1.1 mrg uset = isl_union_set_from_set(isl_set_universe(space)); 459 1.1 mrg umap2 = isl_union_map_copy(cluster_map); 460 1.1 mrg umap2 = isl_union_map_intersect_domain(umap2, uset); 461 1.1 mrg umap = isl_union_map_product(umap1, umap2); 462 1.1 mrg 463 1.1 mrg sc = add_conditional_constraints(edge, umap, sc); 464 1.1 mrg 465 1.1 mrg isl_union_map_free(umap); 466 1.1 mrg return sc; 467 1.1 mrg } 468 1.1 mrg 469 1.1 mrg /* Given a mapping "cluster_map" from the original instances to 470 1.1 mrg * the cluster instances, add schedule constraints on the clusters 471 1.1 mrg * to "sc" corresponding to all edges in "graph" between nodes that 472 1.1 mrg * belong to SCCs that are marked for merging in "scc_in_merge". 473 1.1 mrg */ 474 1.1 mrg static __isl_give isl_schedule_constraints *collect_constraints( 475 1.1 mrg struct isl_sched_graph *graph, int *scc_in_merge, 476 1.1 mrg __isl_keep isl_union_map *cluster_map, 477 1.1 mrg __isl_take isl_schedule_constraints *sc) 478 1.1 mrg { 479 1.1 mrg int i; 480 1.1 mrg 481 1.1 mrg for (i = 0; i < graph->n_edge; ++i) { 482 1.1 mrg struct isl_sched_edge *edge = &graph->edge[i]; 483 1.1 mrg 484 1.1 mrg if (!scc_in_merge[edge->src->scc]) 485 1.1 mrg continue; 486 1.1 mrg if (!scc_in_merge[edge->dst->scc]) 487 1.1 mrg continue; 488 1.1 mrg sc = collect_edge_constraints(edge, cluster_map, sc); 489 1.1 mrg } 490 1.1 mrg 491 1.1 mrg return sc; 492 1.1 mrg } 493 1.1 mrg 494 1.1 mrg /* Construct a dependence graph for scheduling clusters with respect 495 1.1 mrg * to each other and store the result in "merge_graph". 496 1.1 mrg * In particular, the nodes of the graph correspond to the schedule 497 1.1 mrg * dimensions of the current bands of those clusters that have been 498 1.1 mrg * marked for merging in "c". 499 1.1 mrg * 500 1.1 mrg * First construct an isl_schedule_constraints object for this domain 501 1.1 mrg * by transforming the edges in "graph" to the domain. 502 1.1 mrg * Then initialize a dependence graph for scheduling from these 503 1.1 mrg * constraints. 504 1.1 mrg */ 505 1.1 mrg static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, 506 1.1 mrg struct isl_clustering *c, struct isl_sched_graph *merge_graph) 507 1.1 mrg { 508 1.1 mrg isl_union_set *domain; 509 1.1 mrg isl_union_map *cluster_map; 510 1.1 mrg isl_schedule_constraints *sc; 511 1.1 mrg isl_stat r; 512 1.1 mrg 513 1.1 mrg domain = collect_domain(ctx, graph, c); 514 1.1 mrg sc = isl_schedule_constraints_on_domain(domain); 515 1.1 mrg if (!sc) 516 1.1 mrg return isl_stat_error; 517 1.1 mrg cluster_map = collect_cluster_map(ctx, graph, c); 518 1.1 mrg sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc); 519 1.1 mrg isl_union_map_free(cluster_map); 520 1.1 mrg 521 1.1 mrg r = isl_sched_graph_init(merge_graph, sc); 522 1.1 mrg 523 1.1 mrg isl_schedule_constraints_free(sc); 524 1.1 mrg 525 1.1 mrg return r; 526 1.1 mrg } 527 1.1 mrg 528 1.1 mrg /* Compute the maximal number of remaining schedule rows that still need 529 1.1 mrg * to be computed for the nodes that belong to clusters with the maximal 530 1.1 mrg * dimension for the current band (i.e., the band that is to be merged). 531 1.1 mrg * Only clusters that are about to be merged are considered. 532 1.1 mrg * "maxvar" is the maximal dimension for the current band. 533 1.1 mrg * "c" contains information about the clusters. 534 1.1 mrg * 535 1.1 mrg * Return the maximal number of remaining schedule rows or 536 1.1 mrg * isl_size_error on error. 537 1.1 mrg */ 538 1.1 mrg static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) 539 1.1 mrg { 540 1.1 mrg int i, j; 541 1.1 mrg int max_slack; 542 1.1 mrg 543 1.1 mrg max_slack = 0; 544 1.1 mrg for (i = 0; i < c->n; ++i) { 545 1.1 mrg int nvar; 546 1.1 mrg struct isl_sched_graph *scc; 547 1.1 mrg 548 1.1 mrg if (!c->scc_in_merge[i]) 549 1.1 mrg continue; 550 1.1 mrg scc = &c->scc[i]; 551 1.1 mrg nvar = scc->n_total_row - scc->band_start; 552 1.1 mrg if (nvar != maxvar) 553 1.1 mrg continue; 554 1.1 mrg for (j = 0; j < scc->n; ++j) { 555 1.1 mrg struct isl_sched_node *node = &scc->node[j]; 556 1.1 mrg int slack; 557 1.1 mrg 558 1.1 mrg if (isl_sched_node_update_vmap(node) < 0) 559 1.1 mrg return isl_size_error; 560 1.1 mrg slack = node->nvar - node->rank; 561 1.1 mrg if (slack > max_slack) 562 1.1 mrg max_slack = slack; 563 1.1 mrg } 564 1.1 mrg } 565 1.1 mrg 566 1.1 mrg return max_slack; 567 1.1 mrg } 568 1.1 mrg 569 1.1 mrg /* If there are any clusters where the dimension of the current band 570 1.1 mrg * (i.e., the band that is to be merged) is smaller than "maxvar" and 571 1.1 mrg * if there are any nodes in such a cluster where the number 572 1.1 mrg * of remaining schedule rows that still need to be computed 573 1.1 mrg * is greater than "max_slack", then return the smallest current band 574 1.1 mrg * dimension of all these clusters. Otherwise return the original value 575 1.1 mrg * of "maxvar". Return isl_size_error in case of any error. 576 1.1 mrg * Only clusters that are about to be merged are considered. 577 1.1 mrg * "c" contains information about the clusters. 578 1.1 mrg */ 579 1.1 mrg static isl_size limit_maxvar_to_slack(int maxvar, int max_slack, 580 1.1 mrg struct isl_clustering *c) 581 1.1 mrg { 582 1.1 mrg int i, j; 583 1.1 mrg 584 1.1 mrg for (i = 0; i < c->n; ++i) { 585 1.1 mrg int nvar; 586 1.1 mrg struct isl_sched_graph *scc; 587 1.1 mrg 588 1.1 mrg if (!c->scc_in_merge[i]) 589 1.1 mrg continue; 590 1.1 mrg scc = &c->scc[i]; 591 1.1 mrg nvar = scc->n_total_row - scc->band_start; 592 1.1 mrg if (nvar >= maxvar) 593 1.1 mrg continue; 594 1.1 mrg for (j = 0; j < scc->n; ++j) { 595 1.1 mrg struct isl_sched_node *node = &scc->node[j]; 596 1.1 mrg int slack; 597 1.1 mrg 598 1.1 mrg if (isl_sched_node_update_vmap(node) < 0) 599 1.1 mrg return isl_size_error; 600 1.1 mrg slack = node->nvar - node->rank; 601 1.1 mrg if (slack > max_slack) { 602 1.1 mrg maxvar = nvar; 603 1.1 mrg break; 604 1.1 mrg } 605 1.1 mrg } 606 1.1 mrg } 607 1.1 mrg 608 1.1 mrg return maxvar; 609 1.1 mrg } 610 1.1 mrg 611 1.1 mrg /* Adjust merge_graph->maxvar based on the number of remaining schedule rows 612 1.1 mrg * that still need to be computed. In particular, if there is a node 613 1.1 mrg * in a cluster where the dimension of the current band is smaller 614 1.1 mrg * than merge_graph->maxvar, but the number of remaining schedule rows 615 1.1 mrg * is greater than that of any node in a cluster with the maximal 616 1.1 mrg * dimension for the current band (i.e., merge_graph->maxvar), 617 1.1 mrg * then adjust merge_graph->maxvar to the (smallest) current band dimension 618 1.1 mrg * of those clusters. Without this adjustment, the total number of 619 1.1 mrg * schedule dimensions would be increased, resulting in a skewed view 620 1.1 mrg * of the number of coincident dimensions. 621 1.1 mrg * "c" contains information about the clusters. 622 1.1 mrg * 623 1.1 mrg * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, 624 1.1 mrg * then there is no point in attempting any merge since it will be rejected 625 1.1 mrg * anyway. Set merge_graph->maxvar to zero in such cases. 626 1.1 mrg */ 627 1.1 mrg static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, 628 1.1 mrg struct isl_sched_graph *merge_graph, struct isl_clustering *c) 629 1.1 mrg { 630 1.1 mrg isl_size max_slack, maxvar; 631 1.1 mrg 632 1.1 mrg max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c); 633 1.1 mrg if (max_slack < 0) 634 1.1 mrg return isl_stat_error; 635 1.1 mrg maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c); 636 1.1 mrg if (maxvar < 0) 637 1.1 mrg return isl_stat_error; 638 1.1 mrg 639 1.1 mrg if (maxvar < merge_graph->maxvar) { 640 1.1 mrg if (isl_options_get_schedule_maximize_band_depth(ctx)) 641 1.1 mrg merge_graph->maxvar = 0; 642 1.1 mrg else 643 1.1 mrg merge_graph->maxvar = maxvar; 644 1.1 mrg } 645 1.1 mrg 646 1.1 mrg return isl_stat_ok; 647 1.1 mrg } 648 1.1 mrg 649 1.1 mrg /* Return the number of coincident dimensions in the current band of "graph", 650 1.1 mrg * where the nodes of "graph" are assumed to be scheduled by a single band. 651 1.1 mrg */ 652 1.1 mrg static int get_n_coincident(struct isl_sched_graph *graph) 653 1.1 mrg { 654 1.1 mrg int i; 655 1.1 mrg 656 1.1 mrg for (i = graph->band_start; i < graph->n_total_row; ++i) 657 1.1 mrg if (!graph->node[0].coincident[i]) 658 1.1 mrg break; 659 1.1 mrg 660 1.1 mrg return i - graph->band_start; 661 1.1 mrg } 662 1.1 mrg 663 1.1 mrg /* Should the clusters be merged based on the cluster schedule 664 1.1 mrg * in the current (and only) band of "merge_graph", given that 665 1.1 mrg * coincidence should be maximized? 666 1.1 mrg * 667 1.1 mrg * If the number of coincident schedule dimensions in the merged band 668 1.1 mrg * would be less than the maximal number of coincident schedule dimensions 669 1.1 mrg * in any of the merged clusters, then the clusters should not be merged. 670 1.1 mrg */ 671 1.1 mrg static isl_bool ok_to_merge_coincident(struct isl_clustering *c, 672 1.1 mrg struct isl_sched_graph *merge_graph) 673 1.1 mrg { 674 1.1 mrg int i; 675 1.1 mrg int n_coincident; 676 1.1 mrg int max_coincident; 677 1.1 mrg 678 1.1 mrg max_coincident = 0; 679 1.1 mrg for (i = 0; i < c->n; ++i) { 680 1.1 mrg if (!c->scc_in_merge[i]) 681 1.1 mrg continue; 682 1.1 mrg n_coincident = get_n_coincident(&c->scc[i]); 683 1.1 mrg if (n_coincident > max_coincident) 684 1.1 mrg max_coincident = n_coincident; 685 1.1 mrg } 686 1.1 mrg 687 1.1 mrg n_coincident = get_n_coincident(merge_graph); 688 1.1 mrg 689 1.1 mrg return isl_bool_ok(n_coincident >= max_coincident); 690 1.1 mrg } 691 1.1 mrg 692 1.1 mrg /* Return the transformation on "node" expressed by the current (and only) 693 1.1 mrg * band of "merge_graph" applied to the clusters in "c". 694 1.1 mrg * 695 1.1 mrg * First find the representation of "node" in its SCC in "c" and 696 1.1 mrg * extract the transformation expressed by the current band. 697 1.1 mrg * Then extract the transformation applied by "merge_graph" 698 1.1 mrg * to the cluster to which this SCC belongs. 699 1.1 mrg * Combine the two to obtain the complete transformation on the node. 700 1.1 mrg * 701 1.1 mrg * Note that the range of the first transformation is an anonymous space, 702 1.1 mrg * while the domain of the second is named "cluster_X". The range 703 1.1 mrg * of the former therefore needs to be adjusted before the two 704 1.1 mrg * can be combined. 705 1.1 mrg */ 706 1.1 mrg static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx, 707 1.1 mrg struct isl_sched_node *node, struct isl_clustering *c, 708 1.1 mrg struct isl_sched_graph *merge_graph) 709 1.1 mrg { 710 1.1 mrg struct isl_sched_node *scc_node, *cluster_node; 711 1.1 mrg int start, n; 712 1.1 mrg isl_id *id; 713 1.1 mrg isl_space *space; 714 1.1 mrg isl_multi_aff *ma, *ma2; 715 1.1 mrg 716 1.1 mrg scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc], 717 1.1 mrg node->space); 718 1.1 mrg if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node)) 719 1.1 mrg isl_die(ctx, isl_error_internal, "unable to find node", 720 1.1 mrg return NULL); 721 1.1 mrg start = c->scc[node->scc].band_start; 722 1.1 mrg n = c->scc[node->scc].n_total_row - start; 723 1.1 mrg ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node, 724 1.1 mrg start, n); 725 1.1 mrg space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]); 726 1.1 mrg cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space); 727 1.1 mrg if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node)) 728 1.1 mrg isl_die(ctx, isl_error_internal, "unable to find cluster", 729 1.1 mrg space = isl_space_free(space)); 730 1.1 mrg id = isl_space_get_tuple_id(space, isl_dim_set); 731 1.1 mrg ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id); 732 1.1 mrg isl_space_free(space); 733 1.1 mrg n = merge_graph->n_total_row; 734 1.1 mrg ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node, 735 1.1 mrg 0, n); 736 1.1 mrg ma = isl_multi_aff_pullback_multi_aff(ma2, ma); 737 1.1 mrg 738 1.1 mrg return isl_map_from_multi_aff(ma); 739 1.1 mrg } 740 1.1 mrg 741 1.1 mrg /* Give a set of distances "set", are they bounded by a small constant 742 1.1 mrg * in direction "pos"? 743 1.1 mrg * In practice, check if they are bounded by 2 by checking that there 744 1.1 mrg * are no elements with a value greater than or equal to 3 or 745 1.1 mrg * smaller than or equal to -3. 746 1.1 mrg */ 747 1.1 mrg static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos) 748 1.1 mrg { 749 1.1 mrg isl_bool bounded; 750 1.1 mrg isl_set *test; 751 1.1 mrg 752 1.1 mrg if (!set) 753 1.1 mrg return isl_bool_error; 754 1.1 mrg 755 1.1 mrg test = isl_set_copy(set); 756 1.1 mrg test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3); 757 1.1 mrg bounded = isl_set_is_empty(test); 758 1.1 mrg isl_set_free(test); 759 1.1 mrg 760 1.1 mrg if (bounded < 0 || !bounded) 761 1.1 mrg return bounded; 762 1.1 mrg 763 1.1 mrg test = isl_set_copy(set); 764 1.1 mrg test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3); 765 1.1 mrg bounded = isl_set_is_empty(test); 766 1.1 mrg isl_set_free(test); 767 1.1 mrg 768 1.1 mrg return bounded; 769 1.1 mrg } 770 1.1 mrg 771 1.1 mrg /* Does the set "set" have a fixed (but possible parametric) value 772 1.1 mrg * at dimension "pos"? 773 1.1 mrg */ 774 1.1 mrg static isl_bool has_single_value(__isl_keep isl_set *set, int pos) 775 1.1 mrg { 776 1.1 mrg isl_size n; 777 1.1 mrg isl_bool single; 778 1.1 mrg 779 1.1 mrg n = isl_set_dim(set, isl_dim_set); 780 1.1 mrg if (n < 0) 781 1.1 mrg return isl_bool_error; 782 1.1 mrg set = isl_set_copy(set); 783 1.1 mrg set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1)); 784 1.1 mrg set = isl_set_project_out(set, isl_dim_set, 0, pos); 785 1.1 mrg single = isl_set_is_singleton(set); 786 1.1 mrg isl_set_free(set); 787 1.1 mrg 788 1.1 mrg return single; 789 1.1 mrg } 790 1.1 mrg 791 1.1 mrg /* Does "map" have a fixed (but possible parametric) value 792 1.1 mrg * at dimension "pos" of either its domain or its range? 793 1.1 mrg */ 794 1.1 mrg static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) 795 1.1 mrg { 796 1.1 mrg isl_set *set; 797 1.1 mrg isl_bool single; 798 1.1 mrg 799 1.1 mrg set = isl_map_domain(isl_map_copy(map)); 800 1.1 mrg single = has_single_value(set, pos); 801 1.1 mrg isl_set_free(set); 802 1.1 mrg 803 1.1 mrg if (single < 0 || single) 804 1.1 mrg return single; 805 1.1 mrg 806 1.1 mrg set = isl_map_range(isl_map_copy(map)); 807 1.1 mrg single = has_single_value(set, pos); 808 1.1 mrg isl_set_free(set); 809 1.1 mrg 810 1.1 mrg return single; 811 1.1 mrg } 812 1.1 mrg 813 1.1 mrg /* Does the edge "edge" from "graph" have bounded dependence distances 814 1.1 mrg * in the merged graph "merge_graph" of a selection of clusters in "c"? 815 1.1 mrg * 816 1.1 mrg * Extract the complete transformations of the source and destination 817 1.1 mrg * nodes of the edge, apply them to the edge constraints and 818 1.1 mrg * compute the differences. Finally, check if these differences are bounded 819 1.1 mrg * in each direction. 820 1.1 mrg * 821 1.1 mrg * If the dimension of the band is greater than the number of 822 1.1 mrg * dimensions that can be expected to be optimized by the edge 823 1.1 mrg * (based on its weight), then also allow the differences to be unbounded 824 1.1 mrg * in the remaining dimensions, but only if either the source or 825 1.1 mrg * the destination has a fixed value in that direction. 826 1.1 mrg * This allows a statement that produces values that are used by 827 1.1 mrg * several instances of another statement to be merged with that 828 1.1 mrg * other statement. 829 1.1 mrg * However, merging such clusters will introduce an inherently 830 1.1 mrg * large proximity distance inside the merged cluster, meaning 831 1.1 mrg * that proximity distances will no longer be optimized in 832 1.1 mrg * subsequent merges. These merges are therefore only allowed 833 1.1 mrg * after all other possible merges have been tried. 834 1.1 mrg * The first time such a merge is encountered, the weight of the edge 835 1.1 mrg * is replaced by a negative weight. The second time (i.e., after 836 1.1 mrg * all merges over edges with a non-negative weight have been tried), 837 1.1 mrg * the merge is allowed. 838 1.1 mrg */ 839 1.1 mrg static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, 840 1.1 mrg struct isl_sched_graph *graph, struct isl_clustering *c, 841 1.1 mrg struct isl_sched_graph *merge_graph) 842 1.1 mrg { 843 1.1 mrg int i, n_slack; 844 1.1 mrg isl_size n; 845 1.1 mrg isl_bool bounded; 846 1.1 mrg isl_map *map, *t; 847 1.1 mrg isl_set *dist; 848 1.1 mrg 849 1.1 mrg map = isl_map_copy(edge->map); 850 1.1 mrg t = extract_node_transformation(ctx, edge->src, c, merge_graph); 851 1.1 mrg map = isl_map_apply_domain(map, t); 852 1.1 mrg t = extract_node_transformation(ctx, edge->dst, c, merge_graph); 853 1.1 mrg map = isl_map_apply_range(map, t); 854 1.1 mrg dist = isl_map_deltas(isl_map_copy(map)); 855 1.1 mrg 856 1.1 mrg bounded = isl_bool_true; 857 1.1 mrg n = isl_set_dim(dist, isl_dim_set); 858 1.1 mrg if (n < 0) 859 1.1 mrg goto error; 860 1.1 mrg n_slack = n - edge->weight; 861 1.1 mrg if (edge->weight < 0) 862 1.1 mrg n_slack -= graph->max_weight + 1; 863 1.1 mrg for (i = 0; i < n; ++i) { 864 1.1 mrg isl_bool bounded_i, singular_i; 865 1.1 mrg 866 1.1 mrg bounded_i = distance_is_bounded(dist, i); 867 1.1 mrg if (bounded_i < 0) 868 1.1 mrg goto error; 869 1.1 mrg if (bounded_i) 870 1.1 mrg continue; 871 1.1 mrg if (edge->weight >= 0) 872 1.1 mrg bounded = isl_bool_false; 873 1.1 mrg n_slack--; 874 1.1 mrg if (n_slack < 0) 875 1.1 mrg break; 876 1.1 mrg singular_i = has_singular_src_or_dst(map, i); 877 1.1 mrg if (singular_i < 0) 878 1.1 mrg goto error; 879 1.1 mrg if (singular_i) 880 1.1 mrg continue; 881 1.1 mrg bounded = isl_bool_false; 882 1.1 mrg break; 883 1.1 mrg } 884 1.1 mrg if (!bounded && i >= n && edge->weight >= 0) 885 1.1 mrg edge->weight -= graph->max_weight + 1; 886 1.1 mrg isl_map_free(map); 887 1.1 mrg isl_set_free(dist); 888 1.1 mrg 889 1.1 mrg return bounded; 890 1.1 mrg error: 891 1.1 mrg isl_map_free(map); 892 1.1 mrg isl_set_free(dist); 893 1.1 mrg return isl_bool_error; 894 1.1 mrg } 895 1.1 mrg 896 1.1 mrg /* Should the clusters be merged based on the cluster schedule 897 1.1 mrg * in the current (and only) band of "merge_graph"? 898 1.1 mrg * "graph" is the original dependence graph, while "c" records 899 1.1 mrg * which SCCs are involved in the latest merge. 900 1.1 mrg * 901 1.1 mrg * In particular, is there at least one proximity constraint 902 1.1 mrg * that is optimized by the merge? 903 1.1 mrg * 904 1.1 mrg * A proximity constraint is considered to be optimized 905 1.1 mrg * if the dependence distances are small. 906 1.1 mrg */ 907 1.1 mrg static isl_bool ok_to_merge_proximity(isl_ctx *ctx, 908 1.1 mrg struct isl_sched_graph *graph, struct isl_clustering *c, 909 1.1 mrg struct isl_sched_graph *merge_graph) 910 1.1 mrg { 911 1.1 mrg int i; 912 1.1 mrg 913 1.1 mrg for (i = 0; i < graph->n_edge; ++i) { 914 1.1 mrg struct isl_sched_edge *edge = &graph->edge[i]; 915 1.1 mrg isl_bool bounded; 916 1.1 mrg 917 1.1 mrg if (!isl_sched_edge_is_proximity(edge)) 918 1.1 mrg continue; 919 1.1 mrg if (!c->scc_in_merge[edge->src->scc]) 920 1.1 mrg continue; 921 1.1 mrg if (!c->scc_in_merge[edge->dst->scc]) 922 1.1 mrg continue; 923 1.1 mrg if (c->scc_cluster[edge->dst->scc] == 924 1.1 mrg c->scc_cluster[edge->src->scc]) 925 1.1 mrg continue; 926 1.1 mrg bounded = has_bounded_distances(ctx, edge, graph, c, 927 1.1 mrg merge_graph); 928 1.1 mrg if (bounded < 0 || bounded) 929 1.1 mrg return bounded; 930 1.1 mrg } 931 1.1 mrg 932 1.1 mrg return isl_bool_false; 933 1.1 mrg } 934 1.1 mrg 935 1.1 mrg /* Should the clusters be merged based on the cluster schedule 936 1.1 mrg * in the current (and only) band of "merge_graph"? 937 1.1 mrg * "graph" is the original dependence graph, while "c" records 938 1.1 mrg * which SCCs are involved in the latest merge. 939 1.1 mrg * 940 1.1 mrg * If the current band is empty, then the clusters should not be merged. 941 1.1 mrg * 942 1.1 mrg * If the band depth should be maximized and the merge schedule 943 1.1 mrg * is incomplete (meaning that the dimension of some of the schedule 944 1.1 mrg * bands in the original schedule will be reduced), then the clusters 945 1.1 mrg * should not be merged. 946 1.1 mrg * 947 1.1 mrg * If the schedule_maximize_coincidence option is set, then check that 948 1.1 mrg * the number of coincident schedule dimensions is not reduced. 949 1.1 mrg * 950 1.1 mrg * Finally, only allow the merge if at least one proximity 951 1.1 mrg * constraint is optimized. 952 1.1 mrg */ 953 1.1 mrg static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, 954 1.1 mrg struct isl_clustering *c, struct isl_sched_graph *merge_graph) 955 1.1 mrg { 956 1.1 mrg if (merge_graph->n_total_row == merge_graph->band_start) 957 1.1 mrg return isl_bool_false; 958 1.1 mrg 959 1.1 mrg if (isl_options_get_schedule_maximize_band_depth(ctx) && 960 1.1 mrg merge_graph->n_total_row < merge_graph->maxvar) 961 1.1 mrg return isl_bool_false; 962 1.1 mrg 963 1.1 mrg if (isl_options_get_schedule_maximize_coincidence(ctx)) { 964 1.1 mrg isl_bool ok; 965 1.1 mrg 966 1.1 mrg ok = ok_to_merge_coincident(c, merge_graph); 967 1.1 mrg if (ok < 0 || !ok) 968 1.1 mrg return ok; 969 1.1 mrg } 970 1.1 mrg 971 1.1 mrg return ok_to_merge_proximity(ctx, graph, c, merge_graph); 972 1.1 mrg } 973 1.1 mrg 974 1.1 mrg /* Apply the schedule in "t_node" to the "n" rows starting at "first" 975 1.1 mrg * of the schedule in "node" and return the result. 976 1.1 mrg * 977 1.1 mrg * That is, essentially compute 978 1.1 mrg * 979 1.1 mrg * T * N(first:first+n-1) 980 1.1 mrg * 981 1.1 mrg * taking into account the constant term and the parameter coefficients 982 1.1 mrg * in "t_node". 983 1.1 mrg */ 984 1.1 mrg static __isl_give isl_mat *node_transformation(isl_ctx *ctx, 985 1.1 mrg struct isl_sched_node *t_node, struct isl_sched_node *node, 986 1.1 mrg int first, int n) 987 1.1 mrg { 988 1.1 mrg int i, j; 989 1.1 mrg isl_mat *t; 990 1.1 mrg isl_size n_row, n_col; 991 1.1 mrg int n_param, n_var; 992 1.1 mrg 993 1.1 mrg n_param = node->nparam; 994 1.1 mrg n_var = node->nvar; 995 1.1 mrg n_row = isl_mat_rows(t_node->sched); 996 1.1 mrg n_col = isl_mat_cols(node->sched); 997 1.1 mrg if (n_row < 0 || n_col < 0) 998 1.1 mrg return NULL; 999 1.1 mrg t = isl_mat_alloc(ctx, n_row, n_col); 1000 1.1 mrg if (!t) 1001 1.1 mrg return NULL; 1002 1.1 mrg for (i = 0; i < n_row; ++i) { 1003 1.1 mrg isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param); 1004 1.1 mrg isl_seq_clr(t->row[i] + 1 + n_param, n_var); 1005 1.1 mrg for (j = 0; j < n; ++j) 1006 1.1 mrg isl_seq_addmul(t->row[i], 1007 1.1 mrg t_node->sched->row[i][1 + n_param + j], 1008 1.1 mrg node->sched->row[first + j], 1009 1.1 mrg 1 + n_param + n_var); 1010 1.1 mrg } 1011 1.1 mrg return t; 1012 1.1 mrg } 1013 1.1 mrg 1014 1.1 mrg /* Apply the cluster schedule in "t_node" to the current band 1015 1.1 mrg * schedule of the nodes in "graph". 1016 1.1 mrg * 1017 1.1 mrg * In particular, replace the rows starting at band_start 1018 1.1 mrg * by the result of applying the cluster schedule in "t_node" 1019 1.1 mrg * to the original rows. 1020 1.1 mrg * 1021 1.1 mrg * The coincidence of the schedule is determined by the coincidence 1022 1.1 mrg * of the cluster schedule. 1023 1.1 mrg */ 1024 1.1 mrg static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, 1025 1.1 mrg struct isl_sched_node *t_node) 1026 1.1 mrg { 1027 1.1 mrg int i, j; 1028 1.1 mrg isl_size n_new; 1029 1.1 mrg int start, n; 1030 1.1 mrg 1031 1.1 mrg start = graph->band_start; 1032 1.1 mrg n = graph->n_total_row - start; 1033 1.1 mrg 1034 1.1 mrg n_new = isl_mat_rows(t_node->sched); 1035 1.1 mrg if (n_new < 0) 1036 1.1 mrg return isl_stat_error; 1037 1.1 mrg for (i = 0; i < graph->n; ++i) { 1038 1.1 mrg struct isl_sched_node *node = &graph->node[i]; 1039 1.1 mrg isl_mat *t; 1040 1.1 mrg 1041 1.1 mrg t = node_transformation(ctx, t_node, node, start, n); 1042 1.1 mrg node->sched = isl_mat_drop_rows(node->sched, start, n); 1043 1.1 mrg node->sched = isl_mat_concat(node->sched, t); 1044 1.1 mrg node->sched_map = isl_map_free(node->sched_map); 1045 1.1 mrg if (!node->sched) 1046 1.1 mrg return isl_stat_error; 1047 1.1 mrg for (j = 0; j < n_new; ++j) 1048 1.1 mrg node->coincident[start + j] = t_node->coincident[j]; 1049 1.1 mrg } 1050 1.1 mrg graph->n_total_row -= n; 1051 1.1 mrg graph->n_row -= n; 1052 1.1 mrg graph->n_total_row += n_new; 1053 1.1 mrg graph->n_row += n_new; 1054 1.1 mrg 1055 1.1 mrg return isl_stat_ok; 1056 1.1 mrg } 1057 1.1 mrg 1058 1.1 mrg /* Merge the clusters marked for merging in "c" into a single 1059 1.1 mrg * cluster using the cluster schedule in the current band of "merge_graph". 1060 1.1 mrg * The representative SCC for the new cluster is the SCC with 1061 1.1 mrg * the smallest index. 1062 1.1 mrg * 1063 1.1 mrg * The current band schedule of each SCC in the new cluster is obtained 1064 1.1 mrg * by applying the schedule of the corresponding original cluster 1065 1.1 mrg * to the original band schedule. 1066 1.1 mrg * All SCCs in the new cluster have the same number of schedule rows. 1067 1.1 mrg */ 1068 1.1 mrg static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, 1069 1.1 mrg struct isl_sched_graph *merge_graph) 1070 1.1 mrg { 1071 1.1 mrg int i; 1072 1.1 mrg int cluster = -1; 1073 1.1 mrg isl_space *space; 1074 1.1 mrg 1075 1.1 mrg for (i = 0; i < c->n; ++i) { 1076 1.1 mrg struct isl_sched_node *node; 1077 1.1 mrg 1078 1.1 mrg if (!c->scc_in_merge[i]) 1079 1.1 mrg continue; 1080 1.1 mrg if (cluster < 0) 1081 1.1 mrg cluster = i; 1082 1.1 mrg space = cluster_space(&c->scc[i], c->scc_cluster[i]); 1083 1.1 mrg node = isl_sched_graph_find_node(ctx, merge_graph, space); 1084 1.1 mrg isl_space_free(space); 1085 1.1 mrg if (!node) 1086 1.1 mrg return isl_stat_error; 1087 1.1 mrg if (!isl_sched_graph_is_node(merge_graph, node)) 1088 1.1 mrg isl_die(ctx, isl_error_internal, 1089 1.1 mrg "unable to find cluster", 1090 1.1 mrg return isl_stat_error); 1091 1.1 mrg if (transform(ctx, &c->scc[i], node) < 0) 1092 1.1 mrg return isl_stat_error; 1093 1.1 mrg c->scc_cluster[i] = cluster; 1094 1.1 mrg } 1095 1.1 mrg 1096 1.1 mrg return isl_stat_ok; 1097 1.1 mrg } 1098 1.1 mrg 1099 1.1 mrg /* Try and merge the clusters of SCCs marked in c->scc_in_merge 1100 1.1 mrg * by scheduling the current cluster bands with respect to each other. 1101 1.1 mrg * 1102 1.1 mrg * Construct a dependence graph with a space for each cluster and 1103 1.1 mrg * with the coordinates of each space corresponding to the schedule 1104 1.1 mrg * dimensions of the current band of that cluster. 1105 1.1 mrg * Construct a cluster schedule in this cluster dependence graph and 1106 1.1 mrg * apply it to the current cluster bands if it is applicable 1107 1.1 mrg * according to ok_to_merge. 1108 1.1 mrg * 1109 1.1 mrg * If the number of remaining schedule dimensions in a cluster 1110 1.1 mrg * with a non-maximal current schedule dimension is greater than 1111 1.1 mrg * the number of remaining schedule dimensions in clusters 1112 1.1 mrg * with a maximal current schedule dimension, then restrict 1113 1.1 mrg * the number of rows to be computed in the cluster schedule 1114 1.1 mrg * to the minimal such non-maximal current schedule dimension. 1115 1.1 mrg * Do this by adjusting merge_graph.maxvar. 1116 1.1 mrg * 1117 1.1 mrg * Return isl_bool_true if the clusters have effectively been merged 1118 1.1 mrg * into a single cluster. 1119 1.1 mrg * 1120 1.1 mrg * Note that since the standard scheduling algorithm minimizes the maximal 1121 1.1 mrg * distance over proximity constraints, the proximity constraints between 1122 1.1 mrg * the merged clusters may not be optimized any further than what is 1123 1.1 mrg * sufficient to bring the distances within the limits of the internal 1124 1.1 mrg * proximity constraints inside the individual clusters. 1125 1.1 mrg * It may therefore make sense to perform an additional translation step 1126 1.1 mrg * to bring the clusters closer to each other, while maintaining 1127 1.1 mrg * the linear part of the merging schedule found using the standard 1128 1.1 mrg * scheduling algorithm. 1129 1.1 mrg */ 1130 1.1 mrg static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, 1131 1.1 mrg struct isl_clustering *c) 1132 1.1 mrg { 1133 1.1 mrg struct isl_sched_graph merge_graph = { 0 }; 1134 1.1 mrg isl_bool merged; 1135 1.1 mrg 1136 1.1 mrg if (init_merge_graph(ctx, graph, c, &merge_graph) < 0) 1137 1.1 mrg goto error; 1138 1.1 mrg 1139 1.1 mrg if (isl_sched_graph_compute_maxvar(&merge_graph) < 0) 1140 1.1 mrg goto error; 1141 1.1 mrg if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0) 1142 1.1 mrg goto error; 1143 1.1 mrg if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0) 1144 1.1 mrg goto error; 1145 1.1 mrg merged = ok_to_merge(ctx, graph, c, &merge_graph); 1146 1.1 mrg if (merged && merge(ctx, c, &merge_graph) < 0) 1147 1.1 mrg goto error; 1148 1.1 mrg 1149 1.1 mrg isl_sched_graph_free(ctx, &merge_graph); 1150 1.1 mrg return merged; 1151 1.1 mrg error: 1152 1.1 mrg isl_sched_graph_free(ctx, &merge_graph); 1153 1.1 mrg return isl_bool_error; 1154 1.1 mrg } 1155 1.1 mrg 1156 1.1 mrg /* Is there any edge marked "no_merge" between two SCCs that are 1157 1.1 mrg * about to be merged (i.e., that are set in "scc_in_merge")? 1158 1.1 mrg * "merge_edge" is the proximity edge along which the clusters of SCCs 1159 1.1 mrg * are going to be merged. 1160 1.1 mrg * 1161 1.1 mrg * If there is any edge between two SCCs with a negative weight, 1162 1.1 mrg * while the weight of "merge_edge" is non-negative, then this 1163 1.1 mrg * means that the edge was postponed. "merge_edge" should then 1164 1.1 mrg * also be postponed since merging along the edge with negative weight should 1165 1.1 mrg * be postponed until all edges with non-negative weight have been tried. 1166 1.1 mrg * Replace the weight of "merge_edge" by a negative weight as well and 1167 1.1 mrg * tell the caller not to attempt a merge. 1168 1.1 mrg */ 1169 1.1 mrg static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, 1170 1.1 mrg struct isl_sched_edge *merge_edge) 1171 1.1 mrg { 1172 1.1 mrg int i; 1173 1.1 mrg 1174 1.1 mrg for (i = 0; i < graph->n_edge; ++i) { 1175 1.1 mrg struct isl_sched_edge *edge = &graph->edge[i]; 1176 1.1 mrg 1177 1.1 mrg if (!scc_in_merge[edge->src->scc]) 1178 1.1 mrg continue; 1179 1.1 mrg if (!scc_in_merge[edge->dst->scc]) 1180 1.1 mrg continue; 1181 1.1 mrg if (edge->no_merge) 1182 1.1 mrg return 1; 1183 1.1 mrg if (merge_edge->weight >= 0 && edge->weight < 0) { 1184 1.1 mrg merge_edge->weight -= graph->max_weight + 1; 1185 1.1 mrg return 1; 1186 1.1 mrg } 1187 1.1 mrg } 1188 1.1 mrg 1189 1.1 mrg return 0; 1190 1.1 mrg } 1191 1.1 mrg 1192 1.1 mrg /* Merge the two clusters in "c" connected by the edge in "graph" 1193 1.1 mrg * with index "edge" into a single cluster. 1194 1.1 mrg * If it turns out to be impossible to merge these two clusters, 1195 1.1 mrg * then mark the edge as "no_merge" such that it will not be 1196 1.1 mrg * considered again. 1197 1.1 mrg * 1198 1.1 mrg * First mark all SCCs that need to be merged. This includes the SCCs 1199 1.1 mrg * in the two clusters, but it may also include the SCCs 1200 1.1 mrg * of intermediate clusters. 1201 1.1 mrg * If there is already a no_merge edge between any pair of such SCCs, 1202 1.1 mrg * then simply mark the current edge as no_merge as well. 1203 1.1 mrg * Likewise, if any of those edges was postponed by has_bounded_distances, 1204 1.1 mrg * then postpone the current edge as well. 1205 1.1 mrg * Otherwise, try and merge the clusters and mark "edge" as "no_merge" 1206 1.1 mrg * if the clusters did not end up getting merged, unless the non-merge 1207 1.1 mrg * is due to the fact that the edge was postponed. This postponement 1208 1.1 mrg * can be recognized by a change in weight (from non-negative to negative). 1209 1.1 mrg */ 1210 1.1 mrg static isl_stat merge_clusters_along_edge(isl_ctx *ctx, 1211 1.1 mrg struct isl_sched_graph *graph, int edge, struct isl_clustering *c) 1212 1.1 mrg { 1213 1.1 mrg isl_bool merged; 1214 1.1 mrg int edge_weight = graph->edge[edge].weight; 1215 1.1 mrg 1216 1.1 mrg if (mark_merge_sccs(ctx, graph, edge, c) < 0) 1217 1.1 mrg return isl_stat_error; 1218 1.1 mrg 1219 1.1 mrg if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge])) 1220 1.1 mrg merged = isl_bool_false; 1221 1.1 mrg else 1222 1.1 mrg merged = try_merge(ctx, graph, c); 1223 1.1 mrg if (merged < 0) 1224 1.1 mrg return isl_stat_error; 1225 1.1 mrg if (!merged && edge_weight == graph->edge[edge].weight) 1226 1.1 mrg graph->edge[edge].no_merge = 1; 1227 1.1 mrg 1228 1.1 mrg return isl_stat_ok; 1229 1.1 mrg } 1230 1.1 mrg 1231 1.1 mrg /* Does "node" belong to the cluster identified by "cluster"? 1232 1.1 mrg */ 1233 1.1 mrg static int node_cluster_exactly(struct isl_sched_node *node, int cluster) 1234 1.1 mrg { 1235 1.1 mrg return node->cluster == cluster; 1236 1.1 mrg } 1237 1.1 mrg 1238 1.1 mrg /* Does "edge" connect two nodes belonging to the cluster 1239 1.1 mrg * identified by "cluster"? 1240 1.1 mrg */ 1241 1.1 mrg static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) 1242 1.1 mrg { 1243 1.1 mrg return edge->src->cluster == cluster && edge->dst->cluster == cluster; 1244 1.1 mrg } 1245 1.1 mrg 1246 1.1 mrg /* Swap the schedule of "node1" and "node2". 1247 1.1 mrg * Both nodes have been derived from the same node in a common parent graph. 1248 1.1 mrg * Since the "coincident" field is shared with that node 1249 1.1 mrg * in the parent graph, there is no need to also swap this field. 1250 1.1 mrg */ 1251 1.1 mrg static void swap_sched(struct isl_sched_node *node1, 1252 1.1 mrg struct isl_sched_node *node2) 1253 1.1 mrg { 1254 1.1 mrg isl_mat *sched; 1255 1.1 mrg isl_map *sched_map; 1256 1.1 mrg 1257 1.1 mrg sched = node1->sched; 1258 1.1 mrg node1->sched = node2->sched; 1259 1.1 mrg node2->sched = sched; 1260 1.1 mrg 1261 1.1 mrg sched_map = node1->sched_map; 1262 1.1 mrg node1->sched_map = node2->sched_map; 1263 1.1 mrg node2->sched_map = sched_map; 1264 1.1 mrg } 1265 1.1 mrg 1266 1.1 mrg /* Copy the current band schedule from the SCCs that form the cluster 1267 1.1 mrg * with index "pos" to the actual cluster at position "pos". 1268 1.1 mrg * By construction, the index of the first SCC that belongs to the cluster 1269 1.1 mrg * is also "pos". 1270 1.1 mrg * 1271 1.1 mrg * The order of the nodes inside both the SCCs and the cluster 1272 1.1 mrg * is assumed to be same as the order in the original "graph". 1273 1.1 mrg * 1274 1.1 mrg * Since the SCC graphs will no longer be used after this function, 1275 1.1 mrg * the schedules are actually swapped rather than copied. 1276 1.1 mrg */ 1277 1.1 mrg static isl_stat copy_partial(struct isl_sched_graph *graph, 1278 1.1 mrg struct isl_clustering *c, int pos) 1279 1.1 mrg { 1280 1.1 mrg int i, j; 1281 1.1 mrg 1282 1.1 mrg c->cluster[pos].n_total_row = c->scc[pos].n_total_row; 1283 1.1 mrg c->cluster[pos].n_row = c->scc[pos].n_row; 1284 1.1 mrg c->cluster[pos].maxvar = c->scc[pos].maxvar; 1285 1.1 mrg j = 0; 1286 1.1 mrg for (i = 0; i < graph->n; ++i) { 1287 1.1 mrg int k; 1288 1.1 mrg int s; 1289 1.1 mrg 1290 1.1 mrg if (graph->node[i].cluster != pos) 1291 1.1 mrg continue; 1292 1.1 mrg s = graph->node[i].scc; 1293 1.1 mrg k = c->scc_node[s]++; 1294 1.1 mrg swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]); 1295 1.1 mrg if (c->scc[s].maxvar > c->cluster[pos].maxvar) 1296 1.1 mrg c->cluster[pos].maxvar = c->scc[s].maxvar; 1297 1.1 mrg ++j; 1298 1.1 mrg } 1299 1.1 mrg 1300 1.1 mrg return isl_stat_ok; 1301 1.1 mrg } 1302 1.1 mrg 1303 1.1 mrg /* Is there a (conditional) validity dependence from node[j] to node[i], 1304 1.1 mrg * forcing node[i] to follow node[j] or do the nodes belong to the same 1305 1.1 mrg * cluster? 1306 1.1 mrg */ 1307 1.1 mrg static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) 1308 1.1 mrg { 1309 1.1 mrg struct isl_sched_graph *graph = user; 1310 1.1 mrg 1311 1.1 mrg if (graph->node[i].cluster == graph->node[j].cluster) 1312 1.1 mrg return isl_bool_true; 1313 1.1 mrg return isl_sched_graph_has_validity_edge(graph, &graph->node[j], 1314 1.1 mrg &graph->node[i]); 1315 1.1 mrg } 1316 1.1 mrg 1317 1.1 mrg /* Extract the merged clusters of SCCs in "graph", sort them, and 1318 1.1 mrg * store them in c->clusters. Update c->scc_cluster accordingly. 1319 1.1 mrg * 1320 1.1 mrg * First keep track of the cluster containing the SCC to which a node 1321 1.1 mrg * belongs in the node itself. 1322 1.1 mrg * Then extract the clusters into c->clusters, copying the current 1323 1.1 mrg * band schedule from the SCCs that belong to the cluster. 1324 1.1 mrg * Do this only once per cluster. 1325 1.1 mrg * 1326 1.1 mrg * Finally, topologically sort the clusters and update c->scc_cluster 1327 1.1 mrg * to match the new scc numbering. While the SCCs were originally 1328 1.1 mrg * sorted already, some SCCs that depend on some other SCCs may 1329 1.1 mrg * have been merged with SCCs that appear before these other SCCs. 1330 1.1 mrg * A reordering may therefore be required. 1331 1.1 mrg */ 1332 1.1 mrg static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph, 1333 1.1 mrg struct isl_clustering *c) 1334 1.1 mrg { 1335 1.1 mrg int i; 1336 1.1 mrg 1337 1.1 mrg for (i = 0; i < graph->n; ++i) 1338 1.1 mrg graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; 1339 1.1 mrg 1340 1.1 mrg for (i = 0; i < graph->scc; ++i) { 1341 1.1 mrg if (c->scc_cluster[i] != i) 1342 1.1 mrg continue; 1343 1.1 mrg if (isl_sched_graph_extract_sub_graph(ctx, graph, 1344 1.1 mrg &node_cluster_exactly, 1345 1.1 mrg &edge_cluster_exactly, i, &c->cluster[i]) < 0) 1346 1.1 mrg return isl_stat_error; 1347 1.1 mrg c->cluster[i].src_scc = -1; 1348 1.1 mrg c->cluster[i].dst_scc = -1; 1349 1.1 mrg if (copy_partial(graph, c, i) < 0) 1350 1.1 mrg return isl_stat_error; 1351 1.1 mrg } 1352 1.1 mrg 1353 1.1 mrg if (isl_sched_graph_detect_ccs(ctx, graph, 1354 1.1 mrg &node_follows_strong_or_same_cluster) < 0) 1355 1.1 mrg return isl_stat_error; 1356 1.1 mrg for (i = 0; i < graph->n; ++i) 1357 1.1 mrg c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; 1358 1.1 mrg 1359 1.1 mrg return isl_stat_ok; 1360 1.1 mrg } 1361 1.1 mrg 1362 1.1 mrg /* Compute weights on the proximity edges of "graph" that can 1363 1.1 mrg * be used by find_proximity to find the most appropriate 1364 1.1 mrg * proximity edge to use to merge two clusters in "c". 1365 1.1 mrg * The weights are also used by has_bounded_distances to determine 1366 1.1 mrg * whether the merge should be allowed. 1367 1.1 mrg * Store the maximum of the computed weights in graph->max_weight. 1368 1.1 mrg * 1369 1.1 mrg * The computed weight is a measure for the number of remaining schedule 1370 1.1 mrg * dimensions that can still be completely aligned. 1371 1.1 mrg * In particular, compute the number of equalities between 1372 1.1 mrg * input dimensions and output dimensions in the proximity constraints. 1373 1.1 mrg * The directions that are already handled by outer schedule bands 1374 1.1 mrg * are projected out prior to determining this number. 1375 1.1 mrg * 1376 1.1 mrg * Edges that will never be considered by find_proximity are ignored. 1377 1.1 mrg */ 1378 1.1 mrg static isl_stat compute_weights(struct isl_sched_graph *graph, 1379 1.1 mrg struct isl_clustering *c) 1380 1.1 mrg { 1381 1.1 mrg int i; 1382 1.1 mrg 1383 1.1 mrg graph->max_weight = 0; 1384 1.1 mrg 1385 1.1 mrg for (i = 0; i < graph->n_edge; ++i) { 1386 1.1 mrg struct isl_sched_edge *edge = &graph->edge[i]; 1387 1.1 mrg struct isl_sched_node *src = edge->src; 1388 1.1 mrg struct isl_sched_node *dst = edge->dst; 1389 1.1 mrg isl_basic_map *hull; 1390 1.1 mrg isl_bool prox; 1391 1.1 mrg isl_size n_in, n_out, n; 1392 1.1 mrg 1393 1.1 mrg prox = is_non_empty_proximity(edge); 1394 1.1 mrg if (prox < 0) 1395 1.1 mrg return isl_stat_error; 1396 1.1 mrg if (!prox) 1397 1.1 mrg continue; 1398 1.1 mrg if (bad_cluster(&c->scc[edge->src->scc]) || 1399 1.1 mrg bad_cluster(&c->scc[edge->dst->scc])) 1400 1.1 mrg continue; 1401 1.1 mrg if (c->scc_cluster[edge->dst->scc] == 1402 1.1 mrg c->scc_cluster[edge->src->scc]) 1403 1.1 mrg continue; 1404 1.1 mrg 1405 1.1 mrg hull = isl_map_affine_hull(isl_map_copy(edge->map)); 1406 1.1 mrg hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0, 1407 1.1 mrg isl_mat_copy(src->vmap)); 1408 1.1 mrg hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0, 1409 1.1 mrg isl_mat_copy(dst->vmap)); 1410 1.1 mrg hull = isl_basic_map_project_out(hull, 1411 1.1 mrg isl_dim_in, 0, src->rank); 1412 1.1 mrg hull = isl_basic_map_project_out(hull, 1413 1.1 mrg isl_dim_out, 0, dst->rank); 1414 1.1 mrg hull = isl_basic_map_remove_divs(hull); 1415 1.1 mrg n_in = isl_basic_map_dim(hull, isl_dim_in); 1416 1.1 mrg n_out = isl_basic_map_dim(hull, isl_dim_out); 1417 1.1 mrg if (n_in < 0 || n_out < 0) 1418 1.1 mrg hull = isl_basic_map_free(hull); 1419 1.1 mrg hull = isl_basic_map_drop_constraints_not_involving_dims(hull, 1420 1.1 mrg isl_dim_in, 0, n_in); 1421 1.1 mrg hull = isl_basic_map_drop_constraints_not_involving_dims(hull, 1422 1.1 mrg isl_dim_out, 0, n_out); 1423 1.1 mrg n = isl_basic_map_n_equality(hull); 1424 1.1 mrg isl_basic_map_free(hull); 1425 1.1 mrg if (n < 0) 1426 1.1 mrg return isl_stat_error; 1427 1.1 mrg edge->weight = n; 1428 1.1 mrg 1429 1.1 mrg if (edge->weight > graph->max_weight) 1430 1.1 mrg graph->max_weight = edge->weight; 1431 1.1 mrg } 1432 1.1 mrg 1433 1.1 mrg return isl_stat_ok; 1434 1.1 mrg } 1435 1.1 mrg 1436 1.1 mrg /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and 1437 1.1 mrg * update "node" to arrange for them to be executed in an order 1438 1.1 mrg * possibly involving set nodes that generalizes the topological order 1439 1.1 mrg * determined by the scc fields of the nodes in "graph". 1440 1.1 mrg * 1441 1.1 mrg * Note that at this stage, there are graph->scc clusters and 1442 1.1 mrg * their positions in c->cluster are determined by the values 1443 1.1 mrg * of c->scc_cluster. 1444 1.1 mrg * 1445 1.1 mrg * Construct an isl_scc_graph and perform the decomposition 1446 1.1 mrg * using this graph. 1447 1.1 mrg */ 1448 1.1 mrg static __isl_give isl_schedule_node *finish_bands_decompose( 1449 1.1 mrg __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 1450 1.1 mrg struct isl_clustering *c) 1451 1.1 mrg { 1452 1.1 mrg isl_ctx *ctx; 1453 1.1 mrg struct isl_scc_graph *scc_graph; 1454 1.1 mrg 1455 1.1 mrg ctx = isl_schedule_node_get_ctx(node); 1456 1.1 mrg 1457 1.1 mrg scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c); 1458 1.1 mrg node = isl_scc_graph_decompose(scc_graph, node); 1459 1.1 mrg isl_scc_graph_free(scc_graph); 1460 1.1 mrg 1461 1.1 mrg return node; 1462 1.1 mrg } 1463 1.1 mrg 1464 1.1 mrg /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" 1465 1.1 mrg * in their topological order. This order is determined by the scc 1466 1.1 mrg * fields of the nodes in "graph". 1467 1.1 mrg * Combine the results in a sequence expressing the topological order. 1468 1.1 mrg * 1469 1.1 mrg * If there is only one cluster left, then there is no need to introduce 1470 1.1 mrg * a sequence node. Also, in this case, the cluster necessarily contains 1471 1.1 mrg * the SCC at position 0 in the original graph and is therefore also 1472 1.1 mrg * stored in the first cluster of "c". 1473 1.1 mrg * 1474 1.1 mrg * If there are more than two clusters left, then some subsets of the clusters 1475 1.1 mrg * may still be independent of each other. These could then still 1476 1.1 mrg * be reordered with respect to each other. Call finish_bands_decompose 1477 1.1 mrg * to try and construct an ordering involving set and sequence nodes 1478 1.1 mrg * that generalizes the topological order. 1479 1.1 mrg * Note that at the outermost level there can be no independent components 1480 1.1 mrg * because isl_schedule_node_compute_wcc_clustering is called 1481 1.1 mrg * on a (weakly) connected component. 1482 1.1 mrg */ 1483 1.1 mrg static __isl_give isl_schedule_node *finish_bands_clustering( 1484 1.1 mrg __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 1485 1.1 mrg struct isl_clustering *c) 1486 1.1 mrg { 1487 1.1 mrg int i; 1488 1.1 mrg isl_ctx *ctx; 1489 1.1 mrg isl_union_set_list *filters; 1490 1.1 mrg 1491 1.1 mrg if (graph->scc == 1) 1492 1.1 mrg return isl_schedule_node_compute_finish_band(node, 1493 1.1 mrg &c->cluster[0], 0); 1494 1.1 mrg if (graph->scc > 2) 1495 1.1 mrg return finish_bands_decompose(node, graph, c); 1496 1.1 mrg 1497 1.1 mrg ctx = isl_schedule_node_get_ctx(node); 1498 1.1 mrg 1499 1.1 mrg filters = isl_sched_graph_extract_sccs(ctx, graph); 1500 1.1 mrg node = isl_schedule_node_insert_sequence(node, filters); 1501 1.1 mrg 1502 1.1 mrg for (i = 0; i < graph->scc; ++i) { 1503 1.1 mrg int j = c->scc_cluster[i]; 1504 1.1 mrg node = isl_schedule_node_grandchild(node, i, 0); 1505 1.1 mrg node = isl_schedule_node_compute_finish_band(node, 1506 1.1 mrg &c->cluster[j], 0); 1507 1.1 mrg node = isl_schedule_node_grandparent(node); 1508 1.1 mrg } 1509 1.1 mrg 1510 1.1 mrg return node; 1511 1.1 mrg } 1512 1.1 mrg 1513 1.1 mrg /* Compute a schedule for a connected dependence graph by first considering 1514 1.1 mrg * each strongly connected component (SCC) in the graph separately and then 1515 1.1 mrg * incrementally combining them into clusters. 1516 1.1 mrg * Return the updated schedule node. 1517 1.1 mrg * 1518 1.1 mrg * Initially, each cluster consists of a single SCC, each with its 1519 1.1 mrg * own band schedule. The algorithm then tries to merge pairs 1520 1.1 mrg * of clusters along a proximity edge until no more suitable 1521 1.1 mrg * proximity edges can be found. During this merging, the schedule 1522 1.1 mrg * is maintained in the individual SCCs. 1523 1.1 mrg * After the merging is completed, the full resulting clusters 1524 1.1 mrg * are extracted and in finish_bands_clustering, 1525 1.1 mrg * isl_schedule_node_compute_finish_band is called on each of them to integrate 1526 1.1 mrg * the band into "node" and to continue the computation. 1527 1.1 mrg * 1528 1.1 mrg * compute_weights initializes the weights that are used by find_proximity. 1529 1.1 mrg */ 1530 1.1 mrg __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering( 1531 1.1 mrg __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 1532 1.1 mrg { 1533 1.1 mrg isl_ctx *ctx; 1534 1.1 mrg struct isl_clustering c; 1535 1.1 mrg int i; 1536 1.1 mrg 1537 1.1 mrg ctx = isl_schedule_node_get_ctx(node); 1538 1.1 mrg 1539 1.1 mrg if (clustering_init(ctx, &c, graph) < 0) 1540 1.1 mrg goto error; 1541 1.1 mrg 1542 1.1 mrg if (compute_weights(graph, &c) < 0) 1543 1.1 mrg goto error; 1544 1.1 mrg 1545 1.1 mrg for (;;) { 1546 1.1 mrg i = find_proximity(graph, &c); 1547 1.1 mrg if (i < 0) 1548 1.1 mrg goto error; 1549 1.1 mrg if (i >= graph->n_edge) 1550 1.1 mrg break; 1551 1.1 mrg if (merge_clusters_along_edge(ctx, graph, i, &c) < 0) 1552 1.1 mrg goto error; 1553 1.1 mrg } 1554 1.1 mrg 1555 1.1 mrg if (extract_clusters(ctx, graph, &c) < 0) 1556 1.1 mrg goto error; 1557 1.1 mrg 1558 1.1 mrg node = finish_bands_clustering(node, graph, &c); 1559 1.1 mrg 1560 1.1 mrg clustering_free(ctx, &c); 1561 1.1 mrg return node; 1562 1.1 mrg error: 1563 1.1 mrg clustering_free(ctx, &c); 1564 1.1 mrg return isl_schedule_node_free(node); 1565 1.1 mrg } 1566