Home | History | Annotate | Line # | Download | only in dist
      1 /*
      2  * Copyright 2015      Sven Verdoolaege
      3  *
      4  * Use of this software is governed by the MIT license
      5  *
      6  * Written by Sven Verdoolaege
      7  */
      8 
      9 #include "isl_map_private.h"
     10 
     11 #include <isl/id.h>
     12 #include <isl/schedule_node.h>
     13 #include <isl/union_set.h>
     14 
     15 #include "isl_mat_private.h"
     16 #include "isl_scheduler_clustering.h"
     17 #include "isl_scheduler_scc.h"
     18 #include "isl_seq.h"
     19 #include "isl_tarjan.h"
     20 
     21 /* Initialize the clustering data structure "c" from "graph".
     22  *
     23  * In particular, allocate memory, extract the SCCs from "graph"
     24  * into c->scc, initialize scc_cluster and construct
     25  * a band of schedule rows for each SCC.
     26  * Within each SCC, there is only one SCC by definition.
     27  * Each SCC initially belongs to a cluster containing only that SCC.
     28  */
     29 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
     30 	struct isl_sched_graph *graph)
     31 {
     32 	int i;
     33 
     34 	c->n = graph->scc;
     35 	c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
     36 	c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
     37 	c->scc_cluster = isl_calloc_array(ctx, int, c->n);
     38 	c->scc_node = isl_calloc_array(ctx, int, c->n);
     39 	c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
     40 	if (!c->scc || !c->cluster ||
     41 	    !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
     42 		return isl_stat_error;
     43 
     44 	for (i = 0; i < c->n; ++i) {
     45 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
     46 					&isl_sched_node_scc_exactly,
     47 					&isl_sched_edge_scc_exactly,
     48 					i, &c->scc[i]) < 0)
     49 			return isl_stat_error;
     50 		c->scc[i].scc = 1;
     51 		if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0)
     52 			return isl_stat_error;
     53 		if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0)
     54 			return isl_stat_error;
     55 		c->scc_cluster[i] = i;
     56 	}
     57 
     58 	return isl_stat_ok;
     59 }
     60 
     61 /* Free all memory allocated for "c".
     62  */
     63 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
     64 {
     65 	int i;
     66 
     67 	if (c->scc)
     68 		for (i = 0; i < c->n; ++i)
     69 			isl_sched_graph_free(ctx, &c->scc[i]);
     70 	free(c->scc);
     71 	if (c->cluster)
     72 		for (i = 0; i < c->n; ++i)
     73 			isl_sched_graph_free(ctx, &c->cluster[i]);
     74 	free(c->cluster);
     75 	free(c->scc_cluster);
     76 	free(c->scc_node);
     77 	free(c->scc_in_merge);
     78 }
     79 
     80 /* Should we refrain from merging the cluster in "graph" with
     81  * any other cluster?
     82  * In particular, is its current schedule band empty and incomplete.
     83  */
     84 static int bad_cluster(struct isl_sched_graph *graph)
     85 {
     86 	return graph->n_row < graph->maxvar &&
     87 		graph->n_total_row == graph->band_start;
     88 }
     89 
     90 /* Is "edge" a proximity edge with a non-empty dependence relation?
     91  */
     92 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
     93 {
     94 	if (!isl_sched_edge_is_proximity(edge))
     95 		return isl_bool_false;
     96 	return isl_bool_not(isl_map_plain_is_empty(edge->map));
     97 }
     98 
     99 /* Return the index of an edge in "graph" that can be used to merge
    100  * two clusters in "c".
    101  * Return graph->n_edge if no such edge can be found.
    102  * Return -1 on error.
    103  *
    104  * In particular, return a proximity edge between two clusters
    105  * that is not marked "no_merge" and such that neither of the
    106  * two clusters has an incomplete, empty band.
    107  *
    108  * If there are multiple such edges, then try and find the most
    109  * appropriate edge to use for merging.  In particular, pick the edge
    110  * with the greatest weight.  If there are multiple of those,
    111  * then pick one with the shortest distance between
    112  * the two cluster representatives.
    113  */
    114 static int find_proximity(struct isl_sched_graph *graph,
    115 	struct isl_clustering *c)
    116 {
    117 	int i, best = graph->n_edge, best_dist, best_weight;
    118 
    119 	for (i = 0; i < graph->n_edge; ++i) {
    120 		struct isl_sched_edge *edge = &graph->edge[i];
    121 		int dist, weight;
    122 		isl_bool prox;
    123 
    124 		prox = is_non_empty_proximity(edge);
    125 		if (prox < 0)
    126 			return -1;
    127 		if (!prox)
    128 			continue;
    129 		if (edge->no_merge)
    130 			continue;
    131 		if (bad_cluster(&c->scc[edge->src->scc]) ||
    132 		    bad_cluster(&c->scc[edge->dst->scc]))
    133 			continue;
    134 		dist = c->scc_cluster[edge->dst->scc] -
    135 			c->scc_cluster[edge->src->scc];
    136 		if (dist == 0)
    137 			continue;
    138 		weight = edge->weight;
    139 		if (best < graph->n_edge) {
    140 			if (best_weight > weight)
    141 				continue;
    142 			if (best_weight == weight && best_dist <= dist)
    143 				continue;
    144 		}
    145 		best = i;
    146 		best_dist = dist;
    147 		best_weight = weight;
    148 	}
    149 
    150 	return best;
    151 }
    152 
    153 /* Internal data structure used in mark_merge_sccs.
    154  *
    155  * "graph" is the dependence graph in which a strongly connected
    156  * component is constructed.
    157  * "scc_cluster" maps each SCC index to the cluster to which it belongs.
    158  * "src" and "dst" are the indices of the nodes that are being merged.
    159  */
    160 struct isl_mark_merge_sccs_data {
    161 	struct isl_sched_graph *graph;
    162 	int *scc_cluster;
    163 	int src;
    164 	int dst;
    165 };
    166 
    167 /* Check whether the cluster containing node "i" depends on the cluster
    168  * containing node "j".  If "i" and "j" belong to the same cluster,
    169  * then they are taken to depend on each other to ensure that
    170  * the resulting strongly connected component consists of complete
    171  * clusters.  Furthermore, if "i" and "j" are the two nodes that
    172  * are being merged, then they are taken to depend on each other as well.
    173  * Otherwise, check if there is a (conditional) validity dependence
    174  * from node[j] to node[i], forcing node[i] to follow node[j].
    175  */
    176 static isl_bool cluster_follows(int i, int j, void *user)
    177 {
    178 	struct isl_mark_merge_sccs_data *data = user;
    179 	struct isl_sched_graph *graph = data->graph;
    180 	int *scc_cluster = data->scc_cluster;
    181 
    182 	if (data->src == i && data->dst == j)
    183 		return isl_bool_true;
    184 	if (data->src == j && data->dst == i)
    185 		return isl_bool_true;
    186 	if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
    187 		return isl_bool_true;
    188 
    189 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
    190 							&graph->node[i]);
    191 }
    192 
    193 /* Mark all SCCs that belong to either of the two clusters in "c"
    194  * connected by the edge in "graph" with index "edge", or to any
    195  * of the intermediate clusters.
    196  * The marking is recorded in c->scc_in_merge.
    197  *
    198  * The given edge has been selected for merging two clusters,
    199  * meaning that there is at least a proximity edge between the two nodes.
    200  * However, there may also be (indirect) validity dependences
    201  * between the two nodes.  When merging the two clusters, all clusters
    202  * containing one or more of the intermediate nodes along the
    203  * indirect validity dependences need to be merged in as well.
    204  *
    205  * First collect all such nodes by computing the strongly connected
    206  * component (SCC) containing the two nodes connected by the edge, where
    207  * the two nodes are considered to depend on each other to make
    208  * sure they end up in the same SCC.  Similarly, each node is considered
    209  * to depend on every other node in the same cluster to ensure
    210  * that the SCC consists of complete clusters.
    211  *
    212  * Then the original SCCs that contain any of these nodes are marked
    213  * in c->scc_in_merge.
    214  */
    215 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
    216 	int edge, struct isl_clustering *c)
    217 {
    218 	struct isl_mark_merge_sccs_data data;
    219 	struct isl_tarjan_graph *g;
    220 	int i;
    221 
    222 	for (i = 0; i < c->n; ++i)
    223 		c->scc_in_merge[i] = 0;
    224 
    225 	data.graph = graph;
    226 	data.scc_cluster = c->scc_cluster;
    227 	data.src = graph->edge[edge].src - graph->node;
    228 	data.dst = graph->edge[edge].dst - graph->node;
    229 
    230 	g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
    231 					&cluster_follows, &data);
    232 	if (!g)
    233 		goto error;
    234 
    235 	i = g->op;
    236 	if (i < 3)
    237 		isl_die(ctx, isl_error_internal,
    238 			"expecting at least two nodes in component",
    239 			goto error);
    240 	if (g->order[--i] != -1)
    241 		isl_die(ctx, isl_error_internal,
    242 			"expecting end of component marker", goto error);
    243 
    244 	for (--i; i >= 0 && g->order[i] != -1; --i) {
    245 		int scc = graph->node[g->order[i]].scc;
    246 		c->scc_in_merge[scc] = 1;
    247 	}
    248 
    249 	isl_tarjan_graph_free(g);
    250 	return isl_stat_ok;
    251 error:
    252 	isl_tarjan_graph_free(g);
    253 	return isl_stat_error;
    254 }
    255 
    256 /* Construct the identifier "cluster_i".
    257  */
    258 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
    259 {
    260 	char name[40];
    261 
    262 	snprintf(name, sizeof(name), "cluster_%d", i);
    263 	return isl_id_alloc(ctx, name, NULL);
    264 }
    265 
    266 /* Construct the space of the cluster with index "i" containing
    267  * the strongly connected component "scc".
    268  *
    269  * In particular, construct a space called cluster_i with dimension equal
    270  * to the number of schedule rows in the current band of "scc".
    271  */
    272 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
    273 {
    274 	int nvar;
    275 	isl_space *space;
    276 	isl_id *id;
    277 
    278 	nvar = scc->n_total_row - scc->band_start;
    279 	space = isl_space_copy(scc->node[0].space);
    280 	space = isl_space_params(space);
    281 	space = isl_space_set_from_params(space);
    282 	space = isl_space_add_dims(space, isl_dim_set, nvar);
    283 	id = cluster_id(isl_space_get_ctx(space), i);
    284 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
    285 
    286 	return space;
    287 }
    288 
    289 /* Collect the domain of the graph for merging clusters.
    290  *
    291  * In particular, for each cluster with first SCC "i", construct
    292  * a set in the space called cluster_i with dimension equal
    293  * to the number of schedule rows in the current band of the cluster.
    294  */
    295 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
    296 	struct isl_sched_graph *graph, struct isl_clustering *c)
    297 {
    298 	int i;
    299 	isl_space *space;
    300 	isl_union_set *domain;
    301 
    302 	space = isl_space_params_alloc(ctx, 0);
    303 	domain = isl_union_set_empty(space);
    304 
    305 	for (i = 0; i < graph->scc; ++i) {
    306 		isl_space *space;
    307 
    308 		if (!c->scc_in_merge[i])
    309 			continue;
    310 		if (c->scc_cluster[i] != i)
    311 			continue;
    312 		space = cluster_space(&c->scc[i], i);
    313 		domain = isl_union_set_add_set(domain, isl_set_universe(space));
    314 	}
    315 
    316 	return domain;
    317 }
    318 
    319 /* Construct a map from the original instances to the corresponding
    320  * cluster instance in the current bands of the clusters in "c".
    321  */
    322 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
    323 	struct isl_sched_graph *graph, struct isl_clustering *c)
    324 {
    325 	int i, j;
    326 	isl_space *space;
    327 	isl_union_map *cluster_map;
    328 
    329 	space = isl_space_params_alloc(ctx, 0);
    330 	cluster_map = isl_union_map_empty(space);
    331 	for (i = 0; i < graph->scc; ++i) {
    332 		int start, n;
    333 		isl_id *id;
    334 
    335 		if (!c->scc_in_merge[i])
    336 			continue;
    337 
    338 		id = cluster_id(ctx, c->scc_cluster[i]);
    339 		start = c->scc[i].band_start;
    340 		n = c->scc[i].n_total_row - start;
    341 		for (j = 0; j < c->scc[i].n; ++j) {
    342 			isl_multi_aff *ma;
    343 			isl_map *map;
    344 			struct isl_sched_node *node = &c->scc[i].node[j];
    345 
    346 			ma = isl_sched_node_extract_partial_schedule_multi_aff(
    347 								node, start, n);
    348 			ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
    349 							    isl_id_copy(id));
    350 			map = isl_map_from_multi_aff(ma);
    351 			cluster_map = isl_union_map_add_map(cluster_map, map);
    352 		}
    353 		isl_id_free(id);
    354 	}
    355 
    356 	return cluster_map;
    357 }
    358 
    359 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
    360  * that are not isl_edge_condition or isl_edge_conditional_validity.
    361  */
    362 static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
    363 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    364 	__isl_take isl_schedule_constraints *sc)
    365 {
    366 	enum isl_edge_type t;
    367 
    368 	if (!sc)
    369 		return NULL;
    370 
    371 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
    372 		if (t == isl_edge_condition ||
    373 		    t == isl_edge_conditional_validity)
    374 			continue;
    375 		if (!isl_sched_edge_has_type(edge, t))
    376 			continue;
    377 		sc = isl_schedule_constraints_add(sc, t,
    378 						    isl_union_map_copy(umap));
    379 	}
    380 
    381 	return sc;
    382 }
    383 
    384 /* Add schedule constraints of types isl_edge_condition and
    385  * isl_edge_conditional_validity to "sc" by applying "umap" to
    386  * the domains of the wrapped relations in domain and range
    387  * of the corresponding tagged constraints of "edge".
    388  */
    389 static __isl_give isl_schedule_constraints *add_conditional_constraints(
    390 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    391 	__isl_take isl_schedule_constraints *sc)
    392 {
    393 	enum isl_edge_type t;
    394 	isl_union_map *tagged;
    395 
    396 	for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
    397 		if (!isl_sched_edge_has_type(edge, t))
    398 			continue;
    399 		if (t == isl_edge_condition)
    400 			tagged = isl_union_map_copy(edge->tagged_condition);
    401 		else
    402 			tagged = isl_union_map_copy(edge->tagged_validity);
    403 		tagged = isl_union_map_zip(tagged);
    404 		tagged = isl_union_map_apply_domain(tagged,
    405 					isl_union_map_copy(umap));
    406 		tagged = isl_union_map_zip(tagged);
    407 		sc = isl_schedule_constraints_add(sc, t, tagged);
    408 		if (!sc)
    409 			return NULL;
    410 	}
    411 
    412 	return sc;
    413 }
    414 
    415 /* Given a mapping "cluster_map" from the original instances to
    416  * the cluster instances, add schedule constraints on the clusters
    417  * to "sc" corresponding to the original constraints represented by "edge".
    418  *
    419  * For non-tagged dependence constraints, the cluster constraints
    420  * are obtained by applying "cluster_map" to the edge->map.
    421  *
    422  * For tagged dependence constraints, "cluster_map" needs to be applied
    423  * to the domains of the wrapped relations in domain and range
    424  * of the tagged dependence constraints.  Pick out the mappings
    425  * from these domains from "cluster_map" and construct their product.
    426  * This mapping can then be applied to the pair of domains.
    427  */
    428 static __isl_give isl_schedule_constraints *collect_edge_constraints(
    429 	struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
    430 	__isl_take isl_schedule_constraints *sc)
    431 {
    432 	isl_union_map *umap;
    433 	isl_space *space;
    434 	isl_union_set *uset;
    435 	isl_union_map *umap1, *umap2;
    436 
    437 	if (!sc)
    438 		return NULL;
    439 
    440 	umap = isl_union_map_from_map(isl_map_copy(edge->map));
    441 	umap = isl_union_map_apply_domain(umap,
    442 				isl_union_map_copy(cluster_map));
    443 	umap = isl_union_map_apply_range(umap,
    444 				isl_union_map_copy(cluster_map));
    445 	sc = add_non_conditional_constraints(edge, umap, sc);
    446 	isl_union_map_free(umap);
    447 
    448 	if (!sc ||
    449 	    (!isl_sched_edge_is_condition(edge) &&
    450 	     !isl_sched_edge_is_conditional_validity(edge)))
    451 		return sc;
    452 
    453 	space = isl_space_domain(isl_map_get_space(edge->map));
    454 	uset = isl_union_set_from_set(isl_set_universe(space));
    455 	umap1 = isl_union_map_copy(cluster_map);
    456 	umap1 = isl_union_map_intersect_domain(umap1, uset);
    457 	space = isl_space_range(isl_map_get_space(edge->map));
    458 	uset = isl_union_set_from_set(isl_set_universe(space));
    459 	umap2 = isl_union_map_copy(cluster_map);
    460 	umap2 = isl_union_map_intersect_domain(umap2, uset);
    461 	umap = isl_union_map_product(umap1, umap2);
    462 
    463 	sc = add_conditional_constraints(edge, umap, sc);
    464 
    465 	isl_union_map_free(umap);
    466 	return sc;
    467 }
    468 
    469 /* Given a mapping "cluster_map" from the original instances to
    470  * the cluster instances, add schedule constraints on the clusters
    471  * to "sc" corresponding to all edges in "graph" between nodes that
    472  * belong to SCCs that are marked for merging in "scc_in_merge".
    473  */
    474 static __isl_give isl_schedule_constraints *collect_constraints(
    475 	struct isl_sched_graph *graph, int *scc_in_merge,
    476 	__isl_keep isl_union_map *cluster_map,
    477 	__isl_take isl_schedule_constraints *sc)
    478 {
    479 	int i;
    480 
    481 	for (i = 0; i < graph->n_edge; ++i) {
    482 		struct isl_sched_edge *edge = &graph->edge[i];
    483 
    484 		if (!scc_in_merge[edge->src->scc])
    485 			continue;
    486 		if (!scc_in_merge[edge->dst->scc])
    487 			continue;
    488 		sc = collect_edge_constraints(edge, cluster_map, sc);
    489 	}
    490 
    491 	return sc;
    492 }
    493 
    494 /* Construct a dependence graph for scheduling clusters with respect
    495  * to each other and store the result in "merge_graph".
    496  * In particular, the nodes of the graph correspond to the schedule
    497  * dimensions of the current bands of those clusters that have been
    498  * marked for merging in "c".
    499  *
    500  * First construct an isl_schedule_constraints object for this domain
    501  * by transforming the edges in "graph" to the domain.
    502  * Then initialize a dependence graph for scheduling from these
    503  * constraints.
    504  */
    505 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
    506 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    507 {
    508 	isl_union_set *domain;
    509 	isl_union_map *cluster_map;
    510 	isl_schedule_constraints *sc;
    511 	isl_stat r;
    512 
    513 	domain = collect_domain(ctx, graph, c);
    514 	sc = isl_schedule_constraints_on_domain(domain);
    515 	if (!sc)
    516 		return isl_stat_error;
    517 	cluster_map = collect_cluster_map(ctx, graph, c);
    518 	sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
    519 	isl_union_map_free(cluster_map);
    520 
    521 	r = isl_sched_graph_init(merge_graph, sc);
    522 
    523 	isl_schedule_constraints_free(sc);
    524 
    525 	return r;
    526 }
    527 
    528 /* Compute the maximal number of remaining schedule rows that still need
    529  * to be computed for the nodes that belong to clusters with the maximal
    530  * dimension for the current band (i.e., the band that is to be merged).
    531  * Only clusters that are about to be merged are considered.
    532  * "maxvar" is the maximal dimension for the current band.
    533  * "c" contains information about the clusters.
    534  *
    535  * Return the maximal number of remaining schedule rows or
    536  * isl_size_error on error.
    537  */
    538 static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
    539 {
    540 	int i, j;
    541 	int max_slack;
    542 
    543 	max_slack = 0;
    544 	for (i = 0; i < c->n; ++i) {
    545 		int nvar;
    546 		struct isl_sched_graph *scc;
    547 
    548 		if (!c->scc_in_merge[i])
    549 			continue;
    550 		scc = &c->scc[i];
    551 		nvar = scc->n_total_row - scc->band_start;
    552 		if (nvar != maxvar)
    553 			continue;
    554 		for (j = 0; j < scc->n; ++j) {
    555 			struct isl_sched_node *node = &scc->node[j];
    556 			int slack;
    557 
    558 			if (isl_sched_node_update_vmap(node) < 0)
    559 				return isl_size_error;
    560 			slack = node->nvar - node->rank;
    561 			if (slack > max_slack)
    562 				max_slack = slack;
    563 		}
    564 	}
    565 
    566 	return max_slack;
    567 }
    568 
    569 /* If there are any clusters where the dimension of the current band
    570  * (i.e., the band that is to be merged) is smaller than "maxvar" and
    571  * if there are any nodes in such a cluster where the number
    572  * of remaining schedule rows that still need to be computed
    573  * is greater than "max_slack", then return the smallest current band
    574  * dimension of all these clusters.  Otherwise return the original value
    575  * of "maxvar".  Return isl_size_error in case of any error.
    576  * Only clusters that are about to be merged are considered.
    577  * "c" contains information about the clusters.
    578  */
    579 static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
    580 	struct isl_clustering *c)
    581 {
    582 	int i, j;
    583 
    584 	for (i = 0; i < c->n; ++i) {
    585 		int nvar;
    586 		struct isl_sched_graph *scc;
    587 
    588 		if (!c->scc_in_merge[i])
    589 			continue;
    590 		scc = &c->scc[i];
    591 		nvar = scc->n_total_row - scc->band_start;
    592 		if (nvar >= maxvar)
    593 			continue;
    594 		for (j = 0; j < scc->n; ++j) {
    595 			struct isl_sched_node *node = &scc->node[j];
    596 			int slack;
    597 
    598 			if (isl_sched_node_update_vmap(node) < 0)
    599 				return isl_size_error;
    600 			slack = node->nvar - node->rank;
    601 			if (slack > max_slack) {
    602 				maxvar = nvar;
    603 				break;
    604 			}
    605 		}
    606 	}
    607 
    608 	return maxvar;
    609 }
    610 
    611 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
    612  * that still need to be computed.  In particular, if there is a node
    613  * in a cluster where the dimension of the current band is smaller
    614  * than merge_graph->maxvar, but the number of remaining schedule rows
    615  * is greater than that of any node in a cluster with the maximal
    616  * dimension for the current band (i.e., merge_graph->maxvar),
    617  * then adjust merge_graph->maxvar to the (smallest) current band dimension
    618  * of those clusters.  Without this adjustment, the total number of
    619  * schedule dimensions would be increased, resulting in a skewed view
    620  * of the number of coincident dimensions.
    621  * "c" contains information about the clusters.
    622  *
    623  * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
    624  * then there is no point in attempting any merge since it will be rejected
    625  * anyway.  Set merge_graph->maxvar to zero in such cases.
    626  */
    627 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
    628 	struct isl_sched_graph *merge_graph, struct isl_clustering *c)
    629 {
    630 	isl_size max_slack, maxvar;
    631 
    632 	max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
    633 	if (max_slack < 0)
    634 		return isl_stat_error;
    635 	maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
    636 	if (maxvar < 0)
    637 		return isl_stat_error;
    638 
    639 	if (maxvar < merge_graph->maxvar) {
    640 		if (isl_options_get_schedule_maximize_band_depth(ctx))
    641 			merge_graph->maxvar = 0;
    642 		else
    643 			merge_graph->maxvar = maxvar;
    644 	}
    645 
    646 	return isl_stat_ok;
    647 }
    648 
    649 /* Return the number of coincident dimensions in the current band of "graph",
    650  * where the nodes of "graph" are assumed to be scheduled by a single band.
    651  */
    652 static int get_n_coincident(struct isl_sched_graph *graph)
    653 {
    654 	int i;
    655 
    656 	for (i = graph->band_start; i < graph->n_total_row; ++i)
    657 		if (!graph->node[0].coincident[i])
    658 			break;
    659 
    660 	return i - graph->band_start;
    661 }
    662 
    663 /* Should the clusters be merged based on the cluster schedule
    664  * in the current (and only) band of "merge_graph", given that
    665  * coincidence should be maximized?
    666  *
    667  * If the number of coincident schedule dimensions in the merged band
    668  * would be less than the maximal number of coincident schedule dimensions
    669  * in any of the merged clusters, then the clusters should not be merged.
    670  */
    671 static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
    672 	struct isl_sched_graph *merge_graph)
    673 {
    674 	int i;
    675 	int n_coincident;
    676 	int max_coincident;
    677 
    678 	max_coincident = 0;
    679 	for (i = 0; i < c->n; ++i) {
    680 		if (!c->scc_in_merge[i])
    681 			continue;
    682 		n_coincident = get_n_coincident(&c->scc[i]);
    683 		if (n_coincident > max_coincident)
    684 			max_coincident = n_coincident;
    685 	}
    686 
    687 	n_coincident = get_n_coincident(merge_graph);
    688 
    689 	return isl_bool_ok(n_coincident >= max_coincident);
    690 }
    691 
    692 /* Return the transformation on "node" expressed by the current (and only)
    693  * band of "merge_graph" applied to the clusters in "c".
    694  *
    695  * First find the representation of "node" in its SCC in "c" and
    696  * extract the transformation expressed by the current band.
    697  * Then extract the transformation applied by "merge_graph"
    698  * to the cluster to which this SCC belongs.
    699  * Combine the two to obtain the complete transformation on the node.
    700  *
    701  * Note that the range of the first transformation is an anonymous space,
    702  * while the domain of the second is named "cluster_X".  The range
    703  * of the former therefore needs to be adjusted before the two
    704  * can be combined.
    705  */
    706 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
    707 	struct isl_sched_node *node, struct isl_clustering *c,
    708 	struct isl_sched_graph *merge_graph)
    709 {
    710 	struct isl_sched_node *scc_node, *cluster_node;
    711 	int start, n;
    712 	isl_id *id;
    713 	isl_space *space;
    714 	isl_multi_aff *ma, *ma2;
    715 
    716 	scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc],
    717 						node->space);
    718 	if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node))
    719 		isl_die(ctx, isl_error_internal, "unable to find node",
    720 			return NULL);
    721 	start = c->scc[node->scc].band_start;
    722 	n = c->scc[node->scc].n_total_row - start;
    723 	ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node,
    724 								start, n);
    725 	space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
    726 	cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space);
    727 	if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node))
    728 		isl_die(ctx, isl_error_internal, "unable to find cluster",
    729 			space = isl_space_free(space));
    730 	id = isl_space_get_tuple_id(space, isl_dim_set);
    731 	ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
    732 	isl_space_free(space);
    733 	n = merge_graph->n_total_row;
    734 	ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node,
    735 								0, n);
    736 	ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
    737 
    738 	return isl_map_from_multi_aff(ma);
    739 }
    740 
    741 /* Give a set of distances "set", are they bounded by a small constant
    742  * in direction "pos"?
    743  * In practice, check if they are bounded by 2 by checking that there
    744  * are no elements with a value greater than or equal to 3 or
    745  * smaller than or equal to -3.
    746  */
    747 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
    748 {
    749 	isl_bool bounded;
    750 	isl_set *test;
    751 
    752 	if (!set)
    753 		return isl_bool_error;
    754 
    755 	test = isl_set_copy(set);
    756 	test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
    757 	bounded = isl_set_is_empty(test);
    758 	isl_set_free(test);
    759 
    760 	if (bounded < 0 || !bounded)
    761 		return bounded;
    762 
    763 	test = isl_set_copy(set);
    764 	test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
    765 	bounded = isl_set_is_empty(test);
    766 	isl_set_free(test);
    767 
    768 	return bounded;
    769 }
    770 
    771 /* Does the set "set" have a fixed (but possible parametric) value
    772  * at dimension "pos"?
    773  */
    774 static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
    775 {
    776 	isl_size n;
    777 	isl_bool single;
    778 
    779 	n = isl_set_dim(set, isl_dim_set);
    780 	if (n < 0)
    781 		return isl_bool_error;
    782 	set = isl_set_copy(set);
    783 	set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
    784 	set = isl_set_project_out(set, isl_dim_set, 0, pos);
    785 	single = isl_set_is_singleton(set);
    786 	isl_set_free(set);
    787 
    788 	return single;
    789 }
    790 
    791 /* Does "map" have a fixed (but possible parametric) value
    792  * at dimension "pos" of either its domain or its range?
    793  */
    794 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
    795 {
    796 	isl_set *set;
    797 	isl_bool single;
    798 
    799 	set = isl_map_domain(isl_map_copy(map));
    800 	single = has_single_value(set, pos);
    801 	isl_set_free(set);
    802 
    803 	if (single < 0 || single)
    804 		return single;
    805 
    806 	set = isl_map_range(isl_map_copy(map));
    807 	single = has_single_value(set, pos);
    808 	isl_set_free(set);
    809 
    810 	return single;
    811 }
    812 
    813 /* Does the edge "edge" from "graph" have bounded dependence distances
    814  * in the merged graph "merge_graph" of a selection of clusters in "c"?
    815  *
    816  * Extract the complete transformations of the source and destination
    817  * nodes of the edge, apply them to the edge constraints and
    818  * compute the differences.  Finally, check if these differences are bounded
    819  * in each direction.
    820  *
    821  * If the dimension of the band is greater than the number of
    822  * dimensions that can be expected to be optimized by the edge
    823  * (based on its weight), then also allow the differences to be unbounded
    824  * in the remaining dimensions, but only if either the source or
    825  * the destination has a fixed value in that direction.
    826  * This allows a statement that produces values that are used by
    827  * several instances of another statement to be merged with that
    828  * other statement.
    829  * However, merging such clusters will introduce an inherently
    830  * large proximity distance inside the merged cluster, meaning
    831  * that proximity distances will no longer be optimized in
    832  * subsequent merges.  These merges are therefore only allowed
    833  * after all other possible merges have been tried.
    834  * The first time such a merge is encountered, the weight of the edge
    835  * is replaced by a negative weight.  The second time (i.e., after
    836  * all merges over edges with a non-negative weight have been tried),
    837  * the merge is allowed.
    838  */
    839 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
    840 	struct isl_sched_graph *graph, struct isl_clustering *c,
    841 	struct isl_sched_graph *merge_graph)
    842 {
    843 	int i, n_slack;
    844 	isl_size n;
    845 	isl_bool bounded;
    846 	isl_map *map, *t;
    847 	isl_set *dist;
    848 
    849 	map = isl_map_copy(edge->map);
    850 	t = extract_node_transformation(ctx, edge->src, c, merge_graph);
    851 	map = isl_map_apply_domain(map, t);
    852 	t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
    853 	map = isl_map_apply_range(map, t);
    854 	dist = isl_map_deltas(isl_map_copy(map));
    855 
    856 	bounded = isl_bool_true;
    857 	n = isl_set_dim(dist, isl_dim_set);
    858 	if (n < 0)
    859 		goto error;
    860 	n_slack = n - edge->weight;
    861 	if (edge->weight < 0)
    862 		n_slack -= graph->max_weight + 1;
    863 	for (i = 0; i < n; ++i) {
    864 		isl_bool bounded_i, singular_i;
    865 
    866 		bounded_i = distance_is_bounded(dist, i);
    867 		if (bounded_i < 0)
    868 			goto error;
    869 		if (bounded_i)
    870 			continue;
    871 		if (edge->weight >= 0)
    872 			bounded = isl_bool_false;
    873 		n_slack--;
    874 		if (n_slack < 0)
    875 			break;
    876 		singular_i = has_singular_src_or_dst(map, i);
    877 		if (singular_i < 0)
    878 			goto error;
    879 		if (singular_i)
    880 			continue;
    881 		bounded = isl_bool_false;
    882 		break;
    883 	}
    884 	if (!bounded && i >= n && edge->weight >= 0)
    885 		edge->weight -= graph->max_weight + 1;
    886 	isl_map_free(map);
    887 	isl_set_free(dist);
    888 
    889 	return bounded;
    890 error:
    891 	isl_map_free(map);
    892 	isl_set_free(dist);
    893 	return isl_bool_error;
    894 }
    895 
    896 /* Should the clusters be merged based on the cluster schedule
    897  * in the current (and only) band of "merge_graph"?
    898  * "graph" is the original dependence graph, while "c" records
    899  * which SCCs are involved in the latest merge.
    900  *
    901  * In particular, is there at least one proximity constraint
    902  * that is optimized by the merge?
    903  *
    904  * A proximity constraint is considered to be optimized
    905  * if the dependence distances are small.
    906  */
    907 static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
    908 	struct isl_sched_graph *graph, struct isl_clustering *c,
    909 	struct isl_sched_graph *merge_graph)
    910 {
    911 	int i;
    912 
    913 	for (i = 0; i < graph->n_edge; ++i) {
    914 		struct isl_sched_edge *edge = &graph->edge[i];
    915 		isl_bool bounded;
    916 
    917 		if (!isl_sched_edge_is_proximity(edge))
    918 			continue;
    919 		if (!c->scc_in_merge[edge->src->scc])
    920 			continue;
    921 		if (!c->scc_in_merge[edge->dst->scc])
    922 			continue;
    923 		if (c->scc_cluster[edge->dst->scc] ==
    924 		    c->scc_cluster[edge->src->scc])
    925 			continue;
    926 		bounded = has_bounded_distances(ctx, edge, graph, c,
    927 						merge_graph);
    928 		if (bounded < 0 || bounded)
    929 			return bounded;
    930 	}
    931 
    932 	return isl_bool_false;
    933 }
    934 
    935 /* Should the clusters be merged based on the cluster schedule
    936  * in the current (and only) band of "merge_graph"?
    937  * "graph" is the original dependence graph, while "c" records
    938  * which SCCs are involved in the latest merge.
    939  *
    940  * If the current band is empty, then the clusters should not be merged.
    941  *
    942  * If the band depth should be maximized and the merge schedule
    943  * is incomplete (meaning that the dimension of some of the schedule
    944  * bands in the original schedule will be reduced), then the clusters
    945  * should not be merged.
    946  *
    947  * If the schedule_maximize_coincidence option is set, then check that
    948  * the number of coincident schedule dimensions is not reduced.
    949  *
    950  * Finally, only allow the merge if at least one proximity
    951  * constraint is optimized.
    952  */
    953 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
    954 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    955 {
    956 	if (merge_graph->n_total_row == merge_graph->band_start)
    957 		return isl_bool_false;
    958 
    959 	if (isl_options_get_schedule_maximize_band_depth(ctx) &&
    960 	    merge_graph->n_total_row < merge_graph->maxvar)
    961 		return isl_bool_false;
    962 
    963 	if (isl_options_get_schedule_maximize_coincidence(ctx)) {
    964 		isl_bool ok;
    965 
    966 		ok = ok_to_merge_coincident(c, merge_graph);
    967 		if (ok < 0 || !ok)
    968 			return ok;
    969 	}
    970 
    971 	return ok_to_merge_proximity(ctx, graph, c, merge_graph);
    972 }
    973 
    974 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
    975  * of the schedule in "node" and return the result.
    976  *
    977  * That is, essentially compute
    978  *
    979  *	T * N(first:first+n-1)
    980  *
    981  * taking into account the constant term and the parameter coefficients
    982  * in "t_node".
    983  */
    984 static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
    985 	struct isl_sched_node *t_node, struct isl_sched_node *node,
    986 	int first, int n)
    987 {
    988 	int i, j;
    989 	isl_mat *t;
    990 	isl_size n_row, n_col;
    991 	int n_param, n_var;
    992 
    993 	n_param = node->nparam;
    994 	n_var = node->nvar;
    995 	n_row = isl_mat_rows(t_node->sched);
    996 	n_col = isl_mat_cols(node->sched);
    997 	if (n_row < 0 || n_col < 0)
    998 		return NULL;
    999 	t = isl_mat_alloc(ctx, n_row, n_col);
   1000 	if (!t)
   1001 		return NULL;
   1002 	for (i = 0; i < n_row; ++i) {
   1003 		isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
   1004 		isl_seq_clr(t->row[i] + 1 + n_param, n_var);
   1005 		for (j = 0; j < n; ++j)
   1006 			isl_seq_addmul(t->row[i],
   1007 					t_node->sched->row[i][1 + n_param + j],
   1008 					node->sched->row[first + j],
   1009 					1 + n_param + n_var);
   1010 	}
   1011 	return t;
   1012 }
   1013 
   1014 /* Apply the cluster schedule in "t_node" to the current band
   1015  * schedule of the nodes in "graph".
   1016  *
   1017  * In particular, replace the rows starting at band_start
   1018  * by the result of applying the cluster schedule in "t_node"
   1019  * to the original rows.
   1020  *
   1021  * The coincidence of the schedule is determined by the coincidence
   1022  * of the cluster schedule.
   1023  */
   1024 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
   1025 	struct isl_sched_node *t_node)
   1026 {
   1027 	int i, j;
   1028 	isl_size n_new;
   1029 	int start, n;
   1030 
   1031 	start = graph->band_start;
   1032 	n = graph->n_total_row - start;
   1033 
   1034 	n_new = isl_mat_rows(t_node->sched);
   1035 	if (n_new < 0)
   1036 		return isl_stat_error;
   1037 	for (i = 0; i < graph->n; ++i) {
   1038 		struct isl_sched_node *node = &graph->node[i];
   1039 		isl_mat *t;
   1040 
   1041 		t = node_transformation(ctx, t_node, node, start, n);
   1042 		node->sched = isl_mat_drop_rows(node->sched, start, n);
   1043 		node->sched = isl_mat_concat(node->sched, t);
   1044 		node->sched_map = isl_map_free(node->sched_map);
   1045 		if (!node->sched)
   1046 			return isl_stat_error;
   1047 		for (j = 0; j < n_new; ++j)
   1048 			node->coincident[start + j] = t_node->coincident[j];
   1049 	}
   1050 	graph->n_total_row -= n;
   1051 	graph->n_row -= n;
   1052 	graph->n_total_row += n_new;
   1053 	graph->n_row += n_new;
   1054 
   1055 	return isl_stat_ok;
   1056 }
   1057 
   1058 /* Merge the clusters marked for merging in "c" into a single
   1059  * cluster using the cluster schedule in the current band of "merge_graph".
   1060  * The representative SCC for the new cluster is the SCC with
   1061  * the smallest index.
   1062  *
   1063  * The current band schedule of each SCC in the new cluster is obtained
   1064  * by applying the schedule of the corresponding original cluster
   1065  * to the original band schedule.
   1066  * All SCCs in the new cluster have the same number of schedule rows.
   1067  */
   1068 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
   1069 	struct isl_sched_graph *merge_graph)
   1070 {
   1071 	int i;
   1072 	int cluster = -1;
   1073 	isl_space *space;
   1074 
   1075 	for (i = 0; i < c->n; ++i) {
   1076 		struct isl_sched_node *node;
   1077 
   1078 		if (!c->scc_in_merge[i])
   1079 			continue;
   1080 		if (cluster < 0)
   1081 			cluster = i;
   1082 		space = cluster_space(&c->scc[i], c->scc_cluster[i]);
   1083 		node = isl_sched_graph_find_node(ctx, merge_graph, space);
   1084 		isl_space_free(space);
   1085 		if (!node)
   1086 			return isl_stat_error;
   1087 		if (!isl_sched_graph_is_node(merge_graph, node))
   1088 			isl_die(ctx, isl_error_internal,
   1089 				"unable to find cluster",
   1090 				return isl_stat_error);
   1091 		if (transform(ctx, &c->scc[i], node) < 0)
   1092 			return isl_stat_error;
   1093 		c->scc_cluster[i] = cluster;
   1094 	}
   1095 
   1096 	return isl_stat_ok;
   1097 }
   1098 
   1099 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
   1100  * by scheduling the current cluster bands with respect to each other.
   1101  *
   1102  * Construct a dependence graph with a space for each cluster and
   1103  * with the coordinates of each space corresponding to the schedule
   1104  * dimensions of the current band of that cluster.
   1105  * Construct a cluster schedule in this cluster dependence graph and
   1106  * apply it to the current cluster bands if it is applicable
   1107  * according to ok_to_merge.
   1108  *
   1109  * If the number of remaining schedule dimensions in a cluster
   1110  * with a non-maximal current schedule dimension is greater than
   1111  * the number of remaining schedule dimensions in clusters
   1112  * with a maximal current schedule dimension, then restrict
   1113  * the number of rows to be computed in the cluster schedule
   1114  * to the minimal such non-maximal current schedule dimension.
   1115  * Do this by adjusting merge_graph.maxvar.
   1116  *
   1117  * Return isl_bool_true if the clusters have effectively been merged
   1118  * into a single cluster.
   1119  *
   1120  * Note that since the standard scheduling algorithm minimizes the maximal
   1121  * distance over proximity constraints, the proximity constraints between
   1122  * the merged clusters may not be optimized any further than what is
   1123  * sufficient to bring the distances within the limits of the internal
   1124  * proximity constraints inside the individual clusters.
   1125  * It may therefore make sense to perform an additional translation step
   1126  * to bring the clusters closer to each other, while maintaining
   1127  * the linear part of the merging schedule found using the standard
   1128  * scheduling algorithm.
   1129  */
   1130 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
   1131 	struct isl_clustering *c)
   1132 {
   1133 	struct isl_sched_graph merge_graph = { 0 };
   1134 	isl_bool merged;
   1135 
   1136 	if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
   1137 		goto error;
   1138 
   1139 	if (isl_sched_graph_compute_maxvar(&merge_graph) < 0)
   1140 		goto error;
   1141 	if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
   1142 		goto error;
   1143 	if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0)
   1144 		goto error;
   1145 	merged = ok_to_merge(ctx, graph, c, &merge_graph);
   1146 	if (merged && merge(ctx, c, &merge_graph) < 0)
   1147 		goto error;
   1148 
   1149 	isl_sched_graph_free(ctx, &merge_graph);
   1150 	return merged;
   1151 error:
   1152 	isl_sched_graph_free(ctx, &merge_graph);
   1153 	return isl_bool_error;
   1154 }
   1155 
   1156 /* Is there any edge marked "no_merge" between two SCCs that are
   1157  * about to be merged (i.e., that are set in "scc_in_merge")?
   1158  * "merge_edge" is the proximity edge along which the clusters of SCCs
   1159  * are going to be merged.
   1160  *
   1161  * If there is any edge between two SCCs with a negative weight,
   1162  * while the weight of "merge_edge" is non-negative, then this
   1163  * means that the edge was postponed.  "merge_edge" should then
   1164  * also be postponed since merging along the edge with negative weight should
   1165  * be postponed until all edges with non-negative weight have been tried.
   1166  * Replace the weight of "merge_edge" by a negative weight as well and
   1167  * tell the caller not to attempt a merge.
   1168  */
   1169 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
   1170 	struct isl_sched_edge *merge_edge)
   1171 {
   1172 	int i;
   1173 
   1174 	for (i = 0; i < graph->n_edge; ++i) {
   1175 		struct isl_sched_edge *edge = &graph->edge[i];
   1176 
   1177 		if (!scc_in_merge[edge->src->scc])
   1178 			continue;
   1179 		if (!scc_in_merge[edge->dst->scc])
   1180 			continue;
   1181 		if (edge->no_merge)
   1182 			return 1;
   1183 		if (merge_edge->weight >= 0 && edge->weight < 0) {
   1184 			merge_edge->weight -= graph->max_weight + 1;
   1185 			return 1;
   1186 		}
   1187 	}
   1188 
   1189 	return 0;
   1190 }
   1191 
   1192 /* Merge the two clusters in "c" connected by the edge in "graph"
   1193  * with index "edge" into a single cluster.
   1194  * If it turns out to be impossible to merge these two clusters,
   1195  * then mark the edge as "no_merge" such that it will not be
   1196  * considered again.
   1197  *
   1198  * First mark all SCCs that need to be merged.  This includes the SCCs
   1199  * in the two clusters, but it may also include the SCCs
   1200  * of intermediate clusters.
   1201  * If there is already a no_merge edge between any pair of such SCCs,
   1202  * then simply mark the current edge as no_merge as well.
   1203  * Likewise, if any of those edges was postponed by has_bounded_distances,
   1204  * then postpone the current edge as well.
   1205  * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
   1206  * if the clusters did not end up getting merged, unless the non-merge
   1207  * is due to the fact that the edge was postponed.  This postponement
   1208  * can be recognized by a change in weight (from non-negative to negative).
   1209  */
   1210 static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
   1211 	struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
   1212 {
   1213 	isl_bool merged;
   1214 	int edge_weight = graph->edge[edge].weight;
   1215 
   1216 	if (mark_merge_sccs(ctx, graph, edge, c) < 0)
   1217 		return isl_stat_error;
   1218 
   1219 	if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
   1220 		merged = isl_bool_false;
   1221 	else
   1222 		merged = try_merge(ctx, graph, c);
   1223 	if (merged < 0)
   1224 		return isl_stat_error;
   1225 	if (!merged && edge_weight == graph->edge[edge].weight)
   1226 		graph->edge[edge].no_merge = 1;
   1227 
   1228 	return isl_stat_ok;
   1229 }
   1230 
   1231 /* Does "node" belong to the cluster identified by "cluster"?
   1232  */
   1233 static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
   1234 {
   1235 	return node->cluster == cluster;
   1236 }
   1237 
   1238 /* Does "edge" connect two nodes belonging to the cluster
   1239  * identified by "cluster"?
   1240  */
   1241 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
   1242 {
   1243 	return edge->src->cluster == cluster && edge->dst->cluster == cluster;
   1244 }
   1245 
   1246 /* Swap the schedule of "node1" and "node2".
   1247  * Both nodes have been derived from the same node in a common parent graph.
   1248  * Since the "coincident" field is shared with that node
   1249  * in the parent graph, there is no need to also swap this field.
   1250  */
   1251 static void swap_sched(struct isl_sched_node *node1,
   1252 	struct isl_sched_node *node2)
   1253 {
   1254 	isl_mat *sched;
   1255 	isl_map *sched_map;
   1256 
   1257 	sched = node1->sched;
   1258 	node1->sched = node2->sched;
   1259 	node2->sched = sched;
   1260 
   1261 	sched_map = node1->sched_map;
   1262 	node1->sched_map = node2->sched_map;
   1263 	node2->sched_map = sched_map;
   1264 }
   1265 
   1266 /* Copy the current band schedule from the SCCs that form the cluster
   1267  * with index "pos" to the actual cluster at position "pos".
   1268  * By construction, the index of the first SCC that belongs to the cluster
   1269  * is also "pos".
   1270  *
   1271  * The order of the nodes inside both the SCCs and the cluster
   1272  * is assumed to be same as the order in the original "graph".
   1273  *
   1274  * Since the SCC graphs will no longer be used after this function,
   1275  * the schedules are actually swapped rather than copied.
   1276  */
   1277 static isl_stat copy_partial(struct isl_sched_graph *graph,
   1278 	struct isl_clustering *c, int pos)
   1279 {
   1280 	int i, j;
   1281 
   1282 	c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
   1283 	c->cluster[pos].n_row = c->scc[pos].n_row;
   1284 	c->cluster[pos].maxvar = c->scc[pos].maxvar;
   1285 	j = 0;
   1286 	for (i = 0; i < graph->n; ++i) {
   1287 		int k;
   1288 		int s;
   1289 
   1290 		if (graph->node[i].cluster != pos)
   1291 			continue;
   1292 		s = graph->node[i].scc;
   1293 		k = c->scc_node[s]++;
   1294 		swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
   1295 		if (c->scc[s].maxvar > c->cluster[pos].maxvar)
   1296 			c->cluster[pos].maxvar = c->scc[s].maxvar;
   1297 		++j;
   1298 	}
   1299 
   1300 	return isl_stat_ok;
   1301 }
   1302 
   1303 /* Is there a (conditional) validity dependence from node[j] to node[i],
   1304  * forcing node[i] to follow node[j] or do the nodes belong to the same
   1305  * cluster?
   1306  */
   1307 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
   1308 {
   1309 	struct isl_sched_graph *graph = user;
   1310 
   1311 	if (graph->node[i].cluster == graph->node[j].cluster)
   1312 		return isl_bool_true;
   1313 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
   1314 							&graph->node[i]);
   1315 }
   1316 
   1317 /* Extract the merged clusters of SCCs in "graph", sort them, and
   1318  * store them in c->clusters.  Update c->scc_cluster accordingly.
   1319  *
   1320  * First keep track of the cluster containing the SCC to which a node
   1321  * belongs in the node itself.
   1322  * Then extract the clusters into c->clusters, copying the current
   1323  * band schedule from the SCCs that belong to the cluster.
   1324  * Do this only once per cluster.
   1325  *
   1326  * Finally, topologically sort the clusters and update c->scc_cluster
   1327  * to match the new scc numbering.  While the SCCs were originally
   1328  * sorted already, some SCCs that depend on some other SCCs may
   1329  * have been merged with SCCs that appear before these other SCCs.
   1330  * A reordering may therefore be required.
   1331  */
   1332 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
   1333 	struct isl_clustering *c)
   1334 {
   1335 	int i;
   1336 
   1337 	for (i = 0; i < graph->n; ++i)
   1338 		graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
   1339 
   1340 	for (i = 0; i < graph->scc; ++i) {
   1341 		if (c->scc_cluster[i] != i)
   1342 			continue;
   1343 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
   1344 				&node_cluster_exactly,
   1345 				&edge_cluster_exactly, i, &c->cluster[i]) < 0)
   1346 			return isl_stat_error;
   1347 		c->cluster[i].src_scc = -1;
   1348 		c->cluster[i].dst_scc = -1;
   1349 		if (copy_partial(graph, c, i) < 0)
   1350 			return isl_stat_error;
   1351 	}
   1352 
   1353 	if (isl_sched_graph_detect_ccs(ctx, graph,
   1354 				&node_follows_strong_or_same_cluster) < 0)
   1355 		return isl_stat_error;
   1356 	for (i = 0; i < graph->n; ++i)
   1357 		c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
   1358 
   1359 	return isl_stat_ok;
   1360 }
   1361 
   1362 /* Compute weights on the proximity edges of "graph" that can
   1363  * be used by find_proximity to find the most appropriate
   1364  * proximity edge to use to merge two clusters in "c".
   1365  * The weights are also used by has_bounded_distances to determine
   1366  * whether the merge should be allowed.
   1367  * Store the maximum of the computed weights in graph->max_weight.
   1368  *
   1369  * The computed weight is a measure for the number of remaining schedule
   1370  * dimensions that can still be completely aligned.
   1371  * In particular, compute the number of equalities between
   1372  * input dimensions and output dimensions in the proximity constraints.
   1373  * The directions that are already handled by outer schedule bands
   1374  * are projected out prior to determining this number.
   1375  *
   1376  * Edges that will never be considered by find_proximity are ignored.
   1377  */
   1378 static isl_stat compute_weights(struct isl_sched_graph *graph,
   1379 	struct isl_clustering *c)
   1380 {
   1381 	int i;
   1382 
   1383 	graph->max_weight = 0;
   1384 
   1385 	for (i = 0; i < graph->n_edge; ++i) {
   1386 		struct isl_sched_edge *edge = &graph->edge[i];
   1387 		struct isl_sched_node *src = edge->src;
   1388 		struct isl_sched_node *dst = edge->dst;
   1389 		isl_basic_map *hull;
   1390 		isl_bool prox;
   1391 		isl_size n_in, n_out, n;
   1392 
   1393 		prox = is_non_empty_proximity(edge);
   1394 		if (prox < 0)
   1395 			return isl_stat_error;
   1396 		if (!prox)
   1397 			continue;
   1398 		if (bad_cluster(&c->scc[edge->src->scc]) ||
   1399 		    bad_cluster(&c->scc[edge->dst->scc]))
   1400 			continue;
   1401 		if (c->scc_cluster[edge->dst->scc] ==
   1402 		    c->scc_cluster[edge->src->scc])
   1403 			continue;
   1404 
   1405 		hull = isl_map_affine_hull(isl_map_copy(edge->map));
   1406 		hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
   1407 						    isl_mat_copy(src->vmap));
   1408 		hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
   1409 						    isl_mat_copy(dst->vmap));
   1410 		hull = isl_basic_map_project_out(hull,
   1411 						isl_dim_in, 0, src->rank);
   1412 		hull = isl_basic_map_project_out(hull,
   1413 						isl_dim_out, 0, dst->rank);
   1414 		hull = isl_basic_map_remove_divs(hull);
   1415 		n_in = isl_basic_map_dim(hull, isl_dim_in);
   1416 		n_out = isl_basic_map_dim(hull, isl_dim_out);
   1417 		if (n_in < 0 || n_out < 0)
   1418 			hull = isl_basic_map_free(hull);
   1419 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
   1420 							isl_dim_in, 0, n_in);
   1421 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
   1422 							isl_dim_out, 0, n_out);
   1423 		n = isl_basic_map_n_equality(hull);
   1424 		isl_basic_map_free(hull);
   1425 		if (n < 0)
   1426 			return isl_stat_error;
   1427 		edge->weight = n;
   1428 
   1429 		if (edge->weight > graph->max_weight)
   1430 			graph->max_weight = edge->weight;
   1431 	}
   1432 
   1433 	return isl_stat_ok;
   1434 }
   1435 
   1436 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
   1437  * update "node" to arrange for them to be executed in an order
   1438  * possibly involving set nodes that generalizes the topological order
   1439  * determined by the scc fields of the nodes in "graph".
   1440  *
   1441  * Note that at this stage, there are graph->scc clusters and
   1442  * their positions in c->cluster are determined by the values
   1443  * of c->scc_cluster.
   1444  *
   1445  * Construct an isl_scc_graph and perform the decomposition
   1446  * using this graph.
   1447  */
   1448 static __isl_give isl_schedule_node *finish_bands_decompose(
   1449 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
   1450 	struct isl_clustering *c)
   1451 {
   1452 	isl_ctx *ctx;
   1453 	struct isl_scc_graph *scc_graph;
   1454 
   1455 	ctx = isl_schedule_node_get_ctx(node);
   1456 
   1457 	scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
   1458 	node = isl_scc_graph_decompose(scc_graph, node);
   1459 	isl_scc_graph_free(scc_graph);
   1460 
   1461 	return node;
   1462 }
   1463 
   1464 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
   1465  * in their topological order.  This order is determined by the scc
   1466  * fields of the nodes in "graph".
   1467  * Combine the results in a sequence expressing the topological order.
   1468  *
   1469  * If there is only one cluster left, then there is no need to introduce
   1470  * a sequence node.  Also, in this case, the cluster necessarily contains
   1471  * the SCC at position 0 in the original graph and is therefore also
   1472  * stored in the first cluster of "c".
   1473  *
   1474  * If there are more than two clusters left, then some subsets of the clusters
   1475  * may still be independent of each other.  These could then still
   1476  * be reordered with respect to each other.  Call finish_bands_decompose
   1477  * to try and construct an ordering involving set and sequence nodes
   1478  * that generalizes the topological order.
   1479  * Note that at the outermost level there can be no independent components
   1480  * because isl_schedule_node_compute_wcc_clustering is called
   1481  * on a (weakly) connected component.
   1482  */
   1483 static __isl_give isl_schedule_node *finish_bands_clustering(
   1484 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
   1485 	struct isl_clustering *c)
   1486 {
   1487 	int i;
   1488 	isl_ctx *ctx;
   1489 	isl_union_set_list *filters;
   1490 
   1491 	if (graph->scc == 1)
   1492 		return isl_schedule_node_compute_finish_band(node,
   1493 							&c->cluster[0], 0);
   1494 	if (graph->scc > 2)
   1495 		return finish_bands_decompose(node, graph, c);
   1496 
   1497 	ctx = isl_schedule_node_get_ctx(node);
   1498 
   1499 	filters = isl_sched_graph_extract_sccs(ctx, graph);
   1500 	node = isl_schedule_node_insert_sequence(node, filters);
   1501 
   1502 	for (i = 0; i < graph->scc; ++i) {
   1503 		int j = c->scc_cluster[i];
   1504 		node = isl_schedule_node_grandchild(node, i, 0);
   1505 		node = isl_schedule_node_compute_finish_band(node,
   1506 							&c->cluster[j], 0);
   1507 		node = isl_schedule_node_grandparent(node);
   1508 	}
   1509 
   1510 	return node;
   1511 }
   1512 
   1513 /* Compute a schedule for a connected dependence graph by first considering
   1514  * each strongly connected component (SCC) in the graph separately and then
   1515  * incrementally combining them into clusters.
   1516  * Return the updated schedule node.
   1517  *
   1518  * Initially, each cluster consists of a single SCC, each with its
   1519  * own band schedule.  The algorithm then tries to merge pairs
   1520  * of clusters along a proximity edge until no more suitable
   1521  * proximity edges can be found.  During this merging, the schedule
   1522  * is maintained in the individual SCCs.
   1523  * After the merging is completed, the full resulting clusters
   1524  * are extracted and in finish_bands_clustering,
   1525  * isl_schedule_node_compute_finish_band is called on each of them to integrate
   1526  * the band into "node" and to continue the computation.
   1527  *
   1528  * compute_weights initializes the weights that are used by find_proximity.
   1529  */
   1530 __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
   1531 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
   1532 {
   1533 	isl_ctx *ctx;
   1534 	struct isl_clustering c;
   1535 	int i;
   1536 
   1537 	ctx = isl_schedule_node_get_ctx(node);
   1538 
   1539 	if (clustering_init(ctx, &c, graph) < 0)
   1540 		goto error;
   1541 
   1542 	if (compute_weights(graph, &c) < 0)
   1543 		goto error;
   1544 
   1545 	for (;;) {
   1546 		i = find_proximity(graph, &c);
   1547 		if (i < 0)
   1548 			goto error;
   1549 		if (i >= graph->n_edge)
   1550 			break;
   1551 		if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
   1552 			goto error;
   1553 	}
   1554 
   1555 	if (extract_clusters(ctx, graph, &c) < 0)
   1556 		goto error;
   1557 
   1558 	node = finish_bands_clustering(node, graph, &c);
   1559 
   1560 	clustering_free(ctx, &c);
   1561 	return node;
   1562 error:
   1563 	clustering_free(ctx, &c);
   1564 	return isl_schedule_node_free(node);
   1565 }
   1566