Home | History | Annotate | Line # | Download | only in dist
      1  1.1  mrg /*
      2  1.1  mrg  * Copyright 2015      Sven Verdoolaege
      3  1.1  mrg  *
      4  1.1  mrg  * Use of this software is governed by the MIT license
      5  1.1  mrg  *
      6  1.1  mrg  * Written by Sven Verdoolaege
      7  1.1  mrg  */
      8  1.1  mrg 
      9  1.1  mrg #include "isl_map_private.h"
     10  1.1  mrg 
     11  1.1  mrg #include <isl/id.h>
     12  1.1  mrg #include <isl/schedule_node.h>
     13  1.1  mrg #include <isl/union_set.h>
     14  1.1  mrg 
     15  1.1  mrg #include "isl_mat_private.h"
     16  1.1  mrg #include "isl_scheduler_clustering.h"
     17  1.1  mrg #include "isl_scheduler_scc.h"
     18  1.1  mrg #include "isl_seq.h"
     19  1.1  mrg #include "isl_tarjan.h"
     20  1.1  mrg 
     21  1.1  mrg /* Initialize the clustering data structure "c" from "graph".
     22  1.1  mrg  *
     23  1.1  mrg  * In particular, allocate memory, extract the SCCs from "graph"
     24  1.1  mrg  * into c->scc, initialize scc_cluster and construct
     25  1.1  mrg  * a band of schedule rows for each SCC.
     26  1.1  mrg  * Within each SCC, there is only one SCC by definition.
     27  1.1  mrg  * Each SCC initially belongs to a cluster containing only that SCC.
     28  1.1  mrg  */
     29  1.1  mrg static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
     30  1.1  mrg 	struct isl_sched_graph *graph)
     31  1.1  mrg {
     32  1.1  mrg 	int i;
     33  1.1  mrg 
     34  1.1  mrg 	c->n = graph->scc;
     35  1.1  mrg 	c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
     36  1.1  mrg 	c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
     37  1.1  mrg 	c->scc_cluster = isl_calloc_array(ctx, int, c->n);
     38  1.1  mrg 	c->scc_node = isl_calloc_array(ctx, int, c->n);
     39  1.1  mrg 	c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
     40  1.1  mrg 	if (!c->scc || !c->cluster ||
     41  1.1  mrg 	    !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
     42  1.1  mrg 		return isl_stat_error;
     43  1.1  mrg 
     44  1.1  mrg 	for (i = 0; i < c->n; ++i) {
     45  1.1  mrg 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
     46  1.1  mrg 					&isl_sched_node_scc_exactly,
     47  1.1  mrg 					&isl_sched_edge_scc_exactly,
     48  1.1  mrg 					i, &c->scc[i]) < 0)
     49  1.1  mrg 			return isl_stat_error;
     50  1.1  mrg 		c->scc[i].scc = 1;
     51  1.1  mrg 		if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0)
     52  1.1  mrg 			return isl_stat_error;
     53  1.1  mrg 		if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0)
     54  1.1  mrg 			return isl_stat_error;
     55  1.1  mrg 		c->scc_cluster[i] = i;
     56  1.1  mrg 	}
     57  1.1  mrg 
     58  1.1  mrg 	return isl_stat_ok;
     59  1.1  mrg }
     60  1.1  mrg 
     61  1.1  mrg /* Free all memory allocated for "c".
     62  1.1  mrg  */
     63  1.1  mrg static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
     64  1.1  mrg {
     65  1.1  mrg 	int i;
     66  1.1  mrg 
     67  1.1  mrg 	if (c->scc)
     68  1.1  mrg 		for (i = 0; i < c->n; ++i)
     69  1.1  mrg 			isl_sched_graph_free(ctx, &c->scc[i]);
     70  1.1  mrg 	free(c->scc);
     71  1.1  mrg 	if (c->cluster)
     72  1.1  mrg 		for (i = 0; i < c->n; ++i)
     73  1.1  mrg 			isl_sched_graph_free(ctx, &c->cluster[i]);
     74  1.1  mrg 	free(c->cluster);
     75  1.1  mrg 	free(c->scc_cluster);
     76  1.1  mrg 	free(c->scc_node);
     77  1.1  mrg 	free(c->scc_in_merge);
     78  1.1  mrg }
     79  1.1  mrg 
     80  1.1  mrg /* Should we refrain from merging the cluster in "graph" with
     81  1.1  mrg  * any other cluster?
     82  1.1  mrg  * In particular, is its current schedule band empty and incomplete.
     83  1.1  mrg  */
     84  1.1  mrg static int bad_cluster(struct isl_sched_graph *graph)
     85  1.1  mrg {
     86  1.1  mrg 	return graph->n_row < graph->maxvar &&
     87  1.1  mrg 		graph->n_total_row == graph->band_start;
     88  1.1  mrg }
     89  1.1  mrg 
     90  1.1  mrg /* Is "edge" a proximity edge with a non-empty dependence relation?
     91  1.1  mrg  */
     92  1.1  mrg static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
     93  1.1  mrg {
     94  1.1  mrg 	if (!isl_sched_edge_is_proximity(edge))
     95  1.1  mrg 		return isl_bool_false;
     96  1.1  mrg 	return isl_bool_not(isl_map_plain_is_empty(edge->map));
     97  1.1  mrg }
     98  1.1  mrg 
     99  1.1  mrg /* Return the index of an edge in "graph" that can be used to merge
    100  1.1  mrg  * two clusters in "c".
    101  1.1  mrg  * Return graph->n_edge if no such edge can be found.
    102  1.1  mrg  * Return -1 on error.
    103  1.1  mrg  *
    104  1.1  mrg  * In particular, return a proximity edge between two clusters
    105  1.1  mrg  * that is not marked "no_merge" and such that neither of the
    106  1.1  mrg  * two clusters has an incomplete, empty band.
    107  1.1  mrg  *
    108  1.1  mrg  * If there are multiple such edges, then try and find the most
    109  1.1  mrg  * appropriate edge to use for merging.  In particular, pick the edge
    110  1.1  mrg  * with the greatest weight.  If there are multiple of those,
    111  1.1  mrg  * then pick one with the shortest distance between
    112  1.1  mrg  * the two cluster representatives.
    113  1.1  mrg  */
    114  1.1  mrg static int find_proximity(struct isl_sched_graph *graph,
    115  1.1  mrg 	struct isl_clustering *c)
    116  1.1  mrg {
    117  1.1  mrg 	int i, best = graph->n_edge, best_dist, best_weight;
    118  1.1  mrg 
    119  1.1  mrg 	for (i = 0; i < graph->n_edge; ++i) {
    120  1.1  mrg 		struct isl_sched_edge *edge = &graph->edge[i];
    121  1.1  mrg 		int dist, weight;
    122  1.1  mrg 		isl_bool prox;
    123  1.1  mrg 
    124  1.1  mrg 		prox = is_non_empty_proximity(edge);
    125  1.1  mrg 		if (prox < 0)
    126  1.1  mrg 			return -1;
    127  1.1  mrg 		if (!prox)
    128  1.1  mrg 			continue;
    129  1.1  mrg 		if (edge->no_merge)
    130  1.1  mrg 			continue;
    131  1.1  mrg 		if (bad_cluster(&c->scc[edge->src->scc]) ||
    132  1.1  mrg 		    bad_cluster(&c->scc[edge->dst->scc]))
    133  1.1  mrg 			continue;
    134  1.1  mrg 		dist = c->scc_cluster[edge->dst->scc] -
    135  1.1  mrg 			c->scc_cluster[edge->src->scc];
    136  1.1  mrg 		if (dist == 0)
    137  1.1  mrg 			continue;
    138  1.1  mrg 		weight = edge->weight;
    139  1.1  mrg 		if (best < graph->n_edge) {
    140  1.1  mrg 			if (best_weight > weight)
    141  1.1  mrg 				continue;
    142  1.1  mrg 			if (best_weight == weight && best_dist <= dist)
    143  1.1  mrg 				continue;
    144  1.1  mrg 		}
    145  1.1  mrg 		best = i;
    146  1.1  mrg 		best_dist = dist;
    147  1.1  mrg 		best_weight = weight;
    148  1.1  mrg 	}
    149  1.1  mrg 
    150  1.1  mrg 	return best;
    151  1.1  mrg }
    152  1.1  mrg 
    153  1.1  mrg /* Internal data structure used in mark_merge_sccs.
    154  1.1  mrg  *
    155  1.1  mrg  * "graph" is the dependence graph in which a strongly connected
    156  1.1  mrg  * component is constructed.
    157  1.1  mrg  * "scc_cluster" maps each SCC index to the cluster to which it belongs.
    158  1.1  mrg  * "src" and "dst" are the indices of the nodes that are being merged.
    159  1.1  mrg  */
    160  1.1  mrg struct isl_mark_merge_sccs_data {
    161  1.1  mrg 	struct isl_sched_graph *graph;
    162  1.1  mrg 	int *scc_cluster;
    163  1.1  mrg 	int src;
    164  1.1  mrg 	int dst;
    165  1.1  mrg };
    166  1.1  mrg 
    167  1.1  mrg /* Check whether the cluster containing node "i" depends on the cluster
    168  1.1  mrg  * containing node "j".  If "i" and "j" belong to the same cluster,
    169  1.1  mrg  * then they are taken to depend on each other to ensure that
    170  1.1  mrg  * the resulting strongly connected component consists of complete
    171  1.1  mrg  * clusters.  Furthermore, if "i" and "j" are the two nodes that
    172  1.1  mrg  * are being merged, then they are taken to depend on each other as well.
    173  1.1  mrg  * Otherwise, check if there is a (conditional) validity dependence
    174  1.1  mrg  * from node[j] to node[i], forcing node[i] to follow node[j].
    175  1.1  mrg  */
    176  1.1  mrg static isl_bool cluster_follows(int i, int j, void *user)
    177  1.1  mrg {
    178  1.1  mrg 	struct isl_mark_merge_sccs_data *data = user;
    179  1.1  mrg 	struct isl_sched_graph *graph = data->graph;
    180  1.1  mrg 	int *scc_cluster = data->scc_cluster;
    181  1.1  mrg 
    182  1.1  mrg 	if (data->src == i && data->dst == j)
    183  1.1  mrg 		return isl_bool_true;
    184  1.1  mrg 	if (data->src == j && data->dst == i)
    185  1.1  mrg 		return isl_bool_true;
    186  1.1  mrg 	if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
    187  1.1  mrg 		return isl_bool_true;
    188  1.1  mrg 
    189  1.1  mrg 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
    190  1.1  mrg 							&graph->node[i]);
    191  1.1  mrg }
    192  1.1  mrg 
    193  1.1  mrg /* Mark all SCCs that belong to either of the two clusters in "c"
    194  1.1  mrg  * connected by the edge in "graph" with index "edge", or to any
    195  1.1  mrg  * of the intermediate clusters.
    196  1.1  mrg  * The marking is recorded in c->scc_in_merge.
    197  1.1  mrg  *
    198  1.1  mrg  * The given edge has been selected for merging two clusters,
    199  1.1  mrg  * meaning that there is at least a proximity edge between the two nodes.
    200  1.1  mrg  * However, there may also be (indirect) validity dependences
    201  1.1  mrg  * between the two nodes.  When merging the two clusters, all clusters
    202  1.1  mrg  * containing one or more of the intermediate nodes along the
    203  1.1  mrg  * indirect validity dependences need to be merged in as well.
    204  1.1  mrg  *
    205  1.1  mrg  * First collect all such nodes by computing the strongly connected
    206  1.1  mrg  * component (SCC) containing the two nodes connected by the edge, where
    207  1.1  mrg  * the two nodes are considered to depend on each other to make
    208  1.1  mrg  * sure they end up in the same SCC.  Similarly, each node is considered
    209  1.1  mrg  * to depend on every other node in the same cluster to ensure
    210  1.1  mrg  * that the SCC consists of complete clusters.
    211  1.1  mrg  *
    212  1.1  mrg  * Then the original SCCs that contain any of these nodes are marked
    213  1.1  mrg  * in c->scc_in_merge.
    214  1.1  mrg  */
    215  1.1  mrg static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
    216  1.1  mrg 	int edge, struct isl_clustering *c)
    217  1.1  mrg {
    218  1.1  mrg 	struct isl_mark_merge_sccs_data data;
    219  1.1  mrg 	struct isl_tarjan_graph *g;
    220  1.1  mrg 	int i;
    221  1.1  mrg 
    222  1.1  mrg 	for (i = 0; i < c->n; ++i)
    223  1.1  mrg 		c->scc_in_merge[i] = 0;
    224  1.1  mrg 
    225  1.1  mrg 	data.graph = graph;
    226  1.1  mrg 	data.scc_cluster = c->scc_cluster;
    227  1.1  mrg 	data.src = graph->edge[edge].src - graph->node;
    228  1.1  mrg 	data.dst = graph->edge[edge].dst - graph->node;
    229  1.1  mrg 
    230  1.1  mrg 	g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
    231  1.1  mrg 					&cluster_follows, &data);
    232  1.1  mrg 	if (!g)
    233  1.1  mrg 		goto error;
    234  1.1  mrg 
    235  1.1  mrg 	i = g->op;
    236  1.1  mrg 	if (i < 3)
    237  1.1  mrg 		isl_die(ctx, isl_error_internal,
    238  1.1  mrg 			"expecting at least two nodes in component",
    239  1.1  mrg 			goto error);
    240  1.1  mrg 	if (g->order[--i] != -1)
    241  1.1  mrg 		isl_die(ctx, isl_error_internal,
    242  1.1  mrg 			"expecting end of component marker", goto error);
    243  1.1  mrg 
    244  1.1  mrg 	for (--i; i >= 0 && g->order[i] != -1; --i) {
    245  1.1  mrg 		int scc = graph->node[g->order[i]].scc;
    246  1.1  mrg 		c->scc_in_merge[scc] = 1;
    247  1.1  mrg 	}
    248  1.1  mrg 
    249  1.1  mrg 	isl_tarjan_graph_free(g);
    250  1.1  mrg 	return isl_stat_ok;
    251  1.1  mrg error:
    252  1.1  mrg 	isl_tarjan_graph_free(g);
    253  1.1  mrg 	return isl_stat_error;
    254  1.1  mrg }
    255  1.1  mrg 
    256  1.1  mrg /* Construct the identifier "cluster_i".
    257  1.1  mrg  */
    258  1.1  mrg static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
    259  1.1  mrg {
    260  1.1  mrg 	char name[40];
    261  1.1  mrg 
    262  1.1  mrg 	snprintf(name, sizeof(name), "cluster_%d", i);
    263  1.1  mrg 	return isl_id_alloc(ctx, name, NULL);
    264  1.1  mrg }
    265  1.1  mrg 
    266  1.1  mrg /* Construct the space of the cluster with index "i" containing
    267  1.1  mrg  * the strongly connected component "scc".
    268  1.1  mrg  *
    269  1.1  mrg  * In particular, construct a space called cluster_i with dimension equal
    270  1.1  mrg  * to the number of schedule rows in the current band of "scc".
    271  1.1  mrg  */
    272  1.1  mrg static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
    273  1.1  mrg {
    274  1.1  mrg 	int nvar;
    275  1.1  mrg 	isl_space *space;
    276  1.1  mrg 	isl_id *id;
    277  1.1  mrg 
    278  1.1  mrg 	nvar = scc->n_total_row - scc->band_start;
    279  1.1  mrg 	space = isl_space_copy(scc->node[0].space);
    280  1.1  mrg 	space = isl_space_params(space);
    281  1.1  mrg 	space = isl_space_set_from_params(space);
    282  1.1  mrg 	space = isl_space_add_dims(space, isl_dim_set, nvar);
    283  1.1  mrg 	id = cluster_id(isl_space_get_ctx(space), i);
    284  1.1  mrg 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
    285  1.1  mrg 
    286  1.1  mrg 	return space;
    287  1.1  mrg }
    288  1.1  mrg 
    289  1.1  mrg /* Collect the domain of the graph for merging clusters.
    290  1.1  mrg  *
    291  1.1  mrg  * In particular, for each cluster with first SCC "i", construct
    292  1.1  mrg  * a set in the space called cluster_i with dimension equal
    293  1.1  mrg  * to the number of schedule rows in the current band of the cluster.
    294  1.1  mrg  */
    295  1.1  mrg static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
    296  1.1  mrg 	struct isl_sched_graph *graph, struct isl_clustering *c)
    297  1.1  mrg {
    298  1.1  mrg 	int i;
    299  1.1  mrg 	isl_space *space;
    300  1.1  mrg 	isl_union_set *domain;
    301  1.1  mrg 
    302  1.1  mrg 	space = isl_space_params_alloc(ctx, 0);
    303  1.1  mrg 	domain = isl_union_set_empty(space);
    304  1.1  mrg 
    305  1.1  mrg 	for (i = 0; i < graph->scc; ++i) {
    306  1.1  mrg 		isl_space *space;
    307  1.1  mrg 
    308  1.1  mrg 		if (!c->scc_in_merge[i])
    309  1.1  mrg 			continue;
    310  1.1  mrg 		if (c->scc_cluster[i] != i)
    311  1.1  mrg 			continue;
    312  1.1  mrg 		space = cluster_space(&c->scc[i], i);
    313  1.1  mrg 		domain = isl_union_set_add_set(domain, isl_set_universe(space));
    314  1.1  mrg 	}
    315  1.1  mrg 
    316  1.1  mrg 	return domain;
    317  1.1  mrg }
    318  1.1  mrg 
    319  1.1  mrg /* Construct a map from the original instances to the corresponding
    320  1.1  mrg  * cluster instance in the current bands of the clusters in "c".
    321  1.1  mrg  */
    322  1.1  mrg static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
    323  1.1  mrg 	struct isl_sched_graph *graph, struct isl_clustering *c)
    324  1.1  mrg {
    325  1.1  mrg 	int i, j;
    326  1.1  mrg 	isl_space *space;
    327  1.1  mrg 	isl_union_map *cluster_map;
    328  1.1  mrg 
    329  1.1  mrg 	space = isl_space_params_alloc(ctx, 0);
    330  1.1  mrg 	cluster_map = isl_union_map_empty(space);
    331  1.1  mrg 	for (i = 0; i < graph->scc; ++i) {
    332  1.1  mrg 		int start, n;
    333  1.1  mrg 		isl_id *id;
    334  1.1  mrg 
    335  1.1  mrg 		if (!c->scc_in_merge[i])
    336  1.1  mrg 			continue;
    337  1.1  mrg 
    338  1.1  mrg 		id = cluster_id(ctx, c->scc_cluster[i]);
    339  1.1  mrg 		start = c->scc[i].band_start;
    340  1.1  mrg 		n = c->scc[i].n_total_row - start;
    341  1.1  mrg 		for (j = 0; j < c->scc[i].n; ++j) {
    342  1.1  mrg 			isl_multi_aff *ma;
    343  1.1  mrg 			isl_map *map;
    344  1.1  mrg 			struct isl_sched_node *node = &c->scc[i].node[j];
    345  1.1  mrg 
    346  1.1  mrg 			ma = isl_sched_node_extract_partial_schedule_multi_aff(
    347  1.1  mrg 								node, start, n);
    348  1.1  mrg 			ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
    349  1.1  mrg 							    isl_id_copy(id));
    350  1.1  mrg 			map = isl_map_from_multi_aff(ma);
    351  1.1  mrg 			cluster_map = isl_union_map_add_map(cluster_map, map);
    352  1.1  mrg 		}
    353  1.1  mrg 		isl_id_free(id);
    354  1.1  mrg 	}
    355  1.1  mrg 
    356  1.1  mrg 	return cluster_map;
    357  1.1  mrg }
    358  1.1  mrg 
    359  1.1  mrg /* Add "umap" to the schedule constraints "sc" of all types of "edge"
    360  1.1  mrg  * that are not isl_edge_condition or isl_edge_conditional_validity.
    361  1.1  mrg  */
    362  1.1  mrg static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
    363  1.1  mrg 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    364  1.1  mrg 	__isl_take isl_schedule_constraints *sc)
    365  1.1  mrg {
    366  1.1  mrg 	enum isl_edge_type t;
    367  1.1  mrg 
    368  1.1  mrg 	if (!sc)
    369  1.1  mrg 		return NULL;
    370  1.1  mrg 
    371  1.1  mrg 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
    372  1.1  mrg 		if (t == isl_edge_condition ||
    373  1.1  mrg 		    t == isl_edge_conditional_validity)
    374  1.1  mrg 			continue;
    375  1.1  mrg 		if (!isl_sched_edge_has_type(edge, t))
    376  1.1  mrg 			continue;
    377  1.1  mrg 		sc = isl_schedule_constraints_add(sc, t,
    378  1.1  mrg 						    isl_union_map_copy(umap));
    379  1.1  mrg 	}
    380  1.1  mrg 
    381  1.1  mrg 	return sc;
    382  1.1  mrg }
    383  1.1  mrg 
    384  1.1  mrg /* Add schedule constraints of types isl_edge_condition and
    385  1.1  mrg  * isl_edge_conditional_validity to "sc" by applying "umap" to
    386  1.1  mrg  * the domains of the wrapped relations in domain and range
    387  1.1  mrg  * of the corresponding tagged constraints of "edge".
    388  1.1  mrg  */
    389  1.1  mrg static __isl_give isl_schedule_constraints *add_conditional_constraints(
    390  1.1  mrg 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
    391  1.1  mrg 	__isl_take isl_schedule_constraints *sc)
    392  1.1  mrg {
    393  1.1  mrg 	enum isl_edge_type t;
    394  1.1  mrg 	isl_union_map *tagged;
    395  1.1  mrg 
    396  1.1  mrg 	for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
    397  1.1  mrg 		if (!isl_sched_edge_has_type(edge, t))
    398  1.1  mrg 			continue;
    399  1.1  mrg 		if (t == isl_edge_condition)
    400  1.1  mrg 			tagged = isl_union_map_copy(edge->tagged_condition);
    401  1.1  mrg 		else
    402  1.1  mrg 			tagged = isl_union_map_copy(edge->tagged_validity);
    403  1.1  mrg 		tagged = isl_union_map_zip(tagged);
    404  1.1  mrg 		tagged = isl_union_map_apply_domain(tagged,
    405  1.1  mrg 					isl_union_map_copy(umap));
    406  1.1  mrg 		tagged = isl_union_map_zip(tagged);
    407  1.1  mrg 		sc = isl_schedule_constraints_add(sc, t, tagged);
    408  1.1  mrg 		if (!sc)
    409  1.1  mrg 			return NULL;
    410  1.1  mrg 	}
    411  1.1  mrg 
    412  1.1  mrg 	return sc;
    413  1.1  mrg }
    414  1.1  mrg 
    415  1.1  mrg /* Given a mapping "cluster_map" from the original instances to
    416  1.1  mrg  * the cluster instances, add schedule constraints on the clusters
    417  1.1  mrg  * to "sc" corresponding to the original constraints represented by "edge".
    418  1.1  mrg  *
    419  1.1  mrg  * For non-tagged dependence constraints, the cluster constraints
    420  1.1  mrg  * are obtained by applying "cluster_map" to the edge->map.
    421  1.1  mrg  *
    422  1.1  mrg  * For tagged dependence constraints, "cluster_map" needs to be applied
    423  1.1  mrg  * to the domains of the wrapped relations in domain and range
    424  1.1  mrg  * of the tagged dependence constraints.  Pick out the mappings
    425  1.1  mrg  * from these domains from "cluster_map" and construct their product.
    426  1.1  mrg  * This mapping can then be applied to the pair of domains.
    427  1.1  mrg  */
    428  1.1  mrg static __isl_give isl_schedule_constraints *collect_edge_constraints(
    429  1.1  mrg 	struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
    430  1.1  mrg 	__isl_take isl_schedule_constraints *sc)
    431  1.1  mrg {
    432  1.1  mrg 	isl_union_map *umap;
    433  1.1  mrg 	isl_space *space;
    434  1.1  mrg 	isl_union_set *uset;
    435  1.1  mrg 	isl_union_map *umap1, *umap2;
    436  1.1  mrg 
    437  1.1  mrg 	if (!sc)
    438  1.1  mrg 		return NULL;
    439  1.1  mrg 
    440  1.1  mrg 	umap = isl_union_map_from_map(isl_map_copy(edge->map));
    441  1.1  mrg 	umap = isl_union_map_apply_domain(umap,
    442  1.1  mrg 				isl_union_map_copy(cluster_map));
    443  1.1  mrg 	umap = isl_union_map_apply_range(umap,
    444  1.1  mrg 				isl_union_map_copy(cluster_map));
    445  1.1  mrg 	sc = add_non_conditional_constraints(edge, umap, sc);
    446  1.1  mrg 	isl_union_map_free(umap);
    447  1.1  mrg 
    448  1.1  mrg 	if (!sc ||
    449  1.1  mrg 	    (!isl_sched_edge_is_condition(edge) &&
    450  1.1  mrg 	     !isl_sched_edge_is_conditional_validity(edge)))
    451  1.1  mrg 		return sc;
    452  1.1  mrg 
    453  1.1  mrg 	space = isl_space_domain(isl_map_get_space(edge->map));
    454  1.1  mrg 	uset = isl_union_set_from_set(isl_set_universe(space));
    455  1.1  mrg 	umap1 = isl_union_map_copy(cluster_map);
    456  1.1  mrg 	umap1 = isl_union_map_intersect_domain(umap1, uset);
    457  1.1  mrg 	space = isl_space_range(isl_map_get_space(edge->map));
    458  1.1  mrg 	uset = isl_union_set_from_set(isl_set_universe(space));
    459  1.1  mrg 	umap2 = isl_union_map_copy(cluster_map);
    460  1.1  mrg 	umap2 = isl_union_map_intersect_domain(umap2, uset);
    461  1.1  mrg 	umap = isl_union_map_product(umap1, umap2);
    462  1.1  mrg 
    463  1.1  mrg 	sc = add_conditional_constraints(edge, umap, sc);
    464  1.1  mrg 
    465  1.1  mrg 	isl_union_map_free(umap);
    466  1.1  mrg 	return sc;
    467  1.1  mrg }
    468  1.1  mrg 
    469  1.1  mrg /* Given a mapping "cluster_map" from the original instances to
    470  1.1  mrg  * the cluster instances, add schedule constraints on the clusters
    471  1.1  mrg  * to "sc" corresponding to all edges in "graph" between nodes that
    472  1.1  mrg  * belong to SCCs that are marked for merging in "scc_in_merge".
    473  1.1  mrg  */
    474  1.1  mrg static __isl_give isl_schedule_constraints *collect_constraints(
    475  1.1  mrg 	struct isl_sched_graph *graph, int *scc_in_merge,
    476  1.1  mrg 	__isl_keep isl_union_map *cluster_map,
    477  1.1  mrg 	__isl_take isl_schedule_constraints *sc)
    478  1.1  mrg {
    479  1.1  mrg 	int i;
    480  1.1  mrg 
    481  1.1  mrg 	for (i = 0; i < graph->n_edge; ++i) {
    482  1.1  mrg 		struct isl_sched_edge *edge = &graph->edge[i];
    483  1.1  mrg 
    484  1.1  mrg 		if (!scc_in_merge[edge->src->scc])
    485  1.1  mrg 			continue;
    486  1.1  mrg 		if (!scc_in_merge[edge->dst->scc])
    487  1.1  mrg 			continue;
    488  1.1  mrg 		sc = collect_edge_constraints(edge, cluster_map, sc);
    489  1.1  mrg 	}
    490  1.1  mrg 
    491  1.1  mrg 	return sc;
    492  1.1  mrg }
    493  1.1  mrg 
    494  1.1  mrg /* Construct a dependence graph for scheduling clusters with respect
    495  1.1  mrg  * to each other and store the result in "merge_graph".
    496  1.1  mrg  * In particular, the nodes of the graph correspond to the schedule
    497  1.1  mrg  * dimensions of the current bands of those clusters that have been
    498  1.1  mrg  * marked for merging in "c".
    499  1.1  mrg  *
    500  1.1  mrg  * First construct an isl_schedule_constraints object for this domain
    501  1.1  mrg  * by transforming the edges in "graph" to the domain.
    502  1.1  mrg  * Then initialize a dependence graph for scheduling from these
    503  1.1  mrg  * constraints.
    504  1.1  mrg  */
    505  1.1  mrg static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
    506  1.1  mrg 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    507  1.1  mrg {
    508  1.1  mrg 	isl_union_set *domain;
    509  1.1  mrg 	isl_union_map *cluster_map;
    510  1.1  mrg 	isl_schedule_constraints *sc;
    511  1.1  mrg 	isl_stat r;
    512  1.1  mrg 
    513  1.1  mrg 	domain = collect_domain(ctx, graph, c);
    514  1.1  mrg 	sc = isl_schedule_constraints_on_domain(domain);
    515  1.1  mrg 	if (!sc)
    516  1.1  mrg 		return isl_stat_error;
    517  1.1  mrg 	cluster_map = collect_cluster_map(ctx, graph, c);
    518  1.1  mrg 	sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
    519  1.1  mrg 	isl_union_map_free(cluster_map);
    520  1.1  mrg 
    521  1.1  mrg 	r = isl_sched_graph_init(merge_graph, sc);
    522  1.1  mrg 
    523  1.1  mrg 	isl_schedule_constraints_free(sc);
    524  1.1  mrg 
    525  1.1  mrg 	return r;
    526  1.1  mrg }
    527  1.1  mrg 
    528  1.1  mrg /* Compute the maximal number of remaining schedule rows that still need
    529  1.1  mrg  * to be computed for the nodes that belong to clusters with the maximal
    530  1.1  mrg  * dimension for the current band (i.e., the band that is to be merged).
    531  1.1  mrg  * Only clusters that are about to be merged are considered.
    532  1.1  mrg  * "maxvar" is the maximal dimension for the current band.
    533  1.1  mrg  * "c" contains information about the clusters.
    534  1.1  mrg  *
    535  1.1  mrg  * Return the maximal number of remaining schedule rows or
    536  1.1  mrg  * isl_size_error on error.
    537  1.1  mrg  */
    538  1.1  mrg static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
    539  1.1  mrg {
    540  1.1  mrg 	int i, j;
    541  1.1  mrg 	int max_slack;
    542  1.1  mrg 
    543  1.1  mrg 	max_slack = 0;
    544  1.1  mrg 	for (i = 0; i < c->n; ++i) {
    545  1.1  mrg 		int nvar;
    546  1.1  mrg 		struct isl_sched_graph *scc;
    547  1.1  mrg 
    548  1.1  mrg 		if (!c->scc_in_merge[i])
    549  1.1  mrg 			continue;
    550  1.1  mrg 		scc = &c->scc[i];
    551  1.1  mrg 		nvar = scc->n_total_row - scc->band_start;
    552  1.1  mrg 		if (nvar != maxvar)
    553  1.1  mrg 			continue;
    554  1.1  mrg 		for (j = 0; j < scc->n; ++j) {
    555  1.1  mrg 			struct isl_sched_node *node = &scc->node[j];
    556  1.1  mrg 			int slack;
    557  1.1  mrg 
    558  1.1  mrg 			if (isl_sched_node_update_vmap(node) < 0)
    559  1.1  mrg 				return isl_size_error;
    560  1.1  mrg 			slack = node->nvar - node->rank;
    561  1.1  mrg 			if (slack > max_slack)
    562  1.1  mrg 				max_slack = slack;
    563  1.1  mrg 		}
    564  1.1  mrg 	}
    565  1.1  mrg 
    566  1.1  mrg 	return max_slack;
    567  1.1  mrg }
    568  1.1  mrg 
    569  1.1  mrg /* If there are any clusters where the dimension of the current band
    570  1.1  mrg  * (i.e., the band that is to be merged) is smaller than "maxvar" and
    571  1.1  mrg  * if there are any nodes in such a cluster where the number
    572  1.1  mrg  * of remaining schedule rows that still need to be computed
    573  1.1  mrg  * is greater than "max_slack", then return the smallest current band
    574  1.1  mrg  * dimension of all these clusters.  Otherwise return the original value
    575  1.1  mrg  * of "maxvar".  Return isl_size_error in case of any error.
    576  1.1  mrg  * Only clusters that are about to be merged are considered.
    577  1.1  mrg  * "c" contains information about the clusters.
    578  1.1  mrg  */
    579  1.1  mrg static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
    580  1.1  mrg 	struct isl_clustering *c)
    581  1.1  mrg {
    582  1.1  mrg 	int i, j;
    583  1.1  mrg 
    584  1.1  mrg 	for (i = 0; i < c->n; ++i) {
    585  1.1  mrg 		int nvar;
    586  1.1  mrg 		struct isl_sched_graph *scc;
    587  1.1  mrg 
    588  1.1  mrg 		if (!c->scc_in_merge[i])
    589  1.1  mrg 			continue;
    590  1.1  mrg 		scc = &c->scc[i];
    591  1.1  mrg 		nvar = scc->n_total_row - scc->band_start;
    592  1.1  mrg 		if (nvar >= maxvar)
    593  1.1  mrg 			continue;
    594  1.1  mrg 		for (j = 0; j < scc->n; ++j) {
    595  1.1  mrg 			struct isl_sched_node *node = &scc->node[j];
    596  1.1  mrg 			int slack;
    597  1.1  mrg 
    598  1.1  mrg 			if (isl_sched_node_update_vmap(node) < 0)
    599  1.1  mrg 				return isl_size_error;
    600  1.1  mrg 			slack = node->nvar - node->rank;
    601  1.1  mrg 			if (slack > max_slack) {
    602  1.1  mrg 				maxvar = nvar;
    603  1.1  mrg 				break;
    604  1.1  mrg 			}
    605  1.1  mrg 		}
    606  1.1  mrg 	}
    607  1.1  mrg 
    608  1.1  mrg 	return maxvar;
    609  1.1  mrg }
    610  1.1  mrg 
    611  1.1  mrg /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
    612  1.1  mrg  * that still need to be computed.  In particular, if there is a node
    613  1.1  mrg  * in a cluster where the dimension of the current band is smaller
    614  1.1  mrg  * than merge_graph->maxvar, but the number of remaining schedule rows
    615  1.1  mrg  * is greater than that of any node in a cluster with the maximal
    616  1.1  mrg  * dimension for the current band (i.e., merge_graph->maxvar),
    617  1.1  mrg  * then adjust merge_graph->maxvar to the (smallest) current band dimension
    618  1.1  mrg  * of those clusters.  Without this adjustment, the total number of
    619  1.1  mrg  * schedule dimensions would be increased, resulting in a skewed view
    620  1.1  mrg  * of the number of coincident dimensions.
    621  1.1  mrg  * "c" contains information about the clusters.
    622  1.1  mrg  *
    623  1.1  mrg  * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
    624  1.1  mrg  * then there is no point in attempting any merge since it will be rejected
    625  1.1  mrg  * anyway.  Set merge_graph->maxvar to zero in such cases.
    626  1.1  mrg  */
    627  1.1  mrg static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
    628  1.1  mrg 	struct isl_sched_graph *merge_graph, struct isl_clustering *c)
    629  1.1  mrg {
    630  1.1  mrg 	isl_size max_slack, maxvar;
    631  1.1  mrg 
    632  1.1  mrg 	max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
    633  1.1  mrg 	if (max_slack < 0)
    634  1.1  mrg 		return isl_stat_error;
    635  1.1  mrg 	maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
    636  1.1  mrg 	if (maxvar < 0)
    637  1.1  mrg 		return isl_stat_error;
    638  1.1  mrg 
    639  1.1  mrg 	if (maxvar < merge_graph->maxvar) {
    640  1.1  mrg 		if (isl_options_get_schedule_maximize_band_depth(ctx))
    641  1.1  mrg 			merge_graph->maxvar = 0;
    642  1.1  mrg 		else
    643  1.1  mrg 			merge_graph->maxvar = maxvar;
    644  1.1  mrg 	}
    645  1.1  mrg 
    646  1.1  mrg 	return isl_stat_ok;
    647  1.1  mrg }
    648  1.1  mrg 
    649  1.1  mrg /* Return the number of coincident dimensions in the current band of "graph",
    650  1.1  mrg  * where the nodes of "graph" are assumed to be scheduled by a single band.
    651  1.1  mrg  */
    652  1.1  mrg static int get_n_coincident(struct isl_sched_graph *graph)
    653  1.1  mrg {
    654  1.1  mrg 	int i;
    655  1.1  mrg 
    656  1.1  mrg 	for (i = graph->band_start; i < graph->n_total_row; ++i)
    657  1.1  mrg 		if (!graph->node[0].coincident[i])
    658  1.1  mrg 			break;
    659  1.1  mrg 
    660  1.1  mrg 	return i - graph->band_start;
    661  1.1  mrg }
    662  1.1  mrg 
    663  1.1  mrg /* Should the clusters be merged based on the cluster schedule
    664  1.1  mrg  * in the current (and only) band of "merge_graph", given that
    665  1.1  mrg  * coincidence should be maximized?
    666  1.1  mrg  *
    667  1.1  mrg  * If the number of coincident schedule dimensions in the merged band
    668  1.1  mrg  * would be less than the maximal number of coincident schedule dimensions
    669  1.1  mrg  * in any of the merged clusters, then the clusters should not be merged.
    670  1.1  mrg  */
    671  1.1  mrg static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
    672  1.1  mrg 	struct isl_sched_graph *merge_graph)
    673  1.1  mrg {
    674  1.1  mrg 	int i;
    675  1.1  mrg 	int n_coincident;
    676  1.1  mrg 	int max_coincident;
    677  1.1  mrg 
    678  1.1  mrg 	max_coincident = 0;
    679  1.1  mrg 	for (i = 0; i < c->n; ++i) {
    680  1.1  mrg 		if (!c->scc_in_merge[i])
    681  1.1  mrg 			continue;
    682  1.1  mrg 		n_coincident = get_n_coincident(&c->scc[i]);
    683  1.1  mrg 		if (n_coincident > max_coincident)
    684  1.1  mrg 			max_coincident = n_coincident;
    685  1.1  mrg 	}
    686  1.1  mrg 
    687  1.1  mrg 	n_coincident = get_n_coincident(merge_graph);
    688  1.1  mrg 
    689  1.1  mrg 	return isl_bool_ok(n_coincident >= max_coincident);
    690  1.1  mrg }
    691  1.1  mrg 
    692  1.1  mrg /* Return the transformation on "node" expressed by the current (and only)
    693  1.1  mrg  * band of "merge_graph" applied to the clusters in "c".
    694  1.1  mrg  *
    695  1.1  mrg  * First find the representation of "node" in its SCC in "c" and
    696  1.1  mrg  * extract the transformation expressed by the current band.
    697  1.1  mrg  * Then extract the transformation applied by "merge_graph"
    698  1.1  mrg  * to the cluster to which this SCC belongs.
    699  1.1  mrg  * Combine the two to obtain the complete transformation on the node.
    700  1.1  mrg  *
    701  1.1  mrg  * Note that the range of the first transformation is an anonymous space,
    702  1.1  mrg  * while the domain of the second is named "cluster_X".  The range
    703  1.1  mrg  * of the former therefore needs to be adjusted before the two
    704  1.1  mrg  * can be combined.
    705  1.1  mrg  */
    706  1.1  mrg static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
    707  1.1  mrg 	struct isl_sched_node *node, struct isl_clustering *c,
    708  1.1  mrg 	struct isl_sched_graph *merge_graph)
    709  1.1  mrg {
    710  1.1  mrg 	struct isl_sched_node *scc_node, *cluster_node;
    711  1.1  mrg 	int start, n;
    712  1.1  mrg 	isl_id *id;
    713  1.1  mrg 	isl_space *space;
    714  1.1  mrg 	isl_multi_aff *ma, *ma2;
    715  1.1  mrg 
    716  1.1  mrg 	scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc],
    717  1.1  mrg 						node->space);
    718  1.1  mrg 	if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node))
    719  1.1  mrg 		isl_die(ctx, isl_error_internal, "unable to find node",
    720  1.1  mrg 			return NULL);
    721  1.1  mrg 	start = c->scc[node->scc].band_start;
    722  1.1  mrg 	n = c->scc[node->scc].n_total_row - start;
    723  1.1  mrg 	ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node,
    724  1.1  mrg 								start, n);
    725  1.1  mrg 	space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
    726  1.1  mrg 	cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space);
    727  1.1  mrg 	if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node))
    728  1.1  mrg 		isl_die(ctx, isl_error_internal, "unable to find cluster",
    729  1.1  mrg 			space = isl_space_free(space));
    730  1.1  mrg 	id = isl_space_get_tuple_id(space, isl_dim_set);
    731  1.1  mrg 	ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
    732  1.1  mrg 	isl_space_free(space);
    733  1.1  mrg 	n = merge_graph->n_total_row;
    734  1.1  mrg 	ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node,
    735  1.1  mrg 								0, n);
    736  1.1  mrg 	ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
    737  1.1  mrg 
    738  1.1  mrg 	return isl_map_from_multi_aff(ma);
    739  1.1  mrg }
    740  1.1  mrg 
    741  1.1  mrg /* Give a set of distances "set", are they bounded by a small constant
    742  1.1  mrg  * in direction "pos"?
    743  1.1  mrg  * In practice, check if they are bounded by 2 by checking that there
    744  1.1  mrg  * are no elements with a value greater than or equal to 3 or
    745  1.1  mrg  * smaller than or equal to -3.
    746  1.1  mrg  */
    747  1.1  mrg static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
    748  1.1  mrg {
    749  1.1  mrg 	isl_bool bounded;
    750  1.1  mrg 	isl_set *test;
    751  1.1  mrg 
    752  1.1  mrg 	if (!set)
    753  1.1  mrg 		return isl_bool_error;
    754  1.1  mrg 
    755  1.1  mrg 	test = isl_set_copy(set);
    756  1.1  mrg 	test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
    757  1.1  mrg 	bounded = isl_set_is_empty(test);
    758  1.1  mrg 	isl_set_free(test);
    759  1.1  mrg 
    760  1.1  mrg 	if (bounded < 0 || !bounded)
    761  1.1  mrg 		return bounded;
    762  1.1  mrg 
    763  1.1  mrg 	test = isl_set_copy(set);
    764  1.1  mrg 	test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
    765  1.1  mrg 	bounded = isl_set_is_empty(test);
    766  1.1  mrg 	isl_set_free(test);
    767  1.1  mrg 
    768  1.1  mrg 	return bounded;
    769  1.1  mrg }
    770  1.1  mrg 
    771  1.1  mrg /* Does the set "set" have a fixed (but possible parametric) value
    772  1.1  mrg  * at dimension "pos"?
    773  1.1  mrg  */
    774  1.1  mrg static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
    775  1.1  mrg {
    776  1.1  mrg 	isl_size n;
    777  1.1  mrg 	isl_bool single;
    778  1.1  mrg 
    779  1.1  mrg 	n = isl_set_dim(set, isl_dim_set);
    780  1.1  mrg 	if (n < 0)
    781  1.1  mrg 		return isl_bool_error;
    782  1.1  mrg 	set = isl_set_copy(set);
    783  1.1  mrg 	set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
    784  1.1  mrg 	set = isl_set_project_out(set, isl_dim_set, 0, pos);
    785  1.1  mrg 	single = isl_set_is_singleton(set);
    786  1.1  mrg 	isl_set_free(set);
    787  1.1  mrg 
    788  1.1  mrg 	return single;
    789  1.1  mrg }
    790  1.1  mrg 
    791  1.1  mrg /* Does "map" have a fixed (but possible parametric) value
    792  1.1  mrg  * at dimension "pos" of either its domain or its range?
    793  1.1  mrg  */
    794  1.1  mrg static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
    795  1.1  mrg {
    796  1.1  mrg 	isl_set *set;
    797  1.1  mrg 	isl_bool single;
    798  1.1  mrg 
    799  1.1  mrg 	set = isl_map_domain(isl_map_copy(map));
    800  1.1  mrg 	single = has_single_value(set, pos);
    801  1.1  mrg 	isl_set_free(set);
    802  1.1  mrg 
    803  1.1  mrg 	if (single < 0 || single)
    804  1.1  mrg 		return single;
    805  1.1  mrg 
    806  1.1  mrg 	set = isl_map_range(isl_map_copy(map));
    807  1.1  mrg 	single = has_single_value(set, pos);
    808  1.1  mrg 	isl_set_free(set);
    809  1.1  mrg 
    810  1.1  mrg 	return single;
    811  1.1  mrg }
    812  1.1  mrg 
    813  1.1  mrg /* Does the edge "edge" from "graph" have bounded dependence distances
    814  1.1  mrg  * in the merged graph "merge_graph" of a selection of clusters in "c"?
    815  1.1  mrg  *
    816  1.1  mrg  * Extract the complete transformations of the source and destination
    817  1.1  mrg  * nodes of the edge, apply them to the edge constraints and
    818  1.1  mrg  * compute the differences.  Finally, check if these differences are bounded
    819  1.1  mrg  * in each direction.
    820  1.1  mrg  *
    821  1.1  mrg  * If the dimension of the band is greater than the number of
    822  1.1  mrg  * dimensions that can be expected to be optimized by the edge
    823  1.1  mrg  * (based on its weight), then also allow the differences to be unbounded
    824  1.1  mrg  * in the remaining dimensions, but only if either the source or
    825  1.1  mrg  * the destination has a fixed value in that direction.
    826  1.1  mrg  * This allows a statement that produces values that are used by
    827  1.1  mrg  * several instances of another statement to be merged with that
    828  1.1  mrg  * other statement.
    829  1.1  mrg  * However, merging such clusters will introduce an inherently
    830  1.1  mrg  * large proximity distance inside the merged cluster, meaning
    831  1.1  mrg  * that proximity distances will no longer be optimized in
    832  1.1  mrg  * subsequent merges.  These merges are therefore only allowed
    833  1.1  mrg  * after all other possible merges have been tried.
    834  1.1  mrg  * The first time such a merge is encountered, the weight of the edge
    835  1.1  mrg  * is replaced by a negative weight.  The second time (i.e., after
    836  1.1  mrg  * all merges over edges with a non-negative weight have been tried),
    837  1.1  mrg  * the merge is allowed.
    838  1.1  mrg  */
    839  1.1  mrg static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
    840  1.1  mrg 	struct isl_sched_graph *graph, struct isl_clustering *c,
    841  1.1  mrg 	struct isl_sched_graph *merge_graph)
    842  1.1  mrg {
    843  1.1  mrg 	int i, n_slack;
    844  1.1  mrg 	isl_size n;
    845  1.1  mrg 	isl_bool bounded;
    846  1.1  mrg 	isl_map *map, *t;
    847  1.1  mrg 	isl_set *dist;
    848  1.1  mrg 
    849  1.1  mrg 	map = isl_map_copy(edge->map);
    850  1.1  mrg 	t = extract_node_transformation(ctx, edge->src, c, merge_graph);
    851  1.1  mrg 	map = isl_map_apply_domain(map, t);
    852  1.1  mrg 	t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
    853  1.1  mrg 	map = isl_map_apply_range(map, t);
    854  1.1  mrg 	dist = isl_map_deltas(isl_map_copy(map));
    855  1.1  mrg 
    856  1.1  mrg 	bounded = isl_bool_true;
    857  1.1  mrg 	n = isl_set_dim(dist, isl_dim_set);
    858  1.1  mrg 	if (n < 0)
    859  1.1  mrg 		goto error;
    860  1.1  mrg 	n_slack = n - edge->weight;
    861  1.1  mrg 	if (edge->weight < 0)
    862  1.1  mrg 		n_slack -= graph->max_weight + 1;
    863  1.1  mrg 	for (i = 0; i < n; ++i) {
    864  1.1  mrg 		isl_bool bounded_i, singular_i;
    865  1.1  mrg 
    866  1.1  mrg 		bounded_i = distance_is_bounded(dist, i);
    867  1.1  mrg 		if (bounded_i < 0)
    868  1.1  mrg 			goto error;
    869  1.1  mrg 		if (bounded_i)
    870  1.1  mrg 			continue;
    871  1.1  mrg 		if (edge->weight >= 0)
    872  1.1  mrg 			bounded = isl_bool_false;
    873  1.1  mrg 		n_slack--;
    874  1.1  mrg 		if (n_slack < 0)
    875  1.1  mrg 			break;
    876  1.1  mrg 		singular_i = has_singular_src_or_dst(map, i);
    877  1.1  mrg 		if (singular_i < 0)
    878  1.1  mrg 			goto error;
    879  1.1  mrg 		if (singular_i)
    880  1.1  mrg 			continue;
    881  1.1  mrg 		bounded = isl_bool_false;
    882  1.1  mrg 		break;
    883  1.1  mrg 	}
    884  1.1  mrg 	if (!bounded && i >= n && edge->weight >= 0)
    885  1.1  mrg 		edge->weight -= graph->max_weight + 1;
    886  1.1  mrg 	isl_map_free(map);
    887  1.1  mrg 	isl_set_free(dist);
    888  1.1  mrg 
    889  1.1  mrg 	return bounded;
    890  1.1  mrg error:
    891  1.1  mrg 	isl_map_free(map);
    892  1.1  mrg 	isl_set_free(dist);
    893  1.1  mrg 	return isl_bool_error;
    894  1.1  mrg }
    895  1.1  mrg 
    896  1.1  mrg /* Should the clusters be merged based on the cluster schedule
    897  1.1  mrg  * in the current (and only) band of "merge_graph"?
    898  1.1  mrg  * "graph" is the original dependence graph, while "c" records
    899  1.1  mrg  * which SCCs are involved in the latest merge.
    900  1.1  mrg  *
    901  1.1  mrg  * In particular, is there at least one proximity constraint
    902  1.1  mrg  * that is optimized by the merge?
    903  1.1  mrg  *
    904  1.1  mrg  * A proximity constraint is considered to be optimized
    905  1.1  mrg  * if the dependence distances are small.
    906  1.1  mrg  */
    907  1.1  mrg static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
    908  1.1  mrg 	struct isl_sched_graph *graph, struct isl_clustering *c,
    909  1.1  mrg 	struct isl_sched_graph *merge_graph)
    910  1.1  mrg {
    911  1.1  mrg 	int i;
    912  1.1  mrg 
    913  1.1  mrg 	for (i = 0; i < graph->n_edge; ++i) {
    914  1.1  mrg 		struct isl_sched_edge *edge = &graph->edge[i];
    915  1.1  mrg 		isl_bool bounded;
    916  1.1  mrg 
    917  1.1  mrg 		if (!isl_sched_edge_is_proximity(edge))
    918  1.1  mrg 			continue;
    919  1.1  mrg 		if (!c->scc_in_merge[edge->src->scc])
    920  1.1  mrg 			continue;
    921  1.1  mrg 		if (!c->scc_in_merge[edge->dst->scc])
    922  1.1  mrg 			continue;
    923  1.1  mrg 		if (c->scc_cluster[edge->dst->scc] ==
    924  1.1  mrg 		    c->scc_cluster[edge->src->scc])
    925  1.1  mrg 			continue;
    926  1.1  mrg 		bounded = has_bounded_distances(ctx, edge, graph, c,
    927  1.1  mrg 						merge_graph);
    928  1.1  mrg 		if (bounded < 0 || bounded)
    929  1.1  mrg 			return bounded;
    930  1.1  mrg 	}
    931  1.1  mrg 
    932  1.1  mrg 	return isl_bool_false;
    933  1.1  mrg }
    934  1.1  mrg 
    935  1.1  mrg /* Should the clusters be merged based on the cluster schedule
    936  1.1  mrg  * in the current (and only) band of "merge_graph"?
    937  1.1  mrg  * "graph" is the original dependence graph, while "c" records
    938  1.1  mrg  * which SCCs are involved in the latest merge.
    939  1.1  mrg  *
    940  1.1  mrg  * If the current band is empty, then the clusters should not be merged.
    941  1.1  mrg  *
    942  1.1  mrg  * If the band depth should be maximized and the merge schedule
    943  1.1  mrg  * is incomplete (meaning that the dimension of some of the schedule
    944  1.1  mrg  * bands in the original schedule will be reduced), then the clusters
    945  1.1  mrg  * should not be merged.
    946  1.1  mrg  *
    947  1.1  mrg  * If the schedule_maximize_coincidence option is set, then check that
    948  1.1  mrg  * the number of coincident schedule dimensions is not reduced.
    949  1.1  mrg  *
    950  1.1  mrg  * Finally, only allow the merge if at least one proximity
    951  1.1  mrg  * constraint is optimized.
    952  1.1  mrg  */
    953  1.1  mrg static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
    954  1.1  mrg 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
    955  1.1  mrg {
    956  1.1  mrg 	if (merge_graph->n_total_row == merge_graph->band_start)
    957  1.1  mrg 		return isl_bool_false;
    958  1.1  mrg 
    959  1.1  mrg 	if (isl_options_get_schedule_maximize_band_depth(ctx) &&
    960  1.1  mrg 	    merge_graph->n_total_row < merge_graph->maxvar)
    961  1.1  mrg 		return isl_bool_false;
    962  1.1  mrg 
    963  1.1  mrg 	if (isl_options_get_schedule_maximize_coincidence(ctx)) {
    964  1.1  mrg 		isl_bool ok;
    965  1.1  mrg 
    966  1.1  mrg 		ok = ok_to_merge_coincident(c, merge_graph);
    967  1.1  mrg 		if (ok < 0 || !ok)
    968  1.1  mrg 			return ok;
    969  1.1  mrg 	}
    970  1.1  mrg 
    971  1.1  mrg 	return ok_to_merge_proximity(ctx, graph, c, merge_graph);
    972  1.1  mrg }
    973  1.1  mrg 
    974  1.1  mrg /* Apply the schedule in "t_node" to the "n" rows starting at "first"
    975  1.1  mrg  * of the schedule in "node" and return the result.
    976  1.1  mrg  *
    977  1.1  mrg  * That is, essentially compute
    978  1.1  mrg  *
    979  1.1  mrg  *	T * N(first:first+n-1)
    980  1.1  mrg  *
    981  1.1  mrg  * taking into account the constant term and the parameter coefficients
    982  1.1  mrg  * in "t_node".
    983  1.1  mrg  */
    984  1.1  mrg static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
    985  1.1  mrg 	struct isl_sched_node *t_node, struct isl_sched_node *node,
    986  1.1  mrg 	int first, int n)
    987  1.1  mrg {
    988  1.1  mrg 	int i, j;
    989  1.1  mrg 	isl_mat *t;
    990  1.1  mrg 	isl_size n_row, n_col;
    991  1.1  mrg 	int n_param, n_var;
    992  1.1  mrg 
    993  1.1  mrg 	n_param = node->nparam;
    994  1.1  mrg 	n_var = node->nvar;
    995  1.1  mrg 	n_row = isl_mat_rows(t_node->sched);
    996  1.1  mrg 	n_col = isl_mat_cols(node->sched);
    997  1.1  mrg 	if (n_row < 0 || n_col < 0)
    998  1.1  mrg 		return NULL;
    999  1.1  mrg 	t = isl_mat_alloc(ctx, n_row, n_col);
   1000  1.1  mrg 	if (!t)
   1001  1.1  mrg 		return NULL;
   1002  1.1  mrg 	for (i = 0; i < n_row; ++i) {
   1003  1.1  mrg 		isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
   1004  1.1  mrg 		isl_seq_clr(t->row[i] + 1 + n_param, n_var);
   1005  1.1  mrg 		for (j = 0; j < n; ++j)
   1006  1.1  mrg 			isl_seq_addmul(t->row[i],
   1007  1.1  mrg 					t_node->sched->row[i][1 + n_param + j],
   1008  1.1  mrg 					node->sched->row[first + j],
   1009  1.1  mrg 					1 + n_param + n_var);
   1010  1.1  mrg 	}
   1011  1.1  mrg 	return t;
   1012  1.1  mrg }
   1013  1.1  mrg 
   1014  1.1  mrg /* Apply the cluster schedule in "t_node" to the current band
   1015  1.1  mrg  * schedule of the nodes in "graph".
   1016  1.1  mrg  *
   1017  1.1  mrg  * In particular, replace the rows starting at band_start
   1018  1.1  mrg  * by the result of applying the cluster schedule in "t_node"
   1019  1.1  mrg  * to the original rows.
   1020  1.1  mrg  *
   1021  1.1  mrg  * The coincidence of the schedule is determined by the coincidence
   1022  1.1  mrg  * of the cluster schedule.
   1023  1.1  mrg  */
   1024  1.1  mrg static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
   1025  1.1  mrg 	struct isl_sched_node *t_node)
   1026  1.1  mrg {
   1027  1.1  mrg 	int i, j;
   1028  1.1  mrg 	isl_size n_new;
   1029  1.1  mrg 	int start, n;
   1030  1.1  mrg 
   1031  1.1  mrg 	start = graph->band_start;
   1032  1.1  mrg 	n = graph->n_total_row - start;
   1033  1.1  mrg 
   1034  1.1  mrg 	n_new = isl_mat_rows(t_node->sched);
   1035  1.1  mrg 	if (n_new < 0)
   1036  1.1  mrg 		return isl_stat_error;
   1037  1.1  mrg 	for (i = 0; i < graph->n; ++i) {
   1038  1.1  mrg 		struct isl_sched_node *node = &graph->node[i];
   1039  1.1  mrg 		isl_mat *t;
   1040  1.1  mrg 
   1041  1.1  mrg 		t = node_transformation(ctx, t_node, node, start, n);
   1042  1.1  mrg 		node->sched = isl_mat_drop_rows(node->sched, start, n);
   1043  1.1  mrg 		node->sched = isl_mat_concat(node->sched, t);
   1044  1.1  mrg 		node->sched_map = isl_map_free(node->sched_map);
   1045  1.1  mrg 		if (!node->sched)
   1046  1.1  mrg 			return isl_stat_error;
   1047  1.1  mrg 		for (j = 0; j < n_new; ++j)
   1048  1.1  mrg 			node->coincident[start + j] = t_node->coincident[j];
   1049  1.1  mrg 	}
   1050  1.1  mrg 	graph->n_total_row -= n;
   1051  1.1  mrg 	graph->n_row -= n;
   1052  1.1  mrg 	graph->n_total_row += n_new;
   1053  1.1  mrg 	graph->n_row += n_new;
   1054  1.1  mrg 
   1055  1.1  mrg 	return isl_stat_ok;
   1056  1.1  mrg }
   1057  1.1  mrg 
   1058  1.1  mrg /* Merge the clusters marked for merging in "c" into a single
   1059  1.1  mrg  * cluster using the cluster schedule in the current band of "merge_graph".
   1060  1.1  mrg  * The representative SCC for the new cluster is the SCC with
   1061  1.1  mrg  * the smallest index.
   1062  1.1  mrg  *
   1063  1.1  mrg  * The current band schedule of each SCC in the new cluster is obtained
   1064  1.1  mrg  * by applying the schedule of the corresponding original cluster
   1065  1.1  mrg  * to the original band schedule.
   1066  1.1  mrg  * All SCCs in the new cluster have the same number of schedule rows.
   1067  1.1  mrg  */
   1068  1.1  mrg static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
   1069  1.1  mrg 	struct isl_sched_graph *merge_graph)
   1070  1.1  mrg {
   1071  1.1  mrg 	int i;
   1072  1.1  mrg 	int cluster = -1;
   1073  1.1  mrg 	isl_space *space;
   1074  1.1  mrg 
   1075  1.1  mrg 	for (i = 0; i < c->n; ++i) {
   1076  1.1  mrg 		struct isl_sched_node *node;
   1077  1.1  mrg 
   1078  1.1  mrg 		if (!c->scc_in_merge[i])
   1079  1.1  mrg 			continue;
   1080  1.1  mrg 		if (cluster < 0)
   1081  1.1  mrg 			cluster = i;
   1082  1.1  mrg 		space = cluster_space(&c->scc[i], c->scc_cluster[i]);
   1083  1.1  mrg 		node = isl_sched_graph_find_node(ctx, merge_graph, space);
   1084  1.1  mrg 		isl_space_free(space);
   1085  1.1  mrg 		if (!node)
   1086  1.1  mrg 			return isl_stat_error;
   1087  1.1  mrg 		if (!isl_sched_graph_is_node(merge_graph, node))
   1088  1.1  mrg 			isl_die(ctx, isl_error_internal,
   1089  1.1  mrg 				"unable to find cluster",
   1090  1.1  mrg 				return isl_stat_error);
   1091  1.1  mrg 		if (transform(ctx, &c->scc[i], node) < 0)
   1092  1.1  mrg 			return isl_stat_error;
   1093  1.1  mrg 		c->scc_cluster[i] = cluster;
   1094  1.1  mrg 	}
   1095  1.1  mrg 
   1096  1.1  mrg 	return isl_stat_ok;
   1097  1.1  mrg }
   1098  1.1  mrg 
   1099  1.1  mrg /* Try and merge the clusters of SCCs marked in c->scc_in_merge
   1100  1.1  mrg  * by scheduling the current cluster bands with respect to each other.
   1101  1.1  mrg  *
   1102  1.1  mrg  * Construct a dependence graph with a space for each cluster and
   1103  1.1  mrg  * with the coordinates of each space corresponding to the schedule
   1104  1.1  mrg  * dimensions of the current band of that cluster.
   1105  1.1  mrg  * Construct a cluster schedule in this cluster dependence graph and
   1106  1.1  mrg  * apply it to the current cluster bands if it is applicable
   1107  1.1  mrg  * according to ok_to_merge.
   1108  1.1  mrg  *
   1109  1.1  mrg  * If the number of remaining schedule dimensions in a cluster
   1110  1.1  mrg  * with a non-maximal current schedule dimension is greater than
   1111  1.1  mrg  * the number of remaining schedule dimensions in clusters
   1112  1.1  mrg  * with a maximal current schedule dimension, then restrict
   1113  1.1  mrg  * the number of rows to be computed in the cluster schedule
   1114  1.1  mrg  * to the minimal such non-maximal current schedule dimension.
   1115  1.1  mrg  * Do this by adjusting merge_graph.maxvar.
   1116  1.1  mrg  *
   1117  1.1  mrg  * Return isl_bool_true if the clusters have effectively been merged
   1118  1.1  mrg  * into a single cluster.
   1119  1.1  mrg  *
   1120  1.1  mrg  * Note that since the standard scheduling algorithm minimizes the maximal
   1121  1.1  mrg  * distance over proximity constraints, the proximity constraints between
   1122  1.1  mrg  * the merged clusters may not be optimized any further than what is
   1123  1.1  mrg  * sufficient to bring the distances within the limits of the internal
   1124  1.1  mrg  * proximity constraints inside the individual clusters.
   1125  1.1  mrg  * It may therefore make sense to perform an additional translation step
   1126  1.1  mrg  * to bring the clusters closer to each other, while maintaining
   1127  1.1  mrg  * the linear part of the merging schedule found using the standard
   1128  1.1  mrg  * scheduling algorithm.
   1129  1.1  mrg  */
   1130  1.1  mrg static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
   1131  1.1  mrg 	struct isl_clustering *c)
   1132  1.1  mrg {
   1133  1.1  mrg 	struct isl_sched_graph merge_graph = { 0 };
   1134  1.1  mrg 	isl_bool merged;
   1135  1.1  mrg 
   1136  1.1  mrg 	if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
   1137  1.1  mrg 		goto error;
   1138  1.1  mrg 
   1139  1.1  mrg 	if (isl_sched_graph_compute_maxvar(&merge_graph) < 0)
   1140  1.1  mrg 		goto error;
   1141  1.1  mrg 	if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
   1142  1.1  mrg 		goto error;
   1143  1.1  mrg 	if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0)
   1144  1.1  mrg 		goto error;
   1145  1.1  mrg 	merged = ok_to_merge(ctx, graph, c, &merge_graph);
   1146  1.1  mrg 	if (merged && merge(ctx, c, &merge_graph) < 0)
   1147  1.1  mrg 		goto error;
   1148  1.1  mrg 
   1149  1.1  mrg 	isl_sched_graph_free(ctx, &merge_graph);
   1150  1.1  mrg 	return merged;
   1151  1.1  mrg error:
   1152  1.1  mrg 	isl_sched_graph_free(ctx, &merge_graph);
   1153  1.1  mrg 	return isl_bool_error;
   1154  1.1  mrg }
   1155  1.1  mrg 
   1156  1.1  mrg /* Is there any edge marked "no_merge" between two SCCs that are
   1157  1.1  mrg  * about to be merged (i.e., that are set in "scc_in_merge")?
   1158  1.1  mrg  * "merge_edge" is the proximity edge along which the clusters of SCCs
   1159  1.1  mrg  * are going to be merged.
   1160  1.1  mrg  *
   1161  1.1  mrg  * If there is any edge between two SCCs with a negative weight,
   1162  1.1  mrg  * while the weight of "merge_edge" is non-negative, then this
   1163  1.1  mrg  * means that the edge was postponed.  "merge_edge" should then
   1164  1.1  mrg  * also be postponed since merging along the edge with negative weight should
   1165  1.1  mrg  * be postponed until all edges with non-negative weight have been tried.
   1166  1.1  mrg  * Replace the weight of "merge_edge" by a negative weight as well and
   1167  1.1  mrg  * tell the caller not to attempt a merge.
   1168  1.1  mrg  */
   1169  1.1  mrg static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
   1170  1.1  mrg 	struct isl_sched_edge *merge_edge)
   1171  1.1  mrg {
   1172  1.1  mrg 	int i;
   1173  1.1  mrg 
   1174  1.1  mrg 	for (i = 0; i < graph->n_edge; ++i) {
   1175  1.1  mrg 		struct isl_sched_edge *edge = &graph->edge[i];
   1176  1.1  mrg 
   1177  1.1  mrg 		if (!scc_in_merge[edge->src->scc])
   1178  1.1  mrg 			continue;
   1179  1.1  mrg 		if (!scc_in_merge[edge->dst->scc])
   1180  1.1  mrg 			continue;
   1181  1.1  mrg 		if (edge->no_merge)
   1182  1.1  mrg 			return 1;
   1183  1.1  mrg 		if (merge_edge->weight >= 0 && edge->weight < 0) {
   1184  1.1  mrg 			merge_edge->weight -= graph->max_weight + 1;
   1185  1.1  mrg 			return 1;
   1186  1.1  mrg 		}
   1187  1.1  mrg 	}
   1188  1.1  mrg 
   1189  1.1  mrg 	return 0;
   1190  1.1  mrg }
   1191  1.1  mrg 
   1192  1.1  mrg /* Merge the two clusters in "c" connected by the edge in "graph"
   1193  1.1  mrg  * with index "edge" into a single cluster.
   1194  1.1  mrg  * If it turns out to be impossible to merge these two clusters,
   1195  1.1  mrg  * then mark the edge as "no_merge" such that it will not be
   1196  1.1  mrg  * considered again.
   1197  1.1  mrg  *
   1198  1.1  mrg  * First mark all SCCs that need to be merged.  This includes the SCCs
   1199  1.1  mrg  * in the two clusters, but it may also include the SCCs
   1200  1.1  mrg  * of intermediate clusters.
   1201  1.1  mrg  * If there is already a no_merge edge between any pair of such SCCs,
   1202  1.1  mrg  * then simply mark the current edge as no_merge as well.
   1203  1.1  mrg  * Likewise, if any of those edges was postponed by has_bounded_distances,
   1204  1.1  mrg  * then postpone the current edge as well.
   1205  1.1  mrg  * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
   1206  1.1  mrg  * if the clusters did not end up getting merged, unless the non-merge
   1207  1.1  mrg  * is due to the fact that the edge was postponed.  This postponement
   1208  1.1  mrg  * can be recognized by a change in weight (from non-negative to negative).
   1209  1.1  mrg  */
   1210  1.1  mrg static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
   1211  1.1  mrg 	struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
   1212  1.1  mrg {
   1213  1.1  mrg 	isl_bool merged;
   1214  1.1  mrg 	int edge_weight = graph->edge[edge].weight;
   1215  1.1  mrg 
   1216  1.1  mrg 	if (mark_merge_sccs(ctx, graph, edge, c) < 0)
   1217  1.1  mrg 		return isl_stat_error;
   1218  1.1  mrg 
   1219  1.1  mrg 	if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
   1220  1.1  mrg 		merged = isl_bool_false;
   1221  1.1  mrg 	else
   1222  1.1  mrg 		merged = try_merge(ctx, graph, c);
   1223  1.1  mrg 	if (merged < 0)
   1224  1.1  mrg 		return isl_stat_error;
   1225  1.1  mrg 	if (!merged && edge_weight == graph->edge[edge].weight)
   1226  1.1  mrg 		graph->edge[edge].no_merge = 1;
   1227  1.1  mrg 
   1228  1.1  mrg 	return isl_stat_ok;
   1229  1.1  mrg }
   1230  1.1  mrg 
   1231  1.1  mrg /* Does "node" belong to the cluster identified by "cluster"?
   1232  1.1  mrg  */
   1233  1.1  mrg static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
   1234  1.1  mrg {
   1235  1.1  mrg 	return node->cluster == cluster;
   1236  1.1  mrg }
   1237  1.1  mrg 
   1238  1.1  mrg /* Does "edge" connect two nodes belonging to the cluster
   1239  1.1  mrg  * identified by "cluster"?
   1240  1.1  mrg  */
   1241  1.1  mrg static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
   1242  1.1  mrg {
   1243  1.1  mrg 	return edge->src->cluster == cluster && edge->dst->cluster == cluster;
   1244  1.1  mrg }
   1245  1.1  mrg 
   1246  1.1  mrg /* Swap the schedule of "node1" and "node2".
   1247  1.1  mrg  * Both nodes have been derived from the same node in a common parent graph.
   1248  1.1  mrg  * Since the "coincident" field is shared with that node
   1249  1.1  mrg  * in the parent graph, there is no need to also swap this field.
   1250  1.1  mrg  */
   1251  1.1  mrg static void swap_sched(struct isl_sched_node *node1,
   1252  1.1  mrg 	struct isl_sched_node *node2)
   1253  1.1  mrg {
   1254  1.1  mrg 	isl_mat *sched;
   1255  1.1  mrg 	isl_map *sched_map;
   1256  1.1  mrg 
   1257  1.1  mrg 	sched = node1->sched;
   1258  1.1  mrg 	node1->sched = node2->sched;
   1259  1.1  mrg 	node2->sched = sched;
   1260  1.1  mrg 
   1261  1.1  mrg 	sched_map = node1->sched_map;
   1262  1.1  mrg 	node1->sched_map = node2->sched_map;
   1263  1.1  mrg 	node2->sched_map = sched_map;
   1264  1.1  mrg }
   1265  1.1  mrg 
   1266  1.1  mrg /* Copy the current band schedule from the SCCs that form the cluster
   1267  1.1  mrg  * with index "pos" to the actual cluster at position "pos".
   1268  1.1  mrg  * By construction, the index of the first SCC that belongs to the cluster
   1269  1.1  mrg  * is also "pos".
   1270  1.1  mrg  *
   1271  1.1  mrg  * The order of the nodes inside both the SCCs and the cluster
   1272  1.1  mrg  * is assumed to be same as the order in the original "graph".
   1273  1.1  mrg  *
   1274  1.1  mrg  * Since the SCC graphs will no longer be used after this function,
   1275  1.1  mrg  * the schedules are actually swapped rather than copied.
   1276  1.1  mrg  */
   1277  1.1  mrg static isl_stat copy_partial(struct isl_sched_graph *graph,
   1278  1.1  mrg 	struct isl_clustering *c, int pos)
   1279  1.1  mrg {
   1280  1.1  mrg 	int i, j;
   1281  1.1  mrg 
   1282  1.1  mrg 	c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
   1283  1.1  mrg 	c->cluster[pos].n_row = c->scc[pos].n_row;
   1284  1.1  mrg 	c->cluster[pos].maxvar = c->scc[pos].maxvar;
   1285  1.1  mrg 	j = 0;
   1286  1.1  mrg 	for (i = 0; i < graph->n; ++i) {
   1287  1.1  mrg 		int k;
   1288  1.1  mrg 		int s;
   1289  1.1  mrg 
   1290  1.1  mrg 		if (graph->node[i].cluster != pos)
   1291  1.1  mrg 			continue;
   1292  1.1  mrg 		s = graph->node[i].scc;
   1293  1.1  mrg 		k = c->scc_node[s]++;
   1294  1.1  mrg 		swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
   1295  1.1  mrg 		if (c->scc[s].maxvar > c->cluster[pos].maxvar)
   1296  1.1  mrg 			c->cluster[pos].maxvar = c->scc[s].maxvar;
   1297  1.1  mrg 		++j;
   1298  1.1  mrg 	}
   1299  1.1  mrg 
   1300  1.1  mrg 	return isl_stat_ok;
   1301  1.1  mrg }
   1302  1.1  mrg 
   1303  1.1  mrg /* Is there a (conditional) validity dependence from node[j] to node[i],
   1304  1.1  mrg  * forcing node[i] to follow node[j] or do the nodes belong to the same
   1305  1.1  mrg  * cluster?
   1306  1.1  mrg  */
   1307  1.1  mrg static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
   1308  1.1  mrg {
   1309  1.1  mrg 	struct isl_sched_graph *graph = user;
   1310  1.1  mrg 
   1311  1.1  mrg 	if (graph->node[i].cluster == graph->node[j].cluster)
   1312  1.1  mrg 		return isl_bool_true;
   1313  1.1  mrg 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
   1314  1.1  mrg 							&graph->node[i]);
   1315  1.1  mrg }
   1316  1.1  mrg 
   1317  1.1  mrg /* Extract the merged clusters of SCCs in "graph", sort them, and
   1318  1.1  mrg  * store them in c->clusters.  Update c->scc_cluster accordingly.
   1319  1.1  mrg  *
   1320  1.1  mrg  * First keep track of the cluster containing the SCC to which a node
   1321  1.1  mrg  * belongs in the node itself.
   1322  1.1  mrg  * Then extract the clusters into c->clusters, copying the current
   1323  1.1  mrg  * band schedule from the SCCs that belong to the cluster.
   1324  1.1  mrg  * Do this only once per cluster.
   1325  1.1  mrg  *
   1326  1.1  mrg  * Finally, topologically sort the clusters and update c->scc_cluster
   1327  1.1  mrg  * to match the new scc numbering.  While the SCCs were originally
   1328  1.1  mrg  * sorted already, some SCCs that depend on some other SCCs may
   1329  1.1  mrg  * have been merged with SCCs that appear before these other SCCs.
   1330  1.1  mrg  * A reordering may therefore be required.
   1331  1.1  mrg  */
   1332  1.1  mrg static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
   1333  1.1  mrg 	struct isl_clustering *c)
   1334  1.1  mrg {
   1335  1.1  mrg 	int i;
   1336  1.1  mrg 
   1337  1.1  mrg 	for (i = 0; i < graph->n; ++i)
   1338  1.1  mrg 		graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
   1339  1.1  mrg 
   1340  1.1  mrg 	for (i = 0; i < graph->scc; ++i) {
   1341  1.1  mrg 		if (c->scc_cluster[i] != i)
   1342  1.1  mrg 			continue;
   1343  1.1  mrg 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
   1344  1.1  mrg 				&node_cluster_exactly,
   1345  1.1  mrg 				&edge_cluster_exactly, i, &c->cluster[i]) < 0)
   1346  1.1  mrg 			return isl_stat_error;
   1347  1.1  mrg 		c->cluster[i].src_scc = -1;
   1348  1.1  mrg 		c->cluster[i].dst_scc = -1;
   1349  1.1  mrg 		if (copy_partial(graph, c, i) < 0)
   1350  1.1  mrg 			return isl_stat_error;
   1351  1.1  mrg 	}
   1352  1.1  mrg 
   1353  1.1  mrg 	if (isl_sched_graph_detect_ccs(ctx, graph,
   1354  1.1  mrg 				&node_follows_strong_or_same_cluster) < 0)
   1355  1.1  mrg 		return isl_stat_error;
   1356  1.1  mrg 	for (i = 0; i < graph->n; ++i)
   1357  1.1  mrg 		c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
   1358  1.1  mrg 
   1359  1.1  mrg 	return isl_stat_ok;
   1360  1.1  mrg }
   1361  1.1  mrg 
   1362  1.1  mrg /* Compute weights on the proximity edges of "graph" that can
   1363  1.1  mrg  * be used by find_proximity to find the most appropriate
   1364  1.1  mrg  * proximity edge to use to merge two clusters in "c".
   1365  1.1  mrg  * The weights are also used by has_bounded_distances to determine
   1366  1.1  mrg  * whether the merge should be allowed.
   1367  1.1  mrg  * Store the maximum of the computed weights in graph->max_weight.
   1368  1.1  mrg  *
   1369  1.1  mrg  * The computed weight is a measure for the number of remaining schedule
   1370  1.1  mrg  * dimensions that can still be completely aligned.
   1371  1.1  mrg  * In particular, compute the number of equalities between
   1372  1.1  mrg  * input dimensions and output dimensions in the proximity constraints.
   1373  1.1  mrg  * The directions that are already handled by outer schedule bands
   1374  1.1  mrg  * are projected out prior to determining this number.
   1375  1.1  mrg  *
   1376  1.1  mrg  * Edges that will never be considered by find_proximity are ignored.
   1377  1.1  mrg  */
   1378  1.1  mrg static isl_stat compute_weights(struct isl_sched_graph *graph,
   1379  1.1  mrg 	struct isl_clustering *c)
   1380  1.1  mrg {
   1381  1.1  mrg 	int i;
   1382  1.1  mrg 
   1383  1.1  mrg 	graph->max_weight = 0;
   1384  1.1  mrg 
   1385  1.1  mrg 	for (i = 0; i < graph->n_edge; ++i) {
   1386  1.1  mrg 		struct isl_sched_edge *edge = &graph->edge[i];
   1387  1.1  mrg 		struct isl_sched_node *src = edge->src;
   1388  1.1  mrg 		struct isl_sched_node *dst = edge->dst;
   1389  1.1  mrg 		isl_basic_map *hull;
   1390  1.1  mrg 		isl_bool prox;
   1391  1.1  mrg 		isl_size n_in, n_out, n;
   1392  1.1  mrg 
   1393  1.1  mrg 		prox = is_non_empty_proximity(edge);
   1394  1.1  mrg 		if (prox < 0)
   1395  1.1  mrg 			return isl_stat_error;
   1396  1.1  mrg 		if (!prox)
   1397  1.1  mrg 			continue;
   1398  1.1  mrg 		if (bad_cluster(&c->scc[edge->src->scc]) ||
   1399  1.1  mrg 		    bad_cluster(&c->scc[edge->dst->scc]))
   1400  1.1  mrg 			continue;
   1401  1.1  mrg 		if (c->scc_cluster[edge->dst->scc] ==
   1402  1.1  mrg 		    c->scc_cluster[edge->src->scc])
   1403  1.1  mrg 			continue;
   1404  1.1  mrg 
   1405  1.1  mrg 		hull = isl_map_affine_hull(isl_map_copy(edge->map));
   1406  1.1  mrg 		hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
   1407  1.1  mrg 						    isl_mat_copy(src->vmap));
   1408  1.1  mrg 		hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
   1409  1.1  mrg 						    isl_mat_copy(dst->vmap));
   1410  1.1  mrg 		hull = isl_basic_map_project_out(hull,
   1411  1.1  mrg 						isl_dim_in, 0, src->rank);
   1412  1.1  mrg 		hull = isl_basic_map_project_out(hull,
   1413  1.1  mrg 						isl_dim_out, 0, dst->rank);
   1414  1.1  mrg 		hull = isl_basic_map_remove_divs(hull);
   1415  1.1  mrg 		n_in = isl_basic_map_dim(hull, isl_dim_in);
   1416  1.1  mrg 		n_out = isl_basic_map_dim(hull, isl_dim_out);
   1417  1.1  mrg 		if (n_in < 0 || n_out < 0)
   1418  1.1  mrg 			hull = isl_basic_map_free(hull);
   1419  1.1  mrg 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
   1420  1.1  mrg 							isl_dim_in, 0, n_in);
   1421  1.1  mrg 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
   1422  1.1  mrg 							isl_dim_out, 0, n_out);
   1423  1.1  mrg 		n = isl_basic_map_n_equality(hull);
   1424  1.1  mrg 		isl_basic_map_free(hull);
   1425  1.1  mrg 		if (n < 0)
   1426  1.1  mrg 			return isl_stat_error;
   1427  1.1  mrg 		edge->weight = n;
   1428  1.1  mrg 
   1429  1.1  mrg 		if (edge->weight > graph->max_weight)
   1430  1.1  mrg 			graph->max_weight = edge->weight;
   1431  1.1  mrg 	}
   1432  1.1  mrg 
   1433  1.1  mrg 	return isl_stat_ok;
   1434  1.1  mrg }
   1435  1.1  mrg 
   1436  1.1  mrg /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
   1437  1.1  mrg  * update "node" to arrange for them to be executed in an order
   1438  1.1  mrg  * possibly involving set nodes that generalizes the topological order
   1439  1.1  mrg  * determined by the scc fields of the nodes in "graph".
   1440  1.1  mrg  *
   1441  1.1  mrg  * Note that at this stage, there are graph->scc clusters and
   1442  1.1  mrg  * their positions in c->cluster are determined by the values
   1443  1.1  mrg  * of c->scc_cluster.
   1444  1.1  mrg  *
   1445  1.1  mrg  * Construct an isl_scc_graph and perform the decomposition
   1446  1.1  mrg  * using this graph.
   1447  1.1  mrg  */
   1448  1.1  mrg static __isl_give isl_schedule_node *finish_bands_decompose(
   1449  1.1  mrg 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
   1450  1.1  mrg 	struct isl_clustering *c)
   1451  1.1  mrg {
   1452  1.1  mrg 	isl_ctx *ctx;
   1453  1.1  mrg 	struct isl_scc_graph *scc_graph;
   1454  1.1  mrg 
   1455  1.1  mrg 	ctx = isl_schedule_node_get_ctx(node);
   1456  1.1  mrg 
   1457  1.1  mrg 	scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
   1458  1.1  mrg 	node = isl_scc_graph_decompose(scc_graph, node);
   1459  1.1  mrg 	isl_scc_graph_free(scc_graph);
   1460  1.1  mrg 
   1461  1.1  mrg 	return node;
   1462  1.1  mrg }
   1463  1.1  mrg 
   1464  1.1  mrg /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
   1465  1.1  mrg  * in their topological order.  This order is determined by the scc
   1466  1.1  mrg  * fields of the nodes in "graph".
   1467  1.1  mrg  * Combine the results in a sequence expressing the topological order.
   1468  1.1  mrg  *
   1469  1.1  mrg  * If there is only one cluster left, then there is no need to introduce
   1470  1.1  mrg  * a sequence node.  Also, in this case, the cluster necessarily contains
   1471  1.1  mrg  * the SCC at position 0 in the original graph and is therefore also
   1472  1.1  mrg  * stored in the first cluster of "c".
   1473  1.1  mrg  *
   1474  1.1  mrg  * If there are more than two clusters left, then some subsets of the clusters
   1475  1.1  mrg  * may still be independent of each other.  These could then still
   1476  1.1  mrg  * be reordered with respect to each other.  Call finish_bands_decompose
   1477  1.1  mrg  * to try and construct an ordering involving set and sequence nodes
   1478  1.1  mrg  * that generalizes the topological order.
   1479  1.1  mrg  * Note that at the outermost level there can be no independent components
   1480  1.1  mrg  * because isl_schedule_node_compute_wcc_clustering is called
   1481  1.1  mrg  * on a (weakly) connected component.
   1482  1.1  mrg  */
   1483  1.1  mrg static __isl_give isl_schedule_node *finish_bands_clustering(
   1484  1.1  mrg 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
   1485  1.1  mrg 	struct isl_clustering *c)
   1486  1.1  mrg {
   1487  1.1  mrg 	int i;
   1488  1.1  mrg 	isl_ctx *ctx;
   1489  1.1  mrg 	isl_union_set_list *filters;
   1490  1.1  mrg 
   1491  1.1  mrg 	if (graph->scc == 1)
   1492  1.1  mrg 		return isl_schedule_node_compute_finish_band(node,
   1493  1.1  mrg 							&c->cluster[0], 0);
   1494  1.1  mrg 	if (graph->scc > 2)
   1495  1.1  mrg 		return finish_bands_decompose(node, graph, c);
   1496  1.1  mrg 
   1497  1.1  mrg 	ctx = isl_schedule_node_get_ctx(node);
   1498  1.1  mrg 
   1499  1.1  mrg 	filters = isl_sched_graph_extract_sccs(ctx, graph);
   1500  1.1  mrg 	node = isl_schedule_node_insert_sequence(node, filters);
   1501  1.1  mrg 
   1502  1.1  mrg 	for (i = 0; i < graph->scc; ++i) {
   1503  1.1  mrg 		int j = c->scc_cluster[i];
   1504  1.1  mrg 		node = isl_schedule_node_grandchild(node, i, 0);
   1505  1.1  mrg 		node = isl_schedule_node_compute_finish_band(node,
   1506  1.1  mrg 							&c->cluster[j], 0);
   1507  1.1  mrg 		node = isl_schedule_node_grandparent(node);
   1508  1.1  mrg 	}
   1509  1.1  mrg 
   1510  1.1  mrg 	return node;
   1511  1.1  mrg }
   1512  1.1  mrg 
   1513  1.1  mrg /* Compute a schedule for a connected dependence graph by first considering
   1514  1.1  mrg  * each strongly connected component (SCC) in the graph separately and then
   1515  1.1  mrg  * incrementally combining them into clusters.
   1516  1.1  mrg  * Return the updated schedule node.
   1517  1.1  mrg  *
   1518  1.1  mrg  * Initially, each cluster consists of a single SCC, each with its
   1519  1.1  mrg  * own band schedule.  The algorithm then tries to merge pairs
   1520  1.1  mrg  * of clusters along a proximity edge until no more suitable
   1521  1.1  mrg  * proximity edges can be found.  During this merging, the schedule
   1522  1.1  mrg  * is maintained in the individual SCCs.
   1523  1.1  mrg  * After the merging is completed, the full resulting clusters
   1524  1.1  mrg  * are extracted and in finish_bands_clustering,
   1525  1.1  mrg  * isl_schedule_node_compute_finish_band is called on each of them to integrate
   1526  1.1  mrg  * the band into "node" and to continue the computation.
   1527  1.1  mrg  *
   1528  1.1  mrg  * compute_weights initializes the weights that are used by find_proximity.
   1529  1.1  mrg  */
   1530  1.1  mrg __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
   1531  1.1  mrg 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
   1532  1.1  mrg {
   1533  1.1  mrg 	isl_ctx *ctx;
   1534  1.1  mrg 	struct isl_clustering c;
   1535  1.1  mrg 	int i;
   1536  1.1  mrg 
   1537  1.1  mrg 	ctx = isl_schedule_node_get_ctx(node);
   1538  1.1  mrg 
   1539  1.1  mrg 	if (clustering_init(ctx, &c, graph) < 0)
   1540  1.1  mrg 		goto error;
   1541  1.1  mrg 
   1542  1.1  mrg 	if (compute_weights(graph, &c) < 0)
   1543  1.1  mrg 		goto error;
   1544  1.1  mrg 
   1545  1.1  mrg 	for (;;) {
   1546  1.1  mrg 		i = find_proximity(graph, &c);
   1547  1.1  mrg 		if (i < 0)
   1548  1.1  mrg 			goto error;
   1549  1.1  mrg 		if (i >= graph->n_edge)
   1550  1.1  mrg 			break;
   1551  1.1  mrg 		if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
   1552  1.1  mrg 			goto error;
   1553  1.1  mrg 	}
   1554  1.1  mrg 
   1555  1.1  mrg 	if (extract_clusters(ctx, graph, &c) < 0)
   1556  1.1  mrg 		goto error;
   1557  1.1  mrg 
   1558  1.1  mrg 	node = finish_bands_clustering(node, graph, &c);
   1559  1.1  mrg 
   1560  1.1  mrg 	clustering_free(ctx, &c);
   1561  1.1  mrg 	return node;
   1562  1.1  mrg error:
   1563  1.1  mrg 	clustering_free(ctx, &c);
   1564  1.1  mrg 	return isl_schedule_node_free(node);
   1565  1.1  mrg }
   1566