Home | History | Annotate | Line # | Download | only in dist
      1  1.1  mrg /*
      2  1.1  mrg  * Copyright 2012-2014 Ecole Normale Superieure
      3  1.1  mrg  * Copyright 2014      INRIA Rocquencourt
      4  1.1  mrg  *
      5  1.1  mrg  * Use of this software is governed by the MIT license
      6  1.1  mrg  *
      7  1.1  mrg  * Written by Sven Verdoolaege,
      8  1.1  mrg  * Ecole Normale Superieure, 45 rue dUlm, 75230 Paris, France
      9  1.1  mrg  * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
     10  1.1  mrg  * B.P. 105 - 78153 Le Chesnay, France
     11  1.1  mrg  */
     12  1.1  mrg 
     13  1.1  mrg #include <limits.h>
     14  1.1  mrg #include <isl/id.h>
     15  1.1  mrg #include <isl/val.h>
     16  1.1  mrg #include <isl/space.h>
     17  1.1  mrg #include <isl/aff.h>
     18  1.1  mrg #include <isl/constraint.h>
     19  1.1  mrg #include <isl/set.h>
     20  1.1  mrg #include <isl/ilp.h>
     21  1.1  mrg #include <isl/union_set.h>
     22  1.1  mrg #include <isl/union_map.h>
     23  1.1  mrg #include <isl/schedule_node.h>
     24  1.1  mrg #include <isl/options.h>
     25  1.1  mrg #include <isl_sort.h>
     26  1.1  mrg #include <isl_tarjan.h>
     27  1.1  mrg #include <isl_ast_private.h>
     28  1.1  mrg #include <isl_ast_build_expr.h>
     29  1.1  mrg #include <isl_ast_build_private.h>
     30  1.1  mrg #include <isl_ast_graft_private.h>
     31  1.1  mrg 
     32  1.1  mrg /* Try and reduce the number of disjuncts in the representation of "set",
     33  1.1  mrg  * without dropping explicit representations of local variables.
     34  1.1  mrg  */
     35  1.1  mrg static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set)
     36  1.1  mrg {
     37  1.1  mrg 	isl_ctx *ctx;
     38  1.1  mrg 	int save_preserve;
     39  1.1  mrg 
     40  1.1  mrg 	if (!set)
     41  1.1  mrg 		return NULL;
     42  1.1  mrg 
     43  1.1  mrg 	ctx = isl_set_get_ctx(set);
     44  1.1  mrg 	save_preserve = isl_options_get_coalesce_preserve_locals(ctx);
     45  1.1  mrg 	isl_options_set_coalesce_preserve_locals(ctx, 1);
     46  1.1  mrg 	set = isl_set_coalesce(set);
     47  1.1  mrg 	isl_options_set_coalesce_preserve_locals(ctx, save_preserve);
     48  1.1  mrg 	return set;
     49  1.1  mrg }
     50  1.1  mrg 
     51  1.1  mrg /* Data used in generate_domain.
     52  1.1  mrg  *
     53  1.1  mrg  * "build" is the input build.
     54  1.1  mrg  * "list" collects the results.
     55  1.1  mrg  */
     56  1.1  mrg struct isl_generate_domain_data {
     57  1.1  mrg 	isl_ast_build *build;
     58  1.1  mrg 
     59  1.1  mrg 	isl_ast_graft_list *list;
     60  1.1  mrg };
     61  1.1  mrg 
     62  1.1  mrg static __isl_give isl_ast_graft_list *generate_next_level(
     63  1.1  mrg 	__isl_take isl_union_map *executed,
     64  1.1  mrg 	__isl_take isl_ast_build *build);
     65  1.1  mrg static __isl_give isl_ast_graft_list *generate_code(
     66  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
     67  1.1  mrg 	int internal);
     68  1.1  mrg 
     69  1.1  mrg /* Generate an AST for a single domain based on
     70  1.1  mrg  * the (non single valued) inverse schedule "executed".
     71  1.1  mrg  *
     72  1.1  mrg  * We extend the schedule with the iteration domain
     73  1.1  mrg  * and continue generating through a call to generate_code.
     74  1.1  mrg  *
     75  1.1  mrg  * In particular, if executed has the form
     76  1.1  mrg  *
     77  1.1  mrg  *	S -> D
     78  1.1  mrg  *
     79  1.1  mrg  * then we continue generating code on
     80  1.1  mrg  *
     81  1.1  mrg  *	[S -> D] -> D
     82  1.1  mrg  *
     83  1.1  mrg  * The extended inverse schedule is clearly single valued
     84  1.1  mrg  * ensuring that the nested generate_code will not reach this function,
     85  1.1  mrg  * but will instead create calls to all elements of D that need
     86  1.1  mrg  * to be executed from the current schedule domain.
     87  1.1  mrg  */
     88  1.1  mrg static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
     89  1.1  mrg 	struct isl_generate_domain_data *data)
     90  1.1  mrg {
     91  1.1  mrg 	isl_map *identity;
     92  1.1  mrg 	isl_ast_build *build;
     93  1.1  mrg 	isl_ast_graft_list *list;
     94  1.1  mrg 
     95  1.1  mrg 	build = isl_ast_build_copy(data->build);
     96  1.1  mrg 
     97  1.1  mrg 	identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
     98  1.1  mrg 	executed = isl_map_domain_product(executed, identity);
     99  1.1  mrg 	build = isl_ast_build_set_single_valued(build, 1);
    100  1.1  mrg 
    101  1.1  mrg 	list = generate_code(isl_union_map_from_map(executed), build, 1);
    102  1.1  mrg 
    103  1.1  mrg 	data->list = isl_ast_graft_list_concat(data->list, list);
    104  1.1  mrg 
    105  1.1  mrg 	return isl_stat_ok;
    106  1.1  mrg }
    107  1.1  mrg 
    108  1.1  mrg /* Call the at_each_domain callback, if requested by the user,
    109  1.1  mrg  * after recording the current inverse schedule in the build.
    110  1.1  mrg  */
    111  1.1  mrg static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
    112  1.1  mrg 	__isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
    113  1.1  mrg {
    114  1.1  mrg 	if (!graft || !build)
    115  1.1  mrg 		return isl_ast_graft_free(graft);
    116  1.1  mrg 	if (!build->at_each_domain)
    117  1.1  mrg 		return graft;
    118  1.1  mrg 
    119  1.1  mrg 	build = isl_ast_build_copy(build);
    120  1.1  mrg 	build = isl_ast_build_set_executed(build,
    121  1.1  mrg 			isl_union_map_from_map(isl_map_copy(executed)));
    122  1.1  mrg 	if (!build)
    123  1.1  mrg 		return isl_ast_graft_free(graft);
    124  1.1  mrg 
    125  1.1  mrg 	graft->node = build->at_each_domain(graft->node,
    126  1.1  mrg 					build, build->at_each_domain_user);
    127  1.1  mrg 	isl_ast_build_free(build);
    128  1.1  mrg 
    129  1.1  mrg 	if (!graft->node)
    130  1.1  mrg 		graft = isl_ast_graft_free(graft);
    131  1.1  mrg 
    132  1.1  mrg 	return graft;
    133  1.1  mrg }
    134  1.1  mrg 
    135  1.1  mrg /* Generate a call expression for the single executed
    136  1.1  mrg  * domain element "map" and put a guard around it based its (simplified)
    137  1.1  mrg  * domain.  "executed" is the original inverse schedule from which "map"
    138  1.1  mrg  * has been derived.  In particular, "map" is either identical to "executed"
    139  1.1  mrg  * or it is the result of gisting "executed" with respect to the build domain.
    140  1.1  mrg  * "executed" is only used if there is an at_each_domain callback.
    141  1.1  mrg  *
    142  1.1  mrg  * At this stage, any pending constraints in the build can no longer
    143  1.1  mrg  * be simplified with respect to any enforced constraints since
    144  1.1  mrg  * the call node does not have any enforced constraints.
    145  1.1  mrg  * Since all pending constraints not covered by any enforced constraints
    146  1.1  mrg  * will be added as a guard to the graft in create_node_scaled,
    147  1.1  mrg  * even in the eliminated case, the pending constraints
    148  1.1  mrg  * can be considered to have been generated by outer constructs.
    149  1.1  mrg  *
    150  1.1  mrg  * If the user has set an at_each_domain callback, it is called
    151  1.1  mrg  * on the constructed call expression node.
    152  1.1  mrg  */
    153  1.1  mrg static isl_stat add_domain(__isl_take isl_map *executed,
    154  1.1  mrg 	__isl_take isl_map *map, struct isl_generate_domain_data *data)
    155  1.1  mrg {
    156  1.1  mrg 	isl_ast_build *build;
    157  1.1  mrg 	isl_ast_graft *graft;
    158  1.1  mrg 	isl_ast_graft_list *list;
    159  1.1  mrg 	isl_set *guard, *pending;
    160  1.1  mrg 
    161  1.1  mrg 	build = isl_ast_build_copy(data->build);
    162  1.1  mrg 	pending = isl_ast_build_get_pending(build);
    163  1.1  mrg 	build = isl_ast_build_replace_pending_by_guard(build, pending);
    164  1.1  mrg 
    165  1.1  mrg 	guard = isl_map_domain(isl_map_copy(map));
    166  1.1  mrg 	guard = isl_set_compute_divs(guard);
    167  1.1  mrg 	guard = isl_set_coalesce_preserve(guard);
    168  1.1  mrg 	guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
    169  1.1  mrg 	guard = isl_ast_build_specialize(build, guard);
    170  1.1  mrg 
    171  1.1  mrg 	graft = isl_ast_graft_alloc_domain(map, build);
    172  1.1  mrg 	graft = at_each_domain(graft, executed, build);
    173  1.1  mrg 	isl_ast_build_free(build);
    174  1.1  mrg 	isl_map_free(executed);
    175  1.1  mrg 	graft = isl_ast_graft_add_guard(graft, guard, data->build);
    176  1.1  mrg 
    177  1.1  mrg 	list = isl_ast_graft_list_from_ast_graft(graft);
    178  1.1  mrg 	data->list = isl_ast_graft_list_concat(data->list, list);
    179  1.1  mrg 
    180  1.1  mrg 	return isl_stat_ok;
    181  1.1  mrg }
    182  1.1  mrg 
    183  1.1  mrg /* Generate an AST for a single domain based on
    184  1.1  mrg  * the inverse schedule "executed" and add it to data->list.
    185  1.1  mrg  *
    186  1.1  mrg  * If there is more than one domain element associated to the current
    187  1.1  mrg  * schedule "time", then we need to continue the generation process
    188  1.1  mrg  * in generate_non_single_valued.
    189  1.1  mrg  * Note that the inverse schedule being single-valued may depend
    190  1.1  mrg  * on constraints that are only available in the original context
    191  1.1  mrg  * domain specified by the user.  We therefore first introduce
    192  1.1  mrg  * some of the constraints of data->build->domain.  In particular,
    193  1.1  mrg  * we intersect with a single-disjunct approximation of this set.
    194  1.1  mrg  * We perform this approximation to avoid further splitting up
    195  1.1  mrg  * the executed relation, possibly introducing a disjunctive guard
    196  1.1  mrg  * on the statement.
    197  1.1  mrg  *
    198  1.1  mrg  * On the other hand, we only perform the test after having taken the gist
    199  1.1  mrg  * of the domain as the resulting map is the one from which the call
    200  1.1  mrg  * expression is constructed.  Using this map to construct the call
    201  1.1  mrg  * expression usually yields simpler results in cases where the original
    202  1.1  mrg  * map is not obviously single-valued.
    203  1.1  mrg  * If the original map is obviously single-valued, then the gist
    204  1.1  mrg  * operation is skipped.
    205  1.1  mrg  *
    206  1.1  mrg  * Because we perform the single-valuedness test on the gisted map,
    207  1.1  mrg  * we may in rare cases fail to recognize that the inverse schedule
    208  1.1  mrg  * is single-valued.  This becomes problematic if this happens
    209  1.1  mrg  * from the recursive call through generate_non_single_valued
    210  1.1  mrg  * as we would then end up in an infinite recursion.
    211  1.1  mrg  * We therefore check if we are inside a call to generate_non_single_valued
    212  1.1  mrg  * and revert to the ungisted map if the gisted map turns out not to be
    213  1.1  mrg  * single-valued.
    214  1.1  mrg  *
    215  1.1  mrg  * Otherwise, call add_domain to generate a call expression (with guard) and
    216  1.1  mrg  * to call the at_each_domain callback, if any.
    217  1.1  mrg  */
    218  1.1  mrg static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
    219  1.1  mrg {
    220  1.1  mrg 	struct isl_generate_domain_data *data = user;
    221  1.1  mrg 	isl_set *domain;
    222  1.1  mrg 	isl_map *map = NULL;
    223  1.1  mrg 	int empty, sv;
    224  1.1  mrg 
    225  1.1  mrg 	domain = isl_ast_build_get_domain(data->build);
    226  1.1  mrg 	domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
    227  1.1  mrg 	executed = isl_map_intersect_domain(executed, domain);
    228  1.1  mrg 	empty = isl_map_is_empty(executed);
    229  1.1  mrg 	if (empty < 0)
    230  1.1  mrg 		goto error;
    231  1.1  mrg 	if (empty) {
    232  1.1  mrg 		isl_map_free(executed);
    233  1.1  mrg 		return isl_stat_ok;
    234  1.1  mrg 	}
    235  1.1  mrg 
    236  1.1  mrg 	sv = isl_map_plain_is_single_valued(executed);
    237  1.1  mrg 	if (sv < 0)
    238  1.1  mrg 		goto error;
    239  1.1  mrg 	if (sv)
    240  1.1  mrg 		return add_domain(executed, isl_map_copy(executed), data);
    241  1.1  mrg 
    242  1.1  mrg 	executed = isl_map_coalesce(executed);
    243  1.1  mrg 	map = isl_map_copy(executed);
    244  1.1  mrg 	map = isl_ast_build_compute_gist_map_domain(data->build, map);
    245  1.1  mrg 	sv = isl_map_is_single_valued(map);
    246  1.1  mrg 	if (sv < 0)
    247  1.1  mrg 		goto error;
    248  1.1  mrg 	if (!sv) {
    249  1.1  mrg 		isl_map_free(map);
    250  1.1  mrg 		if (data->build->single_valued)
    251  1.1  mrg 			map = isl_map_copy(executed);
    252  1.1  mrg 		else
    253  1.1  mrg 			return generate_non_single_valued(executed, data);
    254  1.1  mrg 	}
    255  1.1  mrg 
    256  1.1  mrg 	return add_domain(executed, map, data);
    257  1.1  mrg error:
    258  1.1  mrg 	isl_map_free(map);
    259  1.1  mrg 	isl_map_free(executed);
    260  1.1  mrg 	return isl_stat_error;
    261  1.1  mrg }
    262  1.1  mrg 
    263  1.1  mrg /* Call build->create_leaf to a create "leaf" node in the AST,
    264  1.1  mrg  * encapsulate the result in an isl_ast_graft and return the result
    265  1.1  mrg  * as a 1-element list.
    266  1.1  mrg  *
    267  1.1  mrg  * Note that the node returned by the user may be an entire tree.
    268  1.1  mrg  *
    269  1.1  mrg  * Since the node itself cannot enforce any constraints, we turn
    270  1.1  mrg  * all pending constraints into guards and add them to the resulting
    271  1.1  mrg  * graft to ensure that they will be generated.
    272  1.1  mrg  *
    273  1.1  mrg  * Before we pass control to the user, we first clear some information
    274  1.1  mrg  * from the build that is (presumbably) only meaningful
    275  1.1  mrg  * for the current code generation.
    276  1.1  mrg  * This includes the create_leaf callback itself, so we make a copy
    277  1.1  mrg  * of the build first.
    278  1.1  mrg  */
    279  1.1  mrg static __isl_give isl_ast_graft_list *call_create_leaf(
    280  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
    281  1.1  mrg {
    282  1.1  mrg 	isl_set *guard;
    283  1.1  mrg 	isl_ast_node *node;
    284  1.1  mrg 	isl_ast_graft *graft;
    285  1.1  mrg 	isl_ast_build *user_build;
    286  1.1  mrg 
    287  1.1  mrg 	guard = isl_ast_build_get_pending(build);
    288  1.1  mrg 	user_build = isl_ast_build_copy(build);
    289  1.1  mrg 	user_build = isl_ast_build_replace_pending_by_guard(user_build,
    290  1.1  mrg 							isl_set_copy(guard));
    291  1.1  mrg 	user_build = isl_ast_build_set_executed(user_build, executed);
    292  1.1  mrg 	user_build = isl_ast_build_clear_local_info(user_build);
    293  1.1  mrg 	if (!user_build)
    294  1.1  mrg 		node = NULL;
    295  1.1  mrg 	else
    296  1.1  mrg 		node = build->create_leaf(user_build, build->create_leaf_user);
    297  1.1  mrg 	graft = isl_ast_graft_alloc(node, build);
    298  1.1  mrg 	graft = isl_ast_graft_add_guard(graft, guard, build);
    299  1.1  mrg 	isl_ast_build_free(build);
    300  1.1  mrg 	return isl_ast_graft_list_from_ast_graft(graft);
    301  1.1  mrg }
    302  1.1  mrg 
    303  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_child(
    304  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
    305  1.1  mrg 	__isl_take isl_union_map *executed);
    306  1.1  mrg 
    307  1.1  mrg /* Generate an AST after having handled the complete schedule
    308  1.1  mrg  * of this call to the code generator or the complete band
    309  1.1  mrg  * if we are generating an AST from a schedule tree.
    310  1.1  mrg  *
    311  1.1  mrg  * If we are inside a band node, then move on to the child of the band.
    312  1.1  mrg  *
    313  1.1  mrg  * If the user has specified a create_leaf callback, control
    314  1.1  mrg  * is passed to the user in call_create_leaf.
    315  1.1  mrg  *
    316  1.1  mrg  * Otherwise, we generate one or more calls for each individual
    317  1.1  mrg  * domain in generate_domain.
    318  1.1  mrg  */
    319  1.1  mrg static __isl_give isl_ast_graft_list *generate_inner_level(
    320  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
    321  1.1  mrg {
    322  1.1  mrg 	isl_ctx *ctx;
    323  1.1  mrg 	struct isl_generate_domain_data data = { build };
    324  1.1  mrg 
    325  1.1  mrg 	if (!build || !executed)
    326  1.1  mrg 		goto error;
    327  1.1  mrg 
    328  1.1  mrg 	if (isl_ast_build_has_schedule_node(build)) {
    329  1.1  mrg 		isl_schedule_node *node;
    330  1.1  mrg 		node = isl_ast_build_get_schedule_node(build);
    331  1.1  mrg 		build = isl_ast_build_reset_schedule_node(build);
    332  1.1  mrg 		return build_ast_from_child(build, node, executed);
    333  1.1  mrg 	}
    334  1.1  mrg 
    335  1.1  mrg 	if (build->create_leaf)
    336  1.1  mrg 		return call_create_leaf(executed, build);
    337  1.1  mrg 
    338  1.1  mrg 	ctx = isl_union_map_get_ctx(executed);
    339  1.1  mrg 	data.list = isl_ast_graft_list_alloc(ctx, 0);
    340  1.1  mrg 	if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
    341  1.1  mrg 		data.list = isl_ast_graft_list_free(data.list);
    342  1.1  mrg 
    343  1.1  mrg 	if (0)
    344  1.1  mrg error:		data.list = NULL;
    345  1.1  mrg 	isl_ast_build_free(build);
    346  1.1  mrg 	isl_union_map_free(executed);
    347  1.1  mrg 	return data.list;
    348  1.1  mrg }
    349  1.1  mrg 
    350  1.1  mrg /* Call the before_each_for callback, if requested by the user.
    351  1.1  mrg  */
    352  1.1  mrg static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
    353  1.1  mrg 	__isl_keep isl_ast_build *build)
    354  1.1  mrg {
    355  1.1  mrg 	isl_id *id;
    356  1.1  mrg 
    357  1.1  mrg 	if (!node || !build)
    358  1.1  mrg 		return isl_ast_node_free(node);
    359  1.1  mrg 	if (!build->before_each_for)
    360  1.1  mrg 		return node;
    361  1.1  mrg 	id = build->before_each_for(build, build->before_each_for_user);
    362  1.1  mrg 	node = isl_ast_node_set_annotation(node, id);
    363  1.1  mrg 	return node;
    364  1.1  mrg }
    365  1.1  mrg 
    366  1.1  mrg /* Call the after_each_for callback, if requested by the user.
    367  1.1  mrg  */
    368  1.1  mrg static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
    369  1.1  mrg 	__isl_keep isl_ast_build *build)
    370  1.1  mrg {
    371  1.1  mrg 	if (!graft || !build)
    372  1.1  mrg 		return isl_ast_graft_free(graft);
    373  1.1  mrg 	if (!build->after_each_for)
    374  1.1  mrg 		return graft;
    375  1.1  mrg 	graft->node = build->after_each_for(graft->node, build,
    376  1.1  mrg 						build->after_each_for_user);
    377  1.1  mrg 	if (!graft->node)
    378  1.1  mrg 		return isl_ast_graft_free(graft);
    379  1.1  mrg 	return graft;
    380  1.1  mrg }
    381  1.1  mrg 
    382  1.1  mrg /* Plug in all the know values of the current and outer dimensions
    383  1.1  mrg  * in the domain of "executed".  In principle, we only need to plug
    384  1.1  mrg  * in the known value of the current dimension since the values of
    385  1.1  mrg  * outer dimensions have been plugged in already.
    386  1.1  mrg  * However, it turns out to be easier to just plug in all known values.
    387  1.1  mrg  */
    388  1.1  mrg static __isl_give isl_union_map *plug_in_values(
    389  1.1  mrg 	__isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
    390  1.1  mrg {
    391  1.1  mrg 	return isl_ast_build_substitute_values_union_map_domain(build,
    392  1.1  mrg 								    executed);
    393  1.1  mrg }
    394  1.1  mrg 
    395  1.1  mrg /* Check if the constraint "c" is a lower bound on dimension "pos",
    396  1.1  mrg  * an upper bound, or independent of dimension "pos".
    397  1.1  mrg  */
    398  1.1  mrg static int constraint_type(isl_constraint *c, int pos)
    399  1.1  mrg {
    400  1.1  mrg 	if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
    401  1.1  mrg 		return 1;
    402  1.1  mrg 	if (isl_constraint_is_upper_bound(c, isl_dim_set, pos))
    403  1.1  mrg 		return 2;
    404  1.1  mrg 	return 0;
    405  1.1  mrg }
    406  1.1  mrg 
    407  1.1  mrg /* Compare the types of the constraints "a" and "b",
    408  1.1  mrg  * resulting in constraints that are independent of "depth"
    409  1.1  mrg  * to be sorted before the lower bounds on "depth", which in
    410  1.1  mrg  * turn are sorted before the upper bounds on "depth".
    411  1.1  mrg  */
    412  1.1  mrg static int cmp_constraint(__isl_keep isl_constraint *a,
    413  1.1  mrg 	__isl_keep isl_constraint *b, void *user)
    414  1.1  mrg {
    415  1.1  mrg 	int *depth = user;
    416  1.1  mrg 	int t1 = constraint_type(a, *depth);
    417  1.1  mrg 	int t2 = constraint_type(b, *depth);
    418  1.1  mrg 
    419  1.1  mrg 	return t1 - t2;
    420  1.1  mrg }
    421  1.1  mrg 
    422  1.1  mrg /* Extract a lower bound on dimension "pos" from constraint "c".
    423  1.1  mrg  *
    424  1.1  mrg  * If the constraint is of the form
    425  1.1  mrg  *
    426  1.1  mrg  *	a x + f(...) >= 0
    427  1.1  mrg  *
    428  1.1  mrg  * then we essentially return
    429  1.1  mrg  *
    430  1.1  mrg  *	l = ceil(-f(...)/a)
    431  1.1  mrg  *
    432  1.1  mrg  * However, if the current dimension is strided, then we need to make
    433  1.1  mrg  * sure that the lower bound we construct is of the form
    434  1.1  mrg  *
    435  1.1  mrg  *	f + s a
    436  1.1  mrg  *
    437  1.1  mrg  * with f the offset and s the stride.
    438  1.1  mrg  * We therefore compute
    439  1.1  mrg  *
    440  1.1  mrg  *	f + s * ceil((l - f)/s)
    441  1.1  mrg  */
    442  1.1  mrg static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
    443  1.1  mrg 	int pos, __isl_keep isl_ast_build *build)
    444  1.1  mrg {
    445  1.1  mrg 	isl_aff *aff;
    446  1.1  mrg 
    447  1.1  mrg 	aff = isl_constraint_get_bound(c, isl_dim_set, pos);
    448  1.1  mrg 	aff = isl_aff_ceil(aff);
    449  1.1  mrg 
    450  1.1  mrg 	if (isl_ast_build_has_stride(build, pos)) {
    451  1.1  mrg 		isl_aff *offset;
    452  1.1  mrg 		isl_val *stride;
    453  1.1  mrg 
    454  1.1  mrg 		offset = isl_ast_build_get_offset(build, pos);
    455  1.1  mrg 		stride = isl_ast_build_get_stride(build, pos);
    456  1.1  mrg 
    457  1.1  mrg 		aff = isl_aff_sub(aff, isl_aff_copy(offset));
    458  1.1  mrg 		aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
    459  1.1  mrg 		aff = isl_aff_ceil(aff);
    460  1.1  mrg 		aff = isl_aff_scale_val(aff, stride);
    461  1.1  mrg 		aff = isl_aff_add(aff, offset);
    462  1.1  mrg 	}
    463  1.1  mrg 
    464  1.1  mrg 	aff = isl_ast_build_compute_gist_aff(build, aff);
    465  1.1  mrg 
    466  1.1  mrg 	return aff;
    467  1.1  mrg }
    468  1.1  mrg 
    469  1.1  mrg /* Return the exact lower bound (or upper bound if "upper" is set)
    470  1.1  mrg  * of "domain" as a piecewise affine expression.
    471  1.1  mrg  *
    472  1.1  mrg  * If we are computing a lower bound (of a strided dimension), then
    473  1.1  mrg  * we need to make sure it is of the form
    474  1.1  mrg  *
    475  1.1  mrg  *	f + s a
    476  1.1  mrg  *
    477  1.1  mrg  * where f is the offset and s is the stride.
    478  1.1  mrg  * We therefore need to include the stride constraint before computing
    479  1.1  mrg  * the minimum.
    480  1.1  mrg  */
    481  1.1  mrg static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
    482  1.1  mrg 	__isl_keep isl_ast_build *build, int upper)
    483  1.1  mrg {
    484  1.1  mrg 	isl_set *stride;
    485  1.1  mrg 	isl_map *it_map;
    486  1.1  mrg 	isl_pw_aff *pa;
    487  1.1  mrg 	isl_pw_multi_aff *pma;
    488  1.1  mrg 
    489  1.1  mrg 	domain = isl_set_copy(domain);
    490  1.1  mrg 	if (!upper) {
    491  1.1  mrg 		stride = isl_ast_build_get_stride_constraint(build);
    492  1.1  mrg 		domain = isl_set_intersect(domain, stride);
    493  1.1  mrg 	}
    494  1.1  mrg 	it_map = isl_ast_build_map_to_iterator(build, domain);
    495  1.1  mrg 	if (upper)
    496  1.1  mrg 		pma = isl_map_lexmax_pw_multi_aff(it_map);
    497  1.1  mrg 	else
    498  1.1  mrg 		pma = isl_map_lexmin_pw_multi_aff(it_map);
    499  1.1  mrg 	pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
    500  1.1  mrg 	isl_pw_multi_aff_free(pma);
    501  1.1  mrg 	pa = isl_ast_build_compute_gist_pw_aff(build, pa);
    502  1.1  mrg 	pa = isl_pw_aff_coalesce(pa);
    503  1.1  mrg 
    504  1.1  mrg 	return pa;
    505  1.1  mrg }
    506  1.1  mrg 
    507  1.1  mrg /* Callback for sorting the isl_pw_aff_list passed to reduce_list and
    508  1.1  mrg  * remove_redundant_lower_bounds.
    509  1.1  mrg  */
    510  1.1  mrg static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
    511  1.1  mrg 	void *user)
    512  1.1  mrg {
    513  1.1  mrg 	return isl_pw_aff_plain_cmp(a, b);
    514  1.1  mrg }
    515  1.1  mrg 
    516  1.1  mrg /* Given a list of lower bounds "list", remove those that are redundant
    517  1.1  mrg  * with respect to the other bounds in "list" and the domain of "build".
    518  1.1  mrg  *
    519  1.1  mrg  * We first sort the bounds in the same way as they would be sorted
    520  1.1  mrg  * by set_for_node_expressions so that we can try and remove the last
    521  1.1  mrg  * bounds first.
    522  1.1  mrg  *
    523  1.1  mrg  * For a lower bound to be effective, there needs to be at least
    524  1.1  mrg  * one domain element for which it is larger than all other lower bounds.
    525  1.1  mrg  * For each lower bound we therefore intersect the domain with
    526  1.1  mrg  * the conditions that it is larger than all other bounds and
    527  1.1  mrg  * check whether the result is empty.  If so, the bound can be removed.
    528  1.1  mrg  */
    529  1.1  mrg static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
    530  1.1  mrg 	__isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
    531  1.1  mrg {
    532  1.1  mrg 	int i, j;
    533  1.1  mrg 	isl_size n;
    534  1.1  mrg 	isl_set *domain;
    535  1.1  mrg 
    536  1.1  mrg 	list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
    537  1.1  mrg 
    538  1.1  mrg 	n = isl_pw_aff_list_n_pw_aff(list);
    539  1.1  mrg 	if (n < 0)
    540  1.1  mrg 		return isl_pw_aff_list_free(list);
    541  1.1  mrg 	if (n <= 1)
    542  1.1  mrg 		return list;
    543  1.1  mrg 
    544  1.1  mrg 	domain = isl_ast_build_get_domain(build);
    545  1.1  mrg 
    546  1.1  mrg 	for (i = n - 1; i >= 0; --i) {
    547  1.1  mrg 		isl_pw_aff *pa_i;
    548  1.1  mrg 		isl_set *domain_i;
    549  1.1  mrg 		int empty;
    550  1.1  mrg 
    551  1.1  mrg 		domain_i = isl_set_copy(domain);
    552  1.1  mrg 		pa_i = isl_pw_aff_list_get_pw_aff(list, i);
    553  1.1  mrg 
    554  1.1  mrg 		for (j = 0; j < n; ++j) {
    555  1.1  mrg 			isl_pw_aff *pa_j;
    556  1.1  mrg 			isl_set *better;
    557  1.1  mrg 
    558  1.1  mrg 			if (j == i)
    559  1.1  mrg 				continue;
    560  1.1  mrg 
    561  1.1  mrg 			pa_j = isl_pw_aff_list_get_pw_aff(list, j);
    562  1.1  mrg 			better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
    563  1.1  mrg 			domain_i = isl_set_intersect(domain_i, better);
    564  1.1  mrg 		}
    565  1.1  mrg 
    566  1.1  mrg 		empty = isl_set_is_empty(domain_i);
    567  1.1  mrg 
    568  1.1  mrg 		isl_set_free(domain_i);
    569  1.1  mrg 		isl_pw_aff_free(pa_i);
    570  1.1  mrg 
    571  1.1  mrg 		if (empty < 0)
    572  1.1  mrg 			goto error;
    573  1.1  mrg 		if (!empty)
    574  1.1  mrg 			continue;
    575  1.1  mrg 		list = isl_pw_aff_list_drop(list, i, 1);
    576  1.1  mrg 		n--;
    577  1.1  mrg 	}
    578  1.1  mrg 
    579  1.1  mrg 	isl_set_free(domain);
    580  1.1  mrg 
    581  1.1  mrg 	return list;
    582  1.1  mrg error:
    583  1.1  mrg 	isl_set_free(domain);
    584  1.1  mrg 	return isl_pw_aff_list_free(list);
    585  1.1  mrg }
    586  1.1  mrg 
    587  1.1  mrg /* Extract a lower bound on dimension "pos" from each constraint
    588  1.1  mrg  * in "constraints" and return the list of lower bounds.
    589  1.1  mrg  * If "constraints" has zero elements, then we extract a lower bound
    590  1.1  mrg  * from "domain" instead.
    591  1.1  mrg  *
    592  1.1  mrg  * If the current dimension is strided, then the lower bound
    593  1.1  mrg  * is adjusted by lower_bound to match the stride information.
    594  1.1  mrg  * This modification may make one or more lower bounds redundant
    595  1.1  mrg  * with respect to the other lower bounds.  We therefore check
    596  1.1  mrg  * for this condition and remove the redundant lower bounds.
    597  1.1  mrg  */
    598  1.1  mrg static __isl_give isl_pw_aff_list *lower_bounds(
    599  1.1  mrg 	__isl_keep isl_constraint_list *constraints, int pos,
    600  1.1  mrg 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
    601  1.1  mrg {
    602  1.1  mrg 	isl_ctx *ctx;
    603  1.1  mrg 	isl_pw_aff_list *list;
    604  1.1  mrg 	int i;
    605  1.1  mrg 	isl_size n;
    606  1.1  mrg 
    607  1.1  mrg 	if (!build)
    608  1.1  mrg 		return NULL;
    609  1.1  mrg 
    610  1.1  mrg 	n = isl_constraint_list_n_constraint(constraints);
    611  1.1  mrg 	if (n < 0)
    612  1.1  mrg 		return NULL;
    613  1.1  mrg 	if (n == 0) {
    614  1.1  mrg 		isl_pw_aff *pa;
    615  1.1  mrg 		pa = exact_bound(domain, build, 0);
    616  1.1  mrg 		return isl_pw_aff_list_from_pw_aff(pa);
    617  1.1  mrg 	}
    618  1.1  mrg 
    619  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
    620  1.1  mrg 	list = isl_pw_aff_list_alloc(ctx,n);
    621  1.1  mrg 
    622  1.1  mrg 	for (i = 0; i < n; ++i) {
    623  1.1  mrg 		isl_aff *aff;
    624  1.1  mrg 		isl_constraint *c;
    625  1.1  mrg 
    626  1.1  mrg 		c = isl_constraint_list_get_constraint(constraints, i);
    627  1.1  mrg 		aff = lower_bound(c, pos, build);
    628  1.1  mrg 		isl_constraint_free(c);
    629  1.1  mrg 		list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
    630  1.1  mrg 	}
    631  1.1  mrg 
    632  1.1  mrg 	if (isl_ast_build_has_stride(build, pos))
    633  1.1  mrg 		list = remove_redundant_lower_bounds(list, build);
    634  1.1  mrg 
    635  1.1  mrg 	return list;
    636  1.1  mrg }
    637  1.1  mrg 
    638  1.1  mrg /* Extract an upper bound on dimension "pos" from each constraint
    639  1.1  mrg  * in "constraints" and return the list of upper bounds.
    640  1.1  mrg  * If "constraints" has zero elements, then we extract an upper bound
    641  1.1  mrg  * from "domain" instead.
    642  1.1  mrg  */
    643  1.1  mrg static __isl_give isl_pw_aff_list *upper_bounds(
    644  1.1  mrg 	__isl_keep isl_constraint_list *constraints, int pos,
    645  1.1  mrg 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
    646  1.1  mrg {
    647  1.1  mrg 	isl_ctx *ctx;
    648  1.1  mrg 	isl_pw_aff_list *list;
    649  1.1  mrg 	int i;
    650  1.1  mrg 	isl_size n;
    651  1.1  mrg 
    652  1.1  mrg 	n = isl_constraint_list_n_constraint(constraints);
    653  1.1  mrg 	if (n < 0)
    654  1.1  mrg 		return NULL;
    655  1.1  mrg 	if (n == 0) {
    656  1.1  mrg 		isl_pw_aff *pa;
    657  1.1  mrg 		pa = exact_bound(domain, build, 1);
    658  1.1  mrg 		return isl_pw_aff_list_from_pw_aff(pa);
    659  1.1  mrg 	}
    660  1.1  mrg 
    661  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
    662  1.1  mrg 	list = isl_pw_aff_list_alloc(ctx,n);
    663  1.1  mrg 
    664  1.1  mrg 	for (i = 0; i < n; ++i) {
    665  1.1  mrg 		isl_aff *aff;
    666  1.1  mrg 		isl_constraint *c;
    667  1.1  mrg 
    668  1.1  mrg 		c = isl_constraint_list_get_constraint(constraints, i);
    669  1.1  mrg 		aff = isl_constraint_get_bound(c, isl_dim_set, pos);
    670  1.1  mrg 		isl_constraint_free(c);
    671  1.1  mrg 		aff = isl_aff_floor(aff);
    672  1.1  mrg 		list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
    673  1.1  mrg 	}
    674  1.1  mrg 
    675  1.1  mrg 	return list;
    676  1.1  mrg }
    677  1.1  mrg 
    678  1.1  mrg /* Return an isl_ast_expr that performs the reduction of type "type"
    679  1.1  mrg  * on AST expressions corresponding to the elements in "list".
    680  1.1  mrg  *
    681  1.1  mrg  * The list is assumed to contain at least one element.
    682  1.1  mrg  * If the list contains exactly one element, then the returned isl_ast_expr
    683  1.1  mrg  * simply computes that affine expression.
    684  1.1  mrg  * If the list contains more than one element, then we sort it
    685  1.1  mrg  * using a fairly arbitrary but hopefully reasonably stable order.
    686  1.1  mrg  */
    687  1.1  mrg static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type,
    688  1.1  mrg 	__isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
    689  1.1  mrg {
    690  1.1  mrg 	int i;
    691  1.1  mrg 	isl_size n;
    692  1.1  mrg 	isl_ctx *ctx;
    693  1.1  mrg 	isl_ast_expr *expr;
    694  1.1  mrg 
    695  1.1  mrg 	n = isl_pw_aff_list_n_pw_aff(list);
    696  1.1  mrg 	if (n < 0)
    697  1.1  mrg 		return NULL;
    698  1.1  mrg 
    699  1.1  mrg 	if (n == 1)
    700  1.1  mrg 		return isl_ast_build_expr_from_pw_aff_internal(build,
    701  1.1  mrg 				isl_pw_aff_list_get_pw_aff(list, 0));
    702  1.1  mrg 
    703  1.1  mrg 	ctx = isl_pw_aff_list_get_ctx(list);
    704  1.1  mrg 	expr = isl_ast_expr_alloc_op(ctx, type, n);
    705  1.1  mrg 
    706  1.1  mrg 	list = isl_pw_aff_list_copy(list);
    707  1.1  mrg 	list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
    708  1.1  mrg 	if (!list)
    709  1.1  mrg 		return isl_ast_expr_free(expr);
    710  1.1  mrg 
    711  1.1  mrg 	for (i = 0; i < n; ++i) {
    712  1.1  mrg 		isl_ast_expr *expr_i;
    713  1.1  mrg 
    714  1.1  mrg 		expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
    715  1.1  mrg 				isl_pw_aff_list_get_pw_aff(list, i));
    716  1.1  mrg 		expr = isl_ast_expr_op_add_arg(expr, expr_i);
    717  1.1  mrg 	}
    718  1.1  mrg 
    719  1.1  mrg 	isl_pw_aff_list_free(list);
    720  1.1  mrg 	return expr;
    721  1.1  mrg }
    722  1.1  mrg 
    723  1.1  mrg /* Add guards implied by the "generated constraints",
    724  1.1  mrg  * but not (necessarily) enforced by the generated AST to "guard".
    725  1.1  mrg  * In particular, if there is any stride constraints,
    726  1.1  mrg  * then add the guard implied by those constraints.
    727  1.1  mrg  * If we have generated a degenerate loop, then add the guard
    728  1.1  mrg  * implied by "bounds" on the outer dimensions, i.e., the guard
    729  1.1  mrg  * that ensures that the single value actually exists.
    730  1.1  mrg  * Since there may also be guards implied by a combination
    731  1.1  mrg  * of these constraints, we first combine them before
    732  1.1  mrg  * deriving the implied constraints.
    733  1.1  mrg  */
    734  1.1  mrg static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
    735  1.1  mrg 	int degenerate, __isl_keep isl_basic_set *bounds,
    736  1.1  mrg 	__isl_keep isl_ast_build *build)
    737  1.1  mrg {
    738  1.1  mrg 	isl_size depth;
    739  1.1  mrg 	isl_bool has_stride;
    740  1.1  mrg 	isl_space *space;
    741  1.1  mrg 	isl_set *dom, *set;
    742  1.1  mrg 
    743  1.1  mrg 	depth = isl_ast_build_get_depth(build);
    744  1.1  mrg 	has_stride = isl_ast_build_has_stride(build, depth);
    745  1.1  mrg 	if (depth < 0 || has_stride < 0)
    746  1.1  mrg 		return isl_set_free(guard);
    747  1.1  mrg 	if (!has_stride && !degenerate)
    748  1.1  mrg 		return guard;
    749  1.1  mrg 
    750  1.1  mrg 	space = isl_basic_set_get_space(bounds);
    751  1.1  mrg 	dom = isl_set_universe(space);
    752  1.1  mrg 
    753  1.1  mrg 	if (degenerate) {
    754  1.1  mrg 		bounds = isl_basic_set_copy(bounds);
    755  1.1  mrg 		bounds = isl_basic_set_drop_constraints_not_involving_dims(
    756  1.1  mrg 					bounds, isl_dim_set, depth, 1);
    757  1.1  mrg 		set = isl_set_from_basic_set(bounds);
    758  1.1  mrg 		dom = isl_set_intersect(dom, set);
    759  1.1  mrg 	}
    760  1.1  mrg 
    761  1.1  mrg 	if (has_stride) {
    762  1.1  mrg 		set = isl_ast_build_get_stride_constraint(build);
    763  1.1  mrg 		dom = isl_set_intersect(dom, set);
    764  1.1  mrg 	}
    765  1.1  mrg 
    766  1.1  mrg 	dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
    767  1.1  mrg 	dom = isl_ast_build_compute_gist(build, dom);
    768  1.1  mrg 	guard = isl_set_intersect(guard, dom);
    769  1.1  mrg 
    770  1.1  mrg 	return guard;
    771  1.1  mrg }
    772  1.1  mrg 
    773  1.1  mrg /* Update "graft" based on "sub_build" for the degenerate case.
    774  1.1  mrg  *
    775  1.1  mrg  * "build" is the build in which graft->node was created
    776  1.1  mrg  * "sub_build" contains information about the current level itself,
    777  1.1  mrg  * including the single value attained.
    778  1.1  mrg  *
    779  1.1  mrg  * We set the initialization part of the for loop to the single
    780  1.1  mrg  * value attained by the current dimension.
    781  1.1  mrg  * The increment and condition are not strictly needed as they are known
    782  1.1  mrg  * to be "1" and "iterator <= value" respectively.
    783  1.1  mrg  */
    784  1.1  mrg static __isl_give isl_ast_graft *refine_degenerate(
    785  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
    786  1.1  mrg 	__isl_keep isl_ast_build *sub_build)
    787  1.1  mrg {
    788  1.1  mrg 	isl_pw_aff *value;
    789  1.1  mrg 	isl_ast_expr *init;
    790  1.1  mrg 
    791  1.1  mrg 	if (!graft || !sub_build)
    792  1.1  mrg 		return isl_ast_graft_free(graft);
    793  1.1  mrg 
    794  1.1  mrg 	value = isl_pw_aff_copy(sub_build->value);
    795  1.1  mrg 
    796  1.1  mrg 	init = isl_ast_build_expr_from_pw_aff_internal(build, value);
    797  1.1  mrg 	graft->node = isl_ast_node_for_set_init(graft->node, init);
    798  1.1  mrg 	if (!graft->node)
    799  1.1  mrg 		return isl_ast_graft_free(graft);
    800  1.1  mrg 
    801  1.1  mrg 	return graft;
    802  1.1  mrg }
    803  1.1  mrg 
    804  1.1  mrg /* Return the intersection of constraints in "list" as a set.
    805  1.1  mrg  */
    806  1.1  mrg static __isl_give isl_set *intersect_constraints(
    807  1.1  mrg 	__isl_keep isl_constraint_list *list)
    808  1.1  mrg {
    809  1.1  mrg 	int i;
    810  1.1  mrg 	isl_size n;
    811  1.1  mrg 	isl_basic_set *bset;
    812  1.1  mrg 
    813  1.1  mrg 	n = isl_constraint_list_n_constraint(list);
    814  1.1  mrg 	if (n < 0)
    815  1.1  mrg 		return NULL;
    816  1.1  mrg 	if (n < 1)
    817  1.1  mrg 		isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
    818  1.1  mrg 			"expecting at least one constraint", return NULL);
    819  1.1  mrg 
    820  1.1  mrg 	bset = isl_basic_set_from_constraint(
    821  1.1  mrg 				isl_constraint_list_get_constraint(list, 0));
    822  1.1  mrg 	for (i = 1; i < n; ++i) {
    823  1.1  mrg 		isl_basic_set *bset_i;
    824  1.1  mrg 
    825  1.1  mrg 		bset_i = isl_basic_set_from_constraint(
    826  1.1  mrg 				isl_constraint_list_get_constraint(list, i));
    827  1.1  mrg 		bset = isl_basic_set_intersect(bset, bset_i);
    828  1.1  mrg 	}
    829  1.1  mrg 
    830  1.1  mrg 	return isl_set_from_basic_set(bset);
    831  1.1  mrg }
    832  1.1  mrg 
    833  1.1  mrg /* Compute the constraints on the outer dimensions enforced by
    834  1.1  mrg  * graft->node and add those constraints to graft->enforced,
    835  1.1  mrg  * in case the upper bound is expressed as a set "upper".
    836  1.1  mrg  *
    837  1.1  mrg  * In particular, if l(...) is a lower bound in "lower", and
    838  1.1  mrg  *
    839  1.1  mrg  *	-a i + f(...) >= 0		or	a i <= f(...)
    840  1.1  mrg  *
    841  1.1  mrg  * is an upper bound ocnstraint on the current dimension i,
    842  1.1  mrg  * then the for loop enforces the constraint
    843  1.1  mrg  *
    844  1.1  mrg  *	-a l(...) + f(...) >= 0		or	a l(...) <= f(...)
    845  1.1  mrg  *
    846  1.1  mrg  * We therefore simply take each lower bound in turn, plug it into
    847  1.1  mrg  * the upper bounds and compute the intersection over all lower bounds.
    848  1.1  mrg  *
    849  1.1  mrg  * If a lower bound is a rational expression, then
    850  1.1  mrg  * isl_basic_set_preimage_multi_aff will force this rational
    851  1.1  mrg  * expression to have only integer values.  However, the loop
    852  1.1  mrg  * itself does not enforce this integrality constraint.  We therefore
    853  1.1  mrg  * use the ceil of the lower bounds instead of the lower bounds themselves.
    854  1.1  mrg  * Other constraints will make sure that the for loop is only executed
    855  1.1  mrg  * when each of the lower bounds attains an integral value.
    856  1.1  mrg  * In particular, potentially rational values only occur in
    857  1.1  mrg  * lower_bound if the offset is a (seemingly) rational expression,
    858  1.1  mrg  * but then outer conditions will make sure that this rational expression
    859  1.1  mrg  * only attains integer values.
    860  1.1  mrg  */
    861  1.1  mrg static __isl_give isl_ast_graft *set_enforced_from_set(
    862  1.1  mrg 	__isl_take isl_ast_graft *graft,
    863  1.1  mrg 	__isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
    864  1.1  mrg {
    865  1.1  mrg 	isl_space *space;
    866  1.1  mrg 	isl_basic_set *enforced;
    867  1.1  mrg 	isl_pw_multi_aff *pma;
    868  1.1  mrg 	int i;
    869  1.1  mrg 	isl_size n;
    870  1.1  mrg 
    871  1.1  mrg 	n = isl_pw_aff_list_n_pw_aff(lower);
    872  1.1  mrg 	if (!graft || n < 0)
    873  1.1  mrg 		return isl_ast_graft_free(graft);
    874  1.1  mrg 
    875  1.1  mrg 	space = isl_set_get_space(upper);
    876  1.1  mrg 	enforced = isl_basic_set_universe(isl_space_copy(space));
    877  1.1  mrg 
    878  1.1  mrg 	space = isl_space_map_from_set(space);
    879  1.1  mrg 	pma = isl_pw_multi_aff_identity(space);
    880  1.1  mrg 
    881  1.1  mrg 	for (i = 0; i < n; ++i) {
    882  1.1  mrg 		isl_pw_aff *pa;
    883  1.1  mrg 		isl_set *enforced_i;
    884  1.1  mrg 		isl_basic_set *hull;
    885  1.1  mrg 		isl_pw_multi_aff *pma_i;
    886  1.1  mrg 
    887  1.1  mrg 		pa = isl_pw_aff_list_get_pw_aff(lower, i);
    888  1.1  mrg 		pa = isl_pw_aff_ceil(pa);
    889  1.1  mrg 		pma_i = isl_pw_multi_aff_copy(pma);
    890  1.1  mrg 		pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
    891  1.1  mrg 		enforced_i = isl_set_copy(upper);
    892  1.1  mrg 		enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
    893  1.1  mrg 		hull = isl_set_simple_hull(enforced_i);
    894  1.1  mrg 		enforced = isl_basic_set_intersect(enforced, hull);
    895  1.1  mrg 	}
    896  1.1  mrg 
    897  1.1  mrg 	isl_pw_multi_aff_free(pma);
    898  1.1  mrg 
    899  1.1  mrg 	graft = isl_ast_graft_enforce(graft, enforced);
    900  1.1  mrg 
    901  1.1  mrg 	return graft;
    902  1.1  mrg }
    903  1.1  mrg 
    904  1.1  mrg /* Compute the constraints on the outer dimensions enforced by
    905  1.1  mrg  * graft->node and add those constraints to graft->enforced,
    906  1.1  mrg  * in case the upper bound is expressed as
    907  1.1  mrg  * a list of affine expressions "upper".
    908  1.1  mrg  *
    909  1.1  mrg  * The enforced condition is that each lower bound expression is less
    910  1.1  mrg  * than or equal to each upper bound expression.
    911  1.1  mrg  */
    912  1.1  mrg static __isl_give isl_ast_graft *set_enforced_from_list(
    913  1.1  mrg 	__isl_take isl_ast_graft *graft,
    914  1.1  mrg 	__isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
    915  1.1  mrg {
    916  1.1  mrg 	isl_set *cond;
    917  1.1  mrg 	isl_basic_set *enforced;
    918  1.1  mrg 
    919  1.1  mrg 	lower = isl_pw_aff_list_copy(lower);
    920  1.1  mrg 	upper = isl_pw_aff_list_copy(upper);
    921  1.1  mrg 	cond = isl_pw_aff_list_le_set(lower, upper);
    922  1.1  mrg 	enforced = isl_set_simple_hull(cond);
    923  1.1  mrg 	graft = isl_ast_graft_enforce(graft, enforced);
    924  1.1  mrg 
    925  1.1  mrg 	return graft;
    926  1.1  mrg }
    927  1.1  mrg 
    928  1.1  mrg /* Does "aff" have a negative constant term?
    929  1.1  mrg  */
    930  1.1  mrg static isl_bool aff_constant_is_negative(__isl_keep isl_set *set,
    931  1.1  mrg 	__isl_keep isl_aff *aff, void *user)
    932  1.1  mrg {
    933  1.1  mrg 	isl_bool is_neg;
    934  1.1  mrg 	isl_val *v;
    935  1.1  mrg 
    936  1.1  mrg 	v = isl_aff_get_constant_val(aff);
    937  1.1  mrg 	is_neg = isl_val_is_neg(v);
    938  1.1  mrg 	isl_val_free(v);
    939  1.1  mrg 
    940  1.1  mrg 	return is_neg;
    941  1.1  mrg }
    942  1.1  mrg 
    943  1.1  mrg /* Does "pa" have a negative constant term over its entire domain?
    944  1.1  mrg  */
    945  1.1  mrg static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa,
    946  1.1  mrg 	void *user)
    947  1.1  mrg {
    948  1.1  mrg 	return isl_pw_aff_every_piece(pa, &aff_constant_is_negative, NULL);
    949  1.1  mrg }
    950  1.1  mrg 
    951  1.1  mrg /* Does each element in "list" have a negative constant term?
    952  1.1  mrg  */
    953  1.1  mrg static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
    954  1.1  mrg {
    955  1.1  mrg 	return isl_pw_aff_list_every(list, &pw_aff_constant_is_negative, NULL);
    956  1.1  mrg }
    957  1.1  mrg 
    958  1.1  mrg /* Add 1 to each of the elements in "list", where each of these elements
    959  1.1  mrg  * is defined over the internal schedule space of "build".
    960  1.1  mrg  */
    961  1.1  mrg static __isl_give isl_pw_aff_list *list_add_one(
    962  1.1  mrg 	__isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
    963  1.1  mrg {
    964  1.1  mrg 	int i;
    965  1.1  mrg 	isl_size n;
    966  1.1  mrg 	isl_space *space;
    967  1.1  mrg 	isl_aff *aff;
    968  1.1  mrg 	isl_pw_aff *one;
    969  1.1  mrg 
    970  1.1  mrg 	n = isl_pw_aff_list_n_pw_aff(list);
    971  1.1  mrg 	if (n < 0)
    972  1.1  mrg 		return isl_pw_aff_list_free(list);
    973  1.1  mrg 
    974  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
    975  1.1  mrg 	aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
    976  1.1  mrg 	aff = isl_aff_add_constant_si(aff, 1);
    977  1.1  mrg 	one = isl_pw_aff_from_aff(aff);
    978  1.1  mrg 
    979  1.1  mrg 	for (i = 0; i < n; ++i) {
    980  1.1  mrg 		isl_pw_aff *pa;
    981  1.1  mrg 		pa = isl_pw_aff_list_get_pw_aff(list, i);
    982  1.1  mrg 		pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
    983  1.1  mrg 		list = isl_pw_aff_list_set_pw_aff(list, i, pa);
    984  1.1  mrg 	}
    985  1.1  mrg 
    986  1.1  mrg 	isl_pw_aff_free(one);
    987  1.1  mrg 
    988  1.1  mrg 	return list;
    989  1.1  mrg }
    990  1.1  mrg 
    991  1.1  mrg /* Set the condition part of the for node graft->node in case
    992  1.1  mrg  * the upper bound is represented as a list of piecewise affine expressions.
    993  1.1  mrg  *
    994  1.1  mrg  * In particular, set the condition to
    995  1.1  mrg  *
    996  1.1  mrg  *	iterator <= min(list of upper bounds)
    997  1.1  mrg  *
    998  1.1  mrg  * If each of the upper bounds has a negative constant term, then
    999  1.1  mrg  * set the condition to
   1000  1.1  mrg  *
   1001  1.1  mrg  *	iterator < min(list of (upper bound + 1)s)
   1002  1.1  mrg  *
   1003  1.1  mrg  */
   1004  1.1  mrg static __isl_give isl_ast_graft *set_for_cond_from_list(
   1005  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
   1006  1.1  mrg 	__isl_keep isl_ast_build *build)
   1007  1.1  mrg {
   1008  1.1  mrg 	int neg;
   1009  1.1  mrg 	isl_ast_expr *bound, *iterator, *cond;
   1010  1.1  mrg 	enum isl_ast_expr_op_type type = isl_ast_expr_op_le;
   1011  1.1  mrg 
   1012  1.1  mrg 	if (!graft || !list)
   1013  1.1  mrg 		return isl_ast_graft_free(graft);
   1014  1.1  mrg 
   1015  1.1  mrg 	neg = list_constant_is_negative(list);
   1016  1.1  mrg 	if (neg < 0)
   1017  1.1  mrg 		return isl_ast_graft_free(graft);
   1018  1.1  mrg 	list = isl_pw_aff_list_copy(list);
   1019  1.1  mrg 	if (neg) {
   1020  1.1  mrg 		list = list_add_one(list, build);
   1021  1.1  mrg 		type = isl_ast_expr_op_lt;
   1022  1.1  mrg 	}
   1023  1.1  mrg 
   1024  1.1  mrg 	bound = reduce_list(isl_ast_expr_op_min, list, build);
   1025  1.1  mrg 	iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
   1026  1.1  mrg 	cond = isl_ast_expr_alloc_binary(type, iterator, bound);
   1027  1.1  mrg 	graft->node = isl_ast_node_for_set_cond(graft->node, cond);
   1028  1.1  mrg 
   1029  1.1  mrg 	isl_pw_aff_list_free(list);
   1030  1.1  mrg 	if (!graft->node)
   1031  1.1  mrg 		return isl_ast_graft_free(graft);
   1032  1.1  mrg 	return graft;
   1033  1.1  mrg }
   1034  1.1  mrg 
   1035  1.1  mrg /* Set the condition part of the for node graft->node in case
   1036  1.1  mrg  * the upper bound is represented as a set.
   1037  1.1  mrg  */
   1038  1.1  mrg static __isl_give isl_ast_graft *set_for_cond_from_set(
   1039  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
   1040  1.1  mrg 	__isl_keep isl_ast_build *build)
   1041  1.1  mrg {
   1042  1.1  mrg 	isl_ast_expr *cond;
   1043  1.1  mrg 
   1044  1.1  mrg 	if (!graft)
   1045  1.1  mrg 		return NULL;
   1046  1.1  mrg 
   1047  1.1  mrg 	cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
   1048  1.1  mrg 	graft->node = isl_ast_node_for_set_cond(graft->node, cond);
   1049  1.1  mrg 	if (!graft->node)
   1050  1.1  mrg 		return isl_ast_graft_free(graft);
   1051  1.1  mrg 	return graft;
   1052  1.1  mrg }
   1053  1.1  mrg 
   1054  1.1  mrg /* Construct an isl_ast_expr for the increment (i.e., stride) of
   1055  1.1  mrg  * the current dimension.
   1056  1.1  mrg  */
   1057  1.1  mrg static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
   1058  1.1  mrg {
   1059  1.1  mrg 	isl_size depth;
   1060  1.1  mrg 	isl_val *v;
   1061  1.1  mrg 	isl_ctx *ctx;
   1062  1.1  mrg 
   1063  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1064  1.1  mrg 	if (depth < 0)
   1065  1.1  mrg 		return NULL;
   1066  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   1067  1.1  mrg 
   1068  1.1  mrg 	if (!isl_ast_build_has_stride(build, depth))
   1069  1.1  mrg 		return isl_ast_expr_alloc_int_si(ctx, 1);
   1070  1.1  mrg 
   1071  1.1  mrg 	v = isl_ast_build_get_stride(build, depth);
   1072  1.1  mrg 	return isl_ast_expr_from_val(v);
   1073  1.1  mrg }
   1074  1.1  mrg 
   1075  1.1  mrg /* Should we express the loop condition as
   1076  1.1  mrg  *
   1077  1.1  mrg  *	iterator <= min(list of upper bounds)
   1078  1.1  mrg  *
   1079  1.1  mrg  * or as a conjunction of constraints?
   1080  1.1  mrg  *
   1081  1.1  mrg  * The first is constructed from a list of upper bounds.
   1082  1.1  mrg  * The second is constructed from a set.
   1083  1.1  mrg  *
   1084  1.1  mrg  * If there are no upper bounds in "constraints", then this could mean
   1085  1.1  mrg  * that "domain" simply doesn't have an upper bound or that we didn't
   1086  1.1  mrg  * pick any upper bound.  In the first case, we want to generate the
   1087  1.1  mrg  * loop condition as a(n empty) conjunction of constraints
   1088  1.1  mrg  * In the second case, we will compute
   1089  1.1  mrg  * a single upper bound from "domain" and so we use the list form.
   1090  1.1  mrg  *
   1091  1.1  mrg  * If there are upper bounds in "constraints",
   1092  1.1  mrg  * then we use the list form iff the atomic_upper_bound option is set.
   1093  1.1  mrg  */
   1094  1.1  mrg static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
   1095  1.1  mrg 	__isl_keep isl_set *domain, int depth)
   1096  1.1  mrg {
   1097  1.1  mrg 	if (n_upper > 0)
   1098  1.1  mrg 		return isl_options_get_ast_build_atomic_upper_bound(ctx);
   1099  1.1  mrg 	else
   1100  1.1  mrg 		return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
   1101  1.1  mrg }
   1102  1.1  mrg 
   1103  1.1  mrg /* Fill in the expressions of the for node in graft->node.
   1104  1.1  mrg  *
   1105  1.1  mrg  * In particular,
   1106  1.1  mrg  * - set the initialization part of the loop to the maximum of the lower bounds
   1107  1.1  mrg  * - extract the increment from the stride of the current dimension
   1108  1.1  mrg  * - construct the for condition either based on a list of upper bounds
   1109  1.1  mrg  *	or on a set of upper bound constraints.
   1110  1.1  mrg  */
   1111  1.1  mrg static __isl_give isl_ast_graft *set_for_node_expressions(
   1112  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
   1113  1.1  mrg 	int use_list, __isl_keep isl_pw_aff_list *upper_list,
   1114  1.1  mrg 	__isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
   1115  1.1  mrg {
   1116  1.1  mrg 	isl_ast_expr *init;
   1117  1.1  mrg 
   1118  1.1  mrg 	if (!graft)
   1119  1.1  mrg 		return NULL;
   1120  1.1  mrg 
   1121  1.1  mrg 	init = reduce_list(isl_ast_expr_op_max, lower, build);
   1122  1.1  mrg 	graft->node = isl_ast_node_for_set_init(graft->node, init);
   1123  1.1  mrg 	graft->node = isl_ast_node_for_set_inc(graft->node, for_inc(build));
   1124  1.1  mrg 
   1125  1.1  mrg 	if (!graft->node)
   1126  1.1  mrg 		graft = isl_ast_graft_free(graft);
   1127  1.1  mrg 
   1128  1.1  mrg 	if (use_list)
   1129  1.1  mrg 		graft = set_for_cond_from_list(graft, upper_list, build);
   1130  1.1  mrg 	else
   1131  1.1  mrg 		graft = set_for_cond_from_set(graft, upper_set, build);
   1132  1.1  mrg 
   1133  1.1  mrg 	return graft;
   1134  1.1  mrg }
   1135  1.1  mrg 
   1136  1.1  mrg /* Update "graft" based on "bounds" and "domain" for the generic,
   1137  1.1  mrg  * non-degenerate, case.
   1138  1.1  mrg  *
   1139  1.1  mrg  * "c_lower" and "c_upper" contain the lower and upper bounds
   1140  1.1  mrg  * that the loop node should express.
   1141  1.1  mrg  * "domain" is the subset of the intersection of the constraints
   1142  1.1  mrg  * for which some code is executed.
   1143  1.1  mrg  *
   1144  1.1  mrg  * There may be zero lower bounds or zero upper bounds in "constraints"
   1145  1.1  mrg  * in case the list of constraints was created
   1146  1.1  mrg  * based on the atomic option or based on separation with explicit bounds.
   1147  1.1  mrg  * In that case, we use "domain" to derive lower and/or upper bounds.
   1148  1.1  mrg  *
   1149  1.1  mrg  * We first compute a list of one or more lower bounds.
   1150  1.1  mrg  *
   1151  1.1  mrg  * Then we decide if we want to express the condition as
   1152  1.1  mrg  *
   1153  1.1  mrg  *	iterator <= min(list of upper bounds)
   1154  1.1  mrg  *
   1155  1.1  mrg  * or as a conjunction of constraints.
   1156  1.1  mrg  *
   1157  1.1  mrg  * The set of enforced constraints is then computed either based on
   1158  1.1  mrg  * a list of upper bounds or on a set of upper bound constraints.
   1159  1.1  mrg  * We do not compute any enforced constraints if we were forced
   1160  1.1  mrg  * to compute a lower or upper bound using exact_bound.  The domains
   1161  1.1  mrg  * of the resulting expressions may imply some bounds on outer dimensions
   1162  1.1  mrg  * that we do not want to appear in the enforced constraints since
   1163  1.1  mrg  * they are not actually enforced by the corresponding code.
   1164  1.1  mrg  *
   1165  1.1  mrg  * Finally, we fill in the expressions of the for node.
   1166  1.1  mrg  */
   1167  1.1  mrg static __isl_give isl_ast_graft *refine_generic_bounds(
   1168  1.1  mrg 	__isl_take isl_ast_graft *graft,
   1169  1.1  mrg 	__isl_take isl_constraint_list *c_lower,
   1170  1.1  mrg 	__isl_take isl_constraint_list *c_upper,
   1171  1.1  mrg 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
   1172  1.1  mrg {
   1173  1.1  mrg 	isl_size depth;
   1174  1.1  mrg 	isl_ctx *ctx;
   1175  1.1  mrg 	isl_pw_aff_list *lower;
   1176  1.1  mrg 	int use_list;
   1177  1.1  mrg 	isl_set *upper_set = NULL;
   1178  1.1  mrg 	isl_pw_aff_list *upper_list = NULL;
   1179  1.1  mrg 	isl_size n_lower, n_upper;
   1180  1.1  mrg 
   1181  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1182  1.1  mrg 	if (!graft || !c_lower || !c_upper || depth < 0)
   1183  1.1  mrg 		goto error;
   1184  1.1  mrg 
   1185  1.1  mrg 	ctx = isl_ast_graft_get_ctx(graft);
   1186  1.1  mrg 
   1187  1.1  mrg 	n_lower = isl_constraint_list_n_constraint(c_lower);
   1188  1.1  mrg 	n_upper = isl_constraint_list_n_constraint(c_upper);
   1189  1.1  mrg 	if (n_lower < 0 || n_upper < 0)
   1190  1.1  mrg 		goto error;
   1191  1.1  mrg 
   1192  1.1  mrg 	use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
   1193  1.1  mrg 
   1194  1.1  mrg 	lower = lower_bounds(c_lower, depth, domain, build);
   1195  1.1  mrg 
   1196  1.1  mrg 	if (use_list)
   1197  1.1  mrg 		upper_list = upper_bounds(c_upper, depth, domain, build);
   1198  1.1  mrg 	else if (n_upper > 0)
   1199  1.1  mrg 		upper_set = intersect_constraints(c_upper);
   1200  1.1  mrg 	else
   1201  1.1  mrg 		upper_set = isl_set_universe(isl_set_get_space(domain));
   1202  1.1  mrg 
   1203  1.1  mrg 	if (n_lower == 0 || n_upper == 0)
   1204  1.1  mrg 		;
   1205  1.1  mrg 	else if (use_list)
   1206  1.1  mrg 		graft = set_enforced_from_list(graft, lower, upper_list);
   1207  1.1  mrg 	else
   1208  1.1  mrg 		graft = set_enforced_from_set(graft, lower, depth, upper_set);
   1209  1.1  mrg 
   1210  1.1  mrg 	graft = set_for_node_expressions(graft, lower, use_list, upper_list,
   1211  1.1  mrg 					upper_set, build);
   1212  1.1  mrg 
   1213  1.1  mrg 	isl_pw_aff_list_free(lower);
   1214  1.1  mrg 	isl_pw_aff_list_free(upper_list);
   1215  1.1  mrg 	isl_set_free(upper_set);
   1216  1.1  mrg 	isl_constraint_list_free(c_lower);
   1217  1.1  mrg 	isl_constraint_list_free(c_upper);
   1218  1.1  mrg 
   1219  1.1  mrg 	return graft;
   1220  1.1  mrg error:
   1221  1.1  mrg 	isl_constraint_list_free(c_lower);
   1222  1.1  mrg 	isl_constraint_list_free(c_upper);
   1223  1.1  mrg 	return isl_ast_graft_free(graft);
   1224  1.1  mrg }
   1225  1.1  mrg 
   1226  1.1  mrg /* Internal data structure used inside count_constraints to keep
   1227  1.1  mrg  * track of the number of constraints that are independent of dimension "pos",
   1228  1.1  mrg  * the lower bounds in "pos" and the upper bounds in "pos".
   1229  1.1  mrg  */
   1230  1.1  mrg struct isl_ast_count_constraints_data {
   1231  1.1  mrg 	int pos;
   1232  1.1  mrg 
   1233  1.1  mrg 	int n_indep;
   1234  1.1  mrg 	int n_lower;
   1235  1.1  mrg 	int n_upper;
   1236  1.1  mrg };
   1237  1.1  mrg 
   1238  1.1  mrg /* Increment data->n_indep, data->lower or data->upper depending
   1239  1.1  mrg  * on whether "c" is independent of dimensions data->pos,
   1240  1.1  mrg  * a lower bound or an upper bound.
   1241  1.1  mrg  */
   1242  1.1  mrg static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
   1243  1.1  mrg {
   1244  1.1  mrg 	struct isl_ast_count_constraints_data *data = user;
   1245  1.1  mrg 
   1246  1.1  mrg 	if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
   1247  1.1  mrg 		data->n_lower++;
   1248  1.1  mrg 	else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos))
   1249  1.1  mrg 		data->n_upper++;
   1250  1.1  mrg 	else
   1251  1.1  mrg 		data->n_indep++;
   1252  1.1  mrg 
   1253  1.1  mrg 	isl_constraint_free(c);
   1254  1.1  mrg 
   1255  1.1  mrg 	return isl_stat_ok;
   1256  1.1  mrg }
   1257  1.1  mrg 
   1258  1.1  mrg /* Update "graft" based on "bounds" and "domain" for the generic,
   1259  1.1  mrg  * non-degenerate, case.
   1260  1.1  mrg  *
   1261  1.1  mrg  * "list" respresent the list of bounds that need to be encoded by
   1262  1.1  mrg  * the for loop.  Only the constraints that involve the iterator
   1263  1.1  mrg  * are relevant here.  The other constraints are taken care of by
   1264  1.1  mrg  * the caller and are included in the generated constraints of "build".
   1265  1.1  mrg  * "domain" is the subset of the intersection of the constraints
   1266  1.1  mrg  * for which some code is executed.
   1267  1.1  mrg  * "build" is the build in which graft->node was created.
   1268  1.1  mrg  *
   1269  1.1  mrg  * We separate lower bounds, upper bounds and constraints that
   1270  1.1  mrg  * are independent of the loop iterator.
   1271  1.1  mrg  *
   1272  1.1  mrg  * The actual for loop bounds are generated in refine_generic_bounds.
   1273  1.1  mrg  */
   1274  1.1  mrg static __isl_give isl_ast_graft *refine_generic_split(
   1275  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
   1276  1.1  mrg 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
   1277  1.1  mrg {
   1278  1.1  mrg 	struct isl_ast_count_constraints_data data;
   1279  1.1  mrg 	isl_size depth;
   1280  1.1  mrg 	isl_constraint_list *lower;
   1281  1.1  mrg 	isl_constraint_list *upper;
   1282  1.1  mrg 
   1283  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1284  1.1  mrg 	if (depth < 0)
   1285  1.1  mrg 		list = isl_constraint_list_free(list);
   1286  1.1  mrg 	if (!list)
   1287  1.1  mrg 		return isl_ast_graft_free(graft);
   1288  1.1  mrg 
   1289  1.1  mrg 	data.pos = depth;
   1290  1.1  mrg 
   1291  1.1  mrg 	list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
   1292  1.1  mrg 	if (!list)
   1293  1.1  mrg 		return isl_ast_graft_free(graft);
   1294  1.1  mrg 
   1295  1.1  mrg 	data.n_indep = data.n_lower = data.n_upper = 0;
   1296  1.1  mrg 	if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) {
   1297  1.1  mrg 		isl_constraint_list_free(list);
   1298  1.1  mrg 		return isl_ast_graft_free(graft);
   1299  1.1  mrg 	}
   1300  1.1  mrg 
   1301  1.1  mrg 	lower = isl_constraint_list_drop(list, 0, data.n_indep);
   1302  1.1  mrg 	upper = isl_constraint_list_copy(lower);
   1303  1.1  mrg 	lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
   1304  1.1  mrg 	upper = isl_constraint_list_drop(upper, 0, data.n_lower);
   1305  1.1  mrg 
   1306  1.1  mrg 	return refine_generic_bounds(graft, lower, upper, domain, build);
   1307  1.1  mrg }
   1308  1.1  mrg 
   1309  1.1  mrg /* Update "graft" based on "bounds" and "domain" for the generic,
   1310  1.1  mrg  * non-degenerate, case.
   1311  1.1  mrg  *
   1312  1.1  mrg  * "bounds" respresent the bounds that need to be encoded by
   1313  1.1  mrg  * the for loop (or a guard around the for loop).
   1314  1.1  mrg  * "domain" is the subset of "bounds" for which some code is executed.
   1315  1.1  mrg  * "build" is the build in which graft->node was created.
   1316  1.1  mrg  *
   1317  1.1  mrg  * We break up "bounds" into a list of constraints and continue with
   1318  1.1  mrg  * refine_generic_split.
   1319  1.1  mrg  */
   1320  1.1  mrg static __isl_give isl_ast_graft *refine_generic(
   1321  1.1  mrg 	__isl_take isl_ast_graft *graft,
   1322  1.1  mrg 	__isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
   1323  1.1  mrg 	__isl_keep isl_ast_build *build)
   1324  1.1  mrg {
   1325  1.1  mrg 	isl_constraint_list *list;
   1326  1.1  mrg 
   1327  1.1  mrg 	if (!build || !graft)
   1328  1.1  mrg 		return isl_ast_graft_free(graft);
   1329  1.1  mrg 
   1330  1.1  mrg 	list = isl_basic_set_get_constraint_list(bounds);
   1331  1.1  mrg 
   1332  1.1  mrg 	graft = refine_generic_split(graft, list, domain, build);
   1333  1.1  mrg 
   1334  1.1  mrg 	return graft;
   1335  1.1  mrg }
   1336  1.1  mrg 
   1337  1.1  mrg /* Create a for node for the current level.
   1338  1.1  mrg  *
   1339  1.1  mrg  * Mark the for node degenerate if "degenerate" is set.
   1340  1.1  mrg  */
   1341  1.1  mrg static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
   1342  1.1  mrg 	int degenerate)
   1343  1.1  mrg {
   1344  1.1  mrg 	isl_size depth;
   1345  1.1  mrg 	isl_id *id;
   1346  1.1  mrg 	isl_ast_node *node;
   1347  1.1  mrg 
   1348  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1349  1.1  mrg 	if (depth < 0)
   1350  1.1  mrg 		return NULL;
   1351  1.1  mrg 
   1352  1.1  mrg 	id = isl_ast_build_get_iterator_id(build, depth);
   1353  1.1  mrg 	node = isl_ast_node_alloc_for(id);
   1354  1.1  mrg 	if (degenerate)
   1355  1.1  mrg 		node = isl_ast_node_for_mark_degenerate(node);
   1356  1.1  mrg 
   1357  1.1  mrg 	return node;
   1358  1.1  mrg }
   1359  1.1  mrg 
   1360  1.1  mrg /* If the ast_build_exploit_nested_bounds option is set, then return
   1361  1.1  mrg  * the constraints enforced by all elements in "list".
   1362  1.1  mrg  * Otherwise, return the universe.
   1363  1.1  mrg  */
   1364  1.1  mrg static __isl_give isl_basic_set *extract_shared_enforced(
   1365  1.1  mrg 	__isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
   1366  1.1  mrg {
   1367  1.1  mrg 	isl_ctx *ctx;
   1368  1.1  mrg 	isl_space *space;
   1369  1.1  mrg 
   1370  1.1  mrg 	if (!list)
   1371  1.1  mrg 		return NULL;
   1372  1.1  mrg 
   1373  1.1  mrg 	ctx = isl_ast_graft_list_get_ctx(list);
   1374  1.1  mrg 	if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
   1375  1.1  mrg 		return isl_ast_graft_list_extract_shared_enforced(list, build);
   1376  1.1  mrg 
   1377  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   1378  1.1  mrg 	return isl_basic_set_universe(space);
   1379  1.1  mrg }
   1380  1.1  mrg 
   1381  1.1  mrg /* Return the pending constraints of "build" that are not already taken
   1382  1.1  mrg  * care of (by a combination of "enforced" and the generated constraints
   1383  1.1  mrg  * of "build").
   1384  1.1  mrg  */
   1385  1.1  mrg static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
   1386  1.1  mrg 	__isl_keep isl_basic_set *enforced)
   1387  1.1  mrg {
   1388  1.1  mrg 	isl_set *guard, *context;
   1389  1.1  mrg 
   1390  1.1  mrg 	guard = isl_ast_build_get_pending(build);
   1391  1.1  mrg 	context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
   1392  1.1  mrg 	context = isl_set_intersect(context,
   1393  1.1  mrg 					isl_ast_build_get_generated(build));
   1394  1.1  mrg 	return isl_set_gist(guard, context);
   1395  1.1  mrg }
   1396  1.1  mrg 
   1397  1.1  mrg /* Create an AST node for the current dimension based on
   1398  1.1  mrg  * the schedule domain "bounds" and return the node encapsulated
   1399  1.1  mrg  * in an isl_ast_graft.
   1400  1.1  mrg  *
   1401  1.1  mrg  * "executed" is the current inverse schedule, taking into account
   1402  1.1  mrg  * the bounds in "bounds"
   1403  1.1  mrg  * "domain" is the domain of "executed", with inner dimensions projected out.
   1404  1.1  mrg  * It may be a strict subset of "bounds" in case "bounds" was created
   1405  1.1  mrg  * based on the atomic option or based on separation with explicit bounds.
   1406  1.1  mrg  *
   1407  1.1  mrg  * "domain" may satisfy additional equalities that result
   1408  1.1  mrg  * from intersecting "executed" with "bounds" in add_node.
   1409  1.1  mrg  * It may also satisfy some global constraints that were dropped out because
   1410  1.1  mrg  * we performed separation with explicit bounds.
   1411  1.1  mrg  * The very first step is then to copy these constraints to "bounds".
   1412  1.1  mrg  *
   1413  1.1  mrg  * Since we may be calling before_each_for and after_each_for
   1414  1.1  mrg  * callbacks, we record the current inverse schedule in the build.
   1415  1.1  mrg  *
   1416  1.1  mrg  * We consider three builds,
   1417  1.1  mrg  * "build" is the one in which the current level is created,
   1418  1.1  mrg  * "body_build" is the build in which the next level is created,
   1419  1.1  mrg  * "sub_build" is essentially the same as "body_build", except that
   1420  1.1  mrg  * the depth has not been increased yet.
   1421  1.1  mrg  *
   1422  1.1  mrg  * "build" already contains information (in strides and offsets)
   1423  1.1  mrg  * about the strides at the current level, but this information is not
   1424  1.1  mrg  * reflected in the build->domain.
   1425  1.1  mrg  * We first add this information and the "bounds" to the sub_build->domain.
   1426  1.1  mrg  * isl_ast_build_set_loop_bounds adds the stride information and
   1427  1.1  mrg  * checks whether the current dimension attains
   1428  1.1  mrg  * only a single value and whether this single value can be represented using
   1429  1.1  mrg  * a single affine expression.
   1430  1.1  mrg  * In the first case, the current level is considered "degenerate".
   1431  1.1  mrg  * In the second, sub-case, the current level is considered "eliminated".
   1432  1.1  mrg  * Eliminated levels don't need to be reflected in the AST since we can
   1433  1.1  mrg  * simply plug in the affine expression.  For degenerate, but non-eliminated,
   1434  1.1  mrg  * levels, we do introduce a for node, but mark is as degenerate so that
   1435  1.1  mrg  * it can be printed as an assignment of the single value to the loop
   1436  1.1  mrg  * "iterator".
   1437  1.1  mrg  *
   1438  1.1  mrg  * If the current level is eliminated, we explicitly plug in the value
   1439  1.1  mrg  * for the current level found by isl_ast_build_set_loop_bounds in the
   1440  1.1  mrg  * inverse schedule.  This ensures that if we are working on a slice
   1441  1.1  mrg  * of the domain based on information available in the inverse schedule
   1442  1.1  mrg  * and the build domain, that then this information is also reflected
   1443  1.1  mrg  * in the inverse schedule.  This operation also eliminates the current
   1444  1.1  mrg  * dimension from the inverse schedule making sure no inner dimensions depend
   1445  1.1  mrg  * on the current dimension.  Otherwise, we create a for node, marking
   1446  1.1  mrg  * it degenerate if appropriate.  The initial for node is still incomplete
   1447  1.1  mrg  * and will be completed in either refine_degenerate or refine_generic.
   1448  1.1  mrg  *
   1449  1.1  mrg  * We then generate a sequence of grafts for the next level,
   1450  1.1  mrg  * create a surrounding graft for the current level and insert
   1451  1.1  mrg  * the for node we created (if the current level is not eliminated).
   1452  1.1  mrg  * Before creating a graft for the current level, we first extract
   1453  1.1  mrg  * hoistable constraints from the child guards and combine them
   1454  1.1  mrg  * with the pending constraints in the build.  These constraints
   1455  1.1  mrg  * are used to simplify the child guards and then added to the guard
   1456  1.1  mrg  * of the current graft to ensure that they will be generated.
   1457  1.1  mrg  * If the hoisted guard is a disjunction, then we use it directly
   1458  1.1  mrg  * to gist the guards on the children before intersect it with the
   1459  1.1  mrg  * pending constraints.  We do so because this disjunction is typically
   1460  1.1  mrg  * identical to the guards on the children such that these guards
   1461  1.1  mrg  * can be effectively removed completely.  After the intersection,
   1462  1.1  mrg  * the gist operation would have a harder time figuring this out.
   1463  1.1  mrg  *
   1464  1.1  mrg  * Finally, we set the bounds of the for loop in either
   1465  1.1  mrg  * refine_degenerate or refine_generic.
   1466  1.1  mrg  * We do so in a context where the pending constraints of the build
   1467  1.1  mrg  * have been replaced by the guard of the current graft.
   1468  1.1  mrg  */
   1469  1.1  mrg static __isl_give isl_ast_graft *create_node_scaled(
   1470  1.1  mrg 	__isl_take isl_union_map *executed,
   1471  1.1  mrg 	__isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
   1472  1.1  mrg 	__isl_take isl_ast_build *build)
   1473  1.1  mrg {
   1474  1.1  mrg 	isl_size depth;
   1475  1.1  mrg 	int degenerate;
   1476  1.1  mrg 	isl_bool eliminated;
   1477  1.1  mrg 	isl_size n;
   1478  1.1  mrg 	isl_basic_set *hull;
   1479  1.1  mrg 	isl_basic_set *enforced;
   1480  1.1  mrg 	isl_set *guard, *hoisted;
   1481  1.1  mrg 	isl_ast_node *node = NULL;
   1482  1.1  mrg 	isl_ast_graft *graft;
   1483  1.1  mrg 	isl_ast_graft_list *children;
   1484  1.1  mrg 	isl_ast_build *sub_build;
   1485  1.1  mrg 	isl_ast_build *body_build;
   1486  1.1  mrg 
   1487  1.1  mrg 	domain = isl_ast_build_eliminate_divs(build, domain);
   1488  1.1  mrg 	domain = isl_set_detect_equalities(domain);
   1489  1.1  mrg 	hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
   1490  1.1  mrg 	bounds = isl_basic_set_intersect(bounds, hull);
   1491  1.1  mrg 	build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
   1492  1.1  mrg 
   1493  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1494  1.1  mrg 	if (depth < 0)
   1495  1.1  mrg 		build = isl_ast_build_free(build);
   1496  1.1  mrg 	sub_build = isl_ast_build_copy(build);
   1497  1.1  mrg 	bounds = isl_basic_set_remove_redundancies(bounds);
   1498  1.1  mrg 	bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
   1499  1.1  mrg 	sub_build = isl_ast_build_set_loop_bounds(sub_build,
   1500  1.1  mrg 						isl_basic_set_copy(bounds));
   1501  1.1  mrg 	degenerate = isl_ast_build_has_value(sub_build);
   1502  1.1  mrg 	eliminated = isl_ast_build_has_affine_value(sub_build, depth);
   1503  1.1  mrg 	if (degenerate < 0 || eliminated < 0)
   1504  1.1  mrg 		executed = isl_union_map_free(executed);
   1505  1.1  mrg 	if (!degenerate)
   1506  1.1  mrg 		bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
   1507  1.1  mrg 	sub_build = isl_ast_build_set_pending_generated(sub_build,
   1508  1.1  mrg 						isl_basic_set_copy(bounds));
   1509  1.1  mrg 	if (eliminated)
   1510  1.1  mrg 		executed = plug_in_values(executed, sub_build);
   1511  1.1  mrg 	else
   1512  1.1  mrg 		node = create_for(build, degenerate);
   1513  1.1  mrg 
   1514  1.1  mrg 	body_build = isl_ast_build_copy(sub_build);
   1515  1.1  mrg 	body_build = isl_ast_build_increase_depth(body_build);
   1516  1.1  mrg 	if (!eliminated)
   1517  1.1  mrg 		node = before_each_for(node, body_build);
   1518  1.1  mrg 	children = generate_next_level(executed,
   1519  1.1  mrg 				    isl_ast_build_copy(body_build));
   1520  1.1  mrg 
   1521  1.1  mrg 	enforced = extract_shared_enforced(children, build);
   1522  1.1  mrg 	guard = extract_pending(sub_build, enforced);
   1523  1.1  mrg 	hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
   1524  1.1  mrg 	n = isl_set_n_basic_set(hoisted);
   1525  1.1  mrg 	if (n < 0)
   1526  1.1  mrg 		children = isl_ast_graft_list_free(children);
   1527  1.1  mrg 	if (n > 1)
   1528  1.1  mrg 		children = isl_ast_graft_list_gist_guards(children,
   1529  1.1  mrg 						    isl_set_copy(hoisted));
   1530  1.1  mrg 	guard = isl_set_intersect(guard, hoisted);
   1531  1.1  mrg 	if (!eliminated)
   1532  1.1  mrg 		guard = add_implied_guards(guard, degenerate, bounds, build);
   1533  1.1  mrg 
   1534  1.1  mrg 	graft = isl_ast_graft_alloc_from_children(children,
   1535  1.1  mrg 			    isl_set_copy(guard), enforced, build, sub_build);
   1536  1.1  mrg 
   1537  1.1  mrg 	if (!eliminated) {
   1538  1.1  mrg 		isl_ast_build *for_build;
   1539  1.1  mrg 
   1540  1.1  mrg 		graft = isl_ast_graft_insert_for(graft, node);
   1541  1.1  mrg 		for_build = isl_ast_build_copy(build);
   1542  1.1  mrg 		for_build = isl_ast_build_replace_pending_by_guard(for_build,
   1543  1.1  mrg 							isl_set_copy(guard));
   1544  1.1  mrg 		if (degenerate)
   1545  1.1  mrg 			graft = refine_degenerate(graft, for_build, sub_build);
   1546  1.1  mrg 		else
   1547  1.1  mrg 			graft = refine_generic(graft, bounds,
   1548  1.1  mrg 					domain, for_build);
   1549  1.1  mrg 		isl_ast_build_free(for_build);
   1550  1.1  mrg 	}
   1551  1.1  mrg 	isl_set_free(guard);
   1552  1.1  mrg 	if (!eliminated)
   1553  1.1  mrg 		graft = after_each_for(graft, body_build);
   1554  1.1  mrg 
   1555  1.1  mrg 	isl_ast_build_free(body_build);
   1556  1.1  mrg 	isl_ast_build_free(sub_build);
   1557  1.1  mrg 	isl_ast_build_free(build);
   1558  1.1  mrg 	isl_basic_set_free(bounds);
   1559  1.1  mrg 	isl_set_free(domain);
   1560  1.1  mrg 
   1561  1.1  mrg 	return graft;
   1562  1.1  mrg }
   1563  1.1  mrg 
   1564  1.1  mrg /* Internal data structure for checking if all constraints involving
   1565  1.1  mrg  * the input dimension "depth" are such that the other coefficients
   1566  1.1  mrg  * are multiples of "m", reducing "m" if they are not.
   1567  1.1  mrg  * If "m" is reduced all the way down to "1", then the check has failed
   1568  1.1  mrg  * and we break out of the iteration.
   1569  1.1  mrg  */
   1570  1.1  mrg struct isl_check_scaled_data {
   1571  1.1  mrg 	int depth;
   1572  1.1  mrg 	isl_val *m;
   1573  1.1  mrg };
   1574  1.1  mrg 
   1575  1.1  mrg /* If constraint "c" involves the input dimension data->depth,
   1576  1.1  mrg  * then make sure that all the other coefficients are multiples of data->m,
   1577  1.1  mrg  * reducing data->m if needed.
   1578  1.1  mrg  * Break out of the iteration if data->m has become equal to "1".
   1579  1.1  mrg  */
   1580  1.1  mrg static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
   1581  1.1  mrg 	void *user)
   1582  1.1  mrg {
   1583  1.1  mrg 	struct isl_check_scaled_data *data = user;
   1584  1.1  mrg 	int i, j;
   1585  1.1  mrg 	isl_size n;
   1586  1.1  mrg 	enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
   1587  1.1  mrg 				    isl_dim_div };
   1588  1.1  mrg 
   1589  1.1  mrg 	if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) {
   1590  1.1  mrg 		isl_constraint_free(c);
   1591  1.1  mrg 		return isl_stat_ok;
   1592  1.1  mrg 	}
   1593  1.1  mrg 
   1594  1.1  mrg 	for (i = 0; i < 4; ++i) {
   1595  1.1  mrg 		n = isl_constraint_dim(c, t[i]);
   1596  1.1  mrg 		if (n < 0)
   1597  1.1  mrg 			break;
   1598  1.1  mrg 		for (j = 0; j < n; ++j) {
   1599  1.1  mrg 			isl_val *d;
   1600  1.1  mrg 
   1601  1.1  mrg 			if (t[i] == isl_dim_in && j == data->depth)
   1602  1.1  mrg 				continue;
   1603  1.1  mrg 			if (!isl_constraint_involves_dims(c, t[i], j, 1))
   1604  1.1  mrg 				continue;
   1605  1.1  mrg 			d = isl_constraint_get_coefficient_val(c, t[i], j);
   1606  1.1  mrg 			data->m = isl_val_gcd(data->m, d);
   1607  1.1  mrg 			if (isl_val_is_one(data->m))
   1608  1.1  mrg 				break;
   1609  1.1  mrg 		}
   1610  1.1  mrg 		if (j < n)
   1611  1.1  mrg 			break;
   1612  1.1  mrg 	}
   1613  1.1  mrg 
   1614  1.1  mrg 	isl_constraint_free(c);
   1615  1.1  mrg 
   1616  1.1  mrg 	return i < 4 ? isl_stat_error : isl_stat_ok;
   1617  1.1  mrg }
   1618  1.1  mrg 
   1619  1.1  mrg /* For each constraint of "bmap" that involves the input dimension data->depth,
   1620  1.1  mrg  * make sure that all the other coefficients are multiples of data->m,
   1621  1.1  mrg  * reducing data->m if needed.
   1622  1.1  mrg  * Break out of the iteration if data->m has become equal to "1".
   1623  1.1  mrg  */
   1624  1.1  mrg static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
   1625  1.1  mrg 	void *user)
   1626  1.1  mrg {
   1627  1.1  mrg 	isl_stat r;
   1628  1.1  mrg 
   1629  1.1  mrg 	r = isl_basic_map_foreach_constraint(bmap,
   1630  1.1  mrg 						&constraint_check_scaled, user);
   1631  1.1  mrg 	isl_basic_map_free(bmap);
   1632  1.1  mrg 
   1633  1.1  mrg 	return r;
   1634  1.1  mrg }
   1635  1.1  mrg 
   1636  1.1  mrg /* For each constraint of "map" that involves the input dimension data->depth,
   1637  1.1  mrg  * make sure that all the other coefficients are multiples of data->m,
   1638  1.1  mrg  * reducing data->m if needed.
   1639  1.1  mrg  * Break out of the iteration if data->m has become equal to "1".
   1640  1.1  mrg  */
   1641  1.1  mrg static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
   1642  1.1  mrg {
   1643  1.1  mrg 	isl_stat r;
   1644  1.1  mrg 
   1645  1.1  mrg 	r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
   1646  1.1  mrg 	isl_map_free(map);
   1647  1.1  mrg 
   1648  1.1  mrg 	return r;
   1649  1.1  mrg }
   1650  1.1  mrg 
   1651  1.1  mrg /* Create an AST node for the current dimension based on
   1652  1.1  mrg  * the schedule domain "bounds" and return the node encapsulated
   1653  1.1  mrg  * in an isl_ast_graft.
   1654  1.1  mrg  *
   1655  1.1  mrg  * "executed" is the current inverse schedule, taking into account
   1656  1.1  mrg  * the bounds in "bounds"
   1657  1.1  mrg  * "domain" is the domain of "executed", with inner dimensions projected out.
   1658  1.1  mrg  *
   1659  1.1  mrg  *
   1660  1.1  mrg  * Before moving on to the actual AST node construction in create_node_scaled,
   1661  1.1  mrg  * we first check if the current dimension is strided and if we can scale
   1662  1.1  mrg  * down this stride.  Note that we only do this if the ast_build_scale_strides
   1663  1.1  mrg  * option is set.
   1664  1.1  mrg  *
   1665  1.1  mrg  * In particular, let the current dimension take on values
   1666  1.1  mrg  *
   1667  1.1  mrg  *	f + s a
   1668  1.1  mrg  *
   1669  1.1  mrg  * with a an integer.  We check if we can find an integer m that (obviously)
   1670  1.1  mrg  * divides both f and s.
   1671  1.1  mrg  *
   1672  1.1  mrg  * If so, we check if the current dimension only appears in constraints
   1673  1.1  mrg  * where the coefficients of the other variables are multiples of m.
   1674  1.1  mrg  * We perform this extra check to avoid the risk of introducing
   1675  1.1  mrg  * divisions by scaling down the current dimension.
   1676  1.1  mrg  *
   1677  1.1  mrg  * If so, we scale the current dimension down by a factor of m.
   1678  1.1  mrg  * That is, we plug in
   1679  1.1  mrg  *
   1680  1.1  mrg  *	i = m i'							(1)
   1681  1.1  mrg  *
   1682  1.1  mrg  * Note that in principle we could always scale down strided loops
   1683  1.1  mrg  * by plugging in
   1684  1.1  mrg  *
   1685  1.1  mrg  *	i = f + s i'
   1686  1.1  mrg  *
   1687  1.1  mrg  * but this may result in i' taking on larger values than the original i,
   1688  1.1  mrg  * due to the shift by "f".
   1689  1.1  mrg  * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
   1690  1.1  mrg  */
   1691  1.1  mrg static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
   1692  1.1  mrg 	__isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
   1693  1.1  mrg 	__isl_take isl_ast_build *build)
   1694  1.1  mrg {
   1695  1.1  mrg 	struct isl_check_scaled_data data;
   1696  1.1  mrg 	isl_size depth;
   1697  1.1  mrg 	isl_ctx *ctx;
   1698  1.1  mrg 	isl_aff *offset;
   1699  1.1  mrg 	isl_val *d;
   1700  1.1  mrg 
   1701  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   1702  1.1  mrg 	if (!isl_options_get_ast_build_scale_strides(ctx))
   1703  1.1  mrg 		return create_node_scaled(executed, bounds, domain, build);
   1704  1.1  mrg 
   1705  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   1706  1.1  mrg 	if (depth < 0)
   1707  1.1  mrg 		build = isl_ast_build_free(build);
   1708  1.1  mrg 	data.depth = depth;
   1709  1.1  mrg 	if (!isl_ast_build_has_stride(build, data.depth))
   1710  1.1  mrg 		return create_node_scaled(executed, bounds, domain, build);
   1711  1.1  mrg 
   1712  1.1  mrg 	offset = isl_ast_build_get_offset(build, data.depth);
   1713  1.1  mrg 	data.m = isl_ast_build_get_stride(build, data.depth);
   1714  1.1  mrg 	if (!data.m)
   1715  1.1  mrg 		offset = isl_aff_free(offset);
   1716  1.1  mrg 	offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
   1717  1.1  mrg 	d = isl_aff_get_denominator_val(offset);
   1718  1.1  mrg 	if (!d)
   1719  1.1  mrg 		executed = isl_union_map_free(executed);
   1720  1.1  mrg 
   1721  1.1  mrg 	if (executed && isl_val_is_divisible_by(data.m, d))
   1722  1.1  mrg 		data.m = isl_val_div(data.m, d);
   1723  1.1  mrg 	else {
   1724  1.1  mrg 		data.m = isl_val_set_si(data.m, 1);
   1725  1.1  mrg 		isl_val_free(d);
   1726  1.1  mrg 	}
   1727  1.1  mrg 
   1728  1.1  mrg 	if (!isl_val_is_one(data.m)) {
   1729  1.1  mrg 		if (isl_union_map_foreach_map(executed, &map_check_scaled,
   1730  1.1  mrg 						&data) < 0 &&
   1731  1.1  mrg 		    !isl_val_is_one(data.m))
   1732  1.1  mrg 			executed = isl_union_map_free(executed);
   1733  1.1  mrg 	}
   1734  1.1  mrg 
   1735  1.1  mrg 	if (!isl_val_is_one(data.m)) {
   1736  1.1  mrg 		isl_space *space;
   1737  1.1  mrg 		isl_multi_aff *ma;
   1738  1.1  mrg 		isl_aff *aff;
   1739  1.1  mrg 		isl_map *map;
   1740  1.1  mrg 		isl_union_map *umap;
   1741  1.1  mrg 
   1742  1.1  mrg 		space = isl_ast_build_get_space(build, 1);
   1743  1.1  mrg 		space = isl_space_map_from_set(space);
   1744  1.1  mrg 		ma = isl_multi_aff_identity(space);
   1745  1.1  mrg 		aff = isl_multi_aff_get_aff(ma, data.depth);
   1746  1.1  mrg 		aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
   1747  1.1  mrg 		ma = isl_multi_aff_set_aff(ma, data.depth, aff);
   1748  1.1  mrg 
   1749  1.1  mrg 		bounds = isl_basic_set_preimage_multi_aff(bounds,
   1750  1.1  mrg 						isl_multi_aff_copy(ma));
   1751  1.1  mrg 		domain = isl_set_preimage_multi_aff(domain,
   1752  1.1  mrg 						isl_multi_aff_copy(ma));
   1753  1.1  mrg 		map = isl_map_reverse(isl_map_from_multi_aff(ma));
   1754  1.1  mrg 		umap = isl_union_map_from_map(map);
   1755  1.1  mrg 		executed = isl_union_map_apply_domain(executed,
   1756  1.1  mrg 						isl_union_map_copy(umap));
   1757  1.1  mrg 		build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
   1758  1.1  mrg 						umap);
   1759  1.1  mrg 	}
   1760  1.1  mrg 	isl_aff_free(offset);
   1761  1.1  mrg 	isl_val_free(data.m);
   1762  1.1  mrg 
   1763  1.1  mrg 	return create_node_scaled(executed, bounds, domain, build);
   1764  1.1  mrg }
   1765  1.1  mrg 
   1766  1.1  mrg /* Add the basic set to the list that "user" points to.
   1767  1.1  mrg  */
   1768  1.1  mrg static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
   1769  1.1  mrg {
   1770  1.1  mrg 	isl_basic_set_list **list = user;
   1771  1.1  mrg 
   1772  1.1  mrg 	*list = isl_basic_set_list_add(*list, bset);
   1773  1.1  mrg 
   1774  1.1  mrg 	return isl_stat_ok;
   1775  1.1  mrg }
   1776  1.1  mrg 
   1777  1.1  mrg /* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
   1778  1.1  mrg  */
   1779  1.1  mrg static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
   1780  1.1  mrg 	__isl_take isl_set *set)
   1781  1.1  mrg {
   1782  1.1  mrg 	isl_size n;
   1783  1.1  mrg 	isl_ctx *ctx;
   1784  1.1  mrg 	isl_basic_set_list *list;
   1785  1.1  mrg 
   1786  1.1  mrg 	n = isl_set_n_basic_set(set);
   1787  1.1  mrg 	if (n < 0)
   1788  1.1  mrg 		set = isl_set_free(set);
   1789  1.1  mrg 	if (!set)
   1790  1.1  mrg 		return NULL;
   1791  1.1  mrg 
   1792  1.1  mrg 	ctx = isl_set_get_ctx(set);
   1793  1.1  mrg 
   1794  1.1  mrg 	list = isl_basic_set_list_alloc(ctx, n);
   1795  1.1  mrg 	if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
   1796  1.1  mrg 		list = isl_basic_set_list_free(list);
   1797  1.1  mrg 
   1798  1.1  mrg 	isl_set_free(set);
   1799  1.1  mrg 	return list;
   1800  1.1  mrg }
   1801  1.1  mrg 
   1802  1.1  mrg /* Generate code for the schedule domain "bounds"
   1803  1.1  mrg  * and add the result to "list".
   1804  1.1  mrg  *
   1805  1.1  mrg  * We mainly detect strides here and check if the bounds do not
   1806  1.1  mrg  * conflict with the current build domain
   1807  1.1  mrg  * and then pass over control to create_node.
   1808  1.1  mrg  *
   1809  1.1  mrg  * "bounds" reflects the bounds on the current dimension and possibly
   1810  1.1  mrg  * some extra conditions on outer dimensions.
   1811  1.1  mrg  * It does not, however, include any divs involving the current dimension,
   1812  1.1  mrg  * so it does not capture any stride constraints.
   1813  1.1  mrg  * We therefore need to compute that part of the schedule domain that
   1814  1.1  mrg  * intersects with "bounds" and derive the strides from the result.
   1815  1.1  mrg  */
   1816  1.1  mrg static __isl_give isl_ast_graft_list *add_node(
   1817  1.1  mrg 	__isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
   1818  1.1  mrg 	__isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
   1819  1.1  mrg {
   1820  1.1  mrg 	isl_ast_graft *graft;
   1821  1.1  mrg 	isl_set *domain = NULL;
   1822  1.1  mrg 	isl_union_set *uset;
   1823  1.1  mrg 	int empty, disjoint;
   1824  1.1  mrg 
   1825  1.1  mrg 	uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
   1826  1.1  mrg 	executed = isl_union_map_intersect_domain(executed, uset);
   1827  1.1  mrg 	empty = isl_union_map_is_empty(executed);
   1828  1.1  mrg 	if (empty < 0)
   1829  1.1  mrg 		goto error;
   1830  1.1  mrg 	if (empty)
   1831  1.1  mrg 		goto done;
   1832  1.1  mrg 
   1833  1.1  mrg 	uset = isl_union_map_domain(isl_union_map_copy(executed));
   1834  1.1  mrg 	domain = isl_set_from_union_set(uset);
   1835  1.1  mrg 	domain = isl_ast_build_specialize(build, domain);
   1836  1.1  mrg 
   1837  1.1  mrg 	domain = isl_set_compute_divs(domain);
   1838  1.1  mrg 	domain = isl_ast_build_eliminate_inner(build, domain);
   1839  1.1  mrg 	disjoint = isl_set_is_disjoint(domain, build->domain);
   1840  1.1  mrg 	if (disjoint < 0)
   1841  1.1  mrg 		goto error;
   1842  1.1  mrg 	if (disjoint)
   1843  1.1  mrg 		goto done;
   1844  1.1  mrg 
   1845  1.1  mrg 	build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
   1846  1.1  mrg 
   1847  1.1  mrg 	graft = create_node(executed, bounds, domain,
   1848  1.1  mrg 				isl_ast_build_copy(build));
   1849  1.1  mrg 	list = isl_ast_graft_list_add(list, graft);
   1850  1.1  mrg 	isl_ast_build_free(build);
   1851  1.1  mrg 	return list;
   1852  1.1  mrg error:
   1853  1.1  mrg 	list = isl_ast_graft_list_free(list);
   1854  1.1  mrg done:
   1855  1.1  mrg 	isl_set_free(domain);
   1856  1.1  mrg 	isl_basic_set_free(bounds);
   1857  1.1  mrg 	isl_union_map_free(executed);
   1858  1.1  mrg 	isl_ast_build_free(build);
   1859  1.1  mrg 	return list;
   1860  1.1  mrg }
   1861  1.1  mrg 
   1862  1.1  mrg /* Does any element of i follow or coincide with any element of j
   1863  1.1  mrg  * at the current depth for equal values of the outer dimensions?
   1864  1.1  mrg  */
   1865  1.1  mrg static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
   1866  1.1  mrg 	__isl_keep isl_basic_set *j, void *user)
   1867  1.1  mrg {
   1868  1.1  mrg 	int depth = *(int *) user;
   1869  1.1  mrg 	isl_basic_map *test;
   1870  1.1  mrg 	isl_bool empty;
   1871  1.1  mrg 	int l;
   1872  1.1  mrg 
   1873  1.1  mrg 	test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
   1874  1.1  mrg 						    isl_basic_set_copy(j));
   1875  1.1  mrg 	for (l = 0; l < depth; ++l)
   1876  1.1  mrg 		test = isl_basic_map_equate(test, isl_dim_in, l,
   1877  1.1  mrg 						isl_dim_out, l);
   1878  1.1  mrg 	test = isl_basic_map_order_ge(test, isl_dim_in, depth,
   1879  1.1  mrg 					isl_dim_out, depth);
   1880  1.1  mrg 	empty = isl_basic_map_is_empty(test);
   1881  1.1  mrg 	isl_basic_map_free(test);
   1882  1.1  mrg 
   1883  1.1  mrg 	return isl_bool_not(empty);
   1884  1.1  mrg }
   1885  1.1  mrg 
   1886  1.1  mrg /* Split up each element of "list" into a part that is related to "bset"
   1887  1.1  mrg  * according to "gt" and a part that is not.
   1888  1.1  mrg  * Return a list that consist of "bset" and all the pieces.
   1889  1.1  mrg  */
   1890  1.1  mrg static __isl_give isl_basic_set_list *add_split_on(
   1891  1.1  mrg 	__isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
   1892  1.1  mrg 	__isl_keep isl_basic_map *gt)
   1893  1.1  mrg {
   1894  1.1  mrg 	int i;
   1895  1.1  mrg 	isl_size n;
   1896  1.1  mrg 	isl_basic_set_list *res;
   1897  1.1  mrg 
   1898  1.1  mrg 	n = isl_basic_set_list_n_basic_set(list);
   1899  1.1  mrg 	if (n < 0)
   1900  1.1  mrg 		bset = isl_basic_set_free(bset);
   1901  1.1  mrg 
   1902  1.1  mrg 	gt = isl_basic_map_copy(gt);
   1903  1.1  mrg 	gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
   1904  1.1  mrg 	res = isl_basic_set_list_from_basic_set(bset);
   1905  1.1  mrg 	for (i = 0; res && i < n; ++i) {
   1906  1.1  mrg 		isl_basic_set *bset;
   1907  1.1  mrg 		isl_set *set1, *set2;
   1908  1.1  mrg 		isl_basic_map *bmap;
   1909  1.1  mrg 		int empty;
   1910  1.1  mrg 
   1911  1.1  mrg 		bset = isl_basic_set_list_get_basic_set(list, i);
   1912  1.1  mrg 		bmap = isl_basic_map_copy(gt);
   1913  1.1  mrg 		bmap = isl_basic_map_intersect_range(bmap, bset);
   1914  1.1  mrg 		bset = isl_basic_map_range(bmap);
   1915  1.1  mrg 		empty = isl_basic_set_is_empty(bset);
   1916  1.1  mrg 		if (empty < 0)
   1917  1.1  mrg 			res = isl_basic_set_list_free(res);
   1918  1.1  mrg 		if (empty)  {
   1919  1.1  mrg 			isl_basic_set_free(bset);
   1920  1.1  mrg 			bset = isl_basic_set_list_get_basic_set(list, i);
   1921  1.1  mrg 			res = isl_basic_set_list_add(res, bset);
   1922  1.1  mrg 			continue;
   1923  1.1  mrg 		}
   1924  1.1  mrg 
   1925  1.1  mrg 		res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
   1926  1.1  mrg 		set1 = isl_set_from_basic_set(bset);
   1927  1.1  mrg 		bset = isl_basic_set_list_get_basic_set(list, i);
   1928  1.1  mrg 		set2 = isl_set_from_basic_set(bset);
   1929  1.1  mrg 		set1 = isl_set_subtract(set2, set1);
   1930  1.1  mrg 		set1 = isl_set_make_disjoint(set1);
   1931  1.1  mrg 
   1932  1.1  mrg 		res = isl_basic_set_list_concat(res,
   1933  1.1  mrg 					    isl_basic_set_list_from_set(set1));
   1934  1.1  mrg 	}
   1935  1.1  mrg 	isl_basic_map_free(gt);
   1936  1.1  mrg 	isl_basic_set_list_free(list);
   1937  1.1  mrg 	return res;
   1938  1.1  mrg }
   1939  1.1  mrg 
   1940  1.1  mrg static __isl_give isl_ast_graft_list *generate_sorted_domains(
   1941  1.1  mrg 	__isl_keep isl_basic_set_list *domain_list,
   1942  1.1  mrg 	__isl_keep isl_union_map *executed,
   1943  1.1  mrg 	__isl_keep isl_ast_build *build);
   1944  1.1  mrg 
   1945  1.1  mrg /* Internal data structure for add_nodes.
   1946  1.1  mrg  *
   1947  1.1  mrg  * "executed" and "build" are extra arguments to be passed to add_node.
   1948  1.1  mrg  * "list" collects the results.
   1949  1.1  mrg  */
   1950  1.1  mrg struct isl_add_nodes_data {
   1951  1.1  mrg 	isl_union_map *executed;
   1952  1.1  mrg 	isl_ast_build *build;
   1953  1.1  mrg 
   1954  1.1  mrg 	isl_ast_graft_list *list;
   1955  1.1  mrg };
   1956  1.1  mrg 
   1957  1.1  mrg /* Generate code for the schedule domains in "scc"
   1958  1.1  mrg  * and add the results to "list".
   1959  1.1  mrg  *
   1960  1.1  mrg  * The domains in "scc" form a strongly connected component in the ordering.
   1961  1.1  mrg  * If the number of domains in "scc" is larger than 1, then this means
   1962  1.1  mrg  * that we cannot determine a valid ordering for the domains in the component.
   1963  1.1  mrg  * This should be fairly rare because the individual domains
   1964  1.1  mrg  * have been made disjoint first.
   1965  1.1  mrg  * The problem is that the domains may be integrally disjoint but not
   1966  1.1  mrg  * rationally disjoint.  For example, we may have domains
   1967  1.1  mrg  *
   1968  1.1  mrg  *	{ [i,i] : 0 <= i <= 1 }		and	{ [i,1-i] : 0 <= i <= 1 }
   1969  1.1  mrg  *
   1970  1.1  mrg  * These two domains have an empty intersection, but their rational
   1971  1.1  mrg  * relaxations do intersect.  It is impossible to order these domains
   1972  1.1  mrg  * in the second dimension because the first should be ordered before
   1973  1.1  mrg  * the second for outer dimension equal to 0, while it should be ordered
   1974  1.1  mrg  * after for outer dimension equal to 1.
   1975  1.1  mrg  *
   1976  1.1  mrg  * This may happen in particular in case of unrolling since the domain
   1977  1.1  mrg  * of each slice is replaced by its simple hull.
   1978  1.1  mrg  *
   1979  1.1  mrg  * For each basic set i in "scc" and for each of the following basic sets j,
   1980  1.1  mrg  * we split off that part of the basic set i that shares the outer dimensions
   1981  1.1  mrg  * with j and lies before j in the current dimension.
   1982  1.1  mrg  * We collect all the pieces in a new list that replaces "scc".
   1983  1.1  mrg  *
   1984  1.1  mrg  * While the elements in "scc" should be disjoint, we double-check
   1985  1.1  mrg  * this property to avoid running into an infinite recursion in case
   1986  1.1  mrg  * they intersect due to some internal error.
   1987  1.1  mrg  */
   1988  1.1  mrg static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
   1989  1.1  mrg {
   1990  1.1  mrg 	struct isl_add_nodes_data *data = user;
   1991  1.1  mrg 	int i;
   1992  1.1  mrg 	isl_size depth;
   1993  1.1  mrg 	isl_size n;
   1994  1.1  mrg 	isl_basic_set *bset, *first;
   1995  1.1  mrg 	isl_basic_set_list *list;
   1996  1.1  mrg 	isl_space *space;
   1997  1.1  mrg 	isl_basic_map *gt;
   1998  1.1  mrg 
   1999  1.1  mrg 	n = isl_basic_set_list_n_basic_set(scc);
   2000  1.1  mrg 	if (n < 0)
   2001  1.1  mrg 		goto error;
   2002  1.1  mrg 	bset = isl_basic_set_list_get_basic_set(scc, 0);
   2003  1.1  mrg 	if (n == 1) {
   2004  1.1  mrg 		isl_basic_set_list_free(scc);
   2005  1.1  mrg 		data->list = add_node(data->list,
   2006  1.1  mrg 				isl_union_map_copy(data->executed), bset,
   2007  1.1  mrg 				isl_ast_build_copy(data->build));
   2008  1.1  mrg 		return data->list ? isl_stat_ok : isl_stat_error;
   2009  1.1  mrg 	}
   2010  1.1  mrg 
   2011  1.1  mrg 	depth = isl_ast_build_get_depth(data->build);
   2012  1.1  mrg 	if (depth < 0)
   2013  1.1  mrg 		bset = isl_basic_set_free(bset);
   2014  1.1  mrg 	space = isl_basic_set_get_space(bset);
   2015  1.1  mrg 	space = isl_space_map_from_set(space);
   2016  1.1  mrg 	gt = isl_basic_map_universe(space);
   2017  1.1  mrg 	for (i = 0; i < depth; ++i)
   2018  1.1  mrg 		gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
   2019  1.1  mrg 	gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
   2020  1.1  mrg 
   2021  1.1  mrg 	first = isl_basic_set_copy(bset);
   2022  1.1  mrg 	list = isl_basic_set_list_from_basic_set(bset);
   2023  1.1  mrg 	for (i = 1; i < n; ++i) {
   2024  1.1  mrg 		int disjoint;
   2025  1.1  mrg 
   2026  1.1  mrg 		bset = isl_basic_set_list_get_basic_set(scc, i);
   2027  1.1  mrg 
   2028  1.1  mrg 		disjoint = isl_basic_set_is_disjoint(bset, first);
   2029  1.1  mrg 		if (disjoint < 0)
   2030  1.1  mrg 			list = isl_basic_set_list_free(list);
   2031  1.1  mrg 		else if (!disjoint)
   2032  1.1  mrg 			isl_die(isl_basic_set_list_get_ctx(scc),
   2033  1.1  mrg 				isl_error_internal,
   2034  1.1  mrg 				"basic sets in scc are assumed to be disjoint",
   2035  1.1  mrg 				list = isl_basic_set_list_free(list));
   2036  1.1  mrg 
   2037  1.1  mrg 		list = add_split_on(list, bset, gt);
   2038  1.1  mrg 	}
   2039  1.1  mrg 	isl_basic_set_free(first);
   2040  1.1  mrg 	isl_basic_map_free(gt);
   2041  1.1  mrg 	isl_basic_set_list_free(scc);
   2042  1.1  mrg 	scc = list;
   2043  1.1  mrg 	data->list = isl_ast_graft_list_concat(data->list,
   2044  1.1  mrg 		    generate_sorted_domains(scc, data->executed, data->build));
   2045  1.1  mrg 	isl_basic_set_list_free(scc);
   2046  1.1  mrg 
   2047  1.1  mrg 	return data->list ? isl_stat_ok : isl_stat_error;
   2048  1.1  mrg error:
   2049  1.1  mrg 	isl_basic_set_list_free(scc);
   2050  1.1  mrg 	return isl_stat_error;
   2051  1.1  mrg }
   2052  1.1  mrg 
   2053  1.1  mrg /* Sort the domains in "domain_list" according to the execution order
   2054  1.1  mrg  * at the current depth (for equal values of the outer dimensions),
   2055  1.1  mrg  * generate code for each of them, collecting the results in a list.
   2056  1.1  mrg  * If no code is generated (because the intersection of the inverse schedule
   2057  1.1  mrg  * with the domains turns out to be empty), then an empty list is returned.
   2058  1.1  mrg  *
   2059  1.1  mrg  * The caller is responsible for ensuring that the basic sets in "domain_list"
   2060  1.1  mrg  * are pair-wise disjoint.  It can, however, in principle happen that
   2061  1.1  mrg  * two basic sets should be ordered one way for one value of the outer
   2062  1.1  mrg  * dimensions and the other way for some other value of the outer dimensions.
   2063  1.1  mrg  * We therefore play safe and look for strongly connected components.
   2064  1.1  mrg  * The function add_nodes takes care of handling non-trivial components.
   2065  1.1  mrg  */
   2066  1.1  mrg static __isl_give isl_ast_graft_list *generate_sorted_domains(
   2067  1.1  mrg 	__isl_keep isl_basic_set_list *domain_list,
   2068  1.1  mrg 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
   2069  1.1  mrg {
   2070  1.1  mrg 	isl_ctx *ctx;
   2071  1.1  mrg 	struct isl_add_nodes_data data;
   2072  1.1  mrg 	isl_size depth;
   2073  1.1  mrg 	isl_size n;
   2074  1.1  mrg 
   2075  1.1  mrg 	n = isl_basic_set_list_n_basic_set(domain_list);
   2076  1.1  mrg 	if (n < 0)
   2077  1.1  mrg 		return NULL;
   2078  1.1  mrg 
   2079  1.1  mrg 	ctx = isl_basic_set_list_get_ctx(domain_list);
   2080  1.1  mrg 	data.list = isl_ast_graft_list_alloc(ctx, n);
   2081  1.1  mrg 	if (n == 0)
   2082  1.1  mrg 		return data.list;
   2083  1.1  mrg 	if (n == 1)
   2084  1.1  mrg 		return add_node(data.list, isl_union_map_copy(executed),
   2085  1.1  mrg 			isl_basic_set_list_get_basic_set(domain_list, 0),
   2086  1.1  mrg 			isl_ast_build_copy(build));
   2087  1.1  mrg 
   2088  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   2089  1.1  mrg 	data.executed = executed;
   2090  1.1  mrg 	data.build = build;
   2091  1.1  mrg 	if (depth < 0 || isl_basic_set_list_foreach_scc(domain_list,
   2092  1.1  mrg 					&domain_follows_at_depth, &depth,
   2093  1.1  mrg 					&add_nodes, &data) < 0)
   2094  1.1  mrg 		data.list = isl_ast_graft_list_free(data.list);
   2095  1.1  mrg 
   2096  1.1  mrg 	return data.list;
   2097  1.1  mrg }
   2098  1.1  mrg 
   2099  1.1  mrg /* Do i and j share any values for the outer dimensions?
   2100  1.1  mrg  */
   2101  1.1  mrg static isl_bool shared_outer(__isl_keep isl_basic_set *i,
   2102  1.1  mrg 	__isl_keep isl_basic_set *j, void *user)
   2103  1.1  mrg {
   2104  1.1  mrg 	int depth = *(int *) user;
   2105  1.1  mrg 	isl_basic_map *test;
   2106  1.1  mrg 	isl_bool empty;
   2107  1.1  mrg 	int l;
   2108  1.1  mrg 
   2109  1.1  mrg 	test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
   2110  1.1  mrg 						    isl_basic_set_copy(j));
   2111  1.1  mrg 	for (l = 0; l < depth; ++l)
   2112  1.1  mrg 		test = isl_basic_map_equate(test, isl_dim_in, l,
   2113  1.1  mrg 						isl_dim_out, l);
   2114  1.1  mrg 	empty = isl_basic_map_is_empty(test);
   2115  1.1  mrg 	isl_basic_map_free(test);
   2116  1.1  mrg 
   2117  1.1  mrg 	return isl_bool_not(empty);
   2118  1.1  mrg }
   2119  1.1  mrg 
   2120  1.1  mrg /* Internal data structure for generate_sorted_domains_wrap.
   2121  1.1  mrg  *
   2122  1.1  mrg  * "n" is the total number of basic sets
   2123  1.1  mrg  * "executed" and "build" are extra arguments to be passed
   2124  1.1  mrg  *	to generate_sorted_domains.
   2125  1.1  mrg  *
   2126  1.1  mrg  * "single" is set to 1 by generate_sorted_domains_wrap if there
   2127  1.1  mrg  * is only a single component.
   2128  1.1  mrg  * "list" collects the results.
   2129  1.1  mrg  */
   2130  1.1  mrg struct isl_ast_generate_parallel_domains_data {
   2131  1.1  mrg 	isl_size n;
   2132  1.1  mrg 	isl_union_map *executed;
   2133  1.1  mrg 	isl_ast_build *build;
   2134  1.1  mrg 
   2135  1.1  mrg 	int single;
   2136  1.1  mrg 	isl_ast_graft_list *list;
   2137  1.1  mrg };
   2138  1.1  mrg 
   2139  1.1  mrg /* Call generate_sorted_domains on "scc", fuse the result into a list
   2140  1.1  mrg  * with either zero or one graft and collect the these single element
   2141  1.1  mrg  * lists into data->list.
   2142  1.1  mrg  *
   2143  1.1  mrg  * If there is only one component, i.e., if the number of basic sets
   2144  1.1  mrg  * in the current component is equal to the total number of basic sets,
   2145  1.1  mrg  * then data->single is set to 1 and the result of generate_sorted_domains
   2146  1.1  mrg  * is not fused.
   2147  1.1  mrg  */
   2148  1.1  mrg static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
   2149  1.1  mrg 	void *user)
   2150  1.1  mrg {
   2151  1.1  mrg 	struct isl_ast_generate_parallel_domains_data *data = user;
   2152  1.1  mrg 	isl_ast_graft_list *list;
   2153  1.1  mrg 	isl_size n;
   2154  1.1  mrg 
   2155  1.1  mrg 	n = isl_basic_set_list_n_basic_set(scc);
   2156  1.1  mrg 	if (n < 0)
   2157  1.1  mrg 		scc = isl_basic_set_list_free(scc);
   2158  1.1  mrg 	list = generate_sorted_domains(scc, data->executed, data->build);
   2159  1.1  mrg 	data->single = n == data->n;
   2160  1.1  mrg 	if (!data->single)
   2161  1.1  mrg 		list = isl_ast_graft_list_fuse(list, data->build);
   2162  1.1  mrg 	if (!data->list)
   2163  1.1  mrg 		data->list = list;
   2164  1.1  mrg 	else
   2165  1.1  mrg 		data->list = isl_ast_graft_list_concat(data->list, list);
   2166  1.1  mrg 
   2167  1.1  mrg 	isl_basic_set_list_free(scc);
   2168  1.1  mrg 	if (!data->list)
   2169  1.1  mrg 		return isl_stat_error;
   2170  1.1  mrg 
   2171  1.1  mrg 	return isl_stat_ok;
   2172  1.1  mrg }
   2173  1.1  mrg 
   2174  1.1  mrg /* Look for any (weakly connected) components in the "domain_list"
   2175  1.1  mrg  * of domains that share some values of the outer dimensions.
   2176  1.1  mrg  * That is, domains in different components do not share any values
   2177  1.1  mrg  * of the outer dimensions.  This means that these components
   2178  1.1  mrg  * can be freely reordered.
   2179  1.1  mrg  * Within each of the components, we sort the domains according
   2180  1.1  mrg  * to the execution order at the current depth.
   2181  1.1  mrg  *
   2182  1.1  mrg  * If there is more than one component, then generate_sorted_domains_wrap
   2183  1.1  mrg  * fuses the result of each call to generate_sorted_domains
   2184  1.1  mrg  * into a list with either zero or one graft and collects these (at most)
   2185  1.1  mrg  * single element lists into a bigger list. This means that the elements of the
   2186  1.1  mrg  * final list can be freely reordered.  In particular, we sort them
   2187  1.1  mrg  * according to an arbitrary but fixed ordering to ease merging of
   2188  1.1  mrg  * graft lists from different components.
   2189  1.1  mrg  */
   2190  1.1  mrg static __isl_give isl_ast_graft_list *generate_parallel_domains(
   2191  1.1  mrg 	__isl_keep isl_basic_set_list *domain_list,
   2192  1.1  mrg 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
   2193  1.1  mrg {
   2194  1.1  mrg 	isl_size depth;
   2195  1.1  mrg 	struct isl_ast_generate_parallel_domains_data data;
   2196  1.1  mrg 
   2197  1.1  mrg 	data.n = isl_basic_set_list_n_basic_set(domain_list);
   2198  1.1  mrg 	if (data.n < 0)
   2199  1.1  mrg 		return NULL;
   2200  1.1  mrg 
   2201  1.1  mrg 	if (data.n <= 1)
   2202  1.1  mrg 		return generate_sorted_domains(domain_list, executed, build);
   2203  1.1  mrg 
   2204  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   2205  1.1  mrg 	if (depth < 0)
   2206  1.1  mrg 		return NULL;
   2207  1.1  mrg 	data.list = NULL;
   2208  1.1  mrg 	data.executed = executed;
   2209  1.1  mrg 	data.build = build;
   2210  1.1  mrg 	data.single = 0;
   2211  1.1  mrg 	if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
   2212  1.1  mrg 					    &generate_sorted_domains_wrap,
   2213  1.1  mrg 					    &data) < 0)
   2214  1.1  mrg 		data.list = isl_ast_graft_list_free(data.list);
   2215  1.1  mrg 
   2216  1.1  mrg 	if (!data.single)
   2217  1.1  mrg 		data.list = isl_ast_graft_list_sort_guard(data.list);
   2218  1.1  mrg 
   2219  1.1  mrg 	return data.list;
   2220  1.1  mrg }
   2221  1.1  mrg 
   2222  1.1  mrg /* Internal data for separate_domain.
   2223  1.1  mrg  *
   2224  1.1  mrg  * "explicit" is set if we only want to use explicit bounds.
   2225  1.1  mrg  *
   2226  1.1  mrg  * "domain" collects the separated domains.
   2227  1.1  mrg  */
   2228  1.1  mrg struct isl_separate_domain_data {
   2229  1.1  mrg 	isl_ast_build *build;
   2230  1.1  mrg 	int explicit;
   2231  1.1  mrg 	isl_set *domain;
   2232  1.1  mrg };
   2233  1.1  mrg 
   2234  1.1  mrg /* Extract implicit bounds on the current dimension for the executed "map".
   2235  1.1  mrg  *
   2236  1.1  mrg  * The domain of "map" may involve inner dimensions, so we
   2237  1.1  mrg  * need to eliminate them.
   2238  1.1  mrg  */
   2239  1.1  mrg static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
   2240  1.1  mrg 	__isl_keep isl_ast_build *build)
   2241  1.1  mrg {
   2242  1.1  mrg 	isl_set *domain;
   2243  1.1  mrg 
   2244  1.1  mrg 	domain = isl_map_domain(map);
   2245  1.1  mrg 	domain = isl_ast_build_eliminate(build, domain);
   2246  1.1  mrg 
   2247  1.1  mrg 	return domain;
   2248  1.1  mrg }
   2249  1.1  mrg 
   2250  1.1  mrg /* Extract explicit bounds on the current dimension for the executed "map".
   2251  1.1  mrg  *
   2252  1.1  mrg  * Rather than eliminating the inner dimensions as in implicit_bounds,
   2253  1.1  mrg  * we simply drop any constraints involving those inner dimensions.
   2254  1.1  mrg  * The idea is that most bounds that are implied by constraints on the
   2255  1.1  mrg  * inner dimensions will be enforced by for loops and not by explicit guards.
   2256  1.1  mrg  * There is then no need to separate along those bounds.
   2257  1.1  mrg  */
   2258  1.1  mrg static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
   2259  1.1  mrg 	__isl_keep isl_ast_build *build)
   2260  1.1  mrg {
   2261  1.1  mrg 	isl_set *domain;
   2262  1.1  mrg 	isl_size depth;
   2263  1.1  mrg 	isl_size dim;
   2264  1.1  mrg 
   2265  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   2266  1.1  mrg 	dim = isl_map_dim(map, isl_dim_out);
   2267  1.1  mrg 	if (depth < 0 || dim < 0)
   2268  1.1  mrg 		return isl_map_domain(isl_map_free(map));
   2269  1.1  mrg 	map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
   2270  1.1  mrg 
   2271  1.1  mrg 	domain = isl_map_domain(map);
   2272  1.1  mrg 	dim = isl_set_dim(domain, isl_dim_set);
   2273  1.1  mrg 	domain = isl_set_detect_equalities(domain);
   2274  1.1  mrg 	domain = isl_set_drop_constraints_involving_dims(domain,
   2275  1.1  mrg 				isl_dim_set, depth + 1, dim - (depth + 1));
   2276  1.1  mrg 	domain = isl_set_remove_divs_involving_dims(domain,
   2277  1.1  mrg 				isl_dim_set, depth, 1);
   2278  1.1  mrg 	domain = isl_set_remove_unknown_divs(domain);
   2279  1.1  mrg 
   2280  1.1  mrg 	return domain;
   2281  1.1  mrg }
   2282  1.1  mrg 
   2283  1.1  mrg /* Split data->domain into pieces that intersect with the range of "map"
   2284  1.1  mrg  * and pieces that do not intersect with the range of "map"
   2285  1.1  mrg  * and then add that part of the range of "map" that does not intersect
   2286  1.1  mrg  * with data->domain.
   2287  1.1  mrg  */
   2288  1.1  mrg static isl_stat separate_domain(__isl_take isl_map *map, void *user)
   2289  1.1  mrg {
   2290  1.1  mrg 	struct isl_separate_domain_data *data = user;
   2291  1.1  mrg 	isl_set *domain;
   2292  1.1  mrg 	isl_set *d1, *d2;
   2293  1.1  mrg 
   2294  1.1  mrg 	if (data->explicit)
   2295  1.1  mrg 		domain = explicit_bounds(map, data->build);
   2296  1.1  mrg 	else
   2297  1.1  mrg 		domain = implicit_bounds(map, data->build);
   2298  1.1  mrg 
   2299  1.1  mrg 	domain = isl_set_coalesce(domain);
   2300  1.1  mrg 	domain = isl_set_make_disjoint(domain);
   2301  1.1  mrg 	d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
   2302  1.1  mrg 	d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
   2303  1.1  mrg 	data->domain = isl_set_intersect(data->domain, domain);
   2304  1.1  mrg 	data->domain = isl_set_union(data->domain, d1);
   2305  1.1  mrg 	data->domain = isl_set_union(data->domain, d2);
   2306  1.1  mrg 
   2307  1.1  mrg 	return isl_stat_ok;
   2308  1.1  mrg }
   2309  1.1  mrg 
   2310  1.1  mrg /* Separate the schedule domains of "executed".
   2311  1.1  mrg  *
   2312  1.1  mrg  * That is, break up the domain of "executed" into basic sets,
   2313  1.1  mrg  * such that for each basic set S, every element in S is associated with
   2314  1.1  mrg  * the same domain spaces.
   2315  1.1  mrg  *
   2316  1.1  mrg  * "space" is the (single) domain space of "executed".
   2317  1.1  mrg  */
   2318  1.1  mrg static __isl_give isl_set *separate_schedule_domains(
   2319  1.1  mrg 	__isl_take isl_space *space, __isl_take isl_union_map *executed,
   2320  1.1  mrg 	__isl_keep isl_ast_build *build)
   2321  1.1  mrg {
   2322  1.1  mrg 	struct isl_separate_domain_data data = { build };
   2323  1.1  mrg 	isl_ctx *ctx;
   2324  1.1  mrg 
   2325  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   2326  1.1  mrg 	data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
   2327  1.1  mrg 				    ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
   2328  1.1  mrg 	data.domain = isl_set_empty(space);
   2329  1.1  mrg 	if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
   2330  1.1  mrg 		data.domain = isl_set_free(data.domain);
   2331  1.1  mrg 
   2332  1.1  mrg 	isl_union_map_free(executed);
   2333  1.1  mrg 	return data.domain;
   2334  1.1  mrg }
   2335  1.1  mrg 
   2336  1.1  mrg /* Temporary data used during the search for a lower bound for unrolling.
   2337  1.1  mrg  *
   2338  1.1  mrg  * "build" is the build in which the unrolling will be performed
   2339  1.1  mrg  * "domain" is the original set for which to find a lower bound
   2340  1.1  mrg  * "depth" is the dimension for which to find a lower boudn
   2341  1.1  mrg  * "expansion" is the expansion that needs to be applied to "domain"
   2342  1.1  mrg  * in the unrolling that will be performed
   2343  1.1  mrg  *
   2344  1.1  mrg  * "lower" is the best lower bound found so far.  It is NULL if we have not
   2345  1.1  mrg  * found any yet.
   2346  1.1  mrg  * "n" is the corresponding size.  If lower is NULL, then the value of n
   2347  1.1  mrg  * is undefined.
   2348  1.1  mrg  * "n_div" is the maximal number of integer divisions in the first
   2349  1.1  mrg  * unrolled iteration (after expansion).  It is set to -1 if it hasn't
   2350  1.1  mrg  * been computed yet.
   2351  1.1  mrg  */
   2352  1.1  mrg struct isl_find_unroll_data {
   2353  1.1  mrg 	isl_ast_build *build;
   2354  1.1  mrg 	isl_set *domain;
   2355  1.1  mrg 	int depth;
   2356  1.1  mrg 	isl_basic_map *expansion;
   2357  1.1  mrg 
   2358  1.1  mrg 	isl_aff *lower;
   2359  1.1  mrg 	int *n;
   2360  1.1  mrg 	int n_div;
   2361  1.1  mrg };
   2362  1.1  mrg 
   2363  1.1  mrg /* Return the constraint
   2364  1.1  mrg  *
   2365  1.1  mrg  *	i_"depth" = aff + offset
   2366  1.1  mrg  */
   2367  1.1  mrg static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
   2368  1.1  mrg 	int offset)
   2369  1.1  mrg {
   2370  1.1  mrg 	aff = isl_aff_copy(aff);
   2371  1.1  mrg 	aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
   2372  1.1  mrg 	aff = isl_aff_add_constant_si(aff, offset);
   2373  1.1  mrg 	return isl_equality_from_aff(aff);
   2374  1.1  mrg }
   2375  1.1  mrg 
   2376  1.1  mrg /* Update *user to the number of integer divisions in the first element
   2377  1.1  mrg  * of "ma", if it is larger than the current value.
   2378  1.1  mrg  */
   2379  1.1  mrg static isl_stat update_n_div(__isl_take isl_set *set,
   2380  1.1  mrg 	__isl_take isl_multi_aff *ma, void *user)
   2381  1.1  mrg {
   2382  1.1  mrg 	isl_aff *aff;
   2383  1.1  mrg 	int *n = user;
   2384  1.1  mrg 	isl_size n_div;
   2385  1.1  mrg 
   2386  1.1  mrg 	aff = isl_multi_aff_get_aff(ma, 0);
   2387  1.1  mrg 	n_div = isl_aff_dim(aff, isl_dim_div);
   2388  1.1  mrg 	isl_aff_free(aff);
   2389  1.1  mrg 	isl_multi_aff_free(ma);
   2390  1.1  mrg 	isl_set_free(set);
   2391  1.1  mrg 
   2392  1.1  mrg 	if (n_div > *n)
   2393  1.1  mrg 		*n = n_div;
   2394  1.1  mrg 
   2395  1.1  mrg 	return n_div >= 0 ? isl_stat_ok : isl_stat_error;
   2396  1.1  mrg }
   2397  1.1  mrg 
   2398  1.1  mrg /* Get the number of integer divisions in the expression for the iterator
   2399  1.1  mrg  * value at the first slice in the unrolling based on lower bound "lower",
   2400  1.1  mrg  * taking into account the expansion that needs to be performed on this slice.
   2401  1.1  mrg  */
   2402  1.1  mrg static int get_expanded_n_div(struct isl_find_unroll_data *data,
   2403  1.1  mrg 	__isl_keep isl_aff *lower)
   2404  1.1  mrg {
   2405  1.1  mrg 	isl_constraint *c;
   2406  1.1  mrg 	isl_set *set;
   2407  1.1  mrg 	isl_map *it_map, *expansion;
   2408  1.1  mrg 	isl_pw_multi_aff *pma;
   2409  1.1  mrg 	int n;
   2410  1.1  mrg 
   2411  1.1  mrg 	c = at_offset(data->depth, lower, 0);
   2412  1.1  mrg 	set = isl_set_copy(data->domain);
   2413  1.1  mrg 	set = isl_set_add_constraint(set, c);
   2414  1.1  mrg 	expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
   2415  1.1  mrg 	set = isl_set_apply(set, expansion);
   2416  1.1  mrg 	it_map = isl_ast_build_map_to_iterator(data->build, set);
   2417  1.1  mrg 	pma = isl_pw_multi_aff_from_map(it_map);
   2418  1.1  mrg 	n = 0;
   2419  1.1  mrg 	if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
   2420  1.1  mrg 		n = -1;
   2421  1.1  mrg 	isl_pw_multi_aff_free(pma);
   2422  1.1  mrg 
   2423  1.1  mrg 	return n;
   2424  1.1  mrg }
   2425  1.1  mrg 
   2426  1.1  mrg /* Is the lower bound "lower" with corresponding iteration count "n"
   2427  1.1  mrg  * better than the one stored in "data"?
   2428  1.1  mrg  * If there is no upper bound on the iteration count ("n" is infinity) or
   2429  1.1  mrg  * if the count is too large, then we cannot use this lower bound.
   2430  1.1  mrg  * Otherwise, if there was no previous lower bound or
   2431  1.1  mrg  * if the iteration count of the new lower bound is smaller than
   2432  1.1  mrg  * the iteration count of the previous lower bound, then we consider
   2433  1.1  mrg  * the new lower bound to be better.
   2434  1.1  mrg  * If the iteration count is the same, then compare the number
   2435  1.1  mrg  * of integer divisions that would be needed to express
   2436  1.1  mrg  * the iterator value at the first slice in the unrolling
   2437  1.1  mrg  * according to the lower bound.  If we end up computing this
   2438  1.1  mrg  * number, then store the lowest value in data->n_div.
   2439  1.1  mrg  */
   2440  1.1  mrg static int is_better_lower_bound(struct isl_find_unroll_data *data,
   2441  1.1  mrg 	__isl_keep isl_aff *lower, __isl_keep isl_val *n)
   2442  1.1  mrg {
   2443  1.1  mrg 	int cmp;
   2444  1.1  mrg 	int n_div;
   2445  1.1  mrg 
   2446  1.1  mrg 	if (!n)
   2447  1.1  mrg 		return -1;
   2448  1.1  mrg 	if (isl_val_is_infty(n))
   2449  1.1  mrg 		return 0;
   2450  1.1  mrg 	if (isl_val_cmp_si(n, INT_MAX) > 0)
   2451  1.1  mrg 		return 0;
   2452  1.1  mrg 	if (!data->lower)
   2453  1.1  mrg 		return 1;
   2454  1.1  mrg 	cmp = isl_val_cmp_si(n, *data->n);
   2455  1.1  mrg 	if (cmp < 0)
   2456  1.1  mrg 		return 1;
   2457  1.1  mrg 	if (cmp > 0)
   2458  1.1  mrg 		return 0;
   2459  1.1  mrg 	if (data->n_div < 0)
   2460  1.1  mrg 		data->n_div = get_expanded_n_div(data, data->lower);
   2461  1.1  mrg 	if (data->n_div < 0)
   2462  1.1  mrg 		return -1;
   2463  1.1  mrg 	if (data->n_div == 0)
   2464  1.1  mrg 		return 0;
   2465  1.1  mrg 	n_div = get_expanded_n_div(data, lower);
   2466  1.1  mrg 	if (n_div < 0)
   2467  1.1  mrg 		return -1;
   2468  1.1  mrg 	if (n_div >= data->n_div)
   2469  1.1  mrg 		return 0;
   2470  1.1  mrg 	data->n_div = n_div;
   2471  1.1  mrg 
   2472  1.1  mrg 	return 1;
   2473  1.1  mrg }
   2474  1.1  mrg 
   2475  1.1  mrg /* Check if we can use "c" as a lower bound and if it is better than
   2476  1.1  mrg  * any previously found lower bound.
   2477  1.1  mrg  *
   2478  1.1  mrg  * If "c" does not involve the dimension at the current depth,
   2479  1.1  mrg  * then we cannot use it.
   2480  1.1  mrg  * Otherwise, let "c" be of the form
   2481  1.1  mrg  *
   2482  1.1  mrg  *	i >= f(j)/a
   2483  1.1  mrg  *
   2484  1.1  mrg  * We compute the maximal value of
   2485  1.1  mrg  *
   2486  1.1  mrg  *	-ceil(f(j)/a)) + i + 1
   2487  1.1  mrg  *
   2488  1.1  mrg  * over the domain.  If there is such a value "n", then we know
   2489  1.1  mrg  *
   2490  1.1  mrg  *	-ceil(f(j)/a)) + i + 1 <= n
   2491  1.1  mrg  *
   2492  1.1  mrg  * or
   2493  1.1  mrg  *
   2494  1.1  mrg  *	i < ceil(f(j)/a)) + n
   2495  1.1  mrg  *
   2496  1.1  mrg  * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
   2497  1.1  mrg  * We just need to check if we have found any lower bound before and
   2498  1.1  mrg  * if the new lower bound is better (smaller n or fewer integer divisions)
   2499  1.1  mrg  * than the previously found lower bounds.
   2500  1.1  mrg  */
   2501  1.1  mrg static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
   2502  1.1  mrg 	__isl_keep isl_constraint *c)
   2503  1.1  mrg {
   2504  1.1  mrg 	isl_aff *aff, *lower;
   2505  1.1  mrg 	isl_val *max;
   2506  1.1  mrg 	int better;
   2507  1.1  mrg 
   2508  1.1  mrg 	if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
   2509  1.1  mrg 		return isl_stat_ok;
   2510  1.1  mrg 
   2511  1.1  mrg 	lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
   2512  1.1  mrg 	lower = isl_aff_ceil(lower);
   2513  1.1  mrg 	aff = isl_aff_copy(lower);
   2514  1.1  mrg 	aff = isl_aff_neg(aff);
   2515  1.1  mrg 	aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
   2516  1.1  mrg 	aff = isl_aff_add_constant_si(aff, 1);
   2517  1.1  mrg 	max = isl_set_max_val(data->domain, aff);
   2518  1.1  mrg 	isl_aff_free(aff);
   2519  1.1  mrg 
   2520  1.1  mrg 	better = is_better_lower_bound(data, lower, max);
   2521  1.1  mrg 	if (better < 0 || !better) {
   2522  1.1  mrg 		isl_val_free(max);
   2523  1.1  mrg 		isl_aff_free(lower);
   2524  1.1  mrg 		return better < 0 ? isl_stat_error : isl_stat_ok;
   2525  1.1  mrg 	}
   2526  1.1  mrg 
   2527  1.1  mrg 	isl_aff_free(data->lower);
   2528  1.1  mrg 	data->lower = lower;
   2529  1.1  mrg 	*data->n = isl_val_get_num_si(max);
   2530  1.1  mrg 	isl_val_free(max);
   2531  1.1  mrg 
   2532  1.1  mrg 	return isl_stat_ok;
   2533  1.1  mrg }
   2534  1.1  mrg 
   2535  1.1  mrg /* Check if we can use "c" as a lower bound and if it is better than
   2536  1.1  mrg  * any previously found lower bound.
   2537  1.1  mrg  */
   2538  1.1  mrg static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
   2539  1.1  mrg {
   2540  1.1  mrg 	struct isl_find_unroll_data *data;
   2541  1.1  mrg 	isl_stat r;
   2542  1.1  mrg 
   2543  1.1  mrg 	data = (struct isl_find_unroll_data *) user;
   2544  1.1  mrg 	r = update_unrolling_lower_bound(data, c);
   2545  1.1  mrg 	isl_constraint_free(c);
   2546  1.1  mrg 
   2547  1.1  mrg 	return r;
   2548  1.1  mrg }
   2549  1.1  mrg 
   2550  1.1  mrg /* Look for a lower bound l(i) on the dimension at "depth"
   2551  1.1  mrg  * and a size n such that "domain" is a subset of
   2552  1.1  mrg  *
   2553  1.1  mrg  *	{ [i] : l(i) <= i_d < l(i) + n }
   2554  1.1  mrg  *
   2555  1.1  mrg  * where d is "depth" and l(i) depends only on earlier dimensions.
   2556  1.1  mrg  * Furthermore, try and find a lower bound such that n is as small as possible.
   2557  1.1  mrg  * In particular, "n" needs to be finite.
   2558  1.1  mrg  * "build" is the build in which the unrolling will be performed.
   2559  1.1  mrg  * "expansion" is the expansion that needs to be applied to "domain"
   2560  1.1  mrg  * in the unrolling that will be performed.
   2561  1.1  mrg  *
   2562  1.1  mrg  * Inner dimensions have been eliminated from "domain" by the caller.
   2563  1.1  mrg  *
   2564  1.1  mrg  * We first construct a collection of lower bounds on the input set
   2565  1.1  mrg  * by computing its simple hull.  We then iterate through them,
   2566  1.1  mrg  * discarding those that we cannot use (either because they do not
   2567  1.1  mrg  * involve the dimension at "depth" or because they have no corresponding
   2568  1.1  mrg  * upper bound, meaning that "n" would be unbounded) and pick out the
   2569  1.1  mrg  * best from the remaining ones.
   2570  1.1  mrg  *
   2571  1.1  mrg  * If we cannot find a suitable lower bound, then we consider that
   2572  1.1  mrg  * to be an error.
   2573  1.1  mrg  */
   2574  1.1  mrg static __isl_give isl_aff *find_unroll_lower_bound(
   2575  1.1  mrg 	__isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
   2576  1.1  mrg 	int depth, __isl_keep isl_basic_map *expansion, int *n)
   2577  1.1  mrg {
   2578  1.1  mrg 	struct isl_find_unroll_data data =
   2579  1.1  mrg 			{ build, domain, depth, expansion, NULL, n, -1 };
   2580  1.1  mrg 	isl_basic_set *hull;
   2581  1.1  mrg 
   2582  1.1  mrg 	hull = isl_set_simple_hull(isl_set_copy(domain));
   2583  1.1  mrg 
   2584  1.1  mrg 	if (isl_basic_set_foreach_constraint(hull,
   2585  1.1  mrg 					    &constraint_find_unroll, &data) < 0)
   2586  1.1  mrg 		goto error;
   2587  1.1  mrg 
   2588  1.1  mrg 	isl_basic_set_free(hull);
   2589  1.1  mrg 
   2590  1.1  mrg 	if (!data.lower)
   2591  1.1  mrg 		isl_die(isl_set_get_ctx(domain), isl_error_invalid,
   2592  1.1  mrg 			"cannot find lower bound for unrolling", return NULL);
   2593  1.1  mrg 
   2594  1.1  mrg 	return data.lower;
   2595  1.1  mrg error:
   2596  1.1  mrg 	isl_basic_set_free(hull);
   2597  1.1  mrg 	return isl_aff_free(data.lower);
   2598  1.1  mrg }
   2599  1.1  mrg 
   2600  1.1  mrg /* Call "fn" on each iteration of the current dimension of "domain".
   2601  1.1  mrg  * If "init" is not NULL, then it is called with the number of
   2602  1.1  mrg  * iterations before any call to "fn".
   2603  1.1  mrg  * Return -1 on failure.
   2604  1.1  mrg  *
   2605  1.1  mrg  * Since we are going to be iterating over the individual values,
   2606  1.1  mrg  * we first check if there are any strides on the current dimension.
   2607  1.1  mrg  * If there is, we rewrite the current dimension i as
   2608  1.1  mrg  *
   2609  1.1  mrg  *		i = stride i' + offset
   2610  1.1  mrg  *
   2611  1.1  mrg  * and then iterate over individual values of i' instead.
   2612  1.1  mrg  *
   2613  1.1  mrg  * We then look for a lower bound on i' and a size such that the domain
   2614  1.1  mrg  * is a subset of
   2615  1.1  mrg  *
   2616  1.1  mrg  *	{ [j,i'] : l(j) <= i' < l(j) + n }
   2617  1.1  mrg  *
   2618  1.1  mrg  * and then take slices of the domain at values of i'
   2619  1.1  mrg  * between l(j) and l(j) + n - 1.
   2620  1.1  mrg  *
   2621  1.1  mrg  * We compute the unshifted simple hull of each slice to ensure that
   2622  1.1  mrg  * we have a single basic set per offset.  The slicing constraint
   2623  1.1  mrg  * may get simplified away before the unshifted simple hull is taken
   2624  1.1  mrg  * and may therefore in some rare cases disappear from the result.
   2625  1.1  mrg  * We therefore explicitly add the constraint back after computing
   2626  1.1  mrg  * the unshifted simple hull to ensure that the basic sets
   2627  1.1  mrg  * remain disjoint.  The constraints that are dropped by taking the hull
   2628  1.1  mrg  * will be taken into account at the next level, as in the case of the
   2629  1.1  mrg  * atomic option.
   2630  1.1  mrg  *
   2631  1.1  mrg  * Finally, we map i' back to i and call "fn".
   2632  1.1  mrg  */
   2633  1.1  mrg static int foreach_iteration(__isl_take isl_set *domain,
   2634  1.1  mrg 	__isl_keep isl_ast_build *build, int (*init)(int n, void *user),
   2635  1.1  mrg 	int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
   2636  1.1  mrg {
   2637  1.1  mrg 	int i, n;
   2638  1.1  mrg 	isl_bool empty;
   2639  1.1  mrg 	isl_size depth;
   2640  1.1  mrg 	isl_multi_aff *expansion;
   2641  1.1  mrg 	isl_basic_map *bmap;
   2642  1.1  mrg 	isl_aff *lower = NULL;
   2643  1.1  mrg 	isl_ast_build *stride_build;
   2644  1.1  mrg 
   2645  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   2646  1.1  mrg 	if (depth < 0)
   2647  1.1  mrg 		domain = isl_set_free(domain);
   2648  1.1  mrg 
   2649  1.1  mrg 	domain = isl_ast_build_eliminate_inner(build, domain);
   2650  1.1  mrg 	domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
   2651  1.1  mrg 	stride_build = isl_ast_build_copy(build);
   2652  1.1  mrg 	stride_build = isl_ast_build_detect_strides(stride_build,
   2653  1.1  mrg 							isl_set_copy(domain));
   2654  1.1  mrg 	expansion = isl_ast_build_get_stride_expansion(stride_build);
   2655  1.1  mrg 
   2656  1.1  mrg 	domain = isl_set_preimage_multi_aff(domain,
   2657  1.1  mrg 					    isl_multi_aff_copy(expansion));
   2658  1.1  mrg 	domain = isl_ast_build_eliminate_divs(stride_build, domain);
   2659  1.1  mrg 	isl_ast_build_free(stride_build);
   2660  1.1  mrg 
   2661  1.1  mrg 	bmap = isl_basic_map_from_multi_aff(expansion);
   2662  1.1  mrg 
   2663  1.1  mrg 	empty = isl_set_is_empty(domain);
   2664  1.1  mrg 	if (empty < 0) {
   2665  1.1  mrg 		n = -1;
   2666  1.1  mrg 	} else if (empty) {
   2667  1.1  mrg 		n = 0;
   2668  1.1  mrg 	} else {
   2669  1.1  mrg 		lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
   2670  1.1  mrg 		if (!lower)
   2671  1.1  mrg 			n = -1;
   2672  1.1  mrg 	}
   2673  1.1  mrg 	if (n >= 0 && init && init(n, user) < 0)
   2674  1.1  mrg 		n = -1;
   2675  1.1  mrg 	for (i = 0; i < n; ++i) {
   2676  1.1  mrg 		isl_set *set;
   2677  1.1  mrg 		isl_basic_set *bset;
   2678  1.1  mrg 		isl_constraint *slice;
   2679  1.1  mrg 
   2680  1.1  mrg 		slice = at_offset(depth, lower, i);
   2681  1.1  mrg 		set = isl_set_copy(domain);
   2682  1.1  mrg 		set = isl_set_add_constraint(set, isl_constraint_copy(slice));
   2683  1.1  mrg 		bset = isl_set_unshifted_simple_hull(set);
   2684  1.1  mrg 		bset = isl_basic_set_add_constraint(bset, slice);
   2685  1.1  mrg 		bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
   2686  1.1  mrg 
   2687  1.1  mrg 		if (fn(bset, user) < 0)
   2688  1.1  mrg 			break;
   2689  1.1  mrg 	}
   2690  1.1  mrg 
   2691  1.1  mrg 	isl_aff_free(lower);
   2692  1.1  mrg 	isl_set_free(domain);
   2693  1.1  mrg 	isl_basic_map_free(bmap);
   2694  1.1  mrg 
   2695  1.1  mrg 	return n < 0 || i < n ? -1 : 0;
   2696  1.1  mrg }
   2697  1.1  mrg 
   2698  1.1  mrg /* Data structure for storing the results and the intermediate objects
   2699  1.1  mrg  * of compute_domains.
   2700  1.1  mrg  *
   2701  1.1  mrg  * "list" is the main result of the function and contains a list
   2702  1.1  mrg  * of disjoint basic sets for which code should be generated.
   2703  1.1  mrg  *
   2704  1.1  mrg  * "executed" and "build" are inputs to compute_domains.
   2705  1.1  mrg  * "schedule_domain" is the domain of "executed".
   2706  1.1  mrg  *
   2707  1.1  mrg  * "option" contains the domains at the current depth that should by
   2708  1.1  mrg  * atomic, separated or unrolled.  These domains are as specified by
   2709  1.1  mrg  * the user, except that inner dimensions have been eliminated and
   2710  1.1  mrg  * that they have been made pair-wise disjoint.
   2711  1.1  mrg  *
   2712  1.1  mrg  * "sep_class" contains the user-specified split into separation classes
   2713  1.1  mrg  * specialized to the current depth.
   2714  1.1  mrg  * "done" contains the union of the separation domains that have already
   2715  1.1  mrg  * been handled.
   2716  1.1  mrg  */
   2717  1.1  mrg struct isl_codegen_domains {
   2718  1.1  mrg 	isl_basic_set_list *list;
   2719  1.1  mrg 
   2720  1.1  mrg 	isl_union_map *executed;
   2721  1.1  mrg 	isl_ast_build *build;
   2722  1.1  mrg 	isl_set *schedule_domain;
   2723  1.1  mrg 
   2724  1.1  mrg 	isl_set *option[4];
   2725  1.1  mrg 
   2726  1.1  mrg 	isl_map *sep_class;
   2727  1.1  mrg 	isl_set *done;
   2728  1.1  mrg };
   2729  1.1  mrg 
   2730  1.1  mrg /* Internal data structure for do_unroll.
   2731  1.1  mrg  *
   2732  1.1  mrg  * "domains" stores the results of compute_domains.
   2733  1.1  mrg  * "class_domain" is the original class domain passed to do_unroll.
   2734  1.1  mrg  * "unroll_domain" collects the unrolled iterations.
   2735  1.1  mrg  */
   2736  1.1  mrg struct isl_ast_unroll_data {
   2737  1.1  mrg 	struct isl_codegen_domains *domains;
   2738  1.1  mrg 	isl_set *class_domain;
   2739  1.1  mrg 	isl_set *unroll_domain;
   2740  1.1  mrg };
   2741  1.1  mrg 
   2742  1.1  mrg /* Given an iteration of an unrolled domain represented by "bset",
   2743  1.1  mrg  * add it to data->domains->list.
   2744  1.1  mrg  * Since we may have dropped some constraints, we intersect with
   2745  1.1  mrg  * the class domain again to ensure that each element in the list
   2746  1.1  mrg  * is disjoint from the other class domains.
   2747  1.1  mrg  */
   2748  1.1  mrg static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
   2749  1.1  mrg {
   2750  1.1  mrg 	struct isl_ast_unroll_data *data = user;
   2751  1.1  mrg 	isl_set *set;
   2752  1.1  mrg 	isl_basic_set_list *list;
   2753  1.1  mrg 
   2754  1.1  mrg 	set = isl_set_from_basic_set(bset);
   2755  1.1  mrg 	data->unroll_domain = isl_set_union(data->unroll_domain,
   2756  1.1  mrg 					    isl_set_copy(set));
   2757  1.1  mrg 	set = isl_set_intersect(set, isl_set_copy(data->class_domain));
   2758  1.1  mrg 	set = isl_set_make_disjoint(set);
   2759  1.1  mrg 	list = isl_basic_set_list_from_set(set);
   2760  1.1  mrg 	data->domains->list = isl_basic_set_list_concat(data->domains->list,
   2761  1.1  mrg 							list);
   2762  1.1  mrg 
   2763  1.1  mrg 	return 0;
   2764  1.1  mrg }
   2765  1.1  mrg 
   2766  1.1  mrg /* Extend domains->list with a list of basic sets, one for each value
   2767  1.1  mrg  * of the current dimension in "domain" and remove the corresponding
   2768  1.1  mrg  * sets from the class domain.  Return the updated class domain.
   2769  1.1  mrg  * The divs that involve the current dimension have not been projected out
   2770  1.1  mrg  * from this domain.
   2771  1.1  mrg  *
   2772  1.1  mrg  * We call foreach_iteration to iterate over the individual values and
   2773  1.1  mrg  * in do_unroll_iteration we collect the individual basic sets in
   2774  1.1  mrg  * domains->list and their union in data->unroll_domain, which is then
   2775  1.1  mrg  * used to update the class domain.
   2776  1.1  mrg  */
   2777  1.1  mrg static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
   2778  1.1  mrg 	__isl_take isl_set *domain, __isl_take isl_set *class_domain)
   2779  1.1  mrg {
   2780  1.1  mrg 	struct isl_ast_unroll_data data;
   2781  1.1  mrg 
   2782  1.1  mrg 	if (!domain)
   2783  1.1  mrg 		return isl_set_free(class_domain);
   2784  1.1  mrg 	if (!class_domain)
   2785  1.1  mrg 		return isl_set_free(domain);
   2786  1.1  mrg 
   2787  1.1  mrg 	data.domains = domains;
   2788  1.1  mrg 	data.class_domain = class_domain;
   2789  1.1  mrg 	data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
   2790  1.1  mrg 
   2791  1.1  mrg 	if (foreach_iteration(domain, domains->build, NULL,
   2792  1.1  mrg 				&do_unroll_iteration, &data) < 0)
   2793  1.1  mrg 		data.unroll_domain = isl_set_free(data.unroll_domain);
   2794  1.1  mrg 
   2795  1.1  mrg 	class_domain = isl_set_subtract(class_domain, data.unroll_domain);
   2796  1.1  mrg 
   2797  1.1  mrg 	return class_domain;
   2798  1.1  mrg }
   2799  1.1  mrg 
   2800  1.1  mrg /* Add domains to domains->list for each individual value of the current
   2801  1.1  mrg  * dimension, for that part of the schedule domain that lies in the
   2802  1.1  mrg  * intersection of the option domain and the class domain.
   2803  1.1  mrg  * Remove the corresponding sets from the class domain and
   2804  1.1  mrg  * return the updated class domain.
   2805  1.1  mrg  *
   2806  1.1  mrg  * We first break up the unroll option domain into individual pieces
   2807  1.1  mrg  * and then handle each of them separately.  The unroll option domain
   2808  1.1  mrg  * has been made disjoint in compute_domains_init_options,
   2809  1.1  mrg  *
   2810  1.1  mrg  * Note that we actively want to combine different pieces of the
   2811  1.1  mrg  * schedule domain that have the same value at the current dimension.
   2812  1.1  mrg  * We therefore need to break up the unroll option domain before
   2813  1.1  mrg  * intersecting with class and schedule domain, hoping that the
   2814  1.1  mrg  * unroll option domain specified by the user is relatively simple.
   2815  1.1  mrg  */
   2816  1.1  mrg static __isl_give isl_set *compute_unroll_domains(
   2817  1.1  mrg 	struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
   2818  1.1  mrg {
   2819  1.1  mrg 	isl_set *unroll_domain;
   2820  1.1  mrg 	isl_basic_set_list *unroll_list;
   2821  1.1  mrg 	int i;
   2822  1.1  mrg 	isl_size n;
   2823  1.1  mrg 	isl_bool empty;
   2824  1.1  mrg 
   2825  1.1  mrg 	empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
   2826  1.1  mrg 	if (empty < 0)
   2827  1.1  mrg 		return isl_set_free(class_domain);
   2828  1.1  mrg 	if (empty)
   2829  1.1  mrg 		return class_domain;
   2830  1.1  mrg 
   2831  1.1  mrg 	unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
   2832  1.1  mrg 	unroll_list = isl_basic_set_list_from_set(unroll_domain);
   2833  1.1  mrg 
   2834  1.1  mrg 	n = isl_basic_set_list_n_basic_set(unroll_list);
   2835  1.1  mrg 	if (n < 0)
   2836  1.1  mrg 		class_domain = isl_set_free(class_domain);
   2837  1.1  mrg 	for (i = 0; i < n; ++i) {
   2838  1.1  mrg 		isl_basic_set *bset;
   2839  1.1  mrg 
   2840  1.1  mrg 		bset = isl_basic_set_list_get_basic_set(unroll_list, i);
   2841  1.1  mrg 		unroll_domain = isl_set_from_basic_set(bset);
   2842  1.1  mrg 		unroll_domain = isl_set_intersect(unroll_domain,
   2843  1.1  mrg 						    isl_set_copy(class_domain));
   2844  1.1  mrg 		unroll_domain = isl_set_intersect(unroll_domain,
   2845  1.1  mrg 					isl_set_copy(domains->schedule_domain));
   2846  1.1  mrg 
   2847  1.1  mrg 		empty = isl_set_is_empty(unroll_domain);
   2848  1.1  mrg 		if (empty >= 0 && empty) {
   2849  1.1  mrg 			isl_set_free(unroll_domain);
   2850  1.1  mrg 			continue;
   2851  1.1  mrg 		}
   2852  1.1  mrg 
   2853  1.1  mrg 		class_domain = do_unroll(domains, unroll_domain, class_domain);
   2854  1.1  mrg 	}
   2855  1.1  mrg 
   2856  1.1  mrg 	isl_basic_set_list_free(unroll_list);
   2857  1.1  mrg 
   2858  1.1  mrg 	return class_domain;
   2859  1.1  mrg }
   2860  1.1  mrg 
   2861  1.1  mrg /* Try and construct a single basic set that includes the intersection of
   2862  1.1  mrg  * the schedule domain, the atomic option domain and the class domain.
   2863  1.1  mrg  * Add the resulting basic set(s) to domains->list and remove them
   2864  1.1  mrg  * from class_domain.  Return the updated class domain.
   2865  1.1  mrg  *
   2866  1.1  mrg  * We construct a single domain rather than trying to combine
   2867  1.1  mrg  * the schedule domains of individual domains because we are working
   2868  1.1  mrg  * within a single component so that non-overlapping schedule domains
   2869  1.1  mrg  * should already have been separated.
   2870  1.1  mrg  * We do however need to make sure that this single domains is a subset
   2871  1.1  mrg  * of the class domain so that it would not intersect with any other
   2872  1.1  mrg  * class domains.  This means that we may end up splitting up the atomic
   2873  1.1  mrg  * domain in case separation classes are being used.
   2874  1.1  mrg  *
   2875  1.1  mrg  * "domain" is the intersection of the schedule domain and the class domain,
   2876  1.1  mrg  * with inner dimensions projected out.
   2877  1.1  mrg  */
   2878  1.1  mrg static __isl_give isl_set *compute_atomic_domain(
   2879  1.1  mrg 	struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
   2880  1.1  mrg {
   2881  1.1  mrg 	isl_basic_set *bset;
   2882  1.1  mrg 	isl_basic_set_list *list;
   2883  1.1  mrg 	isl_set *domain, *atomic_domain;
   2884  1.1  mrg 	int empty;
   2885  1.1  mrg 
   2886  1.1  mrg 	domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
   2887  1.1  mrg 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
   2888  1.1  mrg 	domain = isl_set_intersect(domain,
   2889  1.1  mrg 				isl_set_copy(domains->schedule_domain));
   2890  1.1  mrg 	empty = isl_set_is_empty(domain);
   2891  1.1  mrg 	if (empty < 0)
   2892  1.1  mrg 		class_domain = isl_set_free(class_domain);
   2893  1.1  mrg 	if (empty) {
   2894  1.1  mrg 		isl_set_free(domain);
   2895  1.1  mrg 		return class_domain;
   2896  1.1  mrg 	}
   2897  1.1  mrg 
   2898  1.1  mrg 	domain = isl_ast_build_eliminate(domains->build, domain);
   2899  1.1  mrg 	domain = isl_set_coalesce_preserve(domain);
   2900  1.1  mrg 	bset = isl_set_unshifted_simple_hull(domain);
   2901  1.1  mrg 	domain = isl_set_from_basic_set(bset);
   2902  1.1  mrg 	atomic_domain = isl_set_copy(domain);
   2903  1.1  mrg 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
   2904  1.1  mrg 	class_domain = isl_set_subtract(class_domain, atomic_domain);
   2905  1.1  mrg 	domain = isl_set_make_disjoint(domain);
   2906  1.1  mrg 	list = isl_basic_set_list_from_set(domain);
   2907  1.1  mrg 	domains->list = isl_basic_set_list_concat(domains->list, list);
   2908  1.1  mrg 
   2909  1.1  mrg 	return class_domain;
   2910  1.1  mrg }
   2911  1.1  mrg 
   2912  1.1  mrg /* Split up the schedule domain into uniform basic sets,
   2913  1.1  mrg  * in the sense that each element in a basic set is associated to
   2914  1.1  mrg  * elements of the same domains, and add the result to domains->list.
   2915  1.1  mrg  * Do this for that part of the schedule domain that lies in the
   2916  1.1  mrg  * intersection of "class_domain" and the separate option domain.
   2917  1.1  mrg  *
   2918  1.1  mrg  * "class_domain" may or may not include the constraints
   2919  1.1  mrg  * of the schedule domain, but this does not make a difference
   2920  1.1  mrg  * since we are going to intersect it with the domain of the inverse schedule.
   2921  1.1  mrg  * If it includes schedule domain constraints, then they may involve
   2922  1.1  mrg  * inner dimensions, but we will eliminate them in separation_domain.
   2923  1.1  mrg  */
   2924  1.1  mrg static int compute_separate_domain(struct isl_codegen_domains *domains,
   2925  1.1  mrg 	__isl_keep isl_set *class_domain)
   2926  1.1  mrg {
   2927  1.1  mrg 	isl_space *space;
   2928  1.1  mrg 	isl_set *domain;
   2929  1.1  mrg 	isl_union_map *executed;
   2930  1.1  mrg 	isl_basic_set_list *list;
   2931  1.1  mrg 	int empty;
   2932  1.1  mrg 
   2933  1.1  mrg 	domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
   2934  1.1  mrg 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
   2935  1.1  mrg 	executed = isl_union_map_copy(domains->executed);
   2936  1.1  mrg 	executed = isl_union_map_intersect_domain(executed,
   2937  1.1  mrg 				    isl_union_set_from_set(domain));
   2938  1.1  mrg 	empty = isl_union_map_is_empty(executed);
   2939  1.1  mrg 	if (empty < 0 || empty) {
   2940  1.1  mrg 		isl_union_map_free(executed);
   2941  1.1  mrg 		return empty < 0 ? -1 : 0;
   2942  1.1  mrg 	}
   2943  1.1  mrg 
   2944  1.1  mrg 	space = isl_set_get_space(class_domain);
   2945  1.1  mrg 	domain = separate_schedule_domains(space, executed, domains->build);
   2946  1.1  mrg 
   2947  1.1  mrg 	list = isl_basic_set_list_from_set(domain);
   2948  1.1  mrg 	domains->list = isl_basic_set_list_concat(domains->list, list);
   2949  1.1  mrg 
   2950  1.1  mrg 	return 0;
   2951  1.1  mrg }
   2952  1.1  mrg 
   2953  1.1  mrg /* Split up the domain at the current depth into disjoint
   2954  1.1  mrg  * basic sets for which code should be generated separately
   2955  1.1  mrg  * for the given separation class domain.
   2956  1.1  mrg  *
   2957  1.1  mrg  * If any separation classes have been defined, then "class_domain"
   2958  1.1  mrg  * is the domain of the current class and does not refer to inner dimensions.
   2959  1.1  mrg  * Otherwise, "class_domain" is the universe domain.
   2960  1.1  mrg  *
   2961  1.1  mrg  * We first make sure that the class domain is disjoint from
   2962  1.1  mrg  * previously considered class domains.
   2963  1.1  mrg  *
   2964  1.1  mrg  * The separate domains can be computed directly from the "class_domain".
   2965  1.1  mrg  *
   2966  1.1  mrg  * The unroll, atomic and remainder domains need the constraints
   2967  1.1  mrg  * from the schedule domain.
   2968  1.1  mrg  *
   2969  1.1  mrg  * For unrolling, the actual schedule domain is needed (with divs that
   2970  1.1  mrg  * may refer to the current dimension) so that stride detection can be
   2971  1.1  mrg  * performed.
   2972  1.1  mrg  *
   2973  1.1  mrg  * For atomic and remainder domains, inner dimensions and divs involving
   2974  1.1  mrg  * the current dimensions should be eliminated.
   2975  1.1  mrg  * In case we are working within a separation class, we need to intersect
   2976  1.1  mrg  * the result with the current "class_domain" to ensure that the domains
   2977  1.1  mrg  * are disjoint from those generated from other class domains.
   2978  1.1  mrg  *
   2979  1.1  mrg  * The domain that has been made atomic may be larger than specified
   2980  1.1  mrg  * by the user since it needs to be representable as a single basic set.
   2981  1.1  mrg  * This possibly larger domain is removed from class_domain by
   2982  1.1  mrg  * compute_atomic_domain.  It is computed first so that the extended domain
   2983  1.1  mrg  * would not overlap with any domains computed before.
   2984  1.1  mrg  * Similary, the unrolled domains may have some constraints removed and
   2985  1.1  mrg  * may therefore also be larger than specified by the user.
   2986  1.1  mrg  *
   2987  1.1  mrg  * If anything is left after handling separate, unroll and atomic,
   2988  1.1  mrg  * we split it up into basic sets and append the basic sets to domains->list.
   2989  1.1  mrg  */
   2990  1.1  mrg static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
   2991  1.1  mrg 	__isl_take isl_set *class_domain)
   2992  1.1  mrg {
   2993  1.1  mrg 	isl_basic_set_list *list;
   2994  1.1  mrg 	isl_set *domain;
   2995  1.1  mrg 
   2996  1.1  mrg 	class_domain = isl_set_subtract(class_domain,
   2997  1.1  mrg 					isl_set_copy(domains->done));
   2998  1.1  mrg 	domains->done = isl_set_union(domains->done,
   2999  1.1  mrg 					isl_set_copy(class_domain));
   3000  1.1  mrg 
   3001  1.1  mrg 	class_domain = compute_atomic_domain(domains, class_domain);
   3002  1.1  mrg 	class_domain = compute_unroll_domains(domains, class_domain);
   3003  1.1  mrg 
   3004  1.1  mrg 	domain = isl_set_copy(class_domain);
   3005  1.1  mrg 
   3006  1.1  mrg 	if (compute_separate_domain(domains, domain) < 0)
   3007  1.1  mrg 		goto error;
   3008  1.1  mrg 	domain = isl_set_subtract(domain,
   3009  1.1  mrg 			isl_set_copy(domains->option[isl_ast_loop_separate]));
   3010  1.1  mrg 
   3011  1.1  mrg 	domain = isl_set_intersect(domain,
   3012  1.1  mrg 				isl_set_copy(domains->schedule_domain));
   3013  1.1  mrg 
   3014  1.1  mrg 	domain = isl_ast_build_eliminate(domains->build, domain);
   3015  1.1  mrg 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
   3016  1.1  mrg 
   3017  1.1  mrg 	domain = isl_set_coalesce_preserve(domain);
   3018  1.1  mrg 	domain = isl_set_make_disjoint(domain);
   3019  1.1  mrg 
   3020  1.1  mrg 	list = isl_basic_set_list_from_set(domain);
   3021  1.1  mrg 	domains->list = isl_basic_set_list_concat(domains->list, list);
   3022  1.1  mrg 
   3023  1.1  mrg 	isl_set_free(class_domain);
   3024  1.1  mrg 
   3025  1.1  mrg 	return isl_stat_ok;
   3026  1.1  mrg error:
   3027  1.1  mrg 	isl_set_free(domain);
   3028  1.1  mrg 	isl_set_free(class_domain);
   3029  1.1  mrg 	return isl_stat_error;
   3030  1.1  mrg }
   3031  1.1  mrg 
   3032  1.1  mrg /* Split up the domain at the current depth into disjoint
   3033  1.1  mrg  * basic sets for which code should be generated separately
   3034  1.1  mrg  * for the separation class identified by "pnt".
   3035  1.1  mrg  *
   3036  1.1  mrg  * We extract the corresponding class domain from domains->sep_class,
   3037  1.1  mrg  * eliminate inner dimensions and pass control to compute_partial_domains.
   3038  1.1  mrg  */
   3039  1.1  mrg static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
   3040  1.1  mrg {
   3041  1.1  mrg 	struct isl_codegen_domains *domains = user;
   3042  1.1  mrg 	isl_set *class_set;
   3043  1.1  mrg 	isl_set *domain;
   3044  1.1  mrg 	int disjoint;
   3045  1.1  mrg 
   3046  1.1  mrg 	class_set = isl_set_from_point(pnt);
   3047  1.1  mrg 	domain = isl_map_domain(isl_map_intersect_range(
   3048  1.1  mrg 				isl_map_copy(domains->sep_class), class_set));
   3049  1.1  mrg 	domain = isl_ast_build_compute_gist(domains->build, domain);
   3050  1.1  mrg 	domain = isl_ast_build_eliminate(domains->build, domain);
   3051  1.1  mrg 
   3052  1.1  mrg 	disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
   3053  1.1  mrg 	if (disjoint < 0)
   3054  1.1  mrg 		return isl_stat_error;
   3055  1.1  mrg 	if (disjoint) {
   3056  1.1  mrg 		isl_set_free(domain);
   3057  1.1  mrg 		return isl_stat_ok;
   3058  1.1  mrg 	}
   3059  1.1  mrg 
   3060  1.1  mrg 	return compute_partial_domains(domains, domain);
   3061  1.1  mrg }
   3062  1.1  mrg 
   3063  1.1  mrg /* Extract the domains at the current depth that should be atomic,
   3064  1.1  mrg  * separated or unrolled and store them in option.
   3065  1.1  mrg  *
   3066  1.1  mrg  * The domains specified by the user might overlap, so we make
   3067  1.1  mrg  * them disjoint by subtracting earlier domains from later domains.
   3068  1.1  mrg  */
   3069  1.1  mrg static void compute_domains_init_options(isl_set *option[4],
   3070  1.1  mrg 	__isl_keep isl_ast_build *build)
   3071  1.1  mrg {
   3072  1.1  mrg 	enum isl_ast_loop_type type, type2;
   3073  1.1  mrg 	isl_set *unroll;
   3074  1.1  mrg 
   3075  1.1  mrg 	for (type = isl_ast_loop_atomic;
   3076  1.1  mrg 	    type <= isl_ast_loop_separate; ++type) {
   3077  1.1  mrg 		option[type] = isl_ast_build_get_option_domain(build, type);
   3078  1.1  mrg 		for (type2 = isl_ast_loop_atomic; type2 < type; ++type2)
   3079  1.1  mrg 			option[type] = isl_set_subtract(option[type],
   3080  1.1  mrg 						isl_set_copy(option[type2]));
   3081  1.1  mrg 	}
   3082  1.1  mrg 
   3083  1.1  mrg 	unroll = option[isl_ast_loop_unroll];
   3084  1.1  mrg 	unroll = isl_set_coalesce(unroll);
   3085  1.1  mrg 	unroll = isl_set_make_disjoint(unroll);
   3086  1.1  mrg 	option[isl_ast_loop_unroll] = unroll;
   3087  1.1  mrg }
   3088  1.1  mrg 
   3089  1.1  mrg /* Split up the domain at the current depth into disjoint
   3090  1.1  mrg  * basic sets for which code should be generated separately,
   3091  1.1  mrg  * based on the user-specified options.
   3092  1.1  mrg  * Return the list of disjoint basic sets.
   3093  1.1  mrg  *
   3094  1.1  mrg  * There are three kinds of domains that we need to keep track of.
   3095  1.1  mrg  * - the "schedule domain" is the domain of "executed"
   3096  1.1  mrg  * - the "class domain" is the domain corresponding to the currrent
   3097  1.1  mrg  *	separation class
   3098  1.1  mrg  * - the "option domain" is the domain corresponding to one of the options
   3099  1.1  mrg  *	atomic, unroll or separate
   3100  1.1  mrg  *
   3101  1.1  mrg  * We first consider the individial values of the separation classes
   3102  1.1  mrg  * and split up the domain for each of them separately.
   3103  1.1  mrg  * Finally, we consider the remainder.  If no separation classes were
   3104  1.1  mrg  * specified, then we call compute_partial_domains with the universe
   3105  1.1  mrg  * "class_domain".  Otherwise, we take the "schedule_domain" as "class_domain",
   3106  1.1  mrg  * with inner dimensions removed.  We do this because we want to
   3107  1.1  mrg  * avoid computing the complement of the class domains (i.e., the difference
   3108  1.1  mrg  * between the universe and domains->done).
   3109  1.1  mrg  */
   3110  1.1  mrg static __isl_give isl_basic_set_list *compute_domains(
   3111  1.1  mrg 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
   3112  1.1  mrg {
   3113  1.1  mrg 	struct isl_codegen_domains domains;
   3114  1.1  mrg 	isl_ctx *ctx;
   3115  1.1  mrg 	isl_set *domain;
   3116  1.1  mrg 	isl_union_set *schedule_domain;
   3117  1.1  mrg 	isl_set *classes;
   3118  1.1  mrg 	isl_space *space;
   3119  1.1  mrg 	int n_param;
   3120  1.1  mrg 	enum isl_ast_loop_type type;
   3121  1.1  mrg 	isl_bool empty;
   3122  1.1  mrg 
   3123  1.1  mrg 	if (!executed)
   3124  1.1  mrg 		return NULL;
   3125  1.1  mrg 
   3126  1.1  mrg 	ctx = isl_union_map_get_ctx(executed);
   3127  1.1  mrg 	domains.list = isl_basic_set_list_alloc(ctx, 0);
   3128  1.1  mrg 
   3129  1.1  mrg 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
   3130  1.1  mrg 	domain = isl_set_from_union_set(schedule_domain);
   3131  1.1  mrg 
   3132  1.1  mrg 	compute_domains_init_options(domains.option, build);
   3133  1.1  mrg 
   3134  1.1  mrg 	domains.sep_class = isl_ast_build_get_separation_class(build);
   3135  1.1  mrg 	classes = isl_map_range(isl_map_copy(domains.sep_class));
   3136  1.1  mrg 	n_param = isl_set_dim(classes, isl_dim_param);
   3137  1.1  mrg 	if (n_param < 0)
   3138  1.1  mrg 		classes = isl_set_free(classes);
   3139  1.1  mrg 	classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
   3140  1.1  mrg 
   3141  1.1  mrg 	space = isl_set_get_space(domain);
   3142  1.1  mrg 	domains.build = build;
   3143  1.1  mrg 	domains.schedule_domain = isl_set_copy(domain);
   3144  1.1  mrg 	domains.executed = executed;
   3145  1.1  mrg 	domains.done = isl_set_empty(space);
   3146  1.1  mrg 
   3147  1.1  mrg 	if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
   3148  1.1  mrg 		domains.list = isl_basic_set_list_free(domains.list);
   3149  1.1  mrg 	isl_set_free(classes);
   3150  1.1  mrg 
   3151  1.1  mrg 	empty = isl_set_is_empty(domains.done);
   3152  1.1  mrg 	if (empty < 0) {
   3153  1.1  mrg 		domains.list = isl_basic_set_list_free(domains.list);
   3154  1.1  mrg 		domain = isl_set_free(domain);
   3155  1.1  mrg 	} else if (empty) {
   3156  1.1  mrg 		isl_set_free(domain);
   3157  1.1  mrg 		domain = isl_set_universe(isl_set_get_space(domains.done));
   3158  1.1  mrg 	} else {
   3159  1.1  mrg 		domain = isl_ast_build_eliminate(build, domain);
   3160  1.1  mrg 	}
   3161  1.1  mrg 	if (compute_partial_domains(&domains, domain) < 0)
   3162  1.1  mrg 		domains.list = isl_basic_set_list_free(domains.list);
   3163  1.1  mrg 
   3164  1.1  mrg 	isl_set_free(domains.schedule_domain);
   3165  1.1  mrg 	isl_set_free(domains.done);
   3166  1.1  mrg 	isl_map_free(domains.sep_class);
   3167  1.1  mrg 	for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type)
   3168  1.1  mrg 		isl_set_free(domains.option[type]);
   3169  1.1  mrg 
   3170  1.1  mrg 	return domains.list;
   3171  1.1  mrg }
   3172  1.1  mrg 
   3173  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3174  1.1  mrg  * has been applied, in case the schedule was specified as a union map.
   3175  1.1  mrg  *
   3176  1.1  mrg  * We first split up the domain at the current depth into disjoint
   3177  1.1  mrg  * basic sets based on the user-specified options.
   3178  1.1  mrg  * Then we generated code for each of them and concatenate the results.
   3179  1.1  mrg  */
   3180  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
   3181  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   3182  1.1  mrg {
   3183  1.1  mrg 	isl_basic_set_list *domain_list;
   3184  1.1  mrg 	isl_ast_graft_list *list = NULL;
   3185  1.1  mrg 
   3186  1.1  mrg 	domain_list = compute_domains(executed, build);
   3187  1.1  mrg 	list = generate_parallel_domains(domain_list, executed, build);
   3188  1.1  mrg 
   3189  1.1  mrg 	isl_basic_set_list_free(domain_list);
   3190  1.1  mrg 	isl_union_map_free(executed);
   3191  1.1  mrg 	isl_ast_build_free(build);
   3192  1.1  mrg 
   3193  1.1  mrg 	return list;
   3194  1.1  mrg }
   3195  1.1  mrg 
   3196  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3197  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree
   3198  1.1  mrg  * and the separate option was specified.
   3199  1.1  mrg  *
   3200  1.1  mrg  * We perform separation on the domain of "executed" and then generate
   3201  1.1  mrg  * an AST for each of the resulting disjoint basic sets.
   3202  1.1  mrg  */
   3203  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
   3204  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   3205  1.1  mrg {
   3206  1.1  mrg 	isl_space *space;
   3207  1.1  mrg 	isl_set *domain;
   3208  1.1  mrg 	isl_basic_set_list *domain_list;
   3209  1.1  mrg 	isl_ast_graft_list *list;
   3210  1.1  mrg 
   3211  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   3212  1.1  mrg 	domain = separate_schedule_domains(space,
   3213  1.1  mrg 					isl_union_map_copy(executed), build);
   3214  1.1  mrg 	domain_list = isl_basic_set_list_from_set(domain);
   3215  1.1  mrg 
   3216  1.1  mrg 	list = generate_parallel_domains(domain_list, executed, build);
   3217  1.1  mrg 
   3218  1.1  mrg 	isl_basic_set_list_free(domain_list);
   3219  1.1  mrg 	isl_union_map_free(executed);
   3220  1.1  mrg 	isl_ast_build_free(build);
   3221  1.1  mrg 
   3222  1.1  mrg 	return list;
   3223  1.1  mrg }
   3224  1.1  mrg 
   3225  1.1  mrg /* Internal data structure for generate_shifted_component_tree_unroll.
   3226  1.1  mrg  *
   3227  1.1  mrg  * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
   3228  1.1  mrg  * "list" collects the constructs grafts.
   3229  1.1  mrg  */
   3230  1.1  mrg struct isl_ast_unroll_tree_data {
   3231  1.1  mrg 	isl_union_map *executed;
   3232  1.1  mrg 	isl_ast_build *build;
   3233  1.1  mrg 	isl_ast_graft_list *list;
   3234  1.1  mrg };
   3235  1.1  mrg 
   3236  1.1  mrg /* Initialize data->list to a list of "n" elements.
   3237  1.1  mrg  */
   3238  1.1  mrg static int init_unroll_tree(int n, void *user)
   3239  1.1  mrg {
   3240  1.1  mrg 	struct isl_ast_unroll_tree_data *data = user;
   3241  1.1  mrg 	isl_ctx *ctx;
   3242  1.1  mrg 
   3243  1.1  mrg 	ctx = isl_ast_build_get_ctx(data->build);
   3244  1.1  mrg 	data->list = isl_ast_graft_list_alloc(ctx, n);
   3245  1.1  mrg 
   3246  1.1  mrg 	return 0;
   3247  1.1  mrg }
   3248  1.1  mrg 
   3249  1.1  mrg /* Given an iteration of an unrolled domain represented by "bset",
   3250  1.1  mrg  * generate the corresponding AST and add the result to data->list.
   3251  1.1  mrg  */
   3252  1.1  mrg static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
   3253  1.1  mrg {
   3254  1.1  mrg 	struct isl_ast_unroll_tree_data *data = user;
   3255  1.1  mrg 
   3256  1.1  mrg 	data->list = add_node(data->list, isl_union_map_copy(data->executed),
   3257  1.1  mrg 				bset, isl_ast_build_copy(data->build));
   3258  1.1  mrg 
   3259  1.1  mrg 	return 0;
   3260  1.1  mrg }
   3261  1.1  mrg 
   3262  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3263  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree
   3264  1.1  mrg  * and the unroll option was specified.
   3265  1.1  mrg  *
   3266  1.1  mrg  * We call foreach_iteration to iterate over the individual values and
   3267  1.1  mrg  * construct and collect the corresponding grafts in do_unroll_tree_iteration.
   3268  1.1  mrg  */
   3269  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
   3270  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_set *domain,
   3271  1.1  mrg 	__isl_take isl_ast_build *build)
   3272  1.1  mrg {
   3273  1.1  mrg 	struct isl_ast_unroll_tree_data data = { executed, build, NULL };
   3274  1.1  mrg 
   3275  1.1  mrg 	if (foreach_iteration(domain, build, &init_unroll_tree,
   3276  1.1  mrg 				&do_unroll_tree_iteration, &data) < 0)
   3277  1.1  mrg 		data.list = isl_ast_graft_list_free(data.list);
   3278  1.1  mrg 
   3279  1.1  mrg 	isl_union_map_free(executed);
   3280  1.1  mrg 	isl_ast_build_free(build);
   3281  1.1  mrg 
   3282  1.1  mrg 	return data.list;
   3283  1.1  mrg }
   3284  1.1  mrg 
   3285  1.1  mrg /* Does "domain" involve a disjunction that is purely based on
   3286  1.1  mrg  * constraints involving only outer dimension?
   3287  1.1  mrg  *
   3288  1.1  mrg  * In particular, is there a disjunction such that the constraints
   3289  1.1  mrg  * involving the current and later dimensions are the same over
   3290  1.1  mrg  * all the disjuncts?
   3291  1.1  mrg  */
   3292  1.1  mrg static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
   3293  1.1  mrg 	__isl_keep isl_ast_build *build)
   3294  1.1  mrg {
   3295  1.1  mrg 	isl_basic_set *hull;
   3296  1.1  mrg 	isl_set *shared, *inner;
   3297  1.1  mrg 	isl_bool equal;
   3298  1.1  mrg 	isl_size depth;
   3299  1.1  mrg 	isl_size n;
   3300  1.1  mrg 	isl_size dim;
   3301  1.1  mrg 
   3302  1.1  mrg 	n = isl_set_n_basic_set(domain);
   3303  1.1  mrg 	if (n < 0)
   3304  1.1  mrg 		return isl_bool_error;
   3305  1.1  mrg 	if (n <= 1)
   3306  1.1  mrg 		return isl_bool_false;
   3307  1.1  mrg 	dim = isl_set_dim(domain, isl_dim_set);
   3308  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   3309  1.1  mrg 	if (dim < 0 || depth < 0)
   3310  1.1  mrg 		return isl_bool_error;
   3311  1.1  mrg 
   3312  1.1  mrg 	inner = isl_set_copy(domain);
   3313  1.1  mrg 	inner = isl_set_drop_constraints_not_involving_dims(inner,
   3314  1.1  mrg 					    isl_dim_set, depth, dim - depth);
   3315  1.1  mrg 	hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
   3316  1.1  mrg 	shared = isl_set_from_basic_set(hull);
   3317  1.1  mrg 	equal = isl_set_plain_is_equal(inner, shared);
   3318  1.1  mrg 	isl_set_free(inner);
   3319  1.1  mrg 	isl_set_free(shared);
   3320  1.1  mrg 
   3321  1.1  mrg 	return equal;
   3322  1.1  mrg }
   3323  1.1  mrg 
   3324  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3325  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree.
   3326  1.1  mrg  * In particular, handle the base case where there is either no isolated
   3327  1.1  mrg  * set or we are within the isolated set (in which case "isolated" is set)
   3328  1.1  mrg  * or the iterations that precede or follow the isolated set.
   3329  1.1  mrg  *
   3330  1.1  mrg  * The schedule domain is broken up or combined into basic sets
   3331  1.1  mrg  * according to the AST generation option specified in the current
   3332  1.1  mrg  * schedule node, which may be either atomic, separate, unroll or
   3333  1.1  mrg  * unspecified.  If the option is unspecified, then we currently simply
   3334  1.1  mrg  * split the schedule domain into disjoint basic sets.
   3335  1.1  mrg  *
   3336  1.1  mrg  * In case the separate option is specified, the AST generation is
   3337  1.1  mrg  * handled by generate_shifted_component_tree_separate.
   3338  1.1  mrg  * In the other cases, we need the global schedule domain.
   3339  1.1  mrg  * In the unroll case, the AST generation is then handled by
   3340  1.1  mrg  * generate_shifted_component_tree_unroll which needs the actual
   3341  1.1  mrg  * schedule domain (with divs that may refer to the current dimension)
   3342  1.1  mrg  * so that stride detection can be performed.
   3343  1.1  mrg  * In the atomic or unspecified case, inner dimensions and divs involving
   3344  1.1  mrg  * the current dimensions should be eliminated.
   3345  1.1  mrg  * The result is then either combined into a single basic set or
   3346  1.1  mrg  * split up into disjoint basic sets.
   3347  1.1  mrg  * Finally an AST is generated for each basic set and the results are
   3348  1.1  mrg  * concatenated.
   3349  1.1  mrg  *
   3350  1.1  mrg  * If the schedule domain involves a disjunction that is purely based on
   3351  1.1  mrg  * constraints involving only outer dimension, then it is treated as
   3352  1.1  mrg  * if atomic was specified.  This ensures that only a single loop
   3353  1.1  mrg  * is generated instead of a sequence of identical loops with
   3354  1.1  mrg  * different guards.
   3355  1.1  mrg  */
   3356  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
   3357  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
   3358  1.1  mrg 	int isolated)
   3359  1.1  mrg {
   3360  1.1  mrg 	isl_bool outer_disjunction;
   3361  1.1  mrg 	isl_union_set *schedule_domain;
   3362  1.1  mrg 	isl_set *domain;
   3363  1.1  mrg 	isl_basic_set_list *domain_list;
   3364  1.1  mrg 	isl_ast_graft_list *list;
   3365  1.1  mrg 	enum isl_ast_loop_type type;
   3366  1.1  mrg 
   3367  1.1  mrg 	type = isl_ast_build_get_loop_type(build, isolated);
   3368  1.1  mrg 	if (type < 0)
   3369  1.1  mrg 		goto error;
   3370  1.1  mrg 
   3371  1.1  mrg 	if (type == isl_ast_loop_separate)
   3372  1.1  mrg 		return generate_shifted_component_tree_separate(executed,
   3373  1.1  mrg 								build);
   3374  1.1  mrg 
   3375  1.1  mrg 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
   3376  1.1  mrg 	domain = isl_set_from_union_set(schedule_domain);
   3377  1.1  mrg 
   3378  1.1  mrg 	if (type == isl_ast_loop_unroll)
   3379  1.1  mrg 		return generate_shifted_component_tree_unroll(executed, domain,
   3380  1.1  mrg 								build);
   3381  1.1  mrg 
   3382  1.1  mrg 	domain = isl_ast_build_eliminate(build, domain);
   3383  1.1  mrg 	domain = isl_set_coalesce_preserve(domain);
   3384  1.1  mrg 
   3385  1.1  mrg 	outer_disjunction = has_pure_outer_disjunction(domain, build);
   3386  1.1  mrg 	if (outer_disjunction < 0)
   3387  1.1  mrg 		domain = isl_set_free(domain);
   3388  1.1  mrg 
   3389  1.1  mrg 	if (outer_disjunction || type == isl_ast_loop_atomic) {
   3390  1.1  mrg 		isl_basic_set *hull;
   3391  1.1  mrg 		hull = isl_set_unshifted_simple_hull(domain);
   3392  1.1  mrg 		domain_list = isl_basic_set_list_from_basic_set(hull);
   3393  1.1  mrg 	} else {
   3394  1.1  mrg 		domain = isl_set_make_disjoint(domain);
   3395  1.1  mrg 		domain_list = isl_basic_set_list_from_set(domain);
   3396  1.1  mrg 	}
   3397  1.1  mrg 
   3398  1.1  mrg 	list = generate_parallel_domains(domain_list, executed, build);
   3399  1.1  mrg 
   3400  1.1  mrg 	isl_basic_set_list_free(domain_list);
   3401  1.1  mrg 	isl_union_map_free(executed);
   3402  1.1  mrg 	isl_ast_build_free(build);
   3403  1.1  mrg 
   3404  1.1  mrg 	return list;
   3405  1.1  mrg error:
   3406  1.1  mrg 	isl_union_map_free(executed);
   3407  1.1  mrg 	isl_ast_build_free(build);
   3408  1.1  mrg 	return NULL;
   3409  1.1  mrg }
   3410  1.1  mrg 
   3411  1.1  mrg /* Extract out the disjunction imposed by "domain" on the outer
   3412  1.1  mrg  * schedule dimensions.
   3413  1.1  mrg  *
   3414  1.1  mrg  * In particular, remove all inner dimensions from "domain" (including
   3415  1.1  mrg  * the current dimension) and then remove the constraints that are shared
   3416  1.1  mrg  * by all disjuncts in the result.
   3417  1.1  mrg  */
   3418  1.1  mrg static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
   3419  1.1  mrg 	__isl_keep isl_ast_build *build)
   3420  1.1  mrg {
   3421  1.1  mrg 	isl_set *hull;
   3422  1.1  mrg 	isl_size depth;
   3423  1.1  mrg 	isl_size dim;
   3424  1.1  mrg 
   3425  1.1  mrg 	domain = isl_ast_build_specialize(build, domain);
   3426  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   3427  1.1  mrg 	dim = isl_set_dim(domain, isl_dim_set);
   3428  1.1  mrg 	if (depth < 0 || dim < 0)
   3429  1.1  mrg 		return isl_set_free(domain);
   3430  1.1  mrg 	domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
   3431  1.1  mrg 	domain = isl_set_remove_unknown_divs(domain);
   3432  1.1  mrg 	hull = isl_set_copy(domain);
   3433  1.1  mrg 	hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
   3434  1.1  mrg 	domain = isl_set_gist(domain, hull);
   3435  1.1  mrg 
   3436  1.1  mrg 	return domain;
   3437  1.1  mrg }
   3438  1.1  mrg 
   3439  1.1  mrg /* Add "guard" to the grafts in "list".
   3440  1.1  mrg  * "build" is the outer AST build, while "sub_build" includes "guard"
   3441  1.1  mrg  * in its generated domain.
   3442  1.1  mrg  *
   3443  1.1  mrg  * First combine the grafts into a single graft and then add the guard.
   3444  1.1  mrg  * If the list is empty, or if some error occurred, then simply return
   3445  1.1  mrg  * the list.
   3446  1.1  mrg  */
   3447  1.1  mrg static __isl_give isl_ast_graft_list *list_add_guard(
   3448  1.1  mrg 	__isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
   3449  1.1  mrg 	__isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
   3450  1.1  mrg {
   3451  1.1  mrg 	isl_ast_graft *graft;
   3452  1.1  mrg 	isl_size n;
   3453  1.1  mrg 
   3454  1.1  mrg 	list = isl_ast_graft_list_fuse(list, sub_build);
   3455  1.1  mrg 
   3456  1.1  mrg 	n = isl_ast_graft_list_n_ast_graft(list);
   3457  1.1  mrg 	if (n < 0)
   3458  1.1  mrg 		return isl_ast_graft_list_free(list);
   3459  1.1  mrg 	if (n != 1)
   3460  1.1  mrg 		return list;
   3461  1.1  mrg 
   3462  1.1  mrg 	graft = isl_ast_graft_list_get_ast_graft(list, 0);
   3463  1.1  mrg 	graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
   3464  1.1  mrg 	list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
   3465  1.1  mrg 
   3466  1.1  mrg 	return list;
   3467  1.1  mrg }
   3468  1.1  mrg 
   3469  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3470  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree.
   3471  1.1  mrg  * In particular, do so for the specified subset of the schedule domain.
   3472  1.1  mrg  *
   3473  1.1  mrg  * If we are outside of the isolated part, then "domain" may include
   3474  1.1  mrg  * a disjunction.  Explicitly generate this disjunction at this point
   3475  1.1  mrg  * instead of relying on the disjunction getting hoisted back up
   3476  1.1  mrg  * to this level.
   3477  1.1  mrg  */
   3478  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
   3479  1.1  mrg 	__isl_keep isl_union_map *executed, __isl_take isl_set *domain,
   3480  1.1  mrg 	__isl_keep isl_ast_build *build, int isolated)
   3481  1.1  mrg {
   3482  1.1  mrg 	isl_union_set *uset;
   3483  1.1  mrg 	isl_ast_graft_list *list;
   3484  1.1  mrg 	isl_ast_build *sub_build;
   3485  1.1  mrg 	int empty;
   3486  1.1  mrg 
   3487  1.1  mrg 	uset = isl_union_set_from_set(isl_set_copy(domain));
   3488  1.1  mrg 	executed = isl_union_map_copy(executed);
   3489  1.1  mrg 	executed = isl_union_map_intersect_domain(executed, uset);
   3490  1.1  mrg 	empty = isl_union_map_is_empty(executed);
   3491  1.1  mrg 	if (empty < 0)
   3492  1.1  mrg 		goto error;
   3493  1.1  mrg 	if (empty) {
   3494  1.1  mrg 		isl_ctx *ctx;
   3495  1.1  mrg 		isl_union_map_free(executed);
   3496  1.1  mrg 		isl_set_free(domain);
   3497  1.1  mrg 		ctx = isl_ast_build_get_ctx(build);
   3498  1.1  mrg 		return isl_ast_graft_list_alloc(ctx, 0);
   3499  1.1  mrg 	}
   3500  1.1  mrg 
   3501  1.1  mrg 	sub_build = isl_ast_build_copy(build);
   3502  1.1  mrg 	if (!isolated) {
   3503  1.1  mrg 		domain = extract_disjunction(domain, build);
   3504  1.1  mrg 		sub_build = isl_ast_build_restrict_generated(sub_build,
   3505  1.1  mrg 							isl_set_copy(domain));
   3506  1.1  mrg 	}
   3507  1.1  mrg 	list = generate_shifted_component_tree_base(executed,
   3508  1.1  mrg 				isl_ast_build_copy(sub_build), isolated);
   3509  1.1  mrg 	if (!isolated)
   3510  1.1  mrg 		list = list_add_guard(list, domain, build, sub_build);
   3511  1.1  mrg 	isl_ast_build_free(sub_build);
   3512  1.1  mrg 	isl_set_free(domain);
   3513  1.1  mrg 	return list;
   3514  1.1  mrg error:
   3515  1.1  mrg 	isl_union_map_free(executed);
   3516  1.1  mrg 	isl_set_free(domain);
   3517  1.1  mrg 	return NULL;
   3518  1.1  mrg }
   3519  1.1  mrg 
   3520  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3521  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree.
   3522  1.1  mrg  * In particular, do so for the specified sequence of subsets
   3523  1.1  mrg  * of the schedule domain, "before", "isolated", "after" and "other",
   3524  1.1  mrg  * where only the "isolated" part is considered to be isolated.
   3525  1.1  mrg  */
   3526  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
   3527  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_set *before,
   3528  1.1  mrg 	__isl_take isl_set *isolated, __isl_take isl_set *after,
   3529  1.1  mrg 	__isl_take isl_set *other, __isl_take isl_ast_build *build)
   3530  1.1  mrg {
   3531  1.1  mrg 	isl_ast_graft_list *list, *res;
   3532  1.1  mrg 
   3533  1.1  mrg 	res = generate_shifted_component_tree_part(executed, before, build, 0);
   3534  1.1  mrg 	list = generate_shifted_component_tree_part(executed, isolated,
   3535  1.1  mrg 						    build, 1);
   3536  1.1  mrg 	res = isl_ast_graft_list_concat(res, list);
   3537  1.1  mrg 	list = generate_shifted_component_tree_part(executed, after, build, 0);
   3538  1.1  mrg 	res = isl_ast_graft_list_concat(res, list);
   3539  1.1  mrg 	list = generate_shifted_component_tree_part(executed, other, build, 0);
   3540  1.1  mrg 	res = isl_ast_graft_list_concat(res, list);
   3541  1.1  mrg 
   3542  1.1  mrg 	isl_union_map_free(executed);
   3543  1.1  mrg 	isl_ast_build_free(build);
   3544  1.1  mrg 
   3545  1.1  mrg 	return res;
   3546  1.1  mrg }
   3547  1.1  mrg 
   3548  1.1  mrg /* Does "set" intersect "first", but not "second"?
   3549  1.1  mrg  */
   3550  1.1  mrg static isl_bool only_intersects_first(__isl_keep isl_set *set,
   3551  1.1  mrg 	__isl_keep isl_set *first, __isl_keep isl_set *second)
   3552  1.1  mrg {
   3553  1.1  mrg 	isl_bool disjoint;
   3554  1.1  mrg 
   3555  1.1  mrg 	disjoint = isl_set_is_disjoint(set, first);
   3556  1.1  mrg 	if (disjoint < 0)
   3557  1.1  mrg 		return isl_bool_error;
   3558  1.1  mrg 	if (disjoint)
   3559  1.1  mrg 		return isl_bool_false;
   3560  1.1  mrg 
   3561  1.1  mrg 	return isl_set_is_disjoint(set, second);
   3562  1.1  mrg }
   3563  1.1  mrg 
   3564  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3565  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree.
   3566  1.1  mrg  * In particular, do so in case of isolation where there is
   3567  1.1  mrg  * only an "isolated" part and an "after" part.
   3568  1.1  mrg  * "dead1" and "dead2" are freed by this function in order to simplify
   3569  1.1  mrg  * the caller.
   3570  1.1  mrg  *
   3571  1.1  mrg  * The "before" and "other" parts are set to empty sets.
   3572  1.1  mrg  */
   3573  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
   3574  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_set *isolated,
   3575  1.1  mrg 	__isl_take isl_set *after, __isl_take isl_ast_build *build,
   3576  1.1  mrg 	__isl_take isl_set *dead1, __isl_take isl_set *dead2)
   3577  1.1  mrg {
   3578  1.1  mrg 	isl_set *empty;
   3579  1.1  mrg 
   3580  1.1  mrg 	empty = isl_set_empty(isl_set_get_space(after));
   3581  1.1  mrg 	isl_set_free(dead1);
   3582  1.1  mrg 	isl_set_free(dead2);
   3583  1.1  mrg 	return generate_shifted_component_parts(executed, isl_set_copy(empty),
   3584  1.1  mrg 						isolated, after, empty, build);
   3585  1.1  mrg }
   3586  1.1  mrg 
   3587  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3588  1.1  mrg  * has been applied, in case the schedule was specified as a schedule tree.
   3589  1.1  mrg  *
   3590  1.1  mrg  * We first check if the user has specified an isolated schedule domain
   3591  1.1  mrg  * and that we are not already outside of this isolated schedule domain.
   3592  1.1  mrg  * If so, we break up the schedule domain into iterations that
   3593  1.1  mrg  * precede the isolated domain, the isolated domain itself,
   3594  1.1  mrg  * the iterations that follow the isolated domain and
   3595  1.1  mrg  * the remaining iterations (those that are incomparable
   3596  1.1  mrg  * to the isolated domain).
   3597  1.1  mrg  * We generate an AST for each piece and concatenate the results.
   3598  1.1  mrg  *
   3599  1.1  mrg  * If the isolated domain is not convex, then it is replaced
   3600  1.1  mrg  * by a convex superset to ensure that the sets of preceding and
   3601  1.1  mrg  * following iterations are properly defined and, in particular,
   3602  1.1  mrg  * that there are no intermediate iterations that do not belong
   3603  1.1  mrg  * to the isolated domain.
   3604  1.1  mrg  *
   3605  1.1  mrg  * In the special case where at least one element of the schedule
   3606  1.1  mrg  * domain that does not belong to the isolated domain needs
   3607  1.1  mrg  * to be scheduled after this isolated domain, but none of those
   3608  1.1  mrg  * elements need to be scheduled before, break up the schedule domain
   3609  1.1  mrg  * in only two parts, the isolated domain, and a part that will be
   3610  1.1  mrg  * scheduled after the isolated domain.
   3611  1.1  mrg  *
   3612  1.1  mrg  * If no isolated set has been specified, then we generate an
   3613  1.1  mrg  * AST for the entire inverse schedule.
   3614  1.1  mrg  */
   3615  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
   3616  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   3617  1.1  mrg {
   3618  1.1  mrg 	int i;
   3619  1.1  mrg 	isl_size depth;
   3620  1.1  mrg 	int empty, has_isolate;
   3621  1.1  mrg 	isl_space *space;
   3622  1.1  mrg 	isl_union_set *schedule_domain;
   3623  1.1  mrg 	isl_set *domain;
   3624  1.1  mrg 	isl_basic_set *hull;
   3625  1.1  mrg 	isl_set *isolated, *before, *after, *test;
   3626  1.1  mrg 	isl_map *gt, *lt;
   3627  1.1  mrg 	isl_bool pure;
   3628  1.1  mrg 
   3629  1.1  mrg 	build = isl_ast_build_extract_isolated(build);
   3630  1.1  mrg 	has_isolate = isl_ast_build_has_isolated(build);
   3631  1.1  mrg 	if (has_isolate < 0)
   3632  1.1  mrg 		executed = isl_union_map_free(executed);
   3633  1.1  mrg 	else if (!has_isolate)
   3634  1.1  mrg 		return generate_shifted_component_tree_base(executed, build, 0);
   3635  1.1  mrg 
   3636  1.1  mrg 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
   3637  1.1  mrg 	domain = isl_set_from_union_set(schedule_domain);
   3638  1.1  mrg 
   3639  1.1  mrg 	isolated = isl_ast_build_get_isolated(build);
   3640  1.1  mrg 	isolated = isl_set_intersect(isolated, isl_set_copy(domain));
   3641  1.1  mrg 	test = isl_ast_build_specialize(build, isl_set_copy(isolated));
   3642  1.1  mrg 	empty = isl_set_is_empty(test);
   3643  1.1  mrg 	isl_set_free(test);
   3644  1.1  mrg 	if (empty < 0)
   3645  1.1  mrg 		goto error;
   3646  1.1  mrg 	if (empty) {
   3647  1.1  mrg 		isl_set_free(isolated);
   3648  1.1  mrg 		isl_set_free(domain);
   3649  1.1  mrg 		return generate_shifted_component_tree_base(executed, build, 0);
   3650  1.1  mrg 	}
   3651  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   3652  1.1  mrg 	if (depth < 0)
   3653  1.1  mrg 		goto error;
   3654  1.1  mrg 
   3655  1.1  mrg 	isolated = isl_ast_build_eliminate(build, isolated);
   3656  1.1  mrg 	hull = isl_set_unshifted_simple_hull(isolated);
   3657  1.1  mrg 	isolated = isl_set_from_basic_set(hull);
   3658  1.1  mrg 
   3659  1.1  mrg 	space = isl_space_map_from_set(isl_set_get_space(isolated));
   3660  1.1  mrg 	gt = isl_map_universe(space);
   3661  1.1  mrg 	for (i = 0; i < depth; ++i)
   3662  1.1  mrg 		gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
   3663  1.1  mrg 	gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
   3664  1.1  mrg 	lt = isl_map_reverse(isl_map_copy(gt));
   3665  1.1  mrg 	before = isl_set_apply(isl_set_copy(isolated), gt);
   3666  1.1  mrg 	after = isl_set_apply(isl_set_copy(isolated), lt);
   3667  1.1  mrg 
   3668  1.1  mrg 	domain = isl_set_subtract(domain, isl_set_copy(isolated));
   3669  1.1  mrg 	pure = only_intersects_first(domain, after, before);
   3670  1.1  mrg 	if (pure < 0)
   3671  1.1  mrg 		executed = isl_union_map_free(executed);
   3672  1.1  mrg 	else if (pure)
   3673  1.1  mrg 		return generate_shifted_component_only_after(executed, isolated,
   3674  1.1  mrg 						domain, build, before, after);
   3675  1.1  mrg 	domain = isl_set_subtract(domain, isl_set_copy(before));
   3676  1.1  mrg 	domain = isl_set_subtract(domain, isl_set_copy(after));
   3677  1.1  mrg 	after = isl_set_subtract(after, isl_set_copy(isolated));
   3678  1.1  mrg 	after = isl_set_subtract(after, isl_set_copy(before));
   3679  1.1  mrg 	before = isl_set_subtract(before, isl_set_copy(isolated));
   3680  1.1  mrg 
   3681  1.1  mrg 	return generate_shifted_component_parts(executed, before, isolated,
   3682  1.1  mrg 						after, domain, build);
   3683  1.1  mrg error:
   3684  1.1  mrg 	isl_set_free(domain);
   3685  1.1  mrg 	isl_set_free(isolated);
   3686  1.1  mrg 	isl_union_map_free(executed);
   3687  1.1  mrg 	isl_ast_build_free(build);
   3688  1.1  mrg 	return NULL;
   3689  1.1  mrg }
   3690  1.1  mrg 
   3691  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3692  1.1  mrg  * has been applied.
   3693  1.1  mrg  *
   3694  1.1  mrg  * Call generate_shifted_component_tree or generate_shifted_component_flat
   3695  1.1  mrg  * depending on whether the schedule was specified as a schedule tree.
   3696  1.1  mrg  */
   3697  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component(
   3698  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   3699  1.1  mrg {
   3700  1.1  mrg 	if (isl_ast_build_has_schedule_node(build))
   3701  1.1  mrg 		return generate_shifted_component_tree(executed, build);
   3702  1.1  mrg 	else
   3703  1.1  mrg 		return generate_shifted_component_flat(executed, build);
   3704  1.1  mrg }
   3705  1.1  mrg 
   3706  1.1  mrg struct isl_set_map_pair {
   3707  1.1  mrg 	isl_set *set;
   3708  1.1  mrg 	isl_map *map;
   3709  1.1  mrg };
   3710  1.1  mrg 
   3711  1.1  mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
   3712  1.1  mrg  * of indices into the "domain" array,
   3713  1.1  mrg  * return the union of the "map" fields of the elements
   3714  1.1  mrg  * indexed by the first "n" elements of "order".
   3715  1.1  mrg  */
   3716  1.1  mrg static __isl_give isl_union_map *construct_component_executed(
   3717  1.1  mrg 	struct isl_set_map_pair *domain, int *order, int n)
   3718  1.1  mrg {
   3719  1.1  mrg 	int i;
   3720  1.1  mrg 	isl_map *map;
   3721  1.1  mrg 	isl_union_map *executed;
   3722  1.1  mrg 
   3723  1.1  mrg 	map = isl_map_copy(domain[order[0]].map);
   3724  1.1  mrg 	executed = isl_union_map_from_map(map);
   3725  1.1  mrg 	for (i = 1; i < n; ++i) {
   3726  1.1  mrg 		map = isl_map_copy(domain[order[i]].map);
   3727  1.1  mrg 		executed = isl_union_map_add_map(executed, map);
   3728  1.1  mrg 	}
   3729  1.1  mrg 
   3730  1.1  mrg 	return executed;
   3731  1.1  mrg }
   3732  1.1  mrg 
   3733  1.1  mrg /* Generate code for a single component, after shifting (if any)
   3734  1.1  mrg  * has been applied.
   3735  1.1  mrg  *
   3736  1.1  mrg  * The component inverse schedule is specified as the "map" fields
   3737  1.1  mrg  * of the elements of "domain" indexed by the first "n" elements of "order".
   3738  1.1  mrg  */
   3739  1.1  mrg static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
   3740  1.1  mrg 	struct isl_set_map_pair *domain, int *order, int n,
   3741  1.1  mrg 	__isl_take isl_ast_build *build)
   3742  1.1  mrg {
   3743  1.1  mrg 	isl_union_map *executed;
   3744  1.1  mrg 
   3745  1.1  mrg 	executed = construct_component_executed(domain, order, n);
   3746  1.1  mrg 	return generate_shifted_component(executed, build);
   3747  1.1  mrg }
   3748  1.1  mrg 
   3749  1.1  mrg /* Does set dimension "pos" of "set" have an obviously fixed value?
   3750  1.1  mrg  */
   3751  1.1  mrg static int dim_is_fixed(__isl_keep isl_set *set, int pos)
   3752  1.1  mrg {
   3753  1.1  mrg 	int fixed;
   3754  1.1  mrg 	isl_val *v;
   3755  1.1  mrg 
   3756  1.1  mrg 	v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
   3757  1.1  mrg 	if (!v)
   3758  1.1  mrg 		return -1;
   3759  1.1  mrg 	fixed = !isl_val_is_nan(v);
   3760  1.1  mrg 	isl_val_free(v);
   3761  1.1  mrg 
   3762  1.1  mrg 	return fixed;
   3763  1.1  mrg }
   3764  1.1  mrg 
   3765  1.1  mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
   3766  1.1  mrg  * of indices into the "domain" array,
   3767  1.1  mrg  * do all (except for at most one) of the "set" field of the elements
   3768  1.1  mrg  * indexed by the first "n" elements of "order" have a fixed value
   3769  1.1  mrg  * at position "depth"?
   3770  1.1  mrg  */
   3771  1.1  mrg static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
   3772  1.1  mrg 	int *order, int n, int depth)
   3773  1.1  mrg {
   3774  1.1  mrg 	int i;
   3775  1.1  mrg 	int non_fixed = -1;
   3776  1.1  mrg 
   3777  1.1  mrg 	for (i = 0; i < n; ++i) {
   3778  1.1  mrg 		int f;
   3779  1.1  mrg 
   3780  1.1  mrg 		f = dim_is_fixed(domain[order[i]].set, depth);
   3781  1.1  mrg 		if (f < 0)
   3782  1.1  mrg 			return -1;
   3783  1.1  mrg 		if (f)
   3784  1.1  mrg 			continue;
   3785  1.1  mrg 		if (non_fixed >= 0)
   3786  1.1  mrg 			return 0;
   3787  1.1  mrg 		non_fixed = i;
   3788  1.1  mrg 	}
   3789  1.1  mrg 
   3790  1.1  mrg 	return 1;
   3791  1.1  mrg }
   3792  1.1  mrg 
   3793  1.1  mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
   3794  1.1  mrg  * of indices into the "domain" array,
   3795  1.1  mrg  * eliminate the inner dimensions from the "set" field of the elements
   3796  1.1  mrg  * indexed by the first "n" elements of "order", provided the current
   3797  1.1  mrg  * dimension does not have a fixed value.
   3798  1.1  mrg  *
   3799  1.1  mrg  * Return the index of the first element in "order" with a corresponding
   3800  1.1  mrg  * "set" field that does not have an (obviously) fixed value.
   3801  1.1  mrg  */
   3802  1.1  mrg static int eliminate_non_fixed(struct isl_set_map_pair *domain,
   3803  1.1  mrg 	int *order, int n, int depth, __isl_keep isl_ast_build *build)
   3804  1.1  mrg {
   3805  1.1  mrg 	int i;
   3806  1.1  mrg 	int base = -1;
   3807  1.1  mrg 
   3808  1.1  mrg 	for (i = n - 1; i >= 0; --i) {
   3809  1.1  mrg 		int f;
   3810  1.1  mrg 		f = dim_is_fixed(domain[order[i]].set, depth);
   3811  1.1  mrg 		if (f < 0)
   3812  1.1  mrg 			return -1;
   3813  1.1  mrg 		if (f)
   3814  1.1  mrg 			continue;
   3815  1.1  mrg 		domain[order[i]].set = isl_ast_build_eliminate_inner(build,
   3816  1.1  mrg 							domain[order[i]].set);
   3817  1.1  mrg 		base = i;
   3818  1.1  mrg 	}
   3819  1.1  mrg 
   3820  1.1  mrg 	return base;
   3821  1.1  mrg }
   3822  1.1  mrg 
   3823  1.1  mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
   3824  1.1  mrg  * of indices into the "domain" array,
   3825  1.1  mrg  * find the element of "domain" (amongst those indexed by the first "n"
   3826  1.1  mrg  * elements of "order") with the "set" field that has the smallest
   3827  1.1  mrg  * value for the current iterator.
   3828  1.1  mrg  *
   3829  1.1  mrg  * Note that the domain with the smallest value may depend on the parameters
   3830  1.1  mrg  * and/or outer loop dimension.  Since the result of this function is only
   3831  1.1  mrg  * used as heuristic, we only make a reasonable attempt at finding the best
   3832  1.1  mrg  * domain, one that should work in case a single domain provides the smallest
   3833  1.1  mrg  * value for the current dimension over all values of the parameters
   3834  1.1  mrg  * and outer dimensions.
   3835  1.1  mrg  *
   3836  1.1  mrg  * In particular, we compute the smallest value of the first domain
   3837  1.1  mrg  * and replace it by that of any later domain if that later domain
   3838  1.1  mrg  * has a smallest value that is smaller for at least some value
   3839  1.1  mrg  * of the parameters and outer dimensions.
   3840  1.1  mrg  */
   3841  1.1  mrg static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
   3842  1.1  mrg 	__isl_keep isl_ast_build *build)
   3843  1.1  mrg {
   3844  1.1  mrg 	int i;
   3845  1.1  mrg 	isl_map *min_first;
   3846  1.1  mrg 	int first = 0;
   3847  1.1  mrg 
   3848  1.1  mrg 	min_first = isl_ast_build_map_to_iterator(build,
   3849  1.1  mrg 					isl_set_copy(domain[order[0]].set));
   3850  1.1  mrg 	min_first = isl_map_lexmin(min_first);
   3851  1.1  mrg 
   3852  1.1  mrg 	for (i = 1; i < n; ++i) {
   3853  1.1  mrg 		isl_map *min, *test;
   3854  1.1  mrg 		int empty;
   3855  1.1  mrg 
   3856  1.1  mrg 		min = isl_ast_build_map_to_iterator(build,
   3857  1.1  mrg 					isl_set_copy(domain[order[i]].set));
   3858  1.1  mrg 		min = isl_map_lexmin(min);
   3859  1.1  mrg 		test = isl_map_copy(min);
   3860  1.1  mrg 		test = isl_map_apply_domain(isl_map_copy(min_first), test);
   3861  1.1  mrg 		test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
   3862  1.1  mrg 		empty = isl_map_is_empty(test);
   3863  1.1  mrg 		isl_map_free(test);
   3864  1.1  mrg 		if (empty >= 0 && !empty) {
   3865  1.1  mrg 			isl_map_free(min_first);
   3866  1.1  mrg 			first = i;
   3867  1.1  mrg 			min_first = min;
   3868  1.1  mrg 		} else
   3869  1.1  mrg 			isl_map_free(min);
   3870  1.1  mrg 
   3871  1.1  mrg 		if (empty < 0)
   3872  1.1  mrg 			break;
   3873  1.1  mrg 	}
   3874  1.1  mrg 
   3875  1.1  mrg 	isl_map_free(min_first);
   3876  1.1  mrg 
   3877  1.1  mrg 	return i < n ? -1 : first;
   3878  1.1  mrg }
   3879  1.1  mrg 
   3880  1.1  mrg /* Construct a shifted inverse schedule based on the original inverse schedule,
   3881  1.1  mrg  * the stride and the offset.
   3882  1.1  mrg  *
   3883  1.1  mrg  * The original inverse schedule is specified as the "map" fields
   3884  1.1  mrg  * of the elements of "domain" indexed by the first "n" elements of "order".
   3885  1.1  mrg  *
   3886  1.1  mrg  * "stride" and "offset" are such that the difference
   3887  1.1  mrg  * between the values of the current dimension of domain "i"
   3888  1.1  mrg  * and the values of the current dimension for some reference domain are
   3889  1.1  mrg  * equal to
   3890  1.1  mrg  *
   3891  1.1  mrg  *	stride * integer + offset[i]
   3892  1.1  mrg  *
   3893  1.1  mrg  * Moreover, 0 <= offset[i] < stride.
   3894  1.1  mrg  *
   3895  1.1  mrg  * For each domain, we create a map
   3896  1.1  mrg  *
   3897  1.1  mrg  *	{ [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
   3898  1.1  mrg  *
   3899  1.1  mrg  * where j refers to the current dimension and the other dimensions are
   3900  1.1  mrg  * unchanged, and apply this map to the original schedule domain.
   3901  1.1  mrg  *
   3902  1.1  mrg  * For example, for the original schedule
   3903  1.1  mrg  *
   3904  1.1  mrg  *	{ A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
   3905  1.1  mrg  *
   3906  1.1  mrg  * and assuming the offset is 0 for the A domain and 1 for the B domain,
   3907  1.1  mrg  * we apply the mapping
   3908  1.1  mrg  *
   3909  1.1  mrg  *	{ [j] -> [j, 0] }
   3910  1.1  mrg  *
   3911  1.1  mrg  * to the schedule of the "A" domain and the mapping
   3912  1.1  mrg  *
   3913  1.1  mrg  *	{ [j - 1] -> [j, 1] }
   3914  1.1  mrg  *
   3915  1.1  mrg  * to the schedule of the "B" domain.
   3916  1.1  mrg  *
   3917  1.1  mrg  *
   3918  1.1  mrg  * Note that after the transformation, the differences between pairs
   3919  1.1  mrg  * of values of the current dimension over all domains are multiples
   3920  1.1  mrg  * of stride and that we have therefore exposed the stride.
   3921  1.1  mrg  *
   3922  1.1  mrg  *
   3923  1.1  mrg  * To see that the mapping preserves the lexicographic order,
   3924  1.1  mrg  * first note that each of the individual maps above preserves the order.
   3925  1.1  mrg  * If the value of the current iterator is j1 in one domain and j2 in another,
   3926  1.1  mrg  * then if j1 = j2, we know that the same map is applied to both domains
   3927  1.1  mrg  * and the order is preserved.
   3928  1.1  mrg  * Otherwise, let us assume, without loss of generality, that j1 < j2.
   3929  1.1  mrg  * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
   3930  1.1  mrg  *
   3931  1.1  mrg  *	j1 - c1 < j2 - c2
   3932  1.1  mrg  *
   3933  1.1  mrg  * and the order is preserved.
   3934  1.1  mrg  * If c1 < c2, then we know
   3935  1.1  mrg  *
   3936  1.1  mrg  *	0 <= c2 - c1 < s
   3937  1.1  mrg  *
   3938  1.1  mrg  * We also have
   3939  1.1  mrg  *
   3940  1.1  mrg  *	j2 - j1 = n * s + r
   3941  1.1  mrg  *
   3942  1.1  mrg  * with n >= 0 and 0 <= r < s.
   3943  1.1  mrg  * In other words, r = c2 - c1.
   3944  1.1  mrg  * If n > 0, then
   3945  1.1  mrg  *
   3946  1.1  mrg  *	j1 - c1 < j2 - c2
   3947  1.1  mrg  *
   3948  1.1  mrg  * If n = 0, then
   3949  1.1  mrg  *
   3950  1.1  mrg  *	j1 - c1 = j2 - c2
   3951  1.1  mrg  *
   3952  1.1  mrg  * and so
   3953  1.1  mrg  *
   3954  1.1  mrg  *	(j1 - c1, c1) << (j2 - c2, c2)
   3955  1.1  mrg  *
   3956  1.1  mrg  * with "<<" the lexicographic order, proving that the order is preserved
   3957  1.1  mrg  * in all cases.
   3958  1.1  mrg  */
   3959  1.1  mrg static __isl_give isl_union_map *construct_shifted_executed(
   3960  1.1  mrg 	struct isl_set_map_pair *domain, int *order, int n,
   3961  1.1  mrg 	__isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
   3962  1.1  mrg 	__isl_keep isl_ast_build *build)
   3963  1.1  mrg {
   3964  1.1  mrg 	int i;
   3965  1.1  mrg 	isl_union_map *executed;
   3966  1.1  mrg 	isl_space *space;
   3967  1.1  mrg 	isl_map *map;
   3968  1.1  mrg 	isl_size depth;
   3969  1.1  mrg 	isl_constraint *c;
   3970  1.1  mrg 
   3971  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   3972  1.1  mrg 	if (depth < 0)
   3973  1.1  mrg 		return NULL;
   3974  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   3975  1.1  mrg 	executed = isl_union_map_empty(isl_space_copy(space));
   3976  1.1  mrg 	space = isl_space_map_from_set(space);
   3977  1.1  mrg 	map = isl_map_identity(isl_space_copy(space));
   3978  1.1  mrg 	map = isl_map_eliminate(map, isl_dim_out, depth, 1);
   3979  1.1  mrg 	map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
   3980  1.1  mrg 	space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
   3981  1.1  mrg 
   3982  1.1  mrg 	c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
   3983  1.1  mrg 	c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
   3984  1.1  mrg 	c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
   3985  1.1  mrg 
   3986  1.1  mrg 	for (i = 0; i < n; ++i) {
   3987  1.1  mrg 		isl_map *map_i;
   3988  1.1  mrg 		isl_val *v;
   3989  1.1  mrg 
   3990  1.1  mrg 		v = isl_multi_val_get_val(offset, i);
   3991  1.1  mrg 		if (!v)
   3992  1.1  mrg 			break;
   3993  1.1  mrg 		map_i = isl_map_copy(map);
   3994  1.1  mrg 		map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
   3995  1.1  mrg 					isl_val_copy(v));
   3996  1.1  mrg 		v = isl_val_neg(v);
   3997  1.1  mrg 		c = isl_constraint_set_constant_val(c, v);
   3998  1.1  mrg 		map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
   3999  1.1  mrg 
   4000  1.1  mrg 		map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
   4001  1.1  mrg 						map_i);
   4002  1.1  mrg 		executed = isl_union_map_add_map(executed, map_i);
   4003  1.1  mrg 	}
   4004  1.1  mrg 
   4005  1.1  mrg 	isl_constraint_free(c);
   4006  1.1  mrg 	isl_map_free(map);
   4007  1.1  mrg 
   4008  1.1  mrg 	if (i < n)
   4009  1.1  mrg 		executed = isl_union_map_free(executed);
   4010  1.1  mrg 
   4011  1.1  mrg 	return executed;
   4012  1.1  mrg }
   4013  1.1  mrg 
   4014  1.1  mrg /* Generate code for a single component, after exposing the stride,
   4015  1.1  mrg  * given that the schedule domain is "shifted strided".
   4016  1.1  mrg  *
   4017  1.1  mrg  * The component inverse schedule is specified as the "map" fields
   4018  1.1  mrg  * of the elements of "domain" indexed by the first "n" elements of "order".
   4019  1.1  mrg  *
   4020  1.1  mrg  * The schedule domain being "shifted strided" means that the differences
   4021  1.1  mrg  * between the values of the current dimension of domain "i"
   4022  1.1  mrg  * and the values of the current dimension for some reference domain are
   4023  1.1  mrg  * equal to
   4024  1.1  mrg  *
   4025  1.1  mrg  *	stride * integer + offset[i]
   4026  1.1  mrg  *
   4027  1.1  mrg  * We first look for the domain with the "smallest" value for the current
   4028  1.1  mrg  * dimension and adjust the offsets such that the offset of the "smallest"
   4029  1.1  mrg  * domain is equal to zero.  The other offsets are reduced modulo stride.
   4030  1.1  mrg  *
   4031  1.1  mrg  * Based on this information, we construct a new inverse schedule in
   4032  1.1  mrg  * construct_shifted_executed that exposes the stride.
   4033  1.1  mrg  * Since this involves the introduction of a new schedule dimension,
   4034  1.1  mrg  * the build needs to be changed accordingly.
   4035  1.1  mrg  * After computing the AST, the newly introduced dimension needs
   4036  1.1  mrg  * to be removed again from the list of grafts.  We do this by plugging
   4037  1.1  mrg  * in a mapping that represents the new schedule domain in terms of the
   4038  1.1  mrg  * old schedule domain.
   4039  1.1  mrg  */
   4040  1.1  mrg static __isl_give isl_ast_graft_list *generate_shift_component(
   4041  1.1  mrg 	struct isl_set_map_pair *domain, int *order, int n,
   4042  1.1  mrg 	__isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
   4043  1.1  mrg 	__isl_take isl_ast_build *build)
   4044  1.1  mrg {
   4045  1.1  mrg 	isl_ast_graft_list *list;
   4046  1.1  mrg 	int first;
   4047  1.1  mrg 	isl_size depth;
   4048  1.1  mrg 	isl_val *val;
   4049  1.1  mrg 	isl_multi_val *mv;
   4050  1.1  mrg 	isl_space *space;
   4051  1.1  mrg 	isl_multi_aff *ma, *zero;
   4052  1.1  mrg 	isl_union_map *executed;
   4053  1.1  mrg 
   4054  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   4055  1.1  mrg 
   4056  1.1  mrg 	first = first_offset(domain, order, n, build);
   4057  1.1  mrg 	if (depth < 0 || first < 0)
   4058  1.1  mrg 		goto error;
   4059  1.1  mrg 
   4060  1.1  mrg 	mv = isl_multi_val_copy(offset);
   4061  1.1  mrg 	val = isl_multi_val_get_val(offset, first);
   4062  1.1  mrg 	val = isl_val_neg(val);
   4063  1.1  mrg 	mv = isl_multi_val_add_val(mv, val);
   4064  1.1  mrg 	mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
   4065  1.1  mrg 
   4066  1.1  mrg 	executed = construct_shifted_executed(domain, order, n, stride, mv,
   4067  1.1  mrg 						build);
   4068  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   4069  1.1  mrg 	space = isl_space_map_from_set(space);
   4070  1.1  mrg 	ma = isl_multi_aff_identity(isl_space_copy(space));
   4071  1.1  mrg 	space = isl_space_from_domain(isl_space_domain(space));
   4072  1.1  mrg 	space = isl_space_add_dims(space, isl_dim_out, 1);
   4073  1.1  mrg 	zero = isl_multi_aff_zero(space);
   4074  1.1  mrg 	ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
   4075  1.1  mrg 	build = isl_ast_build_insert_dim(build, depth + 1);
   4076  1.1  mrg 	list = generate_shifted_component(executed, build);
   4077  1.1  mrg 
   4078  1.1  mrg 	list = isl_ast_graft_list_preimage_multi_aff(list, ma);
   4079  1.1  mrg 
   4080  1.1  mrg 	isl_multi_val_free(mv);
   4081  1.1  mrg 
   4082  1.1  mrg 	return list;
   4083  1.1  mrg error:
   4084  1.1  mrg 	isl_ast_build_free(build);
   4085  1.1  mrg 	return NULL;
   4086  1.1  mrg }
   4087  1.1  mrg 
   4088  1.1  mrg /* Does any node in the schedule tree rooted at the current schedule node
   4089  1.1  mrg  * of "build" depend on outer schedule nodes?
   4090  1.1  mrg  */
   4091  1.1  mrg static int has_anchored_subtree(__isl_keep isl_ast_build *build)
   4092  1.1  mrg {
   4093  1.1  mrg 	isl_schedule_node *node;
   4094  1.1  mrg 	int dependent = 0;
   4095  1.1  mrg 
   4096  1.1  mrg 	node = isl_ast_build_get_schedule_node(build);
   4097  1.1  mrg 	dependent = isl_schedule_node_is_subtree_anchored(node);
   4098  1.1  mrg 	isl_schedule_node_free(node);
   4099  1.1  mrg 
   4100  1.1  mrg 	return dependent;
   4101  1.1  mrg }
   4102  1.1  mrg 
   4103  1.1  mrg /* Generate code for a single component.
   4104  1.1  mrg  *
   4105  1.1  mrg  * The component inverse schedule is specified as the "map" fields
   4106  1.1  mrg  * of the elements of "domain" indexed by the first "n" elements of "order".
   4107  1.1  mrg  *
   4108  1.1  mrg  * This function may modify the "set" fields of "domain".
   4109  1.1  mrg  *
   4110  1.1  mrg  * Before proceeding with the actual code generation for the component,
   4111  1.1  mrg  * we first check if there are any "shifted" strides, meaning that
   4112  1.1  mrg  * the schedule domains of the individual domains are all strided,
   4113  1.1  mrg  * but that they have different offsets, resulting in the union
   4114  1.1  mrg  * of schedule domains not being strided anymore.
   4115  1.1  mrg  *
   4116  1.1  mrg  * The simplest example is the schedule
   4117  1.1  mrg  *
   4118  1.1  mrg  *	{ A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
   4119  1.1  mrg  *
   4120  1.1  mrg  * Both schedule domains are strided, but their union is not.
   4121  1.1  mrg  * This function detects such cases and then rewrites the schedule to
   4122  1.1  mrg  *
   4123  1.1  mrg  *	{ A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
   4124  1.1  mrg  *
   4125  1.1  mrg  * In the new schedule, the schedule domains have the same offset (modulo
   4126  1.1  mrg  * the stride), ensuring that the union of schedule domains is also strided.
   4127  1.1  mrg  *
   4128  1.1  mrg  *
   4129  1.1  mrg  * If there is only a single domain in the component, then there is
   4130  1.1  mrg  * nothing to do.   Similarly, if the current schedule dimension has
   4131  1.1  mrg  * a fixed value for almost all domains then there is nothing to be done.
   4132  1.1  mrg  * In particular, we need at least two domains where the current schedule
   4133  1.1  mrg  * dimension does not have a fixed value.
   4134  1.1  mrg  * Finally, in case of a schedule map input,
   4135  1.1  mrg  * if any of the options refer to the current schedule dimension,
   4136  1.1  mrg  * then we bail out as well.  It would be possible to reformulate the options
   4137  1.1  mrg  * in terms of the new schedule domain, but that would introduce constraints
   4138  1.1  mrg  * that separate the domains in the options and that is something we would
   4139  1.1  mrg  * like to avoid.
   4140  1.1  mrg  * In the case of a schedule tree input, we bail out if any of
   4141  1.1  mrg  * the descendants of the current schedule node refer to outer
   4142  1.1  mrg  * schedule nodes in any way.
   4143  1.1  mrg  *
   4144  1.1  mrg  *
   4145  1.1  mrg  * To see if there is any shifted stride, we look at the differences
   4146  1.1  mrg  * between the values of the current dimension in pairs of domains
   4147  1.1  mrg  * for equal values of outer dimensions.  These differences should be
   4148  1.1  mrg  * of the form
   4149  1.1  mrg  *
   4150  1.1  mrg  *	m x + r
   4151  1.1  mrg  *
   4152  1.1  mrg  * with "m" the stride and "r" a constant.  Note that we cannot perform
   4153  1.1  mrg  * this analysis on individual domains as the lower bound in each domain
   4154  1.1  mrg  * may depend on parameters or outer dimensions and so the current dimension
   4155  1.1  mrg  * itself may not have a fixed remainder on division by the stride.
   4156  1.1  mrg  *
   4157  1.1  mrg  * In particular, we compare the first domain that does not have an
   4158  1.1  mrg  * obviously fixed value for the current dimension to itself and all
   4159  1.1  mrg  * other domains and collect the offsets and the gcd of the strides.
   4160  1.1  mrg  * If the gcd becomes one, then we failed to find shifted strides.
   4161  1.1  mrg  * If the gcd is zero, then the differences were all fixed, meaning
   4162  1.1  mrg  * that some domains had non-obviously fixed values for the current dimension.
   4163  1.1  mrg  * If all the offsets are the same (for those domains that do not have
   4164  1.1  mrg  * an obviously fixed value for the current dimension), then we do not
   4165  1.1  mrg  * apply the transformation.
   4166  1.1  mrg  * If none of the domains were skipped, then there is nothing to do.
   4167  1.1  mrg  * If some of them were skipped, then if we apply separation, the schedule
   4168  1.1  mrg  * domain should get split in pieces with a (non-shifted) stride.
   4169  1.1  mrg  *
   4170  1.1  mrg  * Otherwise, we apply a shift to expose the stride in
   4171  1.1  mrg  * generate_shift_component.
   4172  1.1  mrg  */
   4173  1.1  mrg static __isl_give isl_ast_graft_list *generate_component(
   4174  1.1  mrg 	struct isl_set_map_pair *domain, int *order, int n,
   4175  1.1  mrg 	__isl_take isl_ast_build *build)
   4176  1.1  mrg {
   4177  1.1  mrg 	int i, d;
   4178  1.1  mrg 	isl_size depth;
   4179  1.1  mrg 	isl_ctx *ctx;
   4180  1.1  mrg 	isl_map *map;
   4181  1.1  mrg 	isl_set *deltas;
   4182  1.1  mrg 	isl_val *gcd = NULL;
   4183  1.1  mrg 	isl_multi_val *mv;
   4184  1.1  mrg 	int fixed, skip;
   4185  1.1  mrg 	int base;
   4186  1.1  mrg 	isl_ast_graft_list *list;
   4187  1.1  mrg 	int res = 0;
   4188  1.1  mrg 
   4189  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   4190  1.1  mrg 	if (depth < 0)
   4191  1.1  mrg 		goto error;
   4192  1.1  mrg 
   4193  1.1  mrg 	skip = n == 1;
   4194  1.1  mrg 	if (skip >= 0 && !skip)
   4195  1.1  mrg 		skip = at_most_one_non_fixed(domain, order, n, depth);
   4196  1.1  mrg 	if (skip >= 0 && !skip) {
   4197  1.1  mrg 		if (isl_ast_build_has_schedule_node(build))
   4198  1.1  mrg 			skip = has_anchored_subtree(build);
   4199  1.1  mrg 		else
   4200  1.1  mrg 			skip = isl_ast_build_options_involve_depth(build);
   4201  1.1  mrg 	}
   4202  1.1  mrg 	if (skip < 0)
   4203  1.1  mrg 		goto error;
   4204  1.1  mrg 	if (skip)
   4205  1.1  mrg 		return generate_shifted_component_from_list(domain,
   4206  1.1  mrg 							    order, n, build);
   4207  1.1  mrg 
   4208  1.1  mrg 	base = eliminate_non_fixed(domain, order, n, depth, build);
   4209  1.1  mrg 	if (base < 0)
   4210  1.1  mrg 		goto error;
   4211  1.1  mrg 
   4212  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   4213  1.1  mrg 
   4214  1.1  mrg 	mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
   4215  1.1  mrg 
   4216  1.1  mrg 	fixed = 1;
   4217  1.1  mrg 	for (i = 0; i < n; ++i) {
   4218  1.1  mrg 		isl_val *r, *m;
   4219  1.1  mrg 
   4220  1.1  mrg 		map = isl_map_from_domain_and_range(
   4221  1.1  mrg 					isl_set_copy(domain[order[base]].set),
   4222  1.1  mrg 					isl_set_copy(domain[order[i]].set));
   4223  1.1  mrg 		for (d = 0; d < depth; ++d)
   4224  1.1  mrg 			map = isl_map_equate(map, isl_dim_in, d,
   4225  1.1  mrg 						    isl_dim_out, d);
   4226  1.1  mrg 		deltas = isl_map_deltas(map);
   4227  1.1  mrg 		res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
   4228  1.1  mrg 		isl_set_free(deltas);
   4229  1.1  mrg 		if (res < 0)
   4230  1.1  mrg 			break;
   4231  1.1  mrg 
   4232  1.1  mrg 		if (i == 0)
   4233  1.1  mrg 			gcd = m;
   4234  1.1  mrg 		else
   4235  1.1  mrg 			gcd = isl_val_gcd(gcd, m);
   4236  1.1  mrg 		if (isl_val_is_one(gcd)) {
   4237  1.1  mrg 			isl_val_free(r);
   4238  1.1  mrg 			break;
   4239  1.1  mrg 		}
   4240  1.1  mrg 		mv = isl_multi_val_set_val(mv, i, r);
   4241  1.1  mrg 
   4242  1.1  mrg 		res = dim_is_fixed(domain[order[i]].set, depth);
   4243  1.1  mrg 		if (res < 0)
   4244  1.1  mrg 			break;
   4245  1.1  mrg 		if (res)
   4246  1.1  mrg 			continue;
   4247  1.1  mrg 
   4248  1.1  mrg 		if (fixed && i > base) {
   4249  1.1  mrg 			isl_val *a, *b;
   4250  1.1  mrg 			a = isl_multi_val_get_val(mv, i);
   4251  1.1  mrg 			b = isl_multi_val_get_val(mv, base);
   4252  1.1  mrg 			if (isl_val_ne(a, b))
   4253  1.1  mrg 				fixed = 0;
   4254  1.1  mrg 			isl_val_free(a);
   4255  1.1  mrg 			isl_val_free(b);
   4256  1.1  mrg 		}
   4257  1.1  mrg 	}
   4258  1.1  mrg 
   4259  1.1  mrg 	if (res < 0 || !gcd) {
   4260  1.1  mrg 		isl_ast_build_free(build);
   4261  1.1  mrg 		list = NULL;
   4262  1.1  mrg 	} else if (i < n || fixed || isl_val_is_zero(gcd)) {
   4263  1.1  mrg 		list = generate_shifted_component_from_list(domain,
   4264  1.1  mrg 							    order, n, build);
   4265  1.1  mrg 	} else {
   4266  1.1  mrg 		list = generate_shift_component(domain, order, n, gcd, mv,
   4267  1.1  mrg 						build);
   4268  1.1  mrg 	}
   4269  1.1  mrg 
   4270  1.1  mrg 	isl_val_free(gcd);
   4271  1.1  mrg 	isl_multi_val_free(mv);
   4272  1.1  mrg 
   4273  1.1  mrg 	return list;
   4274  1.1  mrg error:
   4275  1.1  mrg 	isl_ast_build_free(build);
   4276  1.1  mrg 	return NULL;
   4277  1.1  mrg }
   4278  1.1  mrg 
   4279  1.1  mrg /* Store both "map" itself and its domain in the
   4280  1.1  mrg  * structure pointed to by *next and advance to the next array element.
   4281  1.1  mrg  */
   4282  1.1  mrg static isl_stat extract_domain(__isl_take isl_map *map, void *user)
   4283  1.1  mrg {
   4284  1.1  mrg 	struct isl_set_map_pair **next = user;
   4285  1.1  mrg 
   4286  1.1  mrg 	(*next)->map = isl_map_copy(map);
   4287  1.1  mrg 	(*next)->set = isl_map_domain(map);
   4288  1.1  mrg 	(*next)++;
   4289  1.1  mrg 
   4290  1.1  mrg 	return isl_stat_ok;
   4291  1.1  mrg }
   4292  1.1  mrg 
   4293  1.1  mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
   4294  1.1  mrg 	__isl_keep isl_schedule_node *node);
   4295  1.1  mrg 
   4296  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4297  1.1  mrg  * the corresponding image elements by the tree rooted at
   4298  1.1  mrg  * the child of "node"?
   4299  1.1  mrg  */
   4300  1.1  mrg static isl_bool after_in_child(__isl_keep isl_union_map *umap,
   4301  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4302  1.1  mrg {
   4303  1.1  mrg 	isl_schedule_node *child;
   4304  1.1  mrg 	isl_bool after;
   4305  1.1  mrg 
   4306  1.1  mrg 	child = isl_schedule_node_get_child(node, 0);
   4307  1.1  mrg 	after = after_in_tree(umap, child);
   4308  1.1  mrg 	isl_schedule_node_free(child);
   4309  1.1  mrg 
   4310  1.1  mrg 	return after;
   4311  1.1  mrg }
   4312  1.1  mrg 
   4313  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4314  1.1  mrg  * the corresponding image elements by the tree rooted at
   4315  1.1  mrg  * the band node "node"?
   4316  1.1  mrg  *
   4317  1.1  mrg  * We first check if any domain element is scheduled after any
   4318  1.1  mrg  * of the corresponding image elements by the band node itself.
   4319  1.1  mrg  * If not, we restrict "map" to those pairs of element that
   4320  1.1  mrg  * are scheduled together by the band node and continue with
   4321  1.1  mrg  * the child of the band node.
   4322  1.1  mrg  * If there are no such pairs then the map passed to after_in_child
   4323  1.1  mrg  * will be empty causing it to return 0.
   4324  1.1  mrg  */
   4325  1.1  mrg static isl_bool after_in_band(__isl_keep isl_union_map *umap,
   4326  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4327  1.1  mrg {
   4328  1.1  mrg 	isl_multi_union_pw_aff *mupa;
   4329  1.1  mrg 	isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
   4330  1.1  mrg 	isl_union_set *domain, *range;
   4331  1.1  mrg 	isl_space *space;
   4332  1.1  mrg 	isl_bool empty;
   4333  1.1  mrg 	isl_bool after;
   4334  1.1  mrg 	isl_size n;
   4335  1.1  mrg 
   4336  1.1  mrg 	n = isl_schedule_node_band_n_member(node);
   4337  1.1  mrg 	if (n < 0)
   4338  1.1  mrg 		return isl_bool_error;
   4339  1.1  mrg 	if (n == 0)
   4340  1.1  mrg 		return after_in_child(umap, node);
   4341  1.1  mrg 
   4342  1.1  mrg 	mupa = isl_schedule_node_band_get_partial_schedule(node);
   4343  1.1  mrg 	space = isl_multi_union_pw_aff_get_space(mupa);
   4344  1.1  mrg 	partial = isl_union_map_from_multi_union_pw_aff(mupa);
   4345  1.1  mrg 	test = isl_union_map_copy(umap);
   4346  1.1  mrg 	test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
   4347  1.1  mrg 	test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
   4348  1.1  mrg 	gt = isl_union_map_from_map(isl_map_lex_gt(space));
   4349  1.1  mrg 	test = isl_union_map_intersect(test, gt);
   4350  1.1  mrg 	empty = isl_union_map_is_empty(test);
   4351  1.1  mrg 	isl_union_map_free(test);
   4352  1.1  mrg 
   4353  1.1  mrg 	if (empty < 0 || !empty) {
   4354  1.1  mrg 		isl_union_map_free(partial);
   4355  1.1  mrg 		return isl_bool_not(empty);
   4356  1.1  mrg 	}
   4357  1.1  mrg 
   4358  1.1  mrg 	universe = isl_union_map_universe(isl_union_map_copy(umap));
   4359  1.1  mrg 	domain = isl_union_map_domain(isl_union_map_copy(universe));
   4360  1.1  mrg 	range = isl_union_map_range(universe);
   4361  1.1  mrg 	umap1 = isl_union_map_copy(partial);
   4362  1.1  mrg 	umap1 = isl_union_map_intersect_domain(umap1, domain);
   4363  1.1  mrg 	umap2 = isl_union_map_intersect_domain(partial, range);
   4364  1.1  mrg 	test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
   4365  1.1  mrg 	test = isl_union_map_intersect(test, isl_union_map_copy(umap));
   4366  1.1  mrg 	after = after_in_child(test, node);
   4367  1.1  mrg 	isl_union_map_free(test);
   4368  1.1  mrg 	return after;
   4369  1.1  mrg }
   4370  1.1  mrg 
   4371  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4372  1.1  mrg  * the corresponding image elements by the tree rooted at
   4373  1.1  mrg  * the context node "node"?
   4374  1.1  mrg  *
   4375  1.1  mrg  * The context constraints apply to the schedule domain,
   4376  1.1  mrg  * so we cannot apply them directly to "umap", which contains
   4377  1.1  mrg  * pairs of statement instances.  Instead, we add them
   4378  1.1  mrg  * to the range of the prefix schedule for both domain and
   4379  1.1  mrg  * range of "umap".
   4380  1.1  mrg  */
   4381  1.1  mrg static isl_bool after_in_context(__isl_keep isl_union_map *umap,
   4382  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4383  1.1  mrg {
   4384  1.1  mrg 	isl_union_map *prefix, *universe, *umap1, *umap2;
   4385  1.1  mrg 	isl_union_set *domain, *range;
   4386  1.1  mrg 	isl_set *context;
   4387  1.1  mrg 	isl_bool after;
   4388  1.1  mrg 
   4389  1.1  mrg 	umap = isl_union_map_copy(umap);
   4390  1.1  mrg 	context = isl_schedule_node_context_get_context(node);
   4391  1.1  mrg 	prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
   4392  1.1  mrg 	universe = isl_union_map_universe(isl_union_map_copy(umap));
   4393  1.1  mrg 	domain = isl_union_map_domain(isl_union_map_copy(universe));
   4394  1.1  mrg 	range = isl_union_map_range(universe);
   4395  1.1  mrg 	umap1 = isl_union_map_copy(prefix);
   4396  1.1  mrg 	umap1 = isl_union_map_intersect_domain(umap1, domain);
   4397  1.1  mrg 	umap2 = isl_union_map_intersect_domain(prefix, range);
   4398  1.1  mrg 	umap1 = isl_union_map_intersect_range(umap1,
   4399  1.1  mrg 					    isl_union_set_from_set(context));
   4400  1.1  mrg 	umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
   4401  1.1  mrg 	umap = isl_union_map_intersect(umap, umap1);
   4402  1.1  mrg 
   4403  1.1  mrg 	after = after_in_child(umap, node);
   4404  1.1  mrg 
   4405  1.1  mrg 	isl_union_map_free(umap);
   4406  1.1  mrg 
   4407  1.1  mrg 	return after;
   4408  1.1  mrg }
   4409  1.1  mrg 
   4410  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4411  1.1  mrg  * the corresponding image elements by the tree rooted at
   4412  1.1  mrg  * the expansion node "node"?
   4413  1.1  mrg  *
   4414  1.1  mrg  * We apply the expansion to domain and range of "umap" and
   4415  1.1  mrg  * continue with its child.
   4416  1.1  mrg  */
   4417  1.1  mrg static isl_bool after_in_expansion(__isl_keep isl_union_map *umap,
   4418  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4419  1.1  mrg {
   4420  1.1  mrg 	isl_union_map *expansion;
   4421  1.1  mrg 	isl_bool after;
   4422  1.1  mrg 
   4423  1.1  mrg 	expansion = isl_schedule_node_expansion_get_expansion(node);
   4424  1.1  mrg 	umap = isl_union_map_copy(umap);
   4425  1.1  mrg 	umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
   4426  1.1  mrg 	umap = isl_union_map_apply_range(umap, expansion);
   4427  1.1  mrg 
   4428  1.1  mrg 	after = after_in_child(umap, node);
   4429  1.1  mrg 
   4430  1.1  mrg 	isl_union_map_free(umap);
   4431  1.1  mrg 
   4432  1.1  mrg 	return after;
   4433  1.1  mrg }
   4434  1.1  mrg 
   4435  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4436  1.1  mrg  * the corresponding image elements by the tree rooted at
   4437  1.1  mrg  * the extension node "node"?
   4438  1.1  mrg  *
   4439  1.1  mrg  * Since the extension node may add statement instances before or
   4440  1.1  mrg  * after the pairs of statement instances in "umap", we return isl_bool_true
   4441  1.1  mrg  * to ensure that these pairs are not broken up.
   4442  1.1  mrg  */
   4443  1.1  mrg static isl_bool after_in_extension(__isl_keep isl_union_map *umap,
   4444  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4445  1.1  mrg {
   4446  1.1  mrg 	return isl_bool_true;
   4447  1.1  mrg }
   4448  1.1  mrg 
   4449  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4450  1.1  mrg  * the corresponding image elements by the tree rooted at
   4451  1.1  mrg  * the filter node "node"?
   4452  1.1  mrg  *
   4453  1.1  mrg  * We intersect domain and range of "umap" with the filter and
   4454  1.1  mrg  * continue with its child.
   4455  1.1  mrg  */
   4456  1.1  mrg static isl_bool after_in_filter(__isl_keep isl_union_map *umap,
   4457  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4458  1.1  mrg {
   4459  1.1  mrg 	isl_union_set *filter;
   4460  1.1  mrg 	isl_bool after;
   4461  1.1  mrg 
   4462  1.1  mrg 	umap = isl_union_map_copy(umap);
   4463  1.1  mrg 	filter = isl_schedule_node_filter_get_filter(node);
   4464  1.1  mrg 	umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
   4465  1.1  mrg 	umap = isl_union_map_intersect_range(umap, filter);
   4466  1.1  mrg 
   4467  1.1  mrg 	after = after_in_child(umap, node);
   4468  1.1  mrg 
   4469  1.1  mrg 	isl_union_map_free(umap);
   4470  1.1  mrg 
   4471  1.1  mrg 	return after;
   4472  1.1  mrg }
   4473  1.1  mrg 
   4474  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4475  1.1  mrg  * the corresponding image elements by the tree rooted at
   4476  1.1  mrg  * the set node "node"?
   4477  1.1  mrg  *
   4478  1.1  mrg  * This is only the case if this condition holds in any
   4479  1.1  mrg  * of the (filter) children of the set node.
   4480  1.1  mrg  * In particular, if the domain and the range of "umap"
   4481  1.1  mrg  * are contained in different children, then the condition
   4482  1.1  mrg  * does not hold.
   4483  1.1  mrg  */
   4484  1.1  mrg static isl_bool after_in_set(__isl_keep isl_union_map *umap,
   4485  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4486  1.1  mrg {
   4487  1.1  mrg 	int i;
   4488  1.1  mrg 	isl_size n;
   4489  1.1  mrg 
   4490  1.1  mrg 	n = isl_schedule_node_n_children(node);
   4491  1.1  mrg 	if (n < 0)
   4492  1.1  mrg 		return isl_bool_error;
   4493  1.1  mrg 	for (i = 0; i < n; ++i) {
   4494  1.1  mrg 		isl_schedule_node *child;
   4495  1.1  mrg 		isl_bool after;
   4496  1.1  mrg 
   4497  1.1  mrg 		child = isl_schedule_node_get_child(node, i);
   4498  1.1  mrg 		after = after_in_tree(umap, child);
   4499  1.1  mrg 		isl_schedule_node_free(child);
   4500  1.1  mrg 
   4501  1.1  mrg 		if (after < 0 || after)
   4502  1.1  mrg 			return after;
   4503  1.1  mrg 	}
   4504  1.1  mrg 
   4505  1.1  mrg 	return isl_bool_false;
   4506  1.1  mrg }
   4507  1.1  mrg 
   4508  1.1  mrg /* Return the filter of child "i" of "node".
   4509  1.1  mrg  */
   4510  1.1  mrg static __isl_give isl_union_set *child_filter(
   4511  1.1  mrg 	__isl_keep isl_schedule_node *node, int i)
   4512  1.1  mrg {
   4513  1.1  mrg 	isl_schedule_node *child;
   4514  1.1  mrg 	isl_union_set *filter;
   4515  1.1  mrg 
   4516  1.1  mrg 	child = isl_schedule_node_get_child(node, i);
   4517  1.1  mrg 	filter = isl_schedule_node_filter_get_filter(child);
   4518  1.1  mrg 	isl_schedule_node_free(child);
   4519  1.1  mrg 
   4520  1.1  mrg 	return filter;
   4521  1.1  mrg }
   4522  1.1  mrg 
   4523  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4524  1.1  mrg  * the corresponding image elements by the tree rooted at
   4525  1.1  mrg  * the sequence node "node"?
   4526  1.1  mrg  *
   4527  1.1  mrg  * This happens in particular if any domain element is
   4528  1.1  mrg  * contained in a later child than one containing a range element or
   4529  1.1  mrg  * if the condition holds within a given child in the sequence.
   4530  1.1  mrg  * The later part of the condition is checked by after_in_set.
   4531  1.1  mrg  */
   4532  1.1  mrg static isl_bool after_in_sequence(__isl_keep isl_union_map *umap,
   4533  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4534  1.1  mrg {
   4535  1.1  mrg 	int i, j;
   4536  1.1  mrg 	isl_size n;
   4537  1.1  mrg 	isl_union_map *umap_i;
   4538  1.1  mrg 	isl_bool empty;
   4539  1.1  mrg 	isl_bool after = isl_bool_false;
   4540  1.1  mrg 
   4541  1.1  mrg 	n = isl_schedule_node_n_children(node);
   4542  1.1  mrg 	if (n < 0)
   4543  1.1  mrg 		return isl_bool_error;
   4544  1.1  mrg 	for (i = 1; i < n; ++i) {
   4545  1.1  mrg 		isl_union_set *filter_i;
   4546  1.1  mrg 
   4547  1.1  mrg 		umap_i = isl_union_map_copy(umap);
   4548  1.1  mrg 		filter_i = child_filter(node, i);
   4549  1.1  mrg 		umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
   4550  1.1  mrg 		empty = isl_union_map_is_empty(umap_i);
   4551  1.1  mrg 		if (empty < 0)
   4552  1.1  mrg 			goto error;
   4553  1.1  mrg 		if (empty) {
   4554  1.1  mrg 			isl_union_map_free(umap_i);
   4555  1.1  mrg 			continue;
   4556  1.1  mrg 		}
   4557  1.1  mrg 
   4558  1.1  mrg 		for (j = 0; j < i; ++j) {
   4559  1.1  mrg 			isl_union_set *filter_j;
   4560  1.1  mrg 			isl_union_map *umap_ij;
   4561  1.1  mrg 
   4562  1.1  mrg 			umap_ij = isl_union_map_copy(umap_i);
   4563  1.1  mrg 			filter_j = child_filter(node, j);
   4564  1.1  mrg 			umap_ij = isl_union_map_intersect_range(umap_ij,
   4565  1.1  mrg 								filter_j);
   4566  1.1  mrg 			empty = isl_union_map_is_empty(umap_ij);
   4567  1.1  mrg 			isl_union_map_free(umap_ij);
   4568  1.1  mrg 
   4569  1.1  mrg 			if (empty < 0)
   4570  1.1  mrg 				goto error;
   4571  1.1  mrg 			if (!empty)
   4572  1.1  mrg 				after = isl_bool_true;
   4573  1.1  mrg 			if (after)
   4574  1.1  mrg 				break;
   4575  1.1  mrg 		}
   4576  1.1  mrg 
   4577  1.1  mrg 		isl_union_map_free(umap_i);
   4578  1.1  mrg 		if (after)
   4579  1.1  mrg 			break;
   4580  1.1  mrg 	}
   4581  1.1  mrg 
   4582  1.1  mrg 	if (after < 0 || after)
   4583  1.1  mrg 		return after;
   4584  1.1  mrg 
   4585  1.1  mrg 	return after_in_set(umap, node);
   4586  1.1  mrg error:
   4587  1.1  mrg 	isl_union_map_free(umap_i);
   4588  1.1  mrg 	return isl_bool_error;
   4589  1.1  mrg }
   4590  1.1  mrg 
   4591  1.1  mrg /* Is any domain element of "umap" scheduled after any of
   4592  1.1  mrg  * the corresponding image elements by the tree rooted at "node"?
   4593  1.1  mrg  *
   4594  1.1  mrg  * If "umap" is empty, then clearly there is no such element.
   4595  1.1  mrg  * Otherwise, consider the different types of nodes separately.
   4596  1.1  mrg  */
   4597  1.1  mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
   4598  1.1  mrg 	__isl_keep isl_schedule_node *node)
   4599  1.1  mrg {
   4600  1.1  mrg 	isl_bool empty;
   4601  1.1  mrg 	enum isl_schedule_node_type type;
   4602  1.1  mrg 
   4603  1.1  mrg 	empty = isl_union_map_is_empty(umap);
   4604  1.1  mrg 	if (empty < 0)
   4605  1.1  mrg 		return isl_bool_error;
   4606  1.1  mrg 	if (empty)
   4607  1.1  mrg 		return isl_bool_false;
   4608  1.1  mrg 	if (!node)
   4609  1.1  mrg 		return isl_bool_error;
   4610  1.1  mrg 
   4611  1.1  mrg 	type = isl_schedule_node_get_type(node);
   4612  1.1  mrg 	switch (type) {
   4613  1.1  mrg 	case isl_schedule_node_error:
   4614  1.1  mrg 		return isl_bool_error;
   4615  1.1  mrg 	case isl_schedule_node_leaf:
   4616  1.1  mrg 		return isl_bool_false;
   4617  1.1  mrg 	case isl_schedule_node_band:
   4618  1.1  mrg 		return after_in_band(umap, node);
   4619  1.1  mrg 	case isl_schedule_node_domain:
   4620  1.1  mrg 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
   4621  1.1  mrg 			"unexpected internal domain node",
   4622  1.1  mrg 			return isl_bool_error);
   4623  1.1  mrg 	case isl_schedule_node_context:
   4624  1.1  mrg 		return after_in_context(umap, node);
   4625  1.1  mrg 	case isl_schedule_node_expansion:
   4626  1.1  mrg 		return after_in_expansion(umap, node);
   4627  1.1  mrg 	case isl_schedule_node_extension:
   4628  1.1  mrg 		return after_in_extension(umap, node);
   4629  1.1  mrg 	case isl_schedule_node_filter:
   4630  1.1  mrg 		return after_in_filter(umap, node);
   4631  1.1  mrg 	case isl_schedule_node_guard:
   4632  1.1  mrg 	case isl_schedule_node_mark:
   4633  1.1  mrg 		return after_in_child(umap, node);
   4634  1.1  mrg 	case isl_schedule_node_set:
   4635  1.1  mrg 		return after_in_set(umap, node);
   4636  1.1  mrg 	case isl_schedule_node_sequence:
   4637  1.1  mrg 		return after_in_sequence(umap, node);
   4638  1.1  mrg 	}
   4639  1.1  mrg 
   4640  1.1  mrg 	return isl_bool_true;
   4641  1.1  mrg }
   4642  1.1  mrg 
   4643  1.1  mrg /* Is any domain element of "map1" scheduled after any domain
   4644  1.1  mrg  * element of "map2" by the subtree underneath the current band node,
   4645  1.1  mrg  * while at the same time being scheduled together by the current
   4646  1.1  mrg  * band node, i.e., by "map1" and "map2?
   4647  1.1  mrg  *
   4648  1.1  mrg  * If the child of the current band node is a leaf, then
   4649  1.1  mrg  * no element can be scheduled after any other element.
   4650  1.1  mrg  *
   4651  1.1  mrg  * Otherwise, we construct a relation between domain elements
   4652  1.1  mrg  * of "map1" and domain elements of "map2" that are scheduled
   4653  1.1  mrg  * together and then check if the subtree underneath the current
   4654  1.1  mrg  * band node determines their relative order.
   4655  1.1  mrg  */
   4656  1.1  mrg static isl_bool after_in_subtree(__isl_keep isl_ast_build *build,
   4657  1.1  mrg 	__isl_keep isl_map *map1, __isl_keep isl_map *map2)
   4658  1.1  mrg {
   4659  1.1  mrg 	isl_schedule_node *node;
   4660  1.1  mrg 	isl_map *map;
   4661  1.1  mrg 	isl_union_map *umap;
   4662  1.1  mrg 	isl_bool after;
   4663  1.1  mrg 
   4664  1.1  mrg 	node = isl_ast_build_get_schedule_node(build);
   4665  1.1  mrg 	if (!node)
   4666  1.1  mrg 		return isl_bool_error;
   4667  1.1  mrg 	node = isl_schedule_node_child(node, 0);
   4668  1.1  mrg 	if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) {
   4669  1.1  mrg 		isl_schedule_node_free(node);
   4670  1.1  mrg 		return isl_bool_false;
   4671  1.1  mrg 	}
   4672  1.1  mrg 	map = isl_map_copy(map2);
   4673  1.1  mrg 	map = isl_map_apply_domain(map, isl_map_copy(map1));
   4674  1.1  mrg 	umap = isl_union_map_from_map(map);
   4675  1.1  mrg 	after = after_in_tree(umap, node);
   4676  1.1  mrg 	isl_union_map_free(umap);
   4677  1.1  mrg 	isl_schedule_node_free(node);
   4678  1.1  mrg 	return after;
   4679  1.1  mrg }
   4680  1.1  mrg 
   4681  1.1  mrg /* Internal data for any_scheduled_after.
   4682  1.1  mrg  *
   4683  1.1  mrg  * "build" is the build in which the AST is constructed.
   4684  1.1  mrg  * "depth" is the number of loops that have already been generated
   4685  1.1  mrg  * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
   4686  1.1  mrg  * "domain" is an array of set-map pairs corresponding to the different
   4687  1.1  mrg  * iteration domains.  The set is the schedule domain, i.e., the domain
   4688  1.1  mrg  * of the inverse schedule, while the map is the inverse schedule itself.
   4689  1.1  mrg  */
   4690  1.1  mrg struct isl_any_scheduled_after_data {
   4691  1.1  mrg 	isl_ast_build *build;
   4692  1.1  mrg 	int depth;
   4693  1.1  mrg 	int group_coscheduled;
   4694  1.1  mrg 	struct isl_set_map_pair *domain;
   4695  1.1  mrg };
   4696  1.1  mrg 
   4697  1.1  mrg /* Is any element of domain "i" scheduled after any element of domain "j"
   4698  1.1  mrg  * (for a common iteration of the first data->depth loops)?
   4699  1.1  mrg  *
   4700  1.1  mrg  * data->domain[i].set contains the domain of the inverse schedule
   4701  1.1  mrg  * for domain "i", i.e., elements in the schedule domain.
   4702  1.1  mrg  *
   4703  1.1  mrg  * If we are inside a band of a schedule tree and there is a pair
   4704  1.1  mrg  * of elements in the two domains that is schedule together by
   4705  1.1  mrg  * the current band, then we check if any element of "i" may be schedule
   4706  1.1  mrg  * after element of "j" by the descendants of the band node.
   4707  1.1  mrg  *
   4708  1.1  mrg  * If data->group_coscheduled is set, then we also return 1 if there
   4709  1.1  mrg  * is any pair of elements in the two domains that are scheduled together.
   4710  1.1  mrg  */
   4711  1.1  mrg static isl_bool any_scheduled_after(int i, int j, void *user)
   4712  1.1  mrg {
   4713  1.1  mrg 	struct isl_any_scheduled_after_data *data = user;
   4714  1.1  mrg 	isl_size dim = isl_set_dim(data->domain[i].set, isl_dim_set);
   4715  1.1  mrg 	int pos;
   4716  1.1  mrg 
   4717  1.1  mrg 	if (dim < 0)
   4718  1.1  mrg 		return isl_bool_error;
   4719  1.1  mrg 
   4720  1.1  mrg 	for (pos = data->depth; pos < dim; ++pos) {
   4721  1.1  mrg 		int follows;
   4722  1.1  mrg 
   4723  1.1  mrg 		follows = isl_set_follows_at(data->domain[i].set,
   4724  1.1  mrg 						data->domain[j].set, pos);
   4725  1.1  mrg 
   4726  1.1  mrg 		if (follows < -1)
   4727  1.1  mrg 			return isl_bool_error;
   4728  1.1  mrg 		if (follows > 0)
   4729  1.1  mrg 			return isl_bool_true;
   4730  1.1  mrg 		if (follows < 0)
   4731  1.1  mrg 			return isl_bool_false;
   4732  1.1  mrg 	}
   4733  1.1  mrg 
   4734  1.1  mrg 	if (isl_ast_build_has_schedule_node(data->build)) {
   4735  1.1  mrg 		isl_bool after;
   4736  1.1  mrg 
   4737  1.1  mrg 		after = after_in_subtree(data->build, data->domain[i].map,
   4738  1.1  mrg 					    data->domain[j].map);
   4739  1.1  mrg 		if (after < 0 || after)
   4740  1.1  mrg 			return after;
   4741  1.1  mrg 	}
   4742  1.1  mrg 
   4743  1.1  mrg 	return isl_bool_ok(data->group_coscheduled);
   4744  1.1  mrg }
   4745  1.1  mrg 
   4746  1.1  mrg /* Look for independent components at the current depth and generate code
   4747  1.1  mrg  * for each component separately.  The resulting lists of grafts are
   4748  1.1  mrg  * merged in an attempt to combine grafts with identical guards.
   4749  1.1  mrg  *
   4750  1.1  mrg  * Code for two domains can be generated separately if all the elements
   4751  1.1  mrg  * of one domain are scheduled before (or together with) all the elements
   4752  1.1  mrg  * of the other domain.  We therefore consider the graph with as nodes
   4753  1.1  mrg  * the domains and an edge between two nodes if any element of the first
   4754  1.1  mrg  * node is scheduled after any element of the second node.
   4755  1.1  mrg  * If the ast_build_group_coscheduled is set, then we also add an edge if
   4756  1.1  mrg  * there is any pair of elements in the two domains that are scheduled
   4757  1.1  mrg  * together.
   4758  1.1  mrg  * Code is then generated (by generate_component)
   4759  1.1  mrg  * for each of the strongly connected components in this graph
   4760  1.1  mrg  * in their topological order.
   4761  1.1  mrg  *
   4762  1.1  mrg  * Since the test is performed on the domain of the inverse schedules of
   4763  1.1  mrg  * the different domains, we precompute these domains and store
   4764  1.1  mrg  * them in data.domain.
   4765  1.1  mrg  */
   4766  1.1  mrg static __isl_give isl_ast_graft_list *generate_components(
   4767  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   4768  1.1  mrg {
   4769  1.1  mrg 	int i;
   4770  1.1  mrg 	isl_ctx *ctx = isl_ast_build_get_ctx(build);
   4771  1.1  mrg 	isl_size n = isl_union_map_n_map(executed);
   4772  1.1  mrg 	isl_size depth;
   4773  1.1  mrg 	struct isl_any_scheduled_after_data data;
   4774  1.1  mrg 	struct isl_set_map_pair *next;
   4775  1.1  mrg 	struct isl_tarjan_graph *g = NULL;
   4776  1.1  mrg 	isl_ast_graft_list *list = NULL;
   4777  1.1  mrg 	int n_domain = 0;
   4778  1.1  mrg 
   4779  1.1  mrg 	data.domain = NULL;
   4780  1.1  mrg 	if (n < 0)
   4781  1.1  mrg 		goto error;
   4782  1.1  mrg 	data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
   4783  1.1  mrg 	if (!data.domain)
   4784  1.1  mrg 		goto error;
   4785  1.1  mrg 	n_domain = n;
   4786  1.1  mrg 
   4787  1.1  mrg 	next = data.domain;
   4788  1.1  mrg 	if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
   4789  1.1  mrg 		goto error;
   4790  1.1  mrg 
   4791  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   4792  1.1  mrg 	if (depth < 0)
   4793  1.1  mrg 		goto error;
   4794  1.1  mrg 	data.build = build;
   4795  1.1  mrg 	data.depth = depth;
   4796  1.1  mrg 	data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
   4797  1.1  mrg 	g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
   4798  1.1  mrg 	if (!g)
   4799  1.1  mrg 		goto error;
   4800  1.1  mrg 
   4801  1.1  mrg 	list = isl_ast_graft_list_alloc(ctx, 0);
   4802  1.1  mrg 
   4803  1.1  mrg 	i = 0;
   4804  1.1  mrg 	while (list && n) {
   4805  1.1  mrg 		isl_ast_graft_list *list_c;
   4806  1.1  mrg 		int first = i;
   4807  1.1  mrg 
   4808  1.1  mrg 		if (g->order[i] == -1)
   4809  1.1  mrg 			isl_die(ctx, isl_error_internal, "cannot happen",
   4810  1.1  mrg 				goto error);
   4811  1.1  mrg 		++i; --n;
   4812  1.1  mrg 		while (g->order[i] != -1) {
   4813  1.1  mrg 			++i; --n;
   4814  1.1  mrg 		}
   4815  1.1  mrg 
   4816  1.1  mrg 		list_c = generate_component(data.domain,
   4817  1.1  mrg 					    g->order + first, i - first,
   4818  1.1  mrg 					    isl_ast_build_copy(build));
   4819  1.1  mrg 		list = isl_ast_graft_list_merge(list, list_c, build);
   4820  1.1  mrg 
   4821  1.1  mrg 		++i;
   4822  1.1  mrg 	}
   4823  1.1  mrg 
   4824  1.1  mrg 	if (0)
   4825  1.1  mrg error:		list = isl_ast_graft_list_free(list);
   4826  1.1  mrg 	isl_tarjan_graph_free(g);
   4827  1.1  mrg 	for (i = 0; i < n_domain; ++i) {
   4828  1.1  mrg 		isl_map_free(data.domain[i].map);
   4829  1.1  mrg 		isl_set_free(data.domain[i].set);
   4830  1.1  mrg 	}
   4831  1.1  mrg 	free(data.domain);
   4832  1.1  mrg 	isl_union_map_free(executed);
   4833  1.1  mrg 	isl_ast_build_free(build);
   4834  1.1  mrg 
   4835  1.1  mrg 	return list;
   4836  1.1  mrg }
   4837  1.1  mrg 
   4838  1.1  mrg /* Generate code for the next level (and all inner levels).
   4839  1.1  mrg  *
   4840  1.1  mrg  * If "executed" is empty, i.e., no code needs to be generated,
   4841  1.1  mrg  * then we return an empty list.
   4842  1.1  mrg  *
   4843  1.1  mrg  * If we have already generated code for all loop levels, then we pass
   4844  1.1  mrg  * control to generate_inner_level.
   4845  1.1  mrg  *
   4846  1.1  mrg  * If "executed" lives in a single space, i.e., if code needs to be
   4847  1.1  mrg  * generated for a single domain, then there can only be a single
   4848  1.1  mrg  * component and we go directly to generate_shifted_component.
   4849  1.1  mrg  * Otherwise, we call generate_components to detect the components
   4850  1.1  mrg  * and to call generate_component on each of them separately.
   4851  1.1  mrg  */
   4852  1.1  mrg static __isl_give isl_ast_graft_list *generate_next_level(
   4853  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
   4854  1.1  mrg {
   4855  1.1  mrg 	isl_size depth;
   4856  1.1  mrg 	isl_size dim;
   4857  1.1  mrg 	isl_size n;
   4858  1.1  mrg 
   4859  1.1  mrg 	if (!build || !executed)
   4860  1.1  mrg 		goto error;
   4861  1.1  mrg 
   4862  1.1  mrg 	if (isl_union_map_is_empty(executed)) {
   4863  1.1  mrg 		isl_ctx *ctx = isl_ast_build_get_ctx(build);
   4864  1.1  mrg 		isl_union_map_free(executed);
   4865  1.1  mrg 		isl_ast_build_free(build);
   4866  1.1  mrg 		return isl_ast_graft_list_alloc(ctx, 0);
   4867  1.1  mrg 	}
   4868  1.1  mrg 
   4869  1.1  mrg 	depth = isl_ast_build_get_depth(build);
   4870  1.1  mrg 	dim = isl_ast_build_dim(build, isl_dim_set);
   4871  1.1  mrg 	if (depth < 0 || dim < 0)
   4872  1.1  mrg 		goto error;
   4873  1.1  mrg 	if (depth >= dim)
   4874  1.1  mrg 		return generate_inner_level(executed, build);
   4875  1.1  mrg 
   4876  1.1  mrg 	n = isl_union_map_n_map(executed);
   4877  1.1  mrg 	if (n < 0)
   4878  1.1  mrg 		goto error;
   4879  1.1  mrg 	if (n == 1)
   4880  1.1  mrg 		return generate_shifted_component(executed, build);
   4881  1.1  mrg 
   4882  1.1  mrg 	return generate_components(executed, build);
   4883  1.1  mrg error:
   4884  1.1  mrg 	isl_union_map_free(executed);
   4885  1.1  mrg 	isl_ast_build_free(build);
   4886  1.1  mrg 	return NULL;
   4887  1.1  mrg }
   4888  1.1  mrg 
   4889  1.1  mrg /* Internal data structure used by isl_ast_build_node_from_schedule_map.
   4890  1.1  mrg  * internal, executed and build are the inputs to generate_code.
   4891  1.1  mrg  * list collects the output.
   4892  1.1  mrg  */
   4893  1.1  mrg struct isl_generate_code_data {
   4894  1.1  mrg 	int internal;
   4895  1.1  mrg 	isl_union_map *executed;
   4896  1.1  mrg 	isl_ast_build *build;
   4897  1.1  mrg 
   4898  1.1  mrg 	isl_ast_graft_list *list;
   4899  1.1  mrg };
   4900  1.1  mrg 
   4901  1.1  mrg /* Given an inverse schedule in terms of the external build schedule, i.e.,
   4902  1.1  mrg  *
   4903  1.1  mrg  *	[E -> S] -> D
   4904  1.1  mrg  *
   4905  1.1  mrg  * with E the external build schedule and S the additional schedule "space",
   4906  1.1  mrg  * reformulate the inverse schedule in terms of the internal schedule domain,
   4907  1.1  mrg  * i.e., return
   4908  1.1  mrg  *
   4909  1.1  mrg  *	[I -> S] -> D
   4910  1.1  mrg  *
   4911  1.1  mrg  * We first obtain a mapping
   4912  1.1  mrg  *
   4913  1.1  mrg  *	I -> E
   4914  1.1  mrg  *
   4915  1.1  mrg  * take the inverse and the product with S -> S, resulting in
   4916  1.1  mrg  *
   4917  1.1  mrg  *	[I -> S] -> [E -> S]
   4918  1.1  mrg  *
   4919  1.1  mrg  * Applying the map to the input produces the desired result.
   4920  1.1  mrg  */
   4921  1.1  mrg static __isl_give isl_union_map *internal_executed(
   4922  1.1  mrg 	__isl_take isl_union_map *executed, __isl_keep isl_space *space,
   4923  1.1  mrg 	__isl_keep isl_ast_build *build)
   4924  1.1  mrg {
   4925  1.1  mrg 	isl_map *id, *proj;
   4926  1.1  mrg 
   4927  1.1  mrg 	proj = isl_ast_build_get_schedule_map(build);
   4928  1.1  mrg 	proj = isl_map_reverse(proj);
   4929  1.1  mrg 	space = isl_space_map_from_set(isl_space_copy(space));
   4930  1.1  mrg 	id = isl_map_identity(space);
   4931  1.1  mrg 	proj = isl_map_product(proj, id);
   4932  1.1  mrg 	executed = isl_union_map_apply_domain(executed,
   4933  1.1  mrg 						isl_union_map_from_map(proj));
   4934  1.1  mrg 	return executed;
   4935  1.1  mrg }
   4936  1.1  mrg 
   4937  1.1  mrg /* Generate an AST that visits the elements in the range of data->executed
   4938  1.1  mrg  * in the relative order specified by the corresponding domain element(s)
   4939  1.1  mrg  * for those domain elements that belong to "set".
   4940  1.1  mrg  * Add the result to data->list.
   4941  1.1  mrg  *
   4942  1.1  mrg  * The caller ensures that "set" is a universe domain.
   4943  1.1  mrg  * "space" is the space of the additional part of the schedule.
   4944  1.1  mrg  * It is equal to the space of "set" if build->domain is parametric.
   4945  1.1  mrg  * Otherwise, it is equal to the range of the wrapped space of "set".
   4946  1.1  mrg  *
   4947  1.1  mrg  * If the build space is not parametric and
   4948  1.1  mrg  * if isl_ast_build_node_from_schedule_map
   4949  1.1  mrg  * was called from an outside user (data->internal not set), then
   4950  1.1  mrg  * the (inverse) schedule refers to the external build domain and needs to
   4951  1.1  mrg  * be transformed to refer to the internal build domain.
   4952  1.1  mrg  *
   4953  1.1  mrg  * If the build space is parametric, then we add some of the parameter
   4954  1.1  mrg  * constraints to the executed relation.  Adding these constraints
   4955  1.1  mrg  * allows for an earlier detection of conflicts in some cases.
   4956  1.1  mrg  * However, we do not want to divide the executed relation into
   4957  1.1  mrg  * more disjuncts than necessary.  We therefore approximate
   4958  1.1  mrg  * the constraints on the parameters by a single disjunct set.
   4959  1.1  mrg  *
   4960  1.1  mrg  * The build is extended to include the additional part of the schedule.
   4961  1.1  mrg  * If the original build space was not parametric, then the options
   4962  1.1  mrg  * in data->build refer only to the additional part of the schedule
   4963  1.1  mrg  * and they need to be adjusted to refer to the complete AST build
   4964  1.1  mrg  * domain.
   4965  1.1  mrg  *
   4966  1.1  mrg  * After having adjusted inverse schedule and build, we start generating
   4967  1.1  mrg  * code with the outer loop of the current code generation
   4968  1.1  mrg  * in generate_next_level.
   4969  1.1  mrg  *
   4970  1.1  mrg  * If the original build space was not parametric, we undo the embedding
   4971  1.1  mrg  * on the resulting isl_ast_node_list so that it can be used within
   4972  1.1  mrg  * the outer AST build.
   4973  1.1  mrg  */
   4974  1.1  mrg static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
   4975  1.1  mrg 	__isl_take isl_set *set, __isl_take isl_space *space)
   4976  1.1  mrg {
   4977  1.1  mrg 	isl_union_map *executed;
   4978  1.1  mrg 	isl_ast_build *build;
   4979  1.1  mrg 	isl_ast_graft_list *list;
   4980  1.1  mrg 	int embed;
   4981  1.1  mrg 
   4982  1.1  mrg 	executed = isl_union_map_copy(data->executed);
   4983  1.1  mrg 	executed = isl_union_map_intersect_domain(executed,
   4984  1.1  mrg 						 isl_union_set_from_set(set));
   4985  1.1  mrg 
   4986  1.1  mrg 	embed = !isl_set_is_params(data->build->domain);
   4987  1.1  mrg 	if (embed && !data->internal)
   4988  1.1  mrg 		executed = internal_executed(executed, space, data->build);
   4989  1.1  mrg 	if (!embed) {
   4990  1.1  mrg 		isl_set *domain;
   4991  1.1  mrg 		domain = isl_ast_build_get_domain(data->build);
   4992  1.1  mrg 		domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
   4993  1.1  mrg 		executed = isl_union_map_intersect_params(executed, domain);
   4994  1.1  mrg 	}
   4995  1.1  mrg 
   4996  1.1  mrg 	build = isl_ast_build_copy(data->build);
   4997  1.1  mrg 	build = isl_ast_build_product(build, space);
   4998  1.1  mrg 
   4999  1.1  mrg 	list = generate_next_level(executed, build);
   5000  1.1  mrg 
   5001  1.1  mrg 	list = isl_ast_graft_list_unembed(list, embed);
   5002  1.1  mrg 
   5003  1.1  mrg 	data->list = isl_ast_graft_list_concat(data->list, list);
   5004  1.1  mrg 
   5005  1.1  mrg 	return isl_stat_ok;
   5006  1.1  mrg }
   5007  1.1  mrg 
   5008  1.1  mrg /* Generate an AST that visits the elements in the range of data->executed
   5009  1.1  mrg  * in the relative order specified by the corresponding domain element(s)
   5010  1.1  mrg  * for those domain elements that belong to "set".
   5011  1.1  mrg  * Add the result to data->list.
   5012  1.1  mrg  *
   5013  1.1  mrg  * The caller ensures that "set" is a universe domain.
   5014  1.1  mrg  *
   5015  1.1  mrg  * If the build space S is not parametric, then the space of "set"
   5016  1.1  mrg  * need to be a wrapped relation with S as domain.  That is, it needs
   5017  1.1  mrg  * to be of the form
   5018  1.1  mrg  *
   5019  1.1  mrg  *	[S -> T]
   5020  1.1  mrg  *
   5021  1.1  mrg  * Check this property and pass control to generate_code_in_space
   5022  1.1  mrg  * passing along T.
   5023  1.1  mrg  * If the build space is not parametric, then T is the space of "set".
   5024  1.1  mrg  */
   5025  1.1  mrg static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
   5026  1.1  mrg {
   5027  1.1  mrg 	struct isl_generate_code_data *data = user;
   5028  1.1  mrg 	isl_space *space, *build_space;
   5029  1.1  mrg 	int is_domain;
   5030  1.1  mrg 
   5031  1.1  mrg 	space = isl_set_get_space(set);
   5032  1.1  mrg 
   5033  1.1  mrg 	if (isl_set_is_params(data->build->domain))
   5034  1.1  mrg 		return generate_code_in_space(data, set, space);
   5035  1.1  mrg 
   5036  1.1  mrg 	build_space = isl_ast_build_get_space(data->build, data->internal);
   5037  1.1  mrg 	space = isl_space_unwrap(space);
   5038  1.1  mrg 	is_domain = isl_space_is_domain(build_space, space);
   5039  1.1  mrg 	isl_space_free(build_space);
   5040  1.1  mrg 	space = isl_space_range(space);
   5041  1.1  mrg 
   5042  1.1  mrg 	if (is_domain < 0)
   5043  1.1  mrg 		goto error;
   5044  1.1  mrg 	if (!is_domain)
   5045  1.1  mrg 		isl_die(isl_set_get_ctx(set), isl_error_invalid,
   5046  1.1  mrg 			"invalid nested schedule space", goto error);
   5047  1.1  mrg 
   5048  1.1  mrg 	return generate_code_in_space(data, set, space);
   5049  1.1  mrg error:
   5050  1.1  mrg 	isl_set_free(set);
   5051  1.1  mrg 	isl_space_free(space);
   5052  1.1  mrg 	return isl_stat_error;
   5053  1.1  mrg }
   5054  1.1  mrg 
   5055  1.1  mrg /* Generate an AST that visits the elements in the range of "executed"
   5056  1.1  mrg  * in the relative order specified by the corresponding domain element(s).
   5057  1.1  mrg  *
   5058  1.1  mrg  * "build" is an isl_ast_build that has either been constructed by
   5059  1.1  mrg  * isl_ast_build_from_context or passed to a callback set by
   5060  1.1  mrg  * isl_ast_build_set_create_leaf.
   5061  1.1  mrg  * In the first case, the space of the isl_ast_build is typically
   5062  1.1  mrg  * a parametric space, although this is currently not enforced.
   5063  1.1  mrg  * In the second case, the space is never a parametric space.
   5064  1.1  mrg  * If the space S is not parametric, then the domain space(s) of "executed"
   5065  1.1  mrg  * need to be wrapped relations with S as domain.
   5066  1.1  mrg  *
   5067  1.1  mrg  * If the domain of "executed" consists of several spaces, then an AST
   5068  1.1  mrg  * is generated for each of them (in arbitrary order) and the results
   5069  1.1  mrg  * are concatenated.
   5070  1.1  mrg  *
   5071  1.1  mrg  * If "internal" is set, then the domain "S" above refers to the internal
   5072  1.1  mrg  * schedule domain representation.  Otherwise, it refers to the external
   5073  1.1  mrg  * representation, as returned by isl_ast_build_get_schedule_space.
   5074  1.1  mrg  *
   5075  1.1  mrg  * We essentially run over all the spaces in the domain of "executed"
   5076  1.1  mrg  * and call generate_code_set on each of them.
   5077  1.1  mrg  */
   5078  1.1  mrg static __isl_give isl_ast_graft_list *generate_code(
   5079  1.1  mrg 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
   5080  1.1  mrg 	int internal)
   5081  1.1  mrg {
   5082  1.1  mrg 	isl_ctx *ctx;
   5083  1.1  mrg 	struct isl_generate_code_data data = { 0 };
   5084  1.1  mrg 	isl_space *space;
   5085  1.1  mrg 	isl_union_set *schedule_domain;
   5086  1.1  mrg 	isl_union_map *universe;
   5087  1.1  mrg 
   5088  1.1  mrg 	if (!build)
   5089  1.1  mrg 		goto error;
   5090  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   5091  1.1  mrg 	space = isl_space_align_params(space,
   5092  1.1  mrg 				    isl_union_map_get_space(executed));
   5093  1.1  mrg 	space = isl_space_align_params(space,
   5094  1.1  mrg 				    isl_union_map_get_space(build->options));
   5095  1.1  mrg 	build = isl_ast_build_align_params(build, isl_space_copy(space));
   5096  1.1  mrg 	executed = isl_union_map_align_params(executed, space);
   5097  1.1  mrg 	if (!executed || !build)
   5098  1.1  mrg 		goto error;
   5099  1.1  mrg 
   5100  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   5101  1.1  mrg 
   5102  1.1  mrg 	data.internal = internal;
   5103  1.1  mrg 	data.executed = executed;
   5104  1.1  mrg 	data.build = build;
   5105  1.1  mrg 	data.list = isl_ast_graft_list_alloc(ctx, 0);
   5106  1.1  mrg 
   5107  1.1  mrg 	universe = isl_union_map_universe(isl_union_map_copy(executed));
   5108  1.1  mrg 	schedule_domain = isl_union_map_domain(universe);
   5109  1.1  mrg 	if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
   5110  1.1  mrg 					&data) < 0)
   5111  1.1  mrg 		data.list = isl_ast_graft_list_free(data.list);
   5112  1.1  mrg 
   5113  1.1  mrg 	isl_union_set_free(schedule_domain);
   5114  1.1  mrg 	isl_union_map_free(executed);
   5115  1.1  mrg 
   5116  1.1  mrg 	isl_ast_build_free(build);
   5117  1.1  mrg 	return data.list;
   5118  1.1  mrg error:
   5119  1.1  mrg 	isl_union_map_free(executed);
   5120  1.1  mrg 	isl_ast_build_free(build);
   5121  1.1  mrg 	return NULL;
   5122  1.1  mrg }
   5123  1.1  mrg 
   5124  1.1  mrg /* Generate an AST that visits the elements in the domain of "schedule"
   5125  1.1  mrg  * in the relative order specified by the corresponding image element(s).
   5126  1.1  mrg  *
   5127  1.1  mrg  * "build" is an isl_ast_build that has either been constructed by
   5128  1.1  mrg  * isl_ast_build_from_context or passed to a callback set by
   5129  1.1  mrg  * isl_ast_build_set_create_leaf.
   5130  1.1  mrg  * In the first case, the space of the isl_ast_build is typically
   5131  1.1  mrg  * a parametric space, although this is currently not enforced.
   5132  1.1  mrg  * In the second case, the space is never a parametric space.
   5133  1.1  mrg  * If the space S is not parametric, then the range space(s) of "schedule"
   5134  1.1  mrg  * need to be wrapped relations with S as domain.
   5135  1.1  mrg  *
   5136  1.1  mrg  * If the range of "schedule" consists of several spaces, then an AST
   5137  1.1  mrg  * is generated for each of them (in arbitrary order) and the results
   5138  1.1  mrg  * are concatenated.
   5139  1.1  mrg  *
   5140  1.1  mrg  * We first initialize the local copies of the relevant options.
   5141  1.1  mrg  * We do this here rather than when the isl_ast_build is created
   5142  1.1  mrg  * because the options may have changed between the construction
   5143  1.1  mrg  * of the isl_ast_build and the call to isl_generate_code.
   5144  1.1  mrg  *
   5145  1.1  mrg  * The main computation is performed on an inverse schedule (with
   5146  1.1  mrg  * the schedule domain in the domain and the elements to be executed
   5147  1.1  mrg  * in the range) called "executed".
   5148  1.1  mrg  */
   5149  1.1  mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
   5150  1.1  mrg 	__isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
   5151  1.1  mrg {
   5152  1.1  mrg 	isl_ast_graft_list *list;
   5153  1.1  mrg 	isl_ast_node *node;
   5154  1.1  mrg 	isl_union_map *executed;
   5155  1.1  mrg 
   5156  1.1  mrg 	build = isl_ast_build_copy(build);
   5157  1.1  mrg 	build = isl_ast_build_set_single_valued(build, 0);
   5158  1.1  mrg 	schedule = isl_union_map_coalesce(schedule);
   5159  1.1  mrg 	schedule = isl_union_map_remove_redundancies(schedule);
   5160  1.1  mrg 	executed = isl_union_map_reverse(schedule);
   5161  1.1  mrg 	list = generate_code(executed, isl_ast_build_copy(build), 0);
   5162  1.1  mrg 	node = isl_ast_node_from_graft_list(list, build);
   5163  1.1  mrg 	isl_ast_build_free(build);
   5164  1.1  mrg 
   5165  1.1  mrg 	return node;
   5166  1.1  mrg }
   5167  1.1  mrg 
   5168  1.1  mrg /* The old name for isl_ast_build_node_from_schedule_map.
   5169  1.1  mrg  * It is being kept for backward compatibility, but
   5170  1.1  mrg  * it will be removed in the future.
   5171  1.1  mrg  */
   5172  1.1  mrg __isl_give isl_ast_node *isl_ast_build_ast_from_schedule(
   5173  1.1  mrg 	__isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
   5174  1.1  mrg {
   5175  1.1  mrg 	return isl_ast_build_node_from_schedule_map(build, schedule);
   5176  1.1  mrg }
   5177  1.1  mrg 
   5178  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5179  1.1  mrg  * in the relative order specified by the leaf node "node".
   5180  1.1  mrg  *
   5181  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5182  1.1  mrg  * to the domain elements executed by those iterations.
   5183  1.1  mrg  *
   5184  1.1  mrg  * Simply pass control to generate_inner_level.
   5185  1.1  mrg  * Note that the current build does not refer to any band node, so
   5186  1.1  mrg  * that generate_inner_level will not try to visit the child of
   5187  1.1  mrg  * the leaf node.
   5188  1.1  mrg  *
   5189  1.1  mrg  * If multiple statement instances reach a leaf,
   5190  1.1  mrg  * then they can be executed in any order.
   5191  1.1  mrg  * Group the list of grafts based on shared guards
   5192  1.1  mrg  * such that identical guards are only generated once
   5193  1.1  mrg  * when the list is eventually passed on to isl_ast_graft_list_fuse.
   5194  1.1  mrg  */
   5195  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_leaf(
   5196  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5197  1.1  mrg 	__isl_take isl_union_map *executed)
   5198  1.1  mrg {
   5199  1.1  mrg 	isl_ast_graft_list *list;
   5200  1.1  mrg 
   5201  1.1  mrg 	isl_schedule_node_free(node);
   5202  1.1  mrg 	list = generate_inner_level(executed, isl_ast_build_copy(build));
   5203  1.1  mrg 	list = isl_ast_graft_list_group_on_guard(list, build);
   5204  1.1  mrg 	isl_ast_build_free(build);
   5205  1.1  mrg 
   5206  1.1  mrg 	return list;
   5207  1.1  mrg }
   5208  1.1  mrg 
   5209  1.1  mrg /* Check that the band partial schedule "partial" does not filter out
   5210  1.1  mrg  * any statement instances, as specified by the range of "executed".
   5211  1.1  mrg  */
   5212  1.1  mrg static isl_stat check_band_schedule_total_on_instances(
   5213  1.1  mrg 	__isl_keep isl_multi_union_pw_aff *partial,
   5214  1.1  mrg 	__isl_keep isl_union_map *executed)
   5215  1.1  mrg {
   5216  1.1  mrg 	isl_bool subset;
   5217  1.1  mrg 	isl_union_set *domain, *instances;
   5218  1.1  mrg 
   5219  1.1  mrg 	instances = isl_union_map_range(isl_union_map_copy(executed));
   5220  1.1  mrg 	partial = isl_multi_union_pw_aff_copy(partial);
   5221  1.1  mrg 	domain = isl_multi_union_pw_aff_domain(partial);
   5222  1.1  mrg 	subset = isl_union_set_is_subset(instances, domain);
   5223  1.1  mrg 	isl_union_set_free(domain);
   5224  1.1  mrg 	isl_union_set_free(instances);
   5225  1.1  mrg 
   5226  1.1  mrg 	if (subset < 0)
   5227  1.1  mrg 		return isl_stat_error;
   5228  1.1  mrg 	if (!subset)
   5229  1.1  mrg 		isl_die(isl_union_map_get_ctx(executed), isl_error_invalid,
   5230  1.1  mrg 			"band node is not allowed to drop statement instances",
   5231  1.1  mrg 			return isl_stat_error);
   5232  1.1  mrg 	return isl_stat_ok;
   5233  1.1  mrg }
   5234  1.1  mrg 
   5235  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5236  1.1  mrg  * in the relative order specified by the band node "node" and its descendants.
   5237  1.1  mrg  *
   5238  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5239  1.1  mrg  * to the domain elements executed by those iterations.
   5240  1.1  mrg  *
   5241  1.1  mrg  * If the band is empty, we continue with its descendants.
   5242  1.1  mrg  * Otherwise, we extend the build and the inverse schedule with
   5243  1.1  mrg  * the additional space/partial schedule and continue generating
   5244  1.1  mrg  * an AST in generate_next_level.
   5245  1.1  mrg  * As soon as we have extended the inverse schedule with the additional
   5246  1.1  mrg  * partial schedule, we look for equalities that may exists between
   5247  1.1  mrg  * the old and the new part.
   5248  1.1  mrg  */
   5249  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_band(
   5250  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5251  1.1  mrg 	__isl_take isl_union_map *executed)
   5252  1.1  mrg {
   5253  1.1  mrg 	isl_space *space;
   5254  1.1  mrg 	isl_multi_union_pw_aff *extra;
   5255  1.1  mrg 	isl_union_map *extra_umap;
   5256  1.1  mrg 	isl_ast_graft_list *list;
   5257  1.1  mrg 	isl_size n1, n2;
   5258  1.1  mrg 	isl_size n;
   5259  1.1  mrg 
   5260  1.1  mrg 	n = isl_schedule_node_band_n_member(node);
   5261  1.1  mrg 	if (!build || n < 0 || !executed)
   5262  1.1  mrg 		goto error;
   5263  1.1  mrg 
   5264  1.1  mrg 	if (n == 0)
   5265  1.1  mrg 		return build_ast_from_child(build, node, executed);
   5266  1.1  mrg 
   5267  1.1  mrg 	extra = isl_schedule_node_band_get_partial_schedule(node);
   5268  1.1  mrg 	extra = isl_multi_union_pw_aff_align_params(extra,
   5269  1.1  mrg 				isl_ast_build_get_space(build, 1));
   5270  1.1  mrg 	space = isl_multi_union_pw_aff_get_space(extra);
   5271  1.1  mrg 
   5272  1.1  mrg 	if (check_band_schedule_total_on_instances(extra, executed) < 0)
   5273  1.1  mrg 		executed = isl_union_map_free(executed);
   5274  1.1  mrg 
   5275  1.1  mrg 	extra_umap = isl_union_map_from_multi_union_pw_aff(extra);
   5276  1.1  mrg 	extra_umap = isl_union_map_reverse(extra_umap);
   5277  1.1  mrg 
   5278  1.1  mrg 	executed = isl_union_map_domain_product(executed, extra_umap);
   5279  1.1  mrg 	executed = isl_union_map_detect_equalities(executed);
   5280  1.1  mrg 
   5281  1.1  mrg 	n1 = isl_ast_build_dim(build, isl_dim_param);
   5282  1.1  mrg 	build = isl_ast_build_product(build, space);
   5283  1.1  mrg 	n2 = isl_ast_build_dim(build, isl_dim_param);
   5284  1.1  mrg 	if (n1 < 0 || n2 < 0)
   5285  1.1  mrg 		build = isl_ast_build_free(build);
   5286  1.1  mrg 	else if (n2 > n1)
   5287  1.1  mrg 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
   5288  1.1  mrg 			"band node is not allowed to introduce new parameters",
   5289  1.1  mrg 			build = isl_ast_build_free(build));
   5290  1.1  mrg 	build = isl_ast_build_set_schedule_node(build, node);
   5291  1.1  mrg 
   5292  1.1  mrg 	list = generate_next_level(executed, build);
   5293  1.1  mrg 
   5294  1.1  mrg 	list = isl_ast_graft_list_unembed(list, 1);
   5295  1.1  mrg 
   5296  1.1  mrg 	return list;
   5297  1.1  mrg error:
   5298  1.1  mrg 	isl_schedule_node_free(node);
   5299  1.1  mrg 	isl_union_map_free(executed);
   5300  1.1  mrg 	isl_ast_build_free(build);
   5301  1.1  mrg 	return NULL;
   5302  1.1  mrg }
   5303  1.1  mrg 
   5304  1.1  mrg /* Hoist a list of grafts (in practice containing a single graft)
   5305  1.1  mrg  * from "sub_build" (which includes extra context information)
   5306  1.1  mrg  * to "build".
   5307  1.1  mrg  *
   5308  1.1  mrg  * In particular, project out all additional parameters introduced
   5309  1.1  mrg  * by the context node from the enforced constraints and the guard
   5310  1.1  mrg  * of the single graft.
   5311  1.1  mrg  */
   5312  1.1  mrg static __isl_give isl_ast_graft_list *hoist_out_of_context(
   5313  1.1  mrg 	__isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build,
   5314  1.1  mrg 	__isl_keep isl_ast_build *sub_build)
   5315  1.1  mrg {
   5316  1.1  mrg 	isl_ast_graft *graft;
   5317  1.1  mrg 	isl_basic_set *enforced;
   5318  1.1  mrg 	isl_set *guard;
   5319  1.1  mrg 	isl_size n_param, extra_param;
   5320  1.1  mrg 
   5321  1.1  mrg 	n_param = isl_ast_build_dim(build, isl_dim_param);
   5322  1.1  mrg 	extra_param = isl_ast_build_dim(sub_build, isl_dim_param);
   5323  1.1  mrg 	if (n_param < 0 || extra_param < 0)
   5324  1.1  mrg 		return isl_ast_graft_list_free(list);
   5325  1.1  mrg 
   5326  1.1  mrg 	if (extra_param == n_param)
   5327  1.1  mrg 		return list;
   5328  1.1  mrg 
   5329  1.1  mrg 	extra_param -= n_param;
   5330  1.1  mrg 	enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build);
   5331  1.1  mrg 	enforced = isl_basic_set_project_out(enforced, isl_dim_param,
   5332  1.1  mrg 							n_param, extra_param);
   5333  1.1  mrg 	enforced = isl_basic_set_remove_unknown_divs(enforced);
   5334  1.1  mrg 	guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
   5335  1.1  mrg 	guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param,
   5336  1.1  mrg 							n_param, extra_param);
   5337  1.1  mrg 	guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param);
   5338  1.1  mrg 	guard = isl_set_compute_divs(guard);
   5339  1.1  mrg 	graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
   5340  1.1  mrg 							build, sub_build);
   5341  1.1  mrg 	list = isl_ast_graft_list_from_ast_graft(graft);
   5342  1.1  mrg 
   5343  1.1  mrg 	return list;
   5344  1.1  mrg }
   5345  1.1  mrg 
   5346  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5347  1.1  mrg  * in the relative order specified by the context node "node"
   5348  1.1  mrg  * and its descendants.
   5349  1.1  mrg  *
   5350  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5351  1.1  mrg  * to the domain elements executed by those iterations.
   5352  1.1  mrg  *
   5353  1.1  mrg  * The context node may introduce additional parameters as well as
   5354  1.1  mrg  * constraints on the outer schedule dimensions or original parameters.
   5355  1.1  mrg  *
   5356  1.1  mrg  * We add the extra parameters to a new build and the context
   5357  1.1  mrg  * constraints to both the build and (as a single disjunct)
   5358  1.1  mrg  * to the domain of "executed".  Since the context constraints
   5359  1.1  mrg  * are specified in terms of the input schedule, we first need
   5360  1.1  mrg  * to map them to the internal schedule domain.
   5361  1.1  mrg  *
   5362  1.1  mrg  * After constructing the AST from the descendants of "node",
   5363  1.1  mrg  * we combine the list of grafts into a single graft within
   5364  1.1  mrg  * the new build, in order to be able to exploit the additional
   5365  1.1  mrg  * context constraints during this combination.
   5366  1.1  mrg  *
   5367  1.1  mrg  * Additionally, if the current node is the outermost node in
   5368  1.1  mrg  * the schedule tree (apart from the root domain node), we generate
   5369  1.1  mrg  * all pending guards, again to be able to exploit the additional
   5370  1.1  mrg  * context constraints.  We currently do not do this for internal
   5371  1.1  mrg  * context nodes since we may still want to hoist conditions
   5372  1.1  mrg  * to outer AST nodes.
   5373  1.1  mrg  *
   5374  1.1  mrg  * If the context node introduced any new parameters, then they
   5375  1.1  mrg  * are removed from the set of enforced constraints and guard
   5376  1.1  mrg  * in hoist_out_of_context.
   5377  1.1  mrg  */
   5378  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_context(
   5379  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5380  1.1  mrg 	__isl_take isl_union_map *executed)
   5381  1.1  mrg {
   5382  1.1  mrg 	isl_set *context;
   5383  1.1  mrg 	isl_space *space;
   5384  1.1  mrg 	isl_multi_aff *internal2input;
   5385  1.1  mrg 	isl_ast_build *sub_build;
   5386  1.1  mrg 	isl_ast_graft_list *list;
   5387  1.1  mrg 	isl_size n;
   5388  1.1  mrg 	isl_size depth;
   5389  1.1  mrg 
   5390  1.1  mrg 	depth = isl_schedule_node_get_tree_depth(node);
   5391  1.1  mrg 	if (depth < 0)
   5392  1.1  mrg 		build = isl_ast_build_free(build);
   5393  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   5394  1.1  mrg 	context = isl_schedule_node_context_get_context(node);
   5395  1.1  mrg 	context = isl_set_align_params(context, space);
   5396  1.1  mrg 	sub_build = isl_ast_build_copy(build);
   5397  1.1  mrg 	space = isl_set_get_space(context);
   5398  1.1  mrg 	sub_build = isl_ast_build_align_params(sub_build, space);
   5399  1.1  mrg 	internal2input = isl_ast_build_get_internal2input(sub_build);
   5400  1.1  mrg 	context = isl_set_preimage_multi_aff(context, internal2input);
   5401  1.1  mrg 	sub_build = isl_ast_build_restrict_generated(sub_build,
   5402  1.1  mrg 					isl_set_copy(context));
   5403  1.1  mrg 	context = isl_set_from_basic_set(isl_set_simple_hull(context));
   5404  1.1  mrg 	executed = isl_union_map_intersect_domain(executed,
   5405  1.1  mrg 					isl_union_set_from_set(context));
   5406  1.1  mrg 
   5407  1.1  mrg 	list = build_ast_from_child(isl_ast_build_copy(sub_build),
   5408  1.1  mrg 						node, executed);
   5409  1.1  mrg 	n = isl_ast_graft_list_n_ast_graft(list);
   5410  1.1  mrg 	if (n < 0)
   5411  1.1  mrg 		list = isl_ast_graft_list_free(list);
   5412  1.1  mrg 
   5413  1.1  mrg 	list = isl_ast_graft_list_fuse(list, sub_build);
   5414  1.1  mrg 	if (depth == 1)
   5415  1.1  mrg 		list = isl_ast_graft_list_insert_pending_guard_nodes(list,
   5416  1.1  mrg 								sub_build);
   5417  1.1  mrg 	if (n >= 1)
   5418  1.1  mrg 		list = hoist_out_of_context(list, build, sub_build);
   5419  1.1  mrg 
   5420  1.1  mrg 	isl_ast_build_free(build);
   5421  1.1  mrg 	isl_ast_build_free(sub_build);
   5422  1.1  mrg 
   5423  1.1  mrg 	return list;
   5424  1.1  mrg }
   5425  1.1  mrg 
   5426  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5427  1.1  mrg  * in the relative order specified by the expansion node "node" and
   5428  1.1  mrg  * its descendants.
   5429  1.1  mrg  *
   5430  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5431  1.1  mrg  * to the domain elements executed by those iterations.
   5432  1.1  mrg  *
   5433  1.1  mrg  * We expand the domain elements by the expansion and
   5434  1.1  mrg  * continue with the descendants of the node.
   5435  1.1  mrg  */
   5436  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_expansion(
   5437  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5438  1.1  mrg 	__isl_take isl_union_map *executed)
   5439  1.1  mrg {
   5440  1.1  mrg 	isl_union_map *expansion;
   5441  1.1  mrg 	isl_size n1, n2;
   5442  1.1  mrg 
   5443  1.1  mrg 	expansion = isl_schedule_node_expansion_get_expansion(node);
   5444  1.1  mrg 	expansion = isl_union_map_align_params(expansion,
   5445  1.1  mrg 				isl_union_map_get_space(executed));
   5446  1.1  mrg 
   5447  1.1  mrg 	n1 = isl_union_map_dim(executed, isl_dim_param);
   5448  1.1  mrg 	executed = isl_union_map_apply_range(executed, expansion);
   5449  1.1  mrg 	n2 = isl_union_map_dim(executed, isl_dim_param);
   5450  1.1  mrg 	if (n1 < 0 || n2 < 0)
   5451  1.1  mrg 		goto error;
   5452  1.1  mrg 	if (n2 > n1)
   5453  1.1  mrg 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
   5454  1.1  mrg 			"expansion node is not allowed to introduce "
   5455  1.1  mrg 			"new parameters", goto error);
   5456  1.1  mrg 
   5457  1.1  mrg 	return build_ast_from_child(build, node, executed);
   5458  1.1  mrg error:
   5459  1.1  mrg 	isl_ast_build_free(build);
   5460  1.1  mrg 	isl_schedule_node_free(node);
   5461  1.1  mrg 	isl_union_map_free(executed);
   5462  1.1  mrg 	return NULL;
   5463  1.1  mrg }
   5464  1.1  mrg 
   5465  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5466  1.1  mrg  * in the relative order specified by the extension node "node" and
   5467  1.1  mrg  * its descendants.
   5468  1.1  mrg  *
   5469  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5470  1.1  mrg  * to the domain elements executed by those iterations.
   5471  1.1  mrg  *
   5472  1.1  mrg  * Extend the inverse schedule with the extension applied to current
   5473  1.1  mrg  * set of generated constraints.  Since the extension if formulated
   5474  1.1  mrg  * in terms of the input schedule, it first needs to be transformed
   5475  1.1  mrg  * to refer to the internal schedule.
   5476  1.1  mrg  */
   5477  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_extension(
   5478  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5479  1.1  mrg 	__isl_take isl_union_map *executed)
   5480  1.1  mrg {
   5481  1.1  mrg 	isl_union_set *schedule_domain;
   5482  1.1  mrg 	isl_union_map *extension;
   5483  1.1  mrg 	isl_set *set;
   5484  1.1  mrg 
   5485  1.1  mrg 	set = isl_ast_build_get_generated(build);
   5486  1.1  mrg 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
   5487  1.1  mrg 	schedule_domain = isl_union_set_from_set(set);
   5488  1.1  mrg 
   5489  1.1  mrg 	extension = isl_schedule_node_extension_get_extension(node);
   5490  1.1  mrg 
   5491  1.1  mrg 	extension = isl_union_map_preimage_domain_multi_aff(extension,
   5492  1.1  mrg 			isl_multi_aff_copy(build->internal2input));
   5493  1.1  mrg 	extension = isl_union_map_intersect_domain(extension, schedule_domain);
   5494  1.1  mrg 	extension = isl_ast_build_substitute_values_union_map_domain(build,
   5495  1.1  mrg 								    extension);
   5496  1.1  mrg 	executed = isl_union_map_union(executed, extension);
   5497  1.1  mrg 
   5498  1.1  mrg 	return build_ast_from_child(build, node, executed);
   5499  1.1  mrg }
   5500  1.1  mrg 
   5501  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5502  1.1  mrg  * in the relative order specified by the filter node "node" and
   5503  1.1  mrg  * its descendants.
   5504  1.1  mrg  *
   5505  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5506  1.1  mrg  * to the domain elements executed by those iterations.
   5507  1.1  mrg  *
   5508  1.1  mrg  * We simply intersect the iteration domain (i.e., the range of "executed")
   5509  1.1  mrg  * with the filter and continue with the descendants of the node,
   5510  1.1  mrg  * unless the resulting inverse schedule is empty, in which
   5511  1.1  mrg  * case we return an empty list.
   5512  1.1  mrg  *
   5513  1.1  mrg  * If the result of the intersection is equal to the original "executed"
   5514  1.1  mrg  * relation, then keep the original representation since the intersection
   5515  1.1  mrg  * may have unnecessarily broken up the relation into a greater number
   5516  1.1  mrg  * of disjuncts.
   5517  1.1  mrg  */
   5518  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_filter(
   5519  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5520  1.1  mrg 	__isl_take isl_union_map *executed)
   5521  1.1  mrg {
   5522  1.1  mrg 	isl_ctx *ctx;
   5523  1.1  mrg 	isl_union_set *filter;
   5524  1.1  mrg 	isl_union_map *orig;
   5525  1.1  mrg 	isl_ast_graft_list *list;
   5526  1.1  mrg 	int empty;
   5527  1.1  mrg 	isl_bool unchanged;
   5528  1.1  mrg 	isl_size n1, n2;
   5529  1.1  mrg 
   5530  1.1  mrg 	orig = isl_union_map_copy(executed);
   5531  1.1  mrg 	if (!build || !node || !executed)
   5532  1.1  mrg 		goto error;
   5533  1.1  mrg 
   5534  1.1  mrg 	filter = isl_schedule_node_filter_get_filter(node);
   5535  1.1  mrg 	filter = isl_union_set_align_params(filter,
   5536  1.1  mrg 				isl_union_map_get_space(executed));
   5537  1.1  mrg 	n1 = isl_union_map_dim(executed, isl_dim_param);
   5538  1.1  mrg 	executed = isl_union_map_intersect_range(executed, filter);
   5539  1.1  mrg 	n2 = isl_union_map_dim(executed, isl_dim_param);
   5540  1.1  mrg 	if (n1 < 0 || n2 < 0)
   5541  1.1  mrg 		goto error;
   5542  1.1  mrg 	if (n2 > n1)
   5543  1.1  mrg 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
   5544  1.1  mrg 			"filter node is not allowed to introduce "
   5545  1.1  mrg 			"new parameters", goto error);
   5546  1.1  mrg 
   5547  1.1  mrg 	unchanged = isl_union_map_is_subset(orig, executed);
   5548  1.1  mrg 	empty = isl_union_map_is_empty(executed);
   5549  1.1  mrg 	if (unchanged < 0 || empty < 0)
   5550  1.1  mrg 		goto error;
   5551  1.1  mrg 	if (unchanged) {
   5552  1.1  mrg 		isl_union_map_free(executed);
   5553  1.1  mrg 		return build_ast_from_child(build, node, orig);
   5554  1.1  mrg 	}
   5555  1.1  mrg 	isl_union_map_free(orig);
   5556  1.1  mrg 	if (!empty)
   5557  1.1  mrg 		return build_ast_from_child(build, node, executed);
   5558  1.1  mrg 
   5559  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   5560  1.1  mrg 	list = isl_ast_graft_list_alloc(ctx, 0);
   5561  1.1  mrg 	isl_ast_build_free(build);
   5562  1.1  mrg 	isl_schedule_node_free(node);
   5563  1.1  mrg 	isl_union_map_free(executed);
   5564  1.1  mrg 	return list;
   5565  1.1  mrg error:
   5566  1.1  mrg 	isl_ast_build_free(build);
   5567  1.1  mrg 	isl_schedule_node_free(node);
   5568  1.1  mrg 	isl_union_map_free(executed);
   5569  1.1  mrg 	isl_union_map_free(orig);
   5570  1.1  mrg 	return NULL;
   5571  1.1  mrg }
   5572  1.1  mrg 
   5573  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5574  1.1  mrg  * in the relative order specified by the guard node "node" and
   5575  1.1  mrg  * its descendants.
   5576  1.1  mrg  *
   5577  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5578  1.1  mrg  * to the domain elements executed by those iterations.
   5579  1.1  mrg  *
   5580  1.1  mrg  * Ensure that the associated guard is enforced by the outer AST
   5581  1.1  mrg  * constructs by adding it to the guard of the graft.
   5582  1.1  mrg  * Since we know that we will enforce the guard, we can also include it
   5583  1.1  mrg  * in the generated constraints used to construct an AST for
   5584  1.1  mrg  * the descendant nodes.
   5585  1.1  mrg  */
   5586  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_guard(
   5587  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5588  1.1  mrg 	__isl_take isl_union_map *executed)
   5589  1.1  mrg {
   5590  1.1  mrg 	isl_space *space;
   5591  1.1  mrg 	isl_set *guard, *hoisted;
   5592  1.1  mrg 	isl_basic_set *enforced;
   5593  1.1  mrg 	isl_ast_build *sub_build;
   5594  1.1  mrg 	isl_ast_graft *graft;
   5595  1.1  mrg 	isl_ast_graft_list *list;
   5596  1.1  mrg 	isl_size n1, n2, n;
   5597  1.1  mrg 
   5598  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   5599  1.1  mrg 	guard = isl_schedule_node_guard_get_guard(node);
   5600  1.1  mrg 	n1 = isl_space_dim(space, isl_dim_param);
   5601  1.1  mrg 	guard = isl_set_align_params(guard, space);
   5602  1.1  mrg 	n2 = isl_set_dim(guard, isl_dim_param);
   5603  1.1  mrg 	if (n1 < 0 || n2 < 0)
   5604  1.1  mrg 		guard = isl_set_free(guard);
   5605  1.1  mrg 	else if (n2 > n1)
   5606  1.1  mrg 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
   5607  1.1  mrg 			"guard node is not allowed to introduce "
   5608  1.1  mrg 			"new parameters", guard = isl_set_free(guard));
   5609  1.1  mrg 	guard = isl_set_preimage_multi_aff(guard,
   5610  1.1  mrg 			isl_multi_aff_copy(build->internal2input));
   5611  1.1  mrg 	guard = isl_ast_build_specialize(build, guard);
   5612  1.1  mrg 	guard = isl_set_gist(guard, isl_set_copy(build->generated));
   5613  1.1  mrg 
   5614  1.1  mrg 	sub_build = isl_ast_build_copy(build);
   5615  1.1  mrg 	sub_build = isl_ast_build_restrict_generated(sub_build,
   5616  1.1  mrg 							isl_set_copy(guard));
   5617  1.1  mrg 
   5618  1.1  mrg 	list = build_ast_from_child(isl_ast_build_copy(sub_build),
   5619  1.1  mrg 							node, executed);
   5620  1.1  mrg 
   5621  1.1  mrg 	hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
   5622  1.1  mrg 	n = isl_set_n_basic_set(hoisted);
   5623  1.1  mrg 	if (n < 0)
   5624  1.1  mrg 		list = isl_ast_graft_list_free(list);
   5625  1.1  mrg 	if (n > 1)
   5626  1.1  mrg 		list = isl_ast_graft_list_gist_guards(list,
   5627  1.1  mrg 						    isl_set_copy(hoisted));
   5628  1.1  mrg 	guard = isl_set_intersect(guard, hoisted);
   5629  1.1  mrg 	enforced = extract_shared_enforced(list, build);
   5630  1.1  mrg 	graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
   5631  1.1  mrg 						    build, sub_build);
   5632  1.1  mrg 
   5633  1.1  mrg 	isl_ast_build_free(sub_build);
   5634  1.1  mrg 	isl_ast_build_free(build);
   5635  1.1  mrg 	return isl_ast_graft_list_from_ast_graft(graft);
   5636  1.1  mrg }
   5637  1.1  mrg 
   5638  1.1  mrg /* Call the before_each_mark callback, if requested by the user.
   5639  1.1  mrg  *
   5640  1.1  mrg  * Return 0 on success and -1 on error.
   5641  1.1  mrg  *
   5642  1.1  mrg  * The caller is responsible for recording the current inverse schedule
   5643  1.1  mrg  * in "build".
   5644  1.1  mrg  */
   5645  1.1  mrg static isl_stat before_each_mark(__isl_keep isl_id *mark,
   5646  1.1  mrg 	__isl_keep isl_ast_build *build)
   5647  1.1  mrg {
   5648  1.1  mrg 	if (!build)
   5649  1.1  mrg 		return isl_stat_error;
   5650  1.1  mrg 	if (!build->before_each_mark)
   5651  1.1  mrg 		return isl_stat_ok;
   5652  1.1  mrg 	return build->before_each_mark(mark, build,
   5653  1.1  mrg 					build->before_each_mark_user);
   5654  1.1  mrg }
   5655  1.1  mrg 
   5656  1.1  mrg /* Call the after_each_mark callback, if requested by the user.
   5657  1.1  mrg  *
   5658  1.1  mrg  * The caller is responsible for recording the current inverse schedule
   5659  1.1  mrg  * in "build".
   5660  1.1  mrg  */
   5661  1.1  mrg static __isl_give isl_ast_graft *after_each_mark(
   5662  1.1  mrg 	__isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build)
   5663  1.1  mrg {
   5664  1.1  mrg 	if (!graft || !build)
   5665  1.1  mrg 		return isl_ast_graft_free(graft);
   5666  1.1  mrg 	if (!build->after_each_mark)
   5667  1.1  mrg 		return graft;
   5668  1.1  mrg 	graft->node = build->after_each_mark(graft->node, build,
   5669  1.1  mrg 						build->after_each_mark_user);
   5670  1.1  mrg 	if (!graft->node)
   5671  1.1  mrg 		return isl_ast_graft_free(graft);
   5672  1.1  mrg 	return graft;
   5673  1.1  mrg }
   5674  1.1  mrg 
   5675  1.1  mrg 
   5676  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5677  1.1  mrg  * in the relative order specified by the mark node "node" and
   5678  1.1  mrg  * its descendants.
   5679  1.1  mrg  *
   5680  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5681  1.1  mrg  * to the domain elements executed by those iterations.
   5682  1.1  mrg 
   5683  1.1  mrg  * Since we may be calling before_each_mark and after_each_mark
   5684  1.1  mrg  * callbacks, we record the current inverse schedule in the build.
   5685  1.1  mrg  *
   5686  1.1  mrg  * We generate an AST for the child of the mark node, combine
   5687  1.1  mrg  * the graft list into a single graft and then insert the mark
   5688  1.1  mrg  * in the AST of that single graft.
   5689  1.1  mrg  */
   5690  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_mark(
   5691  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5692  1.1  mrg 	__isl_take isl_union_map *executed)
   5693  1.1  mrg {
   5694  1.1  mrg 	isl_id *mark;
   5695  1.1  mrg 	isl_ast_graft *graft;
   5696  1.1  mrg 	isl_ast_graft_list *list;
   5697  1.1  mrg 	isl_size n;
   5698  1.1  mrg 
   5699  1.1  mrg 	build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
   5700  1.1  mrg 
   5701  1.1  mrg 	mark = isl_schedule_node_mark_get_id(node);
   5702  1.1  mrg 	if (before_each_mark(mark, build) < 0)
   5703  1.1  mrg 		node = isl_schedule_node_free(node);
   5704  1.1  mrg 
   5705  1.1  mrg 	list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
   5706  1.1  mrg 	list = isl_ast_graft_list_fuse(list, build);
   5707  1.1  mrg 	n = isl_ast_graft_list_n_ast_graft(list);
   5708  1.1  mrg 	if (n < 0)
   5709  1.1  mrg 		list = isl_ast_graft_list_free(list);
   5710  1.1  mrg 	if (n == 0) {
   5711  1.1  mrg 		isl_id_free(mark);
   5712  1.1  mrg 	} else {
   5713  1.1  mrg 		graft = isl_ast_graft_list_get_ast_graft(list, 0);
   5714  1.1  mrg 		graft = isl_ast_graft_insert_mark(graft, mark);
   5715  1.1  mrg 		graft = after_each_mark(graft, build);
   5716  1.1  mrg 		list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
   5717  1.1  mrg 	}
   5718  1.1  mrg 	isl_ast_build_free(build);
   5719  1.1  mrg 
   5720  1.1  mrg 	return list;
   5721  1.1  mrg }
   5722  1.1  mrg 
   5723  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
   5724  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5725  1.1  mrg 	__isl_take isl_union_map *executed);
   5726  1.1  mrg 
   5727  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5728  1.1  mrg  * in the relative order specified by the sequence (or set) node "node" and
   5729  1.1  mrg  * its descendants.
   5730  1.1  mrg  *
   5731  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5732  1.1  mrg  * to the domain elements executed by those iterations.
   5733  1.1  mrg  *
   5734  1.1  mrg  * We simply generate an AST for each of the children and concatenate
   5735  1.1  mrg  * the results.
   5736  1.1  mrg  */
   5737  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_sequence(
   5738  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5739  1.1  mrg 	__isl_take isl_union_map *executed)
   5740  1.1  mrg {
   5741  1.1  mrg 	int i;
   5742  1.1  mrg 	isl_size n;
   5743  1.1  mrg 	isl_ctx *ctx;
   5744  1.1  mrg 	isl_ast_graft_list *list;
   5745  1.1  mrg 
   5746  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   5747  1.1  mrg 	list = isl_ast_graft_list_alloc(ctx, 0);
   5748  1.1  mrg 
   5749  1.1  mrg 	n = isl_schedule_node_n_children(node);
   5750  1.1  mrg 	if (n < 0)
   5751  1.1  mrg 		list = isl_ast_graft_list_free(list);
   5752  1.1  mrg 	for (i = 0; i < n; ++i) {
   5753  1.1  mrg 		isl_schedule_node *child;
   5754  1.1  mrg 		isl_ast_graft_list *list_i;
   5755  1.1  mrg 
   5756  1.1  mrg 		child = isl_schedule_node_get_child(node, i);
   5757  1.1  mrg 		list_i = build_ast_from_schedule_node(isl_ast_build_copy(build),
   5758  1.1  mrg 					child, isl_union_map_copy(executed));
   5759  1.1  mrg 		list = isl_ast_graft_list_concat(list, list_i);
   5760  1.1  mrg 	}
   5761  1.1  mrg 	isl_ast_build_free(build);
   5762  1.1  mrg 	isl_schedule_node_free(node);
   5763  1.1  mrg 	isl_union_map_free(executed);
   5764  1.1  mrg 
   5765  1.1  mrg 	return list;
   5766  1.1  mrg }
   5767  1.1  mrg 
   5768  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5769  1.1  mrg  * in the relative order specified by the node "node" and its descendants.
   5770  1.1  mrg  *
   5771  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5772  1.1  mrg  * to the domain elements executed by those iterations.
   5773  1.1  mrg  *
   5774  1.1  mrg  * The node types are handled in separate functions.
   5775  1.1  mrg  * Set nodes are currently treated in the same way as sequence nodes.
   5776  1.1  mrg  * The children of a set node may be executed in any order,
   5777  1.1  mrg  * including the order of the children.
   5778  1.1  mrg  */
   5779  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
   5780  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5781  1.1  mrg 	__isl_take isl_union_map *executed)
   5782  1.1  mrg {
   5783  1.1  mrg 	enum isl_schedule_node_type type;
   5784  1.1  mrg 
   5785  1.1  mrg 	type = isl_schedule_node_get_type(node);
   5786  1.1  mrg 
   5787  1.1  mrg 	switch (type) {
   5788  1.1  mrg 	case isl_schedule_node_error:
   5789  1.1  mrg 		goto error;
   5790  1.1  mrg 	case isl_schedule_node_leaf:
   5791  1.1  mrg 		return build_ast_from_leaf(build, node, executed);
   5792  1.1  mrg 	case isl_schedule_node_band:
   5793  1.1  mrg 		return build_ast_from_band(build, node, executed);
   5794  1.1  mrg 	case isl_schedule_node_context:
   5795  1.1  mrg 		return build_ast_from_context(build, node, executed);
   5796  1.1  mrg 	case isl_schedule_node_domain:
   5797  1.1  mrg 		isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported,
   5798  1.1  mrg 			"unexpected internal domain node", goto error);
   5799  1.1  mrg 	case isl_schedule_node_expansion:
   5800  1.1  mrg 		return build_ast_from_expansion(build, node, executed);
   5801  1.1  mrg 	case isl_schedule_node_extension:
   5802  1.1  mrg 		return build_ast_from_extension(build, node, executed);
   5803  1.1  mrg 	case isl_schedule_node_filter:
   5804  1.1  mrg 		return build_ast_from_filter(build, node, executed);
   5805  1.1  mrg 	case isl_schedule_node_guard:
   5806  1.1  mrg 		return build_ast_from_guard(build, node, executed);
   5807  1.1  mrg 	case isl_schedule_node_mark:
   5808  1.1  mrg 		return build_ast_from_mark(build, node, executed);
   5809  1.1  mrg 	case isl_schedule_node_sequence:
   5810  1.1  mrg 	case isl_schedule_node_set:
   5811  1.1  mrg 		return build_ast_from_sequence(build, node, executed);
   5812  1.1  mrg 	}
   5813  1.1  mrg 
   5814  1.1  mrg 	isl_die(isl_ast_build_get_ctx(build), isl_error_internal,
   5815  1.1  mrg 		"unhandled type", goto error);
   5816  1.1  mrg error:
   5817  1.1  mrg 	isl_union_map_free(executed);
   5818  1.1  mrg 	isl_schedule_node_free(node);
   5819  1.1  mrg 	isl_ast_build_free(build);
   5820  1.1  mrg 
   5821  1.1  mrg 	return NULL;
   5822  1.1  mrg }
   5823  1.1  mrg 
   5824  1.1  mrg /* Generate an AST that visits the elements in the domain of "executed"
   5825  1.1  mrg  * in the relative order specified by the (single) child of "node" and
   5826  1.1  mrg  * its descendants.
   5827  1.1  mrg  *
   5828  1.1  mrg  * The relation "executed" maps the outer generated loop iterators
   5829  1.1  mrg  * to the domain elements executed by those iterations.
   5830  1.1  mrg  *
   5831  1.1  mrg  * This function is never called on a leaf, set or sequence node,
   5832  1.1  mrg  * so the node always has exactly one child.
   5833  1.1  mrg  */
   5834  1.1  mrg static __isl_give isl_ast_graft_list *build_ast_from_child(
   5835  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
   5836  1.1  mrg 	__isl_take isl_union_map *executed)
   5837  1.1  mrg {
   5838  1.1  mrg 	node = isl_schedule_node_child(node, 0);
   5839  1.1  mrg 	return build_ast_from_schedule_node(build, node, executed);
   5840  1.1  mrg }
   5841  1.1  mrg 
   5842  1.1  mrg /* Generate an AST that visits the elements in the domain of the domain
   5843  1.1  mrg  * node "node" in the relative order specified by its descendants.
   5844  1.1  mrg  *
   5845  1.1  mrg  * An initial inverse schedule is created that maps a zero-dimensional
   5846  1.1  mrg  * schedule space to the node domain.
   5847  1.1  mrg  * The input "build" is assumed to have a parametric domain and
   5848  1.1  mrg  * is replaced by the same zero-dimensional schedule space.
   5849  1.1  mrg  *
   5850  1.1  mrg  * We also add some of the parameter constraints in the build domain
   5851  1.1  mrg  * to the executed relation.  Adding these constraints
   5852  1.1  mrg  * allows for an earlier detection of conflicts in some cases.
   5853  1.1  mrg  * However, we do not want to divide the executed relation into
   5854  1.1  mrg  * more disjuncts than necessary.  We therefore approximate
   5855  1.1  mrg  * the constraints on the parameters by a single disjunct set.
   5856  1.1  mrg  */
   5857  1.1  mrg static __isl_give isl_ast_node *build_ast_from_domain(
   5858  1.1  mrg 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node)
   5859  1.1  mrg {
   5860  1.1  mrg 	isl_ctx *ctx;
   5861  1.1  mrg 	isl_union_set *domain, *schedule_domain;
   5862  1.1  mrg 	isl_union_map *executed;
   5863  1.1  mrg 	isl_space *space;
   5864  1.1  mrg 	isl_set *set;
   5865  1.1  mrg 	isl_ast_graft_list *list;
   5866  1.1  mrg 	isl_ast_node *ast;
   5867  1.1  mrg 	int is_params;
   5868  1.1  mrg 
   5869  1.1  mrg 	if (!build)
   5870  1.1  mrg 		goto error;
   5871  1.1  mrg 
   5872  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   5873  1.1  mrg 	space = isl_ast_build_get_space(build, 1);
   5874  1.1  mrg 	is_params = isl_space_is_params(space);
   5875  1.1  mrg 	isl_space_free(space);
   5876  1.1  mrg 	if (is_params < 0)
   5877  1.1  mrg 		goto error;
   5878  1.1  mrg 	if (!is_params)
   5879  1.1  mrg 		isl_die(ctx, isl_error_unsupported,
   5880  1.1  mrg 			"expecting parametric initial context", goto error);
   5881  1.1  mrg 
   5882  1.1  mrg 	domain = isl_schedule_node_domain_get_domain(node);
   5883  1.1  mrg 	domain = isl_union_set_coalesce(domain);
   5884  1.1  mrg 
   5885  1.1  mrg 	space = isl_union_set_get_space(domain);
   5886  1.1  mrg 	space = isl_space_set_from_params(space);
   5887  1.1  mrg 	build = isl_ast_build_product(build, space);
   5888  1.1  mrg 
   5889  1.1  mrg 	set = isl_ast_build_get_domain(build);
   5890  1.1  mrg 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
   5891  1.1  mrg 	schedule_domain = isl_union_set_from_set(set);
   5892  1.1  mrg 
   5893  1.1  mrg 	executed = isl_union_map_from_domain_and_range(schedule_domain, domain);
   5894  1.1  mrg 	list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
   5895  1.1  mrg 	ast = isl_ast_node_from_graft_list(list, build);
   5896  1.1  mrg 	isl_ast_build_free(build);
   5897  1.1  mrg 
   5898  1.1  mrg 	return ast;
   5899  1.1  mrg error:
   5900  1.1  mrg 	isl_schedule_node_free(node);
   5901  1.1  mrg 	isl_ast_build_free(build);
   5902  1.1  mrg 	return NULL;
   5903  1.1  mrg }
   5904  1.1  mrg 
   5905  1.1  mrg /* Generate an AST that visits the elements in the domain of "schedule"
   5906  1.1  mrg  * in the relative order specified by the schedule tree.
   5907  1.1  mrg  *
   5908  1.1  mrg  * "build" is an isl_ast_build that has been created using
   5909  1.1  mrg  * isl_ast_build_alloc or isl_ast_build_from_context based
   5910  1.1  mrg  * on a parametric set.
   5911  1.1  mrg  *
   5912  1.1  mrg  * The construction starts at the root node of the schedule,
   5913  1.1  mrg  * which is assumed to be a domain node.
   5914  1.1  mrg  */
   5915  1.1  mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule(
   5916  1.1  mrg 	__isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule)
   5917  1.1  mrg {
   5918  1.1  mrg 	isl_ctx *ctx;
   5919  1.1  mrg 	isl_schedule_node *node;
   5920  1.1  mrg 
   5921  1.1  mrg 	if (!build || !schedule)
   5922  1.1  mrg 		goto error;
   5923  1.1  mrg 
   5924  1.1  mrg 	ctx = isl_ast_build_get_ctx(build);
   5925  1.1  mrg 
   5926  1.1  mrg 	node = isl_schedule_get_root(schedule);
   5927  1.1  mrg 	if (!node)
   5928  1.1  mrg 		goto error;
   5929  1.1  mrg 	isl_schedule_free(schedule);
   5930  1.1  mrg 
   5931  1.1  mrg 	build = isl_ast_build_copy(build);
   5932  1.1  mrg 	build = isl_ast_build_set_single_valued(build, 0);
   5933  1.1  mrg 	if (isl_schedule_node_get_type(node) != isl_schedule_node_domain)
   5934  1.1  mrg 		isl_die(ctx, isl_error_unsupported,
   5935  1.1  mrg 			"expecting root domain node",
   5936  1.1  mrg 			build = isl_ast_build_free(build));
   5937  1.1  mrg 	return build_ast_from_domain(build, node);
   5938  1.1  mrg error:
   5939  1.1  mrg 	isl_schedule_free(schedule);
   5940  1.1  mrg 	return NULL;
   5941  1.1  mrg }
   5942