isl_ast_codegen.c revision 1.1 1 1.1 mrg /*
2 1.1 mrg * Copyright 2012-2014 Ecole Normale Superieure
3 1.1 mrg * Copyright 2014 INRIA Rocquencourt
4 1.1 mrg *
5 1.1 mrg * Use of this software is governed by the MIT license
6 1.1 mrg *
7 1.1 mrg * Written by Sven Verdoolaege,
8 1.1 mrg * Ecole Normale Superieure, 45 rue dUlm, 75230 Paris, France
9 1.1 mrg * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
10 1.1 mrg * B.P. 105 - 78153 Le Chesnay, France
11 1.1 mrg */
12 1.1 mrg
13 1.1 mrg #include <limits.h>
14 1.1 mrg #include <isl/id.h>
15 1.1 mrg #include <isl/val.h>
16 1.1 mrg #include <isl/space.h>
17 1.1 mrg #include <isl/aff.h>
18 1.1 mrg #include <isl/constraint.h>
19 1.1 mrg #include <isl/set.h>
20 1.1 mrg #include <isl/ilp.h>
21 1.1 mrg #include <isl/union_set.h>
22 1.1 mrg #include <isl/union_map.h>
23 1.1 mrg #include <isl/schedule_node.h>
24 1.1 mrg #include <isl/options.h>
25 1.1 mrg #include <isl_sort.h>
26 1.1 mrg #include <isl_tarjan.h>
27 1.1 mrg #include <isl_ast_private.h>
28 1.1 mrg #include <isl_ast_build_expr.h>
29 1.1 mrg #include <isl_ast_build_private.h>
30 1.1 mrg #include <isl_ast_graft_private.h>
31 1.1 mrg
32 1.1 mrg /* Try and reduce the number of disjuncts in the representation of "set",
33 1.1 mrg * without dropping explicit representations of local variables.
34 1.1 mrg */
35 1.1 mrg static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set)
36 1.1 mrg {
37 1.1 mrg isl_ctx *ctx;
38 1.1 mrg int save_preserve;
39 1.1 mrg
40 1.1 mrg if (!set)
41 1.1 mrg return NULL;
42 1.1 mrg
43 1.1 mrg ctx = isl_set_get_ctx(set);
44 1.1 mrg save_preserve = isl_options_get_coalesce_preserve_locals(ctx);
45 1.1 mrg isl_options_set_coalesce_preserve_locals(ctx, 1);
46 1.1 mrg set = isl_set_coalesce(set);
47 1.1 mrg isl_options_set_coalesce_preserve_locals(ctx, save_preserve);
48 1.1 mrg return set;
49 1.1 mrg }
50 1.1 mrg
51 1.1 mrg /* Data used in generate_domain.
52 1.1 mrg *
53 1.1 mrg * "build" is the input build.
54 1.1 mrg * "list" collects the results.
55 1.1 mrg */
56 1.1 mrg struct isl_generate_domain_data {
57 1.1 mrg isl_ast_build *build;
58 1.1 mrg
59 1.1 mrg isl_ast_graft_list *list;
60 1.1 mrg };
61 1.1 mrg
62 1.1 mrg static __isl_give isl_ast_graft_list *generate_next_level(
63 1.1 mrg __isl_take isl_union_map *executed,
64 1.1 mrg __isl_take isl_ast_build *build);
65 1.1 mrg static __isl_give isl_ast_graft_list *generate_code(
66 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
67 1.1 mrg int internal);
68 1.1 mrg
69 1.1 mrg /* Generate an AST for a single domain based on
70 1.1 mrg * the (non single valued) inverse schedule "executed".
71 1.1 mrg *
72 1.1 mrg * We extend the schedule with the iteration domain
73 1.1 mrg * and continue generating through a call to generate_code.
74 1.1 mrg *
75 1.1 mrg * In particular, if executed has the form
76 1.1 mrg *
77 1.1 mrg * S -> D
78 1.1 mrg *
79 1.1 mrg * then we continue generating code on
80 1.1 mrg *
81 1.1 mrg * [S -> D] -> D
82 1.1 mrg *
83 1.1 mrg * The extended inverse schedule is clearly single valued
84 1.1 mrg * ensuring that the nested generate_code will not reach this function,
85 1.1 mrg * but will instead create calls to all elements of D that need
86 1.1 mrg * to be executed from the current schedule domain.
87 1.1 mrg */
88 1.1 mrg static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
89 1.1 mrg struct isl_generate_domain_data *data)
90 1.1 mrg {
91 1.1 mrg isl_map *identity;
92 1.1 mrg isl_ast_build *build;
93 1.1 mrg isl_ast_graft_list *list;
94 1.1 mrg
95 1.1 mrg build = isl_ast_build_copy(data->build);
96 1.1 mrg
97 1.1 mrg identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
98 1.1 mrg executed = isl_map_domain_product(executed, identity);
99 1.1 mrg build = isl_ast_build_set_single_valued(build, 1);
100 1.1 mrg
101 1.1 mrg list = generate_code(isl_union_map_from_map(executed), build, 1);
102 1.1 mrg
103 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list);
104 1.1 mrg
105 1.1 mrg return isl_stat_ok;
106 1.1 mrg }
107 1.1 mrg
108 1.1 mrg /* Call the at_each_domain callback, if requested by the user,
109 1.1 mrg * after recording the current inverse schedule in the build.
110 1.1 mrg */
111 1.1 mrg static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
112 1.1 mrg __isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
113 1.1 mrg {
114 1.1 mrg if (!graft || !build)
115 1.1 mrg return isl_ast_graft_free(graft);
116 1.1 mrg if (!build->at_each_domain)
117 1.1 mrg return graft;
118 1.1 mrg
119 1.1 mrg build = isl_ast_build_copy(build);
120 1.1 mrg build = isl_ast_build_set_executed(build,
121 1.1 mrg isl_union_map_from_map(isl_map_copy(executed)));
122 1.1 mrg if (!build)
123 1.1 mrg return isl_ast_graft_free(graft);
124 1.1 mrg
125 1.1 mrg graft->node = build->at_each_domain(graft->node,
126 1.1 mrg build, build->at_each_domain_user);
127 1.1 mrg isl_ast_build_free(build);
128 1.1 mrg
129 1.1 mrg if (!graft->node)
130 1.1 mrg graft = isl_ast_graft_free(graft);
131 1.1 mrg
132 1.1 mrg return graft;
133 1.1 mrg }
134 1.1 mrg
135 1.1 mrg /* Generate a call expression for the single executed
136 1.1 mrg * domain element "map" and put a guard around it based its (simplified)
137 1.1 mrg * domain. "executed" is the original inverse schedule from which "map"
138 1.1 mrg * has been derived. In particular, "map" is either identical to "executed"
139 1.1 mrg * or it is the result of gisting "executed" with respect to the build domain.
140 1.1 mrg * "executed" is only used if there is an at_each_domain callback.
141 1.1 mrg *
142 1.1 mrg * At this stage, any pending constraints in the build can no longer
143 1.1 mrg * be simplified with respect to any enforced constraints since
144 1.1 mrg * the call node does not have any enforced constraints.
145 1.1 mrg * Since all pending constraints not covered by any enforced constraints
146 1.1 mrg * will be added as a guard to the graft in create_node_scaled,
147 1.1 mrg * even in the eliminated case, the pending constraints
148 1.1 mrg * can be considered to have been generated by outer constructs.
149 1.1 mrg *
150 1.1 mrg * If the user has set an at_each_domain callback, it is called
151 1.1 mrg * on the constructed call expression node.
152 1.1 mrg */
153 1.1 mrg static isl_stat add_domain(__isl_take isl_map *executed,
154 1.1 mrg __isl_take isl_map *map, struct isl_generate_domain_data *data)
155 1.1 mrg {
156 1.1 mrg isl_ast_build *build;
157 1.1 mrg isl_ast_graft *graft;
158 1.1 mrg isl_ast_graft_list *list;
159 1.1 mrg isl_set *guard, *pending;
160 1.1 mrg
161 1.1 mrg build = isl_ast_build_copy(data->build);
162 1.1 mrg pending = isl_ast_build_get_pending(build);
163 1.1 mrg build = isl_ast_build_replace_pending_by_guard(build, pending);
164 1.1 mrg
165 1.1 mrg guard = isl_map_domain(isl_map_copy(map));
166 1.1 mrg guard = isl_set_compute_divs(guard);
167 1.1 mrg guard = isl_set_coalesce_preserve(guard);
168 1.1 mrg guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
169 1.1 mrg guard = isl_ast_build_specialize(build, guard);
170 1.1 mrg
171 1.1 mrg graft = isl_ast_graft_alloc_domain(map, build);
172 1.1 mrg graft = at_each_domain(graft, executed, build);
173 1.1 mrg isl_ast_build_free(build);
174 1.1 mrg isl_map_free(executed);
175 1.1 mrg graft = isl_ast_graft_add_guard(graft, guard, data->build);
176 1.1 mrg
177 1.1 mrg list = isl_ast_graft_list_from_ast_graft(graft);
178 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list);
179 1.1 mrg
180 1.1 mrg return isl_stat_ok;
181 1.1 mrg }
182 1.1 mrg
183 1.1 mrg /* Generate an AST for a single domain based on
184 1.1 mrg * the inverse schedule "executed" and add it to data->list.
185 1.1 mrg *
186 1.1 mrg * If there is more than one domain element associated to the current
187 1.1 mrg * schedule "time", then we need to continue the generation process
188 1.1 mrg * in generate_non_single_valued.
189 1.1 mrg * Note that the inverse schedule being single-valued may depend
190 1.1 mrg * on constraints that are only available in the original context
191 1.1 mrg * domain specified by the user. We therefore first introduce
192 1.1 mrg * some of the constraints of data->build->domain. In particular,
193 1.1 mrg * we intersect with a single-disjunct approximation of this set.
194 1.1 mrg * We perform this approximation to avoid further splitting up
195 1.1 mrg * the executed relation, possibly introducing a disjunctive guard
196 1.1 mrg * on the statement.
197 1.1 mrg *
198 1.1 mrg * On the other hand, we only perform the test after having taken the gist
199 1.1 mrg * of the domain as the resulting map is the one from which the call
200 1.1 mrg * expression is constructed. Using this map to construct the call
201 1.1 mrg * expression usually yields simpler results in cases where the original
202 1.1 mrg * map is not obviously single-valued.
203 1.1 mrg * If the original map is obviously single-valued, then the gist
204 1.1 mrg * operation is skipped.
205 1.1 mrg *
206 1.1 mrg * Because we perform the single-valuedness test on the gisted map,
207 1.1 mrg * we may in rare cases fail to recognize that the inverse schedule
208 1.1 mrg * is single-valued. This becomes problematic if this happens
209 1.1 mrg * from the recursive call through generate_non_single_valued
210 1.1 mrg * as we would then end up in an infinite recursion.
211 1.1 mrg * We therefore check if we are inside a call to generate_non_single_valued
212 1.1 mrg * and revert to the ungisted map if the gisted map turns out not to be
213 1.1 mrg * single-valued.
214 1.1 mrg *
215 1.1 mrg * Otherwise, call add_domain to generate a call expression (with guard) and
216 1.1 mrg * to call the at_each_domain callback, if any.
217 1.1 mrg */
218 1.1 mrg static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
219 1.1 mrg {
220 1.1 mrg struct isl_generate_domain_data *data = user;
221 1.1 mrg isl_set *domain;
222 1.1 mrg isl_map *map = NULL;
223 1.1 mrg int empty, sv;
224 1.1 mrg
225 1.1 mrg domain = isl_ast_build_get_domain(data->build);
226 1.1 mrg domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
227 1.1 mrg executed = isl_map_intersect_domain(executed, domain);
228 1.1 mrg empty = isl_map_is_empty(executed);
229 1.1 mrg if (empty < 0)
230 1.1 mrg goto error;
231 1.1 mrg if (empty) {
232 1.1 mrg isl_map_free(executed);
233 1.1 mrg return isl_stat_ok;
234 1.1 mrg }
235 1.1 mrg
236 1.1 mrg sv = isl_map_plain_is_single_valued(executed);
237 1.1 mrg if (sv < 0)
238 1.1 mrg goto error;
239 1.1 mrg if (sv)
240 1.1 mrg return add_domain(executed, isl_map_copy(executed), data);
241 1.1 mrg
242 1.1 mrg executed = isl_map_coalesce(executed);
243 1.1 mrg map = isl_map_copy(executed);
244 1.1 mrg map = isl_ast_build_compute_gist_map_domain(data->build, map);
245 1.1 mrg sv = isl_map_is_single_valued(map);
246 1.1 mrg if (sv < 0)
247 1.1 mrg goto error;
248 1.1 mrg if (!sv) {
249 1.1 mrg isl_map_free(map);
250 1.1 mrg if (data->build->single_valued)
251 1.1 mrg map = isl_map_copy(executed);
252 1.1 mrg else
253 1.1 mrg return generate_non_single_valued(executed, data);
254 1.1 mrg }
255 1.1 mrg
256 1.1 mrg return add_domain(executed, map, data);
257 1.1 mrg error:
258 1.1 mrg isl_map_free(map);
259 1.1 mrg isl_map_free(executed);
260 1.1 mrg return isl_stat_error;
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg /* Call build->create_leaf to a create "leaf" node in the AST,
264 1.1 mrg * encapsulate the result in an isl_ast_graft and return the result
265 1.1 mrg * as a 1-element list.
266 1.1 mrg *
267 1.1 mrg * Note that the node returned by the user may be an entire tree.
268 1.1 mrg *
269 1.1 mrg * Since the node itself cannot enforce any constraints, we turn
270 1.1 mrg * all pending constraints into guards and add them to the resulting
271 1.1 mrg * graft to ensure that they will be generated.
272 1.1 mrg *
273 1.1 mrg * Before we pass control to the user, we first clear some information
274 1.1 mrg * from the build that is (presumbably) only meaningful
275 1.1 mrg * for the current code generation.
276 1.1 mrg * This includes the create_leaf callback itself, so we make a copy
277 1.1 mrg * of the build first.
278 1.1 mrg */
279 1.1 mrg static __isl_give isl_ast_graft_list *call_create_leaf(
280 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
281 1.1 mrg {
282 1.1 mrg isl_set *guard;
283 1.1 mrg isl_ast_node *node;
284 1.1 mrg isl_ast_graft *graft;
285 1.1 mrg isl_ast_build *user_build;
286 1.1 mrg
287 1.1 mrg guard = isl_ast_build_get_pending(build);
288 1.1 mrg user_build = isl_ast_build_copy(build);
289 1.1 mrg user_build = isl_ast_build_replace_pending_by_guard(user_build,
290 1.1 mrg isl_set_copy(guard));
291 1.1 mrg user_build = isl_ast_build_set_executed(user_build, executed);
292 1.1 mrg user_build = isl_ast_build_clear_local_info(user_build);
293 1.1 mrg if (!user_build)
294 1.1 mrg node = NULL;
295 1.1 mrg else
296 1.1 mrg node = build->create_leaf(user_build, build->create_leaf_user);
297 1.1 mrg graft = isl_ast_graft_alloc(node, build);
298 1.1 mrg graft = isl_ast_graft_add_guard(graft, guard, build);
299 1.1 mrg isl_ast_build_free(build);
300 1.1 mrg return isl_ast_graft_list_from_ast_graft(graft);
301 1.1 mrg }
302 1.1 mrg
303 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_child(
304 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
305 1.1 mrg __isl_take isl_union_map *executed);
306 1.1 mrg
307 1.1 mrg /* Generate an AST after having handled the complete schedule
308 1.1 mrg * of this call to the code generator or the complete band
309 1.1 mrg * if we are generating an AST from a schedule tree.
310 1.1 mrg *
311 1.1 mrg * If we are inside a band node, then move on to the child of the band.
312 1.1 mrg *
313 1.1 mrg * If the user has specified a create_leaf callback, control
314 1.1 mrg * is passed to the user in call_create_leaf.
315 1.1 mrg *
316 1.1 mrg * Otherwise, we generate one or more calls for each individual
317 1.1 mrg * domain in generate_domain.
318 1.1 mrg */
319 1.1 mrg static __isl_give isl_ast_graft_list *generate_inner_level(
320 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
321 1.1 mrg {
322 1.1 mrg isl_ctx *ctx;
323 1.1 mrg struct isl_generate_domain_data data = { build };
324 1.1 mrg
325 1.1 mrg if (!build || !executed)
326 1.1 mrg goto error;
327 1.1 mrg
328 1.1 mrg if (isl_ast_build_has_schedule_node(build)) {
329 1.1 mrg isl_schedule_node *node;
330 1.1 mrg node = isl_ast_build_get_schedule_node(build);
331 1.1 mrg build = isl_ast_build_reset_schedule_node(build);
332 1.1 mrg return build_ast_from_child(build, node, executed);
333 1.1 mrg }
334 1.1 mrg
335 1.1 mrg if (build->create_leaf)
336 1.1 mrg return call_create_leaf(executed, build);
337 1.1 mrg
338 1.1 mrg ctx = isl_union_map_get_ctx(executed);
339 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, 0);
340 1.1 mrg if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
341 1.1 mrg data.list = isl_ast_graft_list_free(data.list);
342 1.1 mrg
343 1.1 mrg if (0)
344 1.1 mrg error: data.list = NULL;
345 1.1 mrg isl_ast_build_free(build);
346 1.1 mrg isl_union_map_free(executed);
347 1.1 mrg return data.list;
348 1.1 mrg }
349 1.1 mrg
350 1.1 mrg /* Call the before_each_for callback, if requested by the user.
351 1.1 mrg */
352 1.1 mrg static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
353 1.1 mrg __isl_keep isl_ast_build *build)
354 1.1 mrg {
355 1.1 mrg isl_id *id;
356 1.1 mrg
357 1.1 mrg if (!node || !build)
358 1.1 mrg return isl_ast_node_free(node);
359 1.1 mrg if (!build->before_each_for)
360 1.1 mrg return node;
361 1.1 mrg id = build->before_each_for(build, build->before_each_for_user);
362 1.1 mrg node = isl_ast_node_set_annotation(node, id);
363 1.1 mrg return node;
364 1.1 mrg }
365 1.1 mrg
366 1.1 mrg /* Call the after_each_for callback, if requested by the user.
367 1.1 mrg */
368 1.1 mrg static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
369 1.1 mrg __isl_keep isl_ast_build *build)
370 1.1 mrg {
371 1.1 mrg if (!graft || !build)
372 1.1 mrg return isl_ast_graft_free(graft);
373 1.1 mrg if (!build->after_each_for)
374 1.1 mrg return graft;
375 1.1 mrg graft->node = build->after_each_for(graft->node, build,
376 1.1 mrg build->after_each_for_user);
377 1.1 mrg if (!graft->node)
378 1.1 mrg return isl_ast_graft_free(graft);
379 1.1 mrg return graft;
380 1.1 mrg }
381 1.1 mrg
382 1.1 mrg /* Plug in all the know values of the current and outer dimensions
383 1.1 mrg * in the domain of "executed". In principle, we only need to plug
384 1.1 mrg * in the known value of the current dimension since the values of
385 1.1 mrg * outer dimensions have been plugged in already.
386 1.1 mrg * However, it turns out to be easier to just plug in all known values.
387 1.1 mrg */
388 1.1 mrg static __isl_give isl_union_map *plug_in_values(
389 1.1 mrg __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
390 1.1 mrg {
391 1.1 mrg return isl_ast_build_substitute_values_union_map_domain(build,
392 1.1 mrg executed);
393 1.1 mrg }
394 1.1 mrg
395 1.1 mrg /* Check if the constraint "c" is a lower bound on dimension "pos",
396 1.1 mrg * an upper bound, or independent of dimension "pos".
397 1.1 mrg */
398 1.1 mrg static int constraint_type(isl_constraint *c, int pos)
399 1.1 mrg {
400 1.1 mrg if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
401 1.1 mrg return 1;
402 1.1 mrg if (isl_constraint_is_upper_bound(c, isl_dim_set, pos))
403 1.1 mrg return 2;
404 1.1 mrg return 0;
405 1.1 mrg }
406 1.1 mrg
407 1.1 mrg /* Compare the types of the constraints "a" and "b",
408 1.1 mrg * resulting in constraints that are independent of "depth"
409 1.1 mrg * to be sorted before the lower bounds on "depth", which in
410 1.1 mrg * turn are sorted before the upper bounds on "depth".
411 1.1 mrg */
412 1.1 mrg static int cmp_constraint(__isl_keep isl_constraint *a,
413 1.1 mrg __isl_keep isl_constraint *b, void *user)
414 1.1 mrg {
415 1.1 mrg int *depth = user;
416 1.1 mrg int t1 = constraint_type(a, *depth);
417 1.1 mrg int t2 = constraint_type(b, *depth);
418 1.1 mrg
419 1.1 mrg return t1 - t2;
420 1.1 mrg }
421 1.1 mrg
422 1.1 mrg /* Extract a lower bound on dimension "pos" from constraint "c".
423 1.1 mrg *
424 1.1 mrg * If the constraint is of the form
425 1.1 mrg *
426 1.1 mrg * a x + f(...) >= 0
427 1.1 mrg *
428 1.1 mrg * then we essentially return
429 1.1 mrg *
430 1.1 mrg * l = ceil(-f(...)/a)
431 1.1 mrg *
432 1.1 mrg * However, if the current dimension is strided, then we need to make
433 1.1 mrg * sure that the lower bound we construct is of the form
434 1.1 mrg *
435 1.1 mrg * f + s a
436 1.1 mrg *
437 1.1 mrg * with f the offset and s the stride.
438 1.1 mrg * We therefore compute
439 1.1 mrg *
440 1.1 mrg * f + s * ceil((l - f)/s)
441 1.1 mrg */
442 1.1 mrg static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
443 1.1 mrg int pos, __isl_keep isl_ast_build *build)
444 1.1 mrg {
445 1.1 mrg isl_aff *aff;
446 1.1 mrg
447 1.1 mrg aff = isl_constraint_get_bound(c, isl_dim_set, pos);
448 1.1 mrg aff = isl_aff_ceil(aff);
449 1.1 mrg
450 1.1 mrg if (isl_ast_build_has_stride(build, pos)) {
451 1.1 mrg isl_aff *offset;
452 1.1 mrg isl_val *stride;
453 1.1 mrg
454 1.1 mrg offset = isl_ast_build_get_offset(build, pos);
455 1.1 mrg stride = isl_ast_build_get_stride(build, pos);
456 1.1 mrg
457 1.1 mrg aff = isl_aff_sub(aff, isl_aff_copy(offset));
458 1.1 mrg aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
459 1.1 mrg aff = isl_aff_ceil(aff);
460 1.1 mrg aff = isl_aff_scale_val(aff, stride);
461 1.1 mrg aff = isl_aff_add(aff, offset);
462 1.1 mrg }
463 1.1 mrg
464 1.1 mrg aff = isl_ast_build_compute_gist_aff(build, aff);
465 1.1 mrg
466 1.1 mrg return aff;
467 1.1 mrg }
468 1.1 mrg
469 1.1 mrg /* Return the exact lower bound (or upper bound if "upper" is set)
470 1.1 mrg * of "domain" as a piecewise affine expression.
471 1.1 mrg *
472 1.1 mrg * If we are computing a lower bound (of a strided dimension), then
473 1.1 mrg * we need to make sure it is of the form
474 1.1 mrg *
475 1.1 mrg * f + s a
476 1.1 mrg *
477 1.1 mrg * where f is the offset and s is the stride.
478 1.1 mrg * We therefore need to include the stride constraint before computing
479 1.1 mrg * the minimum.
480 1.1 mrg */
481 1.1 mrg static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
482 1.1 mrg __isl_keep isl_ast_build *build, int upper)
483 1.1 mrg {
484 1.1 mrg isl_set *stride;
485 1.1 mrg isl_map *it_map;
486 1.1 mrg isl_pw_aff *pa;
487 1.1 mrg isl_pw_multi_aff *pma;
488 1.1 mrg
489 1.1 mrg domain = isl_set_copy(domain);
490 1.1 mrg if (!upper) {
491 1.1 mrg stride = isl_ast_build_get_stride_constraint(build);
492 1.1 mrg domain = isl_set_intersect(domain, stride);
493 1.1 mrg }
494 1.1 mrg it_map = isl_ast_build_map_to_iterator(build, domain);
495 1.1 mrg if (upper)
496 1.1 mrg pma = isl_map_lexmax_pw_multi_aff(it_map);
497 1.1 mrg else
498 1.1 mrg pma = isl_map_lexmin_pw_multi_aff(it_map);
499 1.1 mrg pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
500 1.1 mrg isl_pw_multi_aff_free(pma);
501 1.1 mrg pa = isl_ast_build_compute_gist_pw_aff(build, pa);
502 1.1 mrg pa = isl_pw_aff_coalesce(pa);
503 1.1 mrg
504 1.1 mrg return pa;
505 1.1 mrg }
506 1.1 mrg
507 1.1 mrg /* Callback for sorting the isl_pw_aff_list passed to reduce_list and
508 1.1 mrg * remove_redundant_lower_bounds.
509 1.1 mrg */
510 1.1 mrg static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
511 1.1 mrg void *user)
512 1.1 mrg {
513 1.1 mrg return isl_pw_aff_plain_cmp(a, b);
514 1.1 mrg }
515 1.1 mrg
516 1.1 mrg /* Given a list of lower bounds "list", remove those that are redundant
517 1.1 mrg * with respect to the other bounds in "list" and the domain of "build".
518 1.1 mrg *
519 1.1 mrg * We first sort the bounds in the same way as they would be sorted
520 1.1 mrg * by set_for_node_expressions so that we can try and remove the last
521 1.1 mrg * bounds first.
522 1.1 mrg *
523 1.1 mrg * For a lower bound to be effective, there needs to be at least
524 1.1 mrg * one domain element for which it is larger than all other lower bounds.
525 1.1 mrg * For each lower bound we therefore intersect the domain with
526 1.1 mrg * the conditions that it is larger than all other bounds and
527 1.1 mrg * check whether the result is empty. If so, the bound can be removed.
528 1.1 mrg */
529 1.1 mrg static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
530 1.1 mrg __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
531 1.1 mrg {
532 1.1 mrg int i, j;
533 1.1 mrg isl_size n;
534 1.1 mrg isl_set *domain;
535 1.1 mrg
536 1.1 mrg list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
537 1.1 mrg
538 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list);
539 1.1 mrg if (n < 0)
540 1.1 mrg return isl_pw_aff_list_free(list);
541 1.1 mrg if (n <= 1)
542 1.1 mrg return list;
543 1.1 mrg
544 1.1 mrg domain = isl_ast_build_get_domain(build);
545 1.1 mrg
546 1.1 mrg for (i = n - 1; i >= 0; --i) {
547 1.1 mrg isl_pw_aff *pa_i;
548 1.1 mrg isl_set *domain_i;
549 1.1 mrg int empty;
550 1.1 mrg
551 1.1 mrg domain_i = isl_set_copy(domain);
552 1.1 mrg pa_i = isl_pw_aff_list_get_pw_aff(list, i);
553 1.1 mrg
554 1.1 mrg for (j = 0; j < n; ++j) {
555 1.1 mrg isl_pw_aff *pa_j;
556 1.1 mrg isl_set *better;
557 1.1 mrg
558 1.1 mrg if (j == i)
559 1.1 mrg continue;
560 1.1 mrg
561 1.1 mrg pa_j = isl_pw_aff_list_get_pw_aff(list, j);
562 1.1 mrg better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
563 1.1 mrg domain_i = isl_set_intersect(domain_i, better);
564 1.1 mrg }
565 1.1 mrg
566 1.1 mrg empty = isl_set_is_empty(domain_i);
567 1.1 mrg
568 1.1 mrg isl_set_free(domain_i);
569 1.1 mrg isl_pw_aff_free(pa_i);
570 1.1 mrg
571 1.1 mrg if (empty < 0)
572 1.1 mrg goto error;
573 1.1 mrg if (!empty)
574 1.1 mrg continue;
575 1.1 mrg list = isl_pw_aff_list_drop(list, i, 1);
576 1.1 mrg n--;
577 1.1 mrg }
578 1.1 mrg
579 1.1 mrg isl_set_free(domain);
580 1.1 mrg
581 1.1 mrg return list;
582 1.1 mrg error:
583 1.1 mrg isl_set_free(domain);
584 1.1 mrg return isl_pw_aff_list_free(list);
585 1.1 mrg }
586 1.1 mrg
587 1.1 mrg /* Extract a lower bound on dimension "pos" from each constraint
588 1.1 mrg * in "constraints" and return the list of lower bounds.
589 1.1 mrg * If "constraints" has zero elements, then we extract a lower bound
590 1.1 mrg * from "domain" instead.
591 1.1 mrg *
592 1.1 mrg * If the current dimension is strided, then the lower bound
593 1.1 mrg * is adjusted by lower_bound to match the stride information.
594 1.1 mrg * This modification may make one or more lower bounds redundant
595 1.1 mrg * with respect to the other lower bounds. We therefore check
596 1.1 mrg * for this condition and remove the redundant lower bounds.
597 1.1 mrg */
598 1.1 mrg static __isl_give isl_pw_aff_list *lower_bounds(
599 1.1 mrg __isl_keep isl_constraint_list *constraints, int pos,
600 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
601 1.1 mrg {
602 1.1 mrg isl_ctx *ctx;
603 1.1 mrg isl_pw_aff_list *list;
604 1.1 mrg int i;
605 1.1 mrg isl_size n;
606 1.1 mrg
607 1.1 mrg if (!build)
608 1.1 mrg return NULL;
609 1.1 mrg
610 1.1 mrg n = isl_constraint_list_n_constraint(constraints);
611 1.1 mrg if (n < 0)
612 1.1 mrg return NULL;
613 1.1 mrg if (n == 0) {
614 1.1 mrg isl_pw_aff *pa;
615 1.1 mrg pa = exact_bound(domain, build, 0);
616 1.1 mrg return isl_pw_aff_list_from_pw_aff(pa);
617 1.1 mrg }
618 1.1 mrg
619 1.1 mrg ctx = isl_ast_build_get_ctx(build);
620 1.1 mrg list = isl_pw_aff_list_alloc(ctx,n);
621 1.1 mrg
622 1.1 mrg for (i = 0; i < n; ++i) {
623 1.1 mrg isl_aff *aff;
624 1.1 mrg isl_constraint *c;
625 1.1 mrg
626 1.1 mrg c = isl_constraint_list_get_constraint(constraints, i);
627 1.1 mrg aff = lower_bound(c, pos, build);
628 1.1 mrg isl_constraint_free(c);
629 1.1 mrg list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
630 1.1 mrg }
631 1.1 mrg
632 1.1 mrg if (isl_ast_build_has_stride(build, pos))
633 1.1 mrg list = remove_redundant_lower_bounds(list, build);
634 1.1 mrg
635 1.1 mrg return list;
636 1.1 mrg }
637 1.1 mrg
638 1.1 mrg /* Extract an upper bound on dimension "pos" from each constraint
639 1.1 mrg * in "constraints" and return the list of upper bounds.
640 1.1 mrg * If "constraints" has zero elements, then we extract an upper bound
641 1.1 mrg * from "domain" instead.
642 1.1 mrg */
643 1.1 mrg static __isl_give isl_pw_aff_list *upper_bounds(
644 1.1 mrg __isl_keep isl_constraint_list *constraints, int pos,
645 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
646 1.1 mrg {
647 1.1 mrg isl_ctx *ctx;
648 1.1 mrg isl_pw_aff_list *list;
649 1.1 mrg int i;
650 1.1 mrg isl_size n;
651 1.1 mrg
652 1.1 mrg n = isl_constraint_list_n_constraint(constraints);
653 1.1 mrg if (n < 0)
654 1.1 mrg return NULL;
655 1.1 mrg if (n == 0) {
656 1.1 mrg isl_pw_aff *pa;
657 1.1 mrg pa = exact_bound(domain, build, 1);
658 1.1 mrg return isl_pw_aff_list_from_pw_aff(pa);
659 1.1 mrg }
660 1.1 mrg
661 1.1 mrg ctx = isl_ast_build_get_ctx(build);
662 1.1 mrg list = isl_pw_aff_list_alloc(ctx,n);
663 1.1 mrg
664 1.1 mrg for (i = 0; i < n; ++i) {
665 1.1 mrg isl_aff *aff;
666 1.1 mrg isl_constraint *c;
667 1.1 mrg
668 1.1 mrg c = isl_constraint_list_get_constraint(constraints, i);
669 1.1 mrg aff = isl_constraint_get_bound(c, isl_dim_set, pos);
670 1.1 mrg isl_constraint_free(c);
671 1.1 mrg aff = isl_aff_floor(aff);
672 1.1 mrg list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
673 1.1 mrg }
674 1.1 mrg
675 1.1 mrg return list;
676 1.1 mrg }
677 1.1 mrg
678 1.1 mrg /* Return an isl_ast_expr that performs the reduction of type "type"
679 1.1 mrg * on AST expressions corresponding to the elements in "list".
680 1.1 mrg *
681 1.1 mrg * The list is assumed to contain at least one element.
682 1.1 mrg * If the list contains exactly one element, then the returned isl_ast_expr
683 1.1 mrg * simply computes that affine expression.
684 1.1 mrg * If the list contains more than one element, then we sort it
685 1.1 mrg * using a fairly arbitrary but hopefully reasonably stable order.
686 1.1 mrg */
687 1.1 mrg static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type,
688 1.1 mrg __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
689 1.1 mrg {
690 1.1 mrg int i;
691 1.1 mrg isl_size n;
692 1.1 mrg isl_ctx *ctx;
693 1.1 mrg isl_ast_expr *expr;
694 1.1 mrg
695 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list);
696 1.1 mrg if (n < 0)
697 1.1 mrg return NULL;
698 1.1 mrg
699 1.1 mrg if (n == 1)
700 1.1 mrg return isl_ast_build_expr_from_pw_aff_internal(build,
701 1.1 mrg isl_pw_aff_list_get_pw_aff(list, 0));
702 1.1 mrg
703 1.1 mrg ctx = isl_pw_aff_list_get_ctx(list);
704 1.1 mrg expr = isl_ast_expr_alloc_op(ctx, type, n);
705 1.1 mrg
706 1.1 mrg list = isl_pw_aff_list_copy(list);
707 1.1 mrg list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
708 1.1 mrg if (!list)
709 1.1 mrg return isl_ast_expr_free(expr);
710 1.1 mrg
711 1.1 mrg for (i = 0; i < n; ++i) {
712 1.1 mrg isl_ast_expr *expr_i;
713 1.1 mrg
714 1.1 mrg expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
715 1.1 mrg isl_pw_aff_list_get_pw_aff(list, i));
716 1.1 mrg expr = isl_ast_expr_op_add_arg(expr, expr_i);
717 1.1 mrg }
718 1.1 mrg
719 1.1 mrg isl_pw_aff_list_free(list);
720 1.1 mrg return expr;
721 1.1 mrg }
722 1.1 mrg
723 1.1 mrg /* Add guards implied by the "generated constraints",
724 1.1 mrg * but not (necessarily) enforced by the generated AST to "guard".
725 1.1 mrg * In particular, if there is any stride constraints,
726 1.1 mrg * then add the guard implied by those constraints.
727 1.1 mrg * If we have generated a degenerate loop, then add the guard
728 1.1 mrg * implied by "bounds" on the outer dimensions, i.e., the guard
729 1.1 mrg * that ensures that the single value actually exists.
730 1.1 mrg * Since there may also be guards implied by a combination
731 1.1 mrg * of these constraints, we first combine them before
732 1.1 mrg * deriving the implied constraints.
733 1.1 mrg */
734 1.1 mrg static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
735 1.1 mrg int degenerate, __isl_keep isl_basic_set *bounds,
736 1.1 mrg __isl_keep isl_ast_build *build)
737 1.1 mrg {
738 1.1 mrg isl_size depth;
739 1.1 mrg isl_bool has_stride;
740 1.1 mrg isl_space *space;
741 1.1 mrg isl_set *dom, *set;
742 1.1 mrg
743 1.1 mrg depth = isl_ast_build_get_depth(build);
744 1.1 mrg has_stride = isl_ast_build_has_stride(build, depth);
745 1.1 mrg if (depth < 0 || has_stride < 0)
746 1.1 mrg return isl_set_free(guard);
747 1.1 mrg if (!has_stride && !degenerate)
748 1.1 mrg return guard;
749 1.1 mrg
750 1.1 mrg space = isl_basic_set_get_space(bounds);
751 1.1 mrg dom = isl_set_universe(space);
752 1.1 mrg
753 1.1 mrg if (degenerate) {
754 1.1 mrg bounds = isl_basic_set_copy(bounds);
755 1.1 mrg bounds = isl_basic_set_drop_constraints_not_involving_dims(
756 1.1 mrg bounds, isl_dim_set, depth, 1);
757 1.1 mrg set = isl_set_from_basic_set(bounds);
758 1.1 mrg dom = isl_set_intersect(dom, set);
759 1.1 mrg }
760 1.1 mrg
761 1.1 mrg if (has_stride) {
762 1.1 mrg set = isl_ast_build_get_stride_constraint(build);
763 1.1 mrg dom = isl_set_intersect(dom, set);
764 1.1 mrg }
765 1.1 mrg
766 1.1 mrg dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
767 1.1 mrg dom = isl_ast_build_compute_gist(build, dom);
768 1.1 mrg guard = isl_set_intersect(guard, dom);
769 1.1 mrg
770 1.1 mrg return guard;
771 1.1 mrg }
772 1.1 mrg
773 1.1 mrg /* Update "graft" based on "sub_build" for the degenerate case.
774 1.1 mrg *
775 1.1 mrg * "build" is the build in which graft->node was created
776 1.1 mrg * "sub_build" contains information about the current level itself,
777 1.1 mrg * including the single value attained.
778 1.1 mrg *
779 1.1 mrg * We set the initialization part of the for loop to the single
780 1.1 mrg * value attained by the current dimension.
781 1.1 mrg * The increment and condition are not strictly needed as they are known
782 1.1 mrg * to be "1" and "iterator <= value" respectively.
783 1.1 mrg */
784 1.1 mrg static __isl_give isl_ast_graft *refine_degenerate(
785 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
786 1.1 mrg __isl_keep isl_ast_build *sub_build)
787 1.1 mrg {
788 1.1 mrg isl_pw_aff *value;
789 1.1 mrg isl_ast_expr *init;
790 1.1 mrg
791 1.1 mrg if (!graft || !sub_build)
792 1.1 mrg return isl_ast_graft_free(graft);
793 1.1 mrg
794 1.1 mrg value = isl_pw_aff_copy(sub_build->value);
795 1.1 mrg
796 1.1 mrg init = isl_ast_build_expr_from_pw_aff_internal(build, value);
797 1.1 mrg graft->node = isl_ast_node_for_set_init(graft->node, init);
798 1.1 mrg if (!graft->node)
799 1.1 mrg return isl_ast_graft_free(graft);
800 1.1 mrg
801 1.1 mrg return graft;
802 1.1 mrg }
803 1.1 mrg
804 1.1 mrg /* Return the intersection of constraints in "list" as a set.
805 1.1 mrg */
806 1.1 mrg static __isl_give isl_set *intersect_constraints(
807 1.1 mrg __isl_keep isl_constraint_list *list)
808 1.1 mrg {
809 1.1 mrg int i;
810 1.1 mrg isl_size n;
811 1.1 mrg isl_basic_set *bset;
812 1.1 mrg
813 1.1 mrg n = isl_constraint_list_n_constraint(list);
814 1.1 mrg if (n < 0)
815 1.1 mrg return NULL;
816 1.1 mrg if (n < 1)
817 1.1 mrg isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
818 1.1 mrg "expecting at least one constraint", return NULL);
819 1.1 mrg
820 1.1 mrg bset = isl_basic_set_from_constraint(
821 1.1 mrg isl_constraint_list_get_constraint(list, 0));
822 1.1 mrg for (i = 1; i < n; ++i) {
823 1.1 mrg isl_basic_set *bset_i;
824 1.1 mrg
825 1.1 mrg bset_i = isl_basic_set_from_constraint(
826 1.1 mrg isl_constraint_list_get_constraint(list, i));
827 1.1 mrg bset = isl_basic_set_intersect(bset, bset_i);
828 1.1 mrg }
829 1.1 mrg
830 1.1 mrg return isl_set_from_basic_set(bset);
831 1.1 mrg }
832 1.1 mrg
833 1.1 mrg /* Compute the constraints on the outer dimensions enforced by
834 1.1 mrg * graft->node and add those constraints to graft->enforced,
835 1.1 mrg * in case the upper bound is expressed as a set "upper".
836 1.1 mrg *
837 1.1 mrg * In particular, if l(...) is a lower bound in "lower", and
838 1.1 mrg *
839 1.1 mrg * -a i + f(...) >= 0 or a i <= f(...)
840 1.1 mrg *
841 1.1 mrg * is an upper bound ocnstraint on the current dimension i,
842 1.1 mrg * then the for loop enforces the constraint
843 1.1 mrg *
844 1.1 mrg * -a l(...) + f(...) >= 0 or a l(...) <= f(...)
845 1.1 mrg *
846 1.1 mrg * We therefore simply take each lower bound in turn, plug it into
847 1.1 mrg * the upper bounds and compute the intersection over all lower bounds.
848 1.1 mrg *
849 1.1 mrg * If a lower bound is a rational expression, then
850 1.1 mrg * isl_basic_set_preimage_multi_aff will force this rational
851 1.1 mrg * expression to have only integer values. However, the loop
852 1.1 mrg * itself does not enforce this integrality constraint. We therefore
853 1.1 mrg * use the ceil of the lower bounds instead of the lower bounds themselves.
854 1.1 mrg * Other constraints will make sure that the for loop is only executed
855 1.1 mrg * when each of the lower bounds attains an integral value.
856 1.1 mrg * In particular, potentially rational values only occur in
857 1.1 mrg * lower_bound if the offset is a (seemingly) rational expression,
858 1.1 mrg * but then outer conditions will make sure that this rational expression
859 1.1 mrg * only attains integer values.
860 1.1 mrg */
861 1.1 mrg static __isl_give isl_ast_graft *set_enforced_from_set(
862 1.1 mrg __isl_take isl_ast_graft *graft,
863 1.1 mrg __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
864 1.1 mrg {
865 1.1 mrg isl_space *space;
866 1.1 mrg isl_basic_set *enforced;
867 1.1 mrg isl_pw_multi_aff *pma;
868 1.1 mrg int i;
869 1.1 mrg isl_size n;
870 1.1 mrg
871 1.1 mrg n = isl_pw_aff_list_n_pw_aff(lower);
872 1.1 mrg if (!graft || n < 0)
873 1.1 mrg return isl_ast_graft_free(graft);
874 1.1 mrg
875 1.1 mrg space = isl_set_get_space(upper);
876 1.1 mrg enforced = isl_basic_set_universe(isl_space_copy(space));
877 1.1 mrg
878 1.1 mrg space = isl_space_map_from_set(space);
879 1.1 mrg pma = isl_pw_multi_aff_identity(space);
880 1.1 mrg
881 1.1 mrg for (i = 0; i < n; ++i) {
882 1.1 mrg isl_pw_aff *pa;
883 1.1 mrg isl_set *enforced_i;
884 1.1 mrg isl_basic_set *hull;
885 1.1 mrg isl_pw_multi_aff *pma_i;
886 1.1 mrg
887 1.1 mrg pa = isl_pw_aff_list_get_pw_aff(lower, i);
888 1.1 mrg pa = isl_pw_aff_ceil(pa);
889 1.1 mrg pma_i = isl_pw_multi_aff_copy(pma);
890 1.1 mrg pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
891 1.1 mrg enforced_i = isl_set_copy(upper);
892 1.1 mrg enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
893 1.1 mrg hull = isl_set_simple_hull(enforced_i);
894 1.1 mrg enforced = isl_basic_set_intersect(enforced, hull);
895 1.1 mrg }
896 1.1 mrg
897 1.1 mrg isl_pw_multi_aff_free(pma);
898 1.1 mrg
899 1.1 mrg graft = isl_ast_graft_enforce(graft, enforced);
900 1.1 mrg
901 1.1 mrg return graft;
902 1.1 mrg }
903 1.1 mrg
904 1.1 mrg /* Compute the constraints on the outer dimensions enforced by
905 1.1 mrg * graft->node and add those constraints to graft->enforced,
906 1.1 mrg * in case the upper bound is expressed as
907 1.1 mrg * a list of affine expressions "upper".
908 1.1 mrg *
909 1.1 mrg * The enforced condition is that each lower bound expression is less
910 1.1 mrg * than or equal to each upper bound expression.
911 1.1 mrg */
912 1.1 mrg static __isl_give isl_ast_graft *set_enforced_from_list(
913 1.1 mrg __isl_take isl_ast_graft *graft,
914 1.1 mrg __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
915 1.1 mrg {
916 1.1 mrg isl_set *cond;
917 1.1 mrg isl_basic_set *enforced;
918 1.1 mrg
919 1.1 mrg lower = isl_pw_aff_list_copy(lower);
920 1.1 mrg upper = isl_pw_aff_list_copy(upper);
921 1.1 mrg cond = isl_pw_aff_list_le_set(lower, upper);
922 1.1 mrg enforced = isl_set_simple_hull(cond);
923 1.1 mrg graft = isl_ast_graft_enforce(graft, enforced);
924 1.1 mrg
925 1.1 mrg return graft;
926 1.1 mrg }
927 1.1 mrg
928 1.1 mrg /* Does "aff" have a negative constant term?
929 1.1 mrg */
930 1.1 mrg static isl_bool aff_constant_is_negative(__isl_keep isl_set *set,
931 1.1 mrg __isl_keep isl_aff *aff, void *user)
932 1.1 mrg {
933 1.1 mrg isl_bool is_neg;
934 1.1 mrg isl_val *v;
935 1.1 mrg
936 1.1 mrg v = isl_aff_get_constant_val(aff);
937 1.1 mrg is_neg = isl_val_is_neg(v);
938 1.1 mrg isl_val_free(v);
939 1.1 mrg
940 1.1 mrg return is_neg;
941 1.1 mrg }
942 1.1 mrg
943 1.1 mrg /* Does "pa" have a negative constant term over its entire domain?
944 1.1 mrg */
945 1.1 mrg static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa,
946 1.1 mrg void *user)
947 1.1 mrg {
948 1.1 mrg return isl_pw_aff_every_piece(pa, &aff_constant_is_negative, NULL);
949 1.1 mrg }
950 1.1 mrg
951 1.1 mrg /* Does each element in "list" have a negative constant term?
952 1.1 mrg */
953 1.1 mrg static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
954 1.1 mrg {
955 1.1 mrg return isl_pw_aff_list_every(list, &pw_aff_constant_is_negative, NULL);
956 1.1 mrg }
957 1.1 mrg
958 1.1 mrg /* Add 1 to each of the elements in "list", where each of these elements
959 1.1 mrg * is defined over the internal schedule space of "build".
960 1.1 mrg */
961 1.1 mrg static __isl_give isl_pw_aff_list *list_add_one(
962 1.1 mrg __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
963 1.1 mrg {
964 1.1 mrg int i;
965 1.1 mrg isl_size n;
966 1.1 mrg isl_space *space;
967 1.1 mrg isl_aff *aff;
968 1.1 mrg isl_pw_aff *one;
969 1.1 mrg
970 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list);
971 1.1 mrg if (n < 0)
972 1.1 mrg return isl_pw_aff_list_free(list);
973 1.1 mrg
974 1.1 mrg space = isl_ast_build_get_space(build, 1);
975 1.1 mrg aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
976 1.1 mrg aff = isl_aff_add_constant_si(aff, 1);
977 1.1 mrg one = isl_pw_aff_from_aff(aff);
978 1.1 mrg
979 1.1 mrg for (i = 0; i < n; ++i) {
980 1.1 mrg isl_pw_aff *pa;
981 1.1 mrg pa = isl_pw_aff_list_get_pw_aff(list, i);
982 1.1 mrg pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
983 1.1 mrg list = isl_pw_aff_list_set_pw_aff(list, i, pa);
984 1.1 mrg }
985 1.1 mrg
986 1.1 mrg isl_pw_aff_free(one);
987 1.1 mrg
988 1.1 mrg return list;
989 1.1 mrg }
990 1.1 mrg
991 1.1 mrg /* Set the condition part of the for node graft->node in case
992 1.1 mrg * the upper bound is represented as a list of piecewise affine expressions.
993 1.1 mrg *
994 1.1 mrg * In particular, set the condition to
995 1.1 mrg *
996 1.1 mrg * iterator <= min(list of upper bounds)
997 1.1 mrg *
998 1.1 mrg * If each of the upper bounds has a negative constant term, then
999 1.1 mrg * set the condition to
1000 1.1 mrg *
1001 1.1 mrg * iterator < min(list of (upper bound + 1)s)
1002 1.1 mrg *
1003 1.1 mrg */
1004 1.1 mrg static __isl_give isl_ast_graft *set_for_cond_from_list(
1005 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
1006 1.1 mrg __isl_keep isl_ast_build *build)
1007 1.1 mrg {
1008 1.1 mrg int neg;
1009 1.1 mrg isl_ast_expr *bound, *iterator, *cond;
1010 1.1 mrg enum isl_ast_expr_op_type type = isl_ast_expr_op_le;
1011 1.1 mrg
1012 1.1 mrg if (!graft || !list)
1013 1.1 mrg return isl_ast_graft_free(graft);
1014 1.1 mrg
1015 1.1 mrg neg = list_constant_is_negative(list);
1016 1.1 mrg if (neg < 0)
1017 1.1 mrg return isl_ast_graft_free(graft);
1018 1.1 mrg list = isl_pw_aff_list_copy(list);
1019 1.1 mrg if (neg) {
1020 1.1 mrg list = list_add_one(list, build);
1021 1.1 mrg type = isl_ast_expr_op_lt;
1022 1.1 mrg }
1023 1.1 mrg
1024 1.1 mrg bound = reduce_list(isl_ast_expr_op_min, list, build);
1025 1.1 mrg iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
1026 1.1 mrg cond = isl_ast_expr_alloc_binary(type, iterator, bound);
1027 1.1 mrg graft->node = isl_ast_node_for_set_cond(graft->node, cond);
1028 1.1 mrg
1029 1.1 mrg isl_pw_aff_list_free(list);
1030 1.1 mrg if (!graft->node)
1031 1.1 mrg return isl_ast_graft_free(graft);
1032 1.1 mrg return graft;
1033 1.1 mrg }
1034 1.1 mrg
1035 1.1 mrg /* Set the condition part of the for node graft->node in case
1036 1.1 mrg * the upper bound is represented as a set.
1037 1.1 mrg */
1038 1.1 mrg static __isl_give isl_ast_graft *set_for_cond_from_set(
1039 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
1040 1.1 mrg __isl_keep isl_ast_build *build)
1041 1.1 mrg {
1042 1.1 mrg isl_ast_expr *cond;
1043 1.1 mrg
1044 1.1 mrg if (!graft)
1045 1.1 mrg return NULL;
1046 1.1 mrg
1047 1.1 mrg cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
1048 1.1 mrg graft->node = isl_ast_node_for_set_cond(graft->node, cond);
1049 1.1 mrg if (!graft->node)
1050 1.1 mrg return isl_ast_graft_free(graft);
1051 1.1 mrg return graft;
1052 1.1 mrg }
1053 1.1 mrg
1054 1.1 mrg /* Construct an isl_ast_expr for the increment (i.e., stride) of
1055 1.1 mrg * the current dimension.
1056 1.1 mrg */
1057 1.1 mrg static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
1058 1.1 mrg {
1059 1.1 mrg isl_size depth;
1060 1.1 mrg isl_val *v;
1061 1.1 mrg isl_ctx *ctx;
1062 1.1 mrg
1063 1.1 mrg depth = isl_ast_build_get_depth(build);
1064 1.1 mrg if (depth < 0)
1065 1.1 mrg return NULL;
1066 1.1 mrg ctx = isl_ast_build_get_ctx(build);
1067 1.1 mrg
1068 1.1 mrg if (!isl_ast_build_has_stride(build, depth))
1069 1.1 mrg return isl_ast_expr_alloc_int_si(ctx, 1);
1070 1.1 mrg
1071 1.1 mrg v = isl_ast_build_get_stride(build, depth);
1072 1.1 mrg return isl_ast_expr_from_val(v);
1073 1.1 mrg }
1074 1.1 mrg
1075 1.1 mrg /* Should we express the loop condition as
1076 1.1 mrg *
1077 1.1 mrg * iterator <= min(list of upper bounds)
1078 1.1 mrg *
1079 1.1 mrg * or as a conjunction of constraints?
1080 1.1 mrg *
1081 1.1 mrg * The first is constructed from a list of upper bounds.
1082 1.1 mrg * The second is constructed from a set.
1083 1.1 mrg *
1084 1.1 mrg * If there are no upper bounds in "constraints", then this could mean
1085 1.1 mrg * that "domain" simply doesn't have an upper bound or that we didn't
1086 1.1 mrg * pick any upper bound. In the first case, we want to generate the
1087 1.1 mrg * loop condition as a(n empty) conjunction of constraints
1088 1.1 mrg * In the second case, we will compute
1089 1.1 mrg * a single upper bound from "domain" and so we use the list form.
1090 1.1 mrg *
1091 1.1 mrg * If there are upper bounds in "constraints",
1092 1.1 mrg * then we use the list form iff the atomic_upper_bound option is set.
1093 1.1 mrg */
1094 1.1 mrg static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
1095 1.1 mrg __isl_keep isl_set *domain, int depth)
1096 1.1 mrg {
1097 1.1 mrg if (n_upper > 0)
1098 1.1 mrg return isl_options_get_ast_build_atomic_upper_bound(ctx);
1099 1.1 mrg else
1100 1.1 mrg return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
1101 1.1 mrg }
1102 1.1 mrg
1103 1.1 mrg /* Fill in the expressions of the for node in graft->node.
1104 1.1 mrg *
1105 1.1 mrg * In particular,
1106 1.1 mrg * - set the initialization part of the loop to the maximum of the lower bounds
1107 1.1 mrg * - extract the increment from the stride of the current dimension
1108 1.1 mrg * - construct the for condition either based on a list of upper bounds
1109 1.1 mrg * or on a set of upper bound constraints.
1110 1.1 mrg */
1111 1.1 mrg static __isl_give isl_ast_graft *set_for_node_expressions(
1112 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
1113 1.1 mrg int use_list, __isl_keep isl_pw_aff_list *upper_list,
1114 1.1 mrg __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
1115 1.1 mrg {
1116 1.1 mrg isl_ast_expr *init;
1117 1.1 mrg
1118 1.1 mrg if (!graft)
1119 1.1 mrg return NULL;
1120 1.1 mrg
1121 1.1 mrg init = reduce_list(isl_ast_expr_op_max, lower, build);
1122 1.1 mrg graft->node = isl_ast_node_for_set_init(graft->node, init);
1123 1.1 mrg graft->node = isl_ast_node_for_set_inc(graft->node, for_inc(build));
1124 1.1 mrg
1125 1.1 mrg if (!graft->node)
1126 1.1 mrg graft = isl_ast_graft_free(graft);
1127 1.1 mrg
1128 1.1 mrg if (use_list)
1129 1.1 mrg graft = set_for_cond_from_list(graft, upper_list, build);
1130 1.1 mrg else
1131 1.1 mrg graft = set_for_cond_from_set(graft, upper_set, build);
1132 1.1 mrg
1133 1.1 mrg return graft;
1134 1.1 mrg }
1135 1.1 mrg
1136 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic,
1137 1.1 mrg * non-degenerate, case.
1138 1.1 mrg *
1139 1.1 mrg * "c_lower" and "c_upper" contain the lower and upper bounds
1140 1.1 mrg * that the loop node should express.
1141 1.1 mrg * "domain" is the subset of the intersection of the constraints
1142 1.1 mrg * for which some code is executed.
1143 1.1 mrg *
1144 1.1 mrg * There may be zero lower bounds or zero upper bounds in "constraints"
1145 1.1 mrg * in case the list of constraints was created
1146 1.1 mrg * based on the atomic option or based on separation with explicit bounds.
1147 1.1 mrg * In that case, we use "domain" to derive lower and/or upper bounds.
1148 1.1 mrg *
1149 1.1 mrg * We first compute a list of one or more lower bounds.
1150 1.1 mrg *
1151 1.1 mrg * Then we decide if we want to express the condition as
1152 1.1 mrg *
1153 1.1 mrg * iterator <= min(list of upper bounds)
1154 1.1 mrg *
1155 1.1 mrg * or as a conjunction of constraints.
1156 1.1 mrg *
1157 1.1 mrg * The set of enforced constraints is then computed either based on
1158 1.1 mrg * a list of upper bounds or on a set of upper bound constraints.
1159 1.1 mrg * We do not compute any enforced constraints if we were forced
1160 1.1 mrg * to compute a lower or upper bound using exact_bound. The domains
1161 1.1 mrg * of the resulting expressions may imply some bounds on outer dimensions
1162 1.1 mrg * that we do not want to appear in the enforced constraints since
1163 1.1 mrg * they are not actually enforced by the corresponding code.
1164 1.1 mrg *
1165 1.1 mrg * Finally, we fill in the expressions of the for node.
1166 1.1 mrg */
1167 1.1 mrg static __isl_give isl_ast_graft *refine_generic_bounds(
1168 1.1 mrg __isl_take isl_ast_graft *graft,
1169 1.1 mrg __isl_take isl_constraint_list *c_lower,
1170 1.1 mrg __isl_take isl_constraint_list *c_upper,
1171 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1172 1.1 mrg {
1173 1.1 mrg isl_size depth;
1174 1.1 mrg isl_ctx *ctx;
1175 1.1 mrg isl_pw_aff_list *lower;
1176 1.1 mrg int use_list;
1177 1.1 mrg isl_set *upper_set = NULL;
1178 1.1 mrg isl_pw_aff_list *upper_list = NULL;
1179 1.1 mrg isl_size n_lower, n_upper;
1180 1.1 mrg
1181 1.1 mrg depth = isl_ast_build_get_depth(build);
1182 1.1 mrg if (!graft || !c_lower || !c_upper || depth < 0)
1183 1.1 mrg goto error;
1184 1.1 mrg
1185 1.1 mrg ctx = isl_ast_graft_get_ctx(graft);
1186 1.1 mrg
1187 1.1 mrg n_lower = isl_constraint_list_n_constraint(c_lower);
1188 1.1 mrg n_upper = isl_constraint_list_n_constraint(c_upper);
1189 1.1 mrg if (n_lower < 0 || n_upper < 0)
1190 1.1 mrg goto error;
1191 1.1 mrg
1192 1.1 mrg use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
1193 1.1 mrg
1194 1.1 mrg lower = lower_bounds(c_lower, depth, domain, build);
1195 1.1 mrg
1196 1.1 mrg if (use_list)
1197 1.1 mrg upper_list = upper_bounds(c_upper, depth, domain, build);
1198 1.1 mrg else if (n_upper > 0)
1199 1.1 mrg upper_set = intersect_constraints(c_upper);
1200 1.1 mrg else
1201 1.1 mrg upper_set = isl_set_universe(isl_set_get_space(domain));
1202 1.1 mrg
1203 1.1 mrg if (n_lower == 0 || n_upper == 0)
1204 1.1 mrg ;
1205 1.1 mrg else if (use_list)
1206 1.1 mrg graft = set_enforced_from_list(graft, lower, upper_list);
1207 1.1 mrg else
1208 1.1 mrg graft = set_enforced_from_set(graft, lower, depth, upper_set);
1209 1.1 mrg
1210 1.1 mrg graft = set_for_node_expressions(graft, lower, use_list, upper_list,
1211 1.1 mrg upper_set, build);
1212 1.1 mrg
1213 1.1 mrg isl_pw_aff_list_free(lower);
1214 1.1 mrg isl_pw_aff_list_free(upper_list);
1215 1.1 mrg isl_set_free(upper_set);
1216 1.1 mrg isl_constraint_list_free(c_lower);
1217 1.1 mrg isl_constraint_list_free(c_upper);
1218 1.1 mrg
1219 1.1 mrg return graft;
1220 1.1 mrg error:
1221 1.1 mrg isl_constraint_list_free(c_lower);
1222 1.1 mrg isl_constraint_list_free(c_upper);
1223 1.1 mrg return isl_ast_graft_free(graft);
1224 1.1 mrg }
1225 1.1 mrg
1226 1.1 mrg /* Internal data structure used inside count_constraints to keep
1227 1.1 mrg * track of the number of constraints that are independent of dimension "pos",
1228 1.1 mrg * the lower bounds in "pos" and the upper bounds in "pos".
1229 1.1 mrg */
1230 1.1 mrg struct isl_ast_count_constraints_data {
1231 1.1 mrg int pos;
1232 1.1 mrg
1233 1.1 mrg int n_indep;
1234 1.1 mrg int n_lower;
1235 1.1 mrg int n_upper;
1236 1.1 mrg };
1237 1.1 mrg
1238 1.1 mrg /* Increment data->n_indep, data->lower or data->upper depending
1239 1.1 mrg * on whether "c" is independent of dimensions data->pos,
1240 1.1 mrg * a lower bound or an upper bound.
1241 1.1 mrg */
1242 1.1 mrg static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
1243 1.1 mrg {
1244 1.1 mrg struct isl_ast_count_constraints_data *data = user;
1245 1.1 mrg
1246 1.1 mrg if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
1247 1.1 mrg data->n_lower++;
1248 1.1 mrg else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos))
1249 1.1 mrg data->n_upper++;
1250 1.1 mrg else
1251 1.1 mrg data->n_indep++;
1252 1.1 mrg
1253 1.1 mrg isl_constraint_free(c);
1254 1.1 mrg
1255 1.1 mrg return isl_stat_ok;
1256 1.1 mrg }
1257 1.1 mrg
1258 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic,
1259 1.1 mrg * non-degenerate, case.
1260 1.1 mrg *
1261 1.1 mrg * "list" respresent the list of bounds that need to be encoded by
1262 1.1 mrg * the for loop. Only the constraints that involve the iterator
1263 1.1 mrg * are relevant here. The other constraints are taken care of by
1264 1.1 mrg * the caller and are included in the generated constraints of "build".
1265 1.1 mrg * "domain" is the subset of the intersection of the constraints
1266 1.1 mrg * for which some code is executed.
1267 1.1 mrg * "build" is the build in which graft->node was created.
1268 1.1 mrg *
1269 1.1 mrg * We separate lower bounds, upper bounds and constraints that
1270 1.1 mrg * are independent of the loop iterator.
1271 1.1 mrg *
1272 1.1 mrg * The actual for loop bounds are generated in refine_generic_bounds.
1273 1.1 mrg */
1274 1.1 mrg static __isl_give isl_ast_graft *refine_generic_split(
1275 1.1 mrg __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
1276 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1277 1.1 mrg {
1278 1.1 mrg struct isl_ast_count_constraints_data data;
1279 1.1 mrg isl_size depth;
1280 1.1 mrg isl_constraint_list *lower;
1281 1.1 mrg isl_constraint_list *upper;
1282 1.1 mrg
1283 1.1 mrg depth = isl_ast_build_get_depth(build);
1284 1.1 mrg if (depth < 0)
1285 1.1 mrg list = isl_constraint_list_free(list);
1286 1.1 mrg if (!list)
1287 1.1 mrg return isl_ast_graft_free(graft);
1288 1.1 mrg
1289 1.1 mrg data.pos = depth;
1290 1.1 mrg
1291 1.1 mrg list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
1292 1.1 mrg if (!list)
1293 1.1 mrg return isl_ast_graft_free(graft);
1294 1.1 mrg
1295 1.1 mrg data.n_indep = data.n_lower = data.n_upper = 0;
1296 1.1 mrg if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) {
1297 1.1 mrg isl_constraint_list_free(list);
1298 1.1 mrg return isl_ast_graft_free(graft);
1299 1.1 mrg }
1300 1.1 mrg
1301 1.1 mrg lower = isl_constraint_list_drop(list, 0, data.n_indep);
1302 1.1 mrg upper = isl_constraint_list_copy(lower);
1303 1.1 mrg lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
1304 1.1 mrg upper = isl_constraint_list_drop(upper, 0, data.n_lower);
1305 1.1 mrg
1306 1.1 mrg return refine_generic_bounds(graft, lower, upper, domain, build);
1307 1.1 mrg }
1308 1.1 mrg
1309 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic,
1310 1.1 mrg * non-degenerate, case.
1311 1.1 mrg *
1312 1.1 mrg * "bounds" respresent the bounds that need to be encoded by
1313 1.1 mrg * the for loop (or a guard around the for loop).
1314 1.1 mrg * "domain" is the subset of "bounds" for which some code is executed.
1315 1.1 mrg * "build" is the build in which graft->node was created.
1316 1.1 mrg *
1317 1.1 mrg * We break up "bounds" into a list of constraints and continue with
1318 1.1 mrg * refine_generic_split.
1319 1.1 mrg */
1320 1.1 mrg static __isl_give isl_ast_graft *refine_generic(
1321 1.1 mrg __isl_take isl_ast_graft *graft,
1322 1.1 mrg __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
1323 1.1 mrg __isl_keep isl_ast_build *build)
1324 1.1 mrg {
1325 1.1 mrg isl_constraint_list *list;
1326 1.1 mrg
1327 1.1 mrg if (!build || !graft)
1328 1.1 mrg return isl_ast_graft_free(graft);
1329 1.1 mrg
1330 1.1 mrg list = isl_basic_set_get_constraint_list(bounds);
1331 1.1 mrg
1332 1.1 mrg graft = refine_generic_split(graft, list, domain, build);
1333 1.1 mrg
1334 1.1 mrg return graft;
1335 1.1 mrg }
1336 1.1 mrg
1337 1.1 mrg /* Create a for node for the current level.
1338 1.1 mrg *
1339 1.1 mrg * Mark the for node degenerate if "degenerate" is set.
1340 1.1 mrg */
1341 1.1 mrg static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
1342 1.1 mrg int degenerate)
1343 1.1 mrg {
1344 1.1 mrg isl_size depth;
1345 1.1 mrg isl_id *id;
1346 1.1 mrg isl_ast_node *node;
1347 1.1 mrg
1348 1.1 mrg depth = isl_ast_build_get_depth(build);
1349 1.1 mrg if (depth < 0)
1350 1.1 mrg return NULL;
1351 1.1 mrg
1352 1.1 mrg id = isl_ast_build_get_iterator_id(build, depth);
1353 1.1 mrg node = isl_ast_node_alloc_for(id);
1354 1.1 mrg if (degenerate)
1355 1.1 mrg node = isl_ast_node_for_mark_degenerate(node);
1356 1.1 mrg
1357 1.1 mrg return node;
1358 1.1 mrg }
1359 1.1 mrg
1360 1.1 mrg /* If the ast_build_exploit_nested_bounds option is set, then return
1361 1.1 mrg * the constraints enforced by all elements in "list".
1362 1.1 mrg * Otherwise, return the universe.
1363 1.1 mrg */
1364 1.1 mrg static __isl_give isl_basic_set *extract_shared_enforced(
1365 1.1 mrg __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
1366 1.1 mrg {
1367 1.1 mrg isl_ctx *ctx;
1368 1.1 mrg isl_space *space;
1369 1.1 mrg
1370 1.1 mrg if (!list)
1371 1.1 mrg return NULL;
1372 1.1 mrg
1373 1.1 mrg ctx = isl_ast_graft_list_get_ctx(list);
1374 1.1 mrg if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
1375 1.1 mrg return isl_ast_graft_list_extract_shared_enforced(list, build);
1376 1.1 mrg
1377 1.1 mrg space = isl_ast_build_get_space(build, 1);
1378 1.1 mrg return isl_basic_set_universe(space);
1379 1.1 mrg }
1380 1.1 mrg
1381 1.1 mrg /* Return the pending constraints of "build" that are not already taken
1382 1.1 mrg * care of (by a combination of "enforced" and the generated constraints
1383 1.1 mrg * of "build").
1384 1.1 mrg */
1385 1.1 mrg static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
1386 1.1 mrg __isl_keep isl_basic_set *enforced)
1387 1.1 mrg {
1388 1.1 mrg isl_set *guard, *context;
1389 1.1 mrg
1390 1.1 mrg guard = isl_ast_build_get_pending(build);
1391 1.1 mrg context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
1392 1.1 mrg context = isl_set_intersect(context,
1393 1.1 mrg isl_ast_build_get_generated(build));
1394 1.1 mrg return isl_set_gist(guard, context);
1395 1.1 mrg }
1396 1.1 mrg
1397 1.1 mrg /* Create an AST node for the current dimension based on
1398 1.1 mrg * the schedule domain "bounds" and return the node encapsulated
1399 1.1 mrg * in an isl_ast_graft.
1400 1.1 mrg *
1401 1.1 mrg * "executed" is the current inverse schedule, taking into account
1402 1.1 mrg * the bounds in "bounds"
1403 1.1 mrg * "domain" is the domain of "executed", with inner dimensions projected out.
1404 1.1 mrg * It may be a strict subset of "bounds" in case "bounds" was created
1405 1.1 mrg * based on the atomic option or based on separation with explicit bounds.
1406 1.1 mrg *
1407 1.1 mrg * "domain" may satisfy additional equalities that result
1408 1.1 mrg * from intersecting "executed" with "bounds" in add_node.
1409 1.1 mrg * It may also satisfy some global constraints that were dropped out because
1410 1.1 mrg * we performed separation with explicit bounds.
1411 1.1 mrg * The very first step is then to copy these constraints to "bounds".
1412 1.1 mrg *
1413 1.1 mrg * Since we may be calling before_each_for and after_each_for
1414 1.1 mrg * callbacks, we record the current inverse schedule in the build.
1415 1.1 mrg *
1416 1.1 mrg * We consider three builds,
1417 1.1 mrg * "build" is the one in which the current level is created,
1418 1.1 mrg * "body_build" is the build in which the next level is created,
1419 1.1 mrg * "sub_build" is essentially the same as "body_build", except that
1420 1.1 mrg * the depth has not been increased yet.
1421 1.1 mrg *
1422 1.1 mrg * "build" already contains information (in strides and offsets)
1423 1.1 mrg * about the strides at the current level, but this information is not
1424 1.1 mrg * reflected in the build->domain.
1425 1.1 mrg * We first add this information and the "bounds" to the sub_build->domain.
1426 1.1 mrg * isl_ast_build_set_loop_bounds adds the stride information and
1427 1.1 mrg * checks whether the current dimension attains
1428 1.1 mrg * only a single value and whether this single value can be represented using
1429 1.1 mrg * a single affine expression.
1430 1.1 mrg * In the first case, the current level is considered "degenerate".
1431 1.1 mrg * In the second, sub-case, the current level is considered "eliminated".
1432 1.1 mrg * Eliminated levels don't need to be reflected in the AST since we can
1433 1.1 mrg * simply plug in the affine expression. For degenerate, but non-eliminated,
1434 1.1 mrg * levels, we do introduce a for node, but mark is as degenerate so that
1435 1.1 mrg * it can be printed as an assignment of the single value to the loop
1436 1.1 mrg * "iterator".
1437 1.1 mrg *
1438 1.1 mrg * If the current level is eliminated, we explicitly plug in the value
1439 1.1 mrg * for the current level found by isl_ast_build_set_loop_bounds in the
1440 1.1 mrg * inverse schedule. This ensures that if we are working on a slice
1441 1.1 mrg * of the domain based on information available in the inverse schedule
1442 1.1 mrg * and the build domain, that then this information is also reflected
1443 1.1 mrg * in the inverse schedule. This operation also eliminates the current
1444 1.1 mrg * dimension from the inverse schedule making sure no inner dimensions depend
1445 1.1 mrg * on the current dimension. Otherwise, we create a for node, marking
1446 1.1 mrg * it degenerate if appropriate. The initial for node is still incomplete
1447 1.1 mrg * and will be completed in either refine_degenerate or refine_generic.
1448 1.1 mrg *
1449 1.1 mrg * We then generate a sequence of grafts for the next level,
1450 1.1 mrg * create a surrounding graft for the current level and insert
1451 1.1 mrg * the for node we created (if the current level is not eliminated).
1452 1.1 mrg * Before creating a graft for the current level, we first extract
1453 1.1 mrg * hoistable constraints from the child guards and combine them
1454 1.1 mrg * with the pending constraints in the build. These constraints
1455 1.1 mrg * are used to simplify the child guards and then added to the guard
1456 1.1 mrg * of the current graft to ensure that they will be generated.
1457 1.1 mrg * If the hoisted guard is a disjunction, then we use it directly
1458 1.1 mrg * to gist the guards on the children before intersect it with the
1459 1.1 mrg * pending constraints. We do so because this disjunction is typically
1460 1.1 mrg * identical to the guards on the children such that these guards
1461 1.1 mrg * can be effectively removed completely. After the intersection,
1462 1.1 mrg * the gist operation would have a harder time figuring this out.
1463 1.1 mrg *
1464 1.1 mrg * Finally, we set the bounds of the for loop in either
1465 1.1 mrg * refine_degenerate or refine_generic.
1466 1.1 mrg * We do so in a context where the pending constraints of the build
1467 1.1 mrg * have been replaced by the guard of the current graft.
1468 1.1 mrg */
1469 1.1 mrg static __isl_give isl_ast_graft *create_node_scaled(
1470 1.1 mrg __isl_take isl_union_map *executed,
1471 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1472 1.1 mrg __isl_take isl_ast_build *build)
1473 1.1 mrg {
1474 1.1 mrg isl_size depth;
1475 1.1 mrg int degenerate;
1476 1.1 mrg isl_bool eliminated;
1477 1.1 mrg isl_size n;
1478 1.1 mrg isl_basic_set *hull;
1479 1.1 mrg isl_basic_set *enforced;
1480 1.1 mrg isl_set *guard, *hoisted;
1481 1.1 mrg isl_ast_node *node = NULL;
1482 1.1 mrg isl_ast_graft *graft;
1483 1.1 mrg isl_ast_graft_list *children;
1484 1.1 mrg isl_ast_build *sub_build;
1485 1.1 mrg isl_ast_build *body_build;
1486 1.1 mrg
1487 1.1 mrg domain = isl_ast_build_eliminate_divs(build, domain);
1488 1.1 mrg domain = isl_set_detect_equalities(domain);
1489 1.1 mrg hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
1490 1.1 mrg bounds = isl_basic_set_intersect(bounds, hull);
1491 1.1 mrg build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
1492 1.1 mrg
1493 1.1 mrg depth = isl_ast_build_get_depth(build);
1494 1.1 mrg if (depth < 0)
1495 1.1 mrg build = isl_ast_build_free(build);
1496 1.1 mrg sub_build = isl_ast_build_copy(build);
1497 1.1 mrg bounds = isl_basic_set_remove_redundancies(bounds);
1498 1.1 mrg bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
1499 1.1 mrg sub_build = isl_ast_build_set_loop_bounds(sub_build,
1500 1.1 mrg isl_basic_set_copy(bounds));
1501 1.1 mrg degenerate = isl_ast_build_has_value(sub_build);
1502 1.1 mrg eliminated = isl_ast_build_has_affine_value(sub_build, depth);
1503 1.1 mrg if (degenerate < 0 || eliminated < 0)
1504 1.1 mrg executed = isl_union_map_free(executed);
1505 1.1 mrg if (!degenerate)
1506 1.1 mrg bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
1507 1.1 mrg sub_build = isl_ast_build_set_pending_generated(sub_build,
1508 1.1 mrg isl_basic_set_copy(bounds));
1509 1.1 mrg if (eliminated)
1510 1.1 mrg executed = plug_in_values(executed, sub_build);
1511 1.1 mrg else
1512 1.1 mrg node = create_for(build, degenerate);
1513 1.1 mrg
1514 1.1 mrg body_build = isl_ast_build_copy(sub_build);
1515 1.1 mrg body_build = isl_ast_build_increase_depth(body_build);
1516 1.1 mrg if (!eliminated)
1517 1.1 mrg node = before_each_for(node, body_build);
1518 1.1 mrg children = generate_next_level(executed,
1519 1.1 mrg isl_ast_build_copy(body_build));
1520 1.1 mrg
1521 1.1 mrg enforced = extract_shared_enforced(children, build);
1522 1.1 mrg guard = extract_pending(sub_build, enforced);
1523 1.1 mrg hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
1524 1.1 mrg n = isl_set_n_basic_set(hoisted);
1525 1.1 mrg if (n < 0)
1526 1.1 mrg children = isl_ast_graft_list_free(children);
1527 1.1 mrg if (n > 1)
1528 1.1 mrg children = isl_ast_graft_list_gist_guards(children,
1529 1.1 mrg isl_set_copy(hoisted));
1530 1.1 mrg guard = isl_set_intersect(guard, hoisted);
1531 1.1 mrg if (!eliminated)
1532 1.1 mrg guard = add_implied_guards(guard, degenerate, bounds, build);
1533 1.1 mrg
1534 1.1 mrg graft = isl_ast_graft_alloc_from_children(children,
1535 1.1 mrg isl_set_copy(guard), enforced, build, sub_build);
1536 1.1 mrg
1537 1.1 mrg if (!eliminated) {
1538 1.1 mrg isl_ast_build *for_build;
1539 1.1 mrg
1540 1.1 mrg graft = isl_ast_graft_insert_for(graft, node);
1541 1.1 mrg for_build = isl_ast_build_copy(build);
1542 1.1 mrg for_build = isl_ast_build_replace_pending_by_guard(for_build,
1543 1.1 mrg isl_set_copy(guard));
1544 1.1 mrg if (degenerate)
1545 1.1 mrg graft = refine_degenerate(graft, for_build, sub_build);
1546 1.1 mrg else
1547 1.1 mrg graft = refine_generic(graft, bounds,
1548 1.1 mrg domain, for_build);
1549 1.1 mrg isl_ast_build_free(for_build);
1550 1.1 mrg }
1551 1.1 mrg isl_set_free(guard);
1552 1.1 mrg if (!eliminated)
1553 1.1 mrg graft = after_each_for(graft, body_build);
1554 1.1 mrg
1555 1.1 mrg isl_ast_build_free(body_build);
1556 1.1 mrg isl_ast_build_free(sub_build);
1557 1.1 mrg isl_ast_build_free(build);
1558 1.1 mrg isl_basic_set_free(bounds);
1559 1.1 mrg isl_set_free(domain);
1560 1.1 mrg
1561 1.1 mrg return graft;
1562 1.1 mrg }
1563 1.1 mrg
1564 1.1 mrg /* Internal data structure for checking if all constraints involving
1565 1.1 mrg * the input dimension "depth" are such that the other coefficients
1566 1.1 mrg * are multiples of "m", reducing "m" if they are not.
1567 1.1 mrg * If "m" is reduced all the way down to "1", then the check has failed
1568 1.1 mrg * and we break out of the iteration.
1569 1.1 mrg */
1570 1.1 mrg struct isl_check_scaled_data {
1571 1.1 mrg int depth;
1572 1.1 mrg isl_val *m;
1573 1.1 mrg };
1574 1.1 mrg
1575 1.1 mrg /* If constraint "c" involves the input dimension data->depth,
1576 1.1 mrg * then make sure that all the other coefficients are multiples of data->m,
1577 1.1 mrg * reducing data->m if needed.
1578 1.1 mrg * Break out of the iteration if data->m has become equal to "1".
1579 1.1 mrg */
1580 1.1 mrg static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
1581 1.1 mrg void *user)
1582 1.1 mrg {
1583 1.1 mrg struct isl_check_scaled_data *data = user;
1584 1.1 mrg int i, j;
1585 1.1 mrg isl_size n;
1586 1.1 mrg enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
1587 1.1 mrg isl_dim_div };
1588 1.1 mrg
1589 1.1 mrg if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) {
1590 1.1 mrg isl_constraint_free(c);
1591 1.1 mrg return isl_stat_ok;
1592 1.1 mrg }
1593 1.1 mrg
1594 1.1 mrg for (i = 0; i < 4; ++i) {
1595 1.1 mrg n = isl_constraint_dim(c, t[i]);
1596 1.1 mrg if (n < 0)
1597 1.1 mrg break;
1598 1.1 mrg for (j = 0; j < n; ++j) {
1599 1.1 mrg isl_val *d;
1600 1.1 mrg
1601 1.1 mrg if (t[i] == isl_dim_in && j == data->depth)
1602 1.1 mrg continue;
1603 1.1 mrg if (!isl_constraint_involves_dims(c, t[i], j, 1))
1604 1.1 mrg continue;
1605 1.1 mrg d = isl_constraint_get_coefficient_val(c, t[i], j);
1606 1.1 mrg data->m = isl_val_gcd(data->m, d);
1607 1.1 mrg if (isl_val_is_one(data->m))
1608 1.1 mrg break;
1609 1.1 mrg }
1610 1.1 mrg if (j < n)
1611 1.1 mrg break;
1612 1.1 mrg }
1613 1.1 mrg
1614 1.1 mrg isl_constraint_free(c);
1615 1.1 mrg
1616 1.1 mrg return i < 4 ? isl_stat_error : isl_stat_ok;
1617 1.1 mrg }
1618 1.1 mrg
1619 1.1 mrg /* For each constraint of "bmap" that involves the input dimension data->depth,
1620 1.1 mrg * make sure that all the other coefficients are multiples of data->m,
1621 1.1 mrg * reducing data->m if needed.
1622 1.1 mrg * Break out of the iteration if data->m has become equal to "1".
1623 1.1 mrg */
1624 1.1 mrg static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
1625 1.1 mrg void *user)
1626 1.1 mrg {
1627 1.1 mrg isl_stat r;
1628 1.1 mrg
1629 1.1 mrg r = isl_basic_map_foreach_constraint(bmap,
1630 1.1 mrg &constraint_check_scaled, user);
1631 1.1 mrg isl_basic_map_free(bmap);
1632 1.1 mrg
1633 1.1 mrg return r;
1634 1.1 mrg }
1635 1.1 mrg
1636 1.1 mrg /* For each constraint of "map" that involves the input dimension data->depth,
1637 1.1 mrg * make sure that all the other coefficients are multiples of data->m,
1638 1.1 mrg * reducing data->m if needed.
1639 1.1 mrg * Break out of the iteration if data->m has become equal to "1".
1640 1.1 mrg */
1641 1.1 mrg static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
1642 1.1 mrg {
1643 1.1 mrg isl_stat r;
1644 1.1 mrg
1645 1.1 mrg r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
1646 1.1 mrg isl_map_free(map);
1647 1.1 mrg
1648 1.1 mrg return r;
1649 1.1 mrg }
1650 1.1 mrg
1651 1.1 mrg /* Create an AST node for the current dimension based on
1652 1.1 mrg * the schedule domain "bounds" and return the node encapsulated
1653 1.1 mrg * in an isl_ast_graft.
1654 1.1 mrg *
1655 1.1 mrg * "executed" is the current inverse schedule, taking into account
1656 1.1 mrg * the bounds in "bounds"
1657 1.1 mrg * "domain" is the domain of "executed", with inner dimensions projected out.
1658 1.1 mrg *
1659 1.1 mrg *
1660 1.1 mrg * Before moving on to the actual AST node construction in create_node_scaled,
1661 1.1 mrg * we first check if the current dimension is strided and if we can scale
1662 1.1 mrg * down this stride. Note that we only do this if the ast_build_scale_strides
1663 1.1 mrg * option is set.
1664 1.1 mrg *
1665 1.1 mrg * In particular, let the current dimension take on values
1666 1.1 mrg *
1667 1.1 mrg * f + s a
1668 1.1 mrg *
1669 1.1 mrg * with a an integer. We check if we can find an integer m that (obviously)
1670 1.1 mrg * divides both f and s.
1671 1.1 mrg *
1672 1.1 mrg * If so, we check if the current dimension only appears in constraints
1673 1.1 mrg * where the coefficients of the other variables are multiples of m.
1674 1.1 mrg * We perform this extra check to avoid the risk of introducing
1675 1.1 mrg * divisions by scaling down the current dimension.
1676 1.1 mrg *
1677 1.1 mrg * If so, we scale the current dimension down by a factor of m.
1678 1.1 mrg * That is, we plug in
1679 1.1 mrg *
1680 1.1 mrg * i = m i' (1)
1681 1.1 mrg *
1682 1.1 mrg * Note that in principle we could always scale down strided loops
1683 1.1 mrg * by plugging in
1684 1.1 mrg *
1685 1.1 mrg * i = f + s i'
1686 1.1 mrg *
1687 1.1 mrg * but this may result in i' taking on larger values than the original i,
1688 1.1 mrg * due to the shift by "f".
1689 1.1 mrg * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
1690 1.1 mrg */
1691 1.1 mrg static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
1692 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1693 1.1 mrg __isl_take isl_ast_build *build)
1694 1.1 mrg {
1695 1.1 mrg struct isl_check_scaled_data data;
1696 1.1 mrg isl_size depth;
1697 1.1 mrg isl_ctx *ctx;
1698 1.1 mrg isl_aff *offset;
1699 1.1 mrg isl_val *d;
1700 1.1 mrg
1701 1.1 mrg ctx = isl_ast_build_get_ctx(build);
1702 1.1 mrg if (!isl_options_get_ast_build_scale_strides(ctx))
1703 1.1 mrg return create_node_scaled(executed, bounds, domain, build);
1704 1.1 mrg
1705 1.1 mrg depth = isl_ast_build_get_depth(build);
1706 1.1 mrg if (depth < 0)
1707 1.1 mrg build = isl_ast_build_free(build);
1708 1.1 mrg data.depth = depth;
1709 1.1 mrg if (!isl_ast_build_has_stride(build, data.depth))
1710 1.1 mrg return create_node_scaled(executed, bounds, domain, build);
1711 1.1 mrg
1712 1.1 mrg offset = isl_ast_build_get_offset(build, data.depth);
1713 1.1 mrg data.m = isl_ast_build_get_stride(build, data.depth);
1714 1.1 mrg if (!data.m)
1715 1.1 mrg offset = isl_aff_free(offset);
1716 1.1 mrg offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
1717 1.1 mrg d = isl_aff_get_denominator_val(offset);
1718 1.1 mrg if (!d)
1719 1.1 mrg executed = isl_union_map_free(executed);
1720 1.1 mrg
1721 1.1 mrg if (executed && isl_val_is_divisible_by(data.m, d))
1722 1.1 mrg data.m = isl_val_div(data.m, d);
1723 1.1 mrg else {
1724 1.1 mrg data.m = isl_val_set_si(data.m, 1);
1725 1.1 mrg isl_val_free(d);
1726 1.1 mrg }
1727 1.1 mrg
1728 1.1 mrg if (!isl_val_is_one(data.m)) {
1729 1.1 mrg if (isl_union_map_foreach_map(executed, &map_check_scaled,
1730 1.1 mrg &data) < 0 &&
1731 1.1 mrg !isl_val_is_one(data.m))
1732 1.1 mrg executed = isl_union_map_free(executed);
1733 1.1 mrg }
1734 1.1 mrg
1735 1.1 mrg if (!isl_val_is_one(data.m)) {
1736 1.1 mrg isl_space *space;
1737 1.1 mrg isl_multi_aff *ma;
1738 1.1 mrg isl_aff *aff;
1739 1.1 mrg isl_map *map;
1740 1.1 mrg isl_union_map *umap;
1741 1.1 mrg
1742 1.1 mrg space = isl_ast_build_get_space(build, 1);
1743 1.1 mrg space = isl_space_map_from_set(space);
1744 1.1 mrg ma = isl_multi_aff_identity(space);
1745 1.1 mrg aff = isl_multi_aff_get_aff(ma, data.depth);
1746 1.1 mrg aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
1747 1.1 mrg ma = isl_multi_aff_set_aff(ma, data.depth, aff);
1748 1.1 mrg
1749 1.1 mrg bounds = isl_basic_set_preimage_multi_aff(bounds,
1750 1.1 mrg isl_multi_aff_copy(ma));
1751 1.1 mrg domain = isl_set_preimage_multi_aff(domain,
1752 1.1 mrg isl_multi_aff_copy(ma));
1753 1.1 mrg map = isl_map_reverse(isl_map_from_multi_aff(ma));
1754 1.1 mrg umap = isl_union_map_from_map(map);
1755 1.1 mrg executed = isl_union_map_apply_domain(executed,
1756 1.1 mrg isl_union_map_copy(umap));
1757 1.1 mrg build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
1758 1.1 mrg umap);
1759 1.1 mrg }
1760 1.1 mrg isl_aff_free(offset);
1761 1.1 mrg isl_val_free(data.m);
1762 1.1 mrg
1763 1.1 mrg return create_node_scaled(executed, bounds, domain, build);
1764 1.1 mrg }
1765 1.1 mrg
1766 1.1 mrg /* Add the basic set to the list that "user" points to.
1767 1.1 mrg */
1768 1.1 mrg static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
1769 1.1 mrg {
1770 1.1 mrg isl_basic_set_list **list = user;
1771 1.1 mrg
1772 1.1 mrg *list = isl_basic_set_list_add(*list, bset);
1773 1.1 mrg
1774 1.1 mrg return isl_stat_ok;
1775 1.1 mrg }
1776 1.1 mrg
1777 1.1 mrg /* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
1778 1.1 mrg */
1779 1.1 mrg static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
1780 1.1 mrg __isl_take isl_set *set)
1781 1.1 mrg {
1782 1.1 mrg isl_size n;
1783 1.1 mrg isl_ctx *ctx;
1784 1.1 mrg isl_basic_set_list *list;
1785 1.1 mrg
1786 1.1 mrg n = isl_set_n_basic_set(set);
1787 1.1 mrg if (n < 0)
1788 1.1 mrg set = isl_set_free(set);
1789 1.1 mrg if (!set)
1790 1.1 mrg return NULL;
1791 1.1 mrg
1792 1.1 mrg ctx = isl_set_get_ctx(set);
1793 1.1 mrg
1794 1.1 mrg list = isl_basic_set_list_alloc(ctx, n);
1795 1.1 mrg if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
1796 1.1 mrg list = isl_basic_set_list_free(list);
1797 1.1 mrg
1798 1.1 mrg isl_set_free(set);
1799 1.1 mrg return list;
1800 1.1 mrg }
1801 1.1 mrg
1802 1.1 mrg /* Generate code for the schedule domain "bounds"
1803 1.1 mrg * and add the result to "list".
1804 1.1 mrg *
1805 1.1 mrg * We mainly detect strides here and check if the bounds do not
1806 1.1 mrg * conflict with the current build domain
1807 1.1 mrg * and then pass over control to create_node.
1808 1.1 mrg *
1809 1.1 mrg * "bounds" reflects the bounds on the current dimension and possibly
1810 1.1 mrg * some extra conditions on outer dimensions.
1811 1.1 mrg * It does not, however, include any divs involving the current dimension,
1812 1.1 mrg * so it does not capture any stride constraints.
1813 1.1 mrg * We therefore need to compute that part of the schedule domain that
1814 1.1 mrg * intersects with "bounds" and derive the strides from the result.
1815 1.1 mrg */
1816 1.1 mrg static __isl_give isl_ast_graft_list *add_node(
1817 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
1818 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
1819 1.1 mrg {
1820 1.1 mrg isl_ast_graft *graft;
1821 1.1 mrg isl_set *domain = NULL;
1822 1.1 mrg isl_union_set *uset;
1823 1.1 mrg int empty, disjoint;
1824 1.1 mrg
1825 1.1 mrg uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
1826 1.1 mrg executed = isl_union_map_intersect_domain(executed, uset);
1827 1.1 mrg empty = isl_union_map_is_empty(executed);
1828 1.1 mrg if (empty < 0)
1829 1.1 mrg goto error;
1830 1.1 mrg if (empty)
1831 1.1 mrg goto done;
1832 1.1 mrg
1833 1.1 mrg uset = isl_union_map_domain(isl_union_map_copy(executed));
1834 1.1 mrg domain = isl_set_from_union_set(uset);
1835 1.1 mrg domain = isl_ast_build_specialize(build, domain);
1836 1.1 mrg
1837 1.1 mrg domain = isl_set_compute_divs(domain);
1838 1.1 mrg domain = isl_ast_build_eliminate_inner(build, domain);
1839 1.1 mrg disjoint = isl_set_is_disjoint(domain, build->domain);
1840 1.1 mrg if (disjoint < 0)
1841 1.1 mrg goto error;
1842 1.1 mrg if (disjoint)
1843 1.1 mrg goto done;
1844 1.1 mrg
1845 1.1 mrg build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
1846 1.1 mrg
1847 1.1 mrg graft = create_node(executed, bounds, domain,
1848 1.1 mrg isl_ast_build_copy(build));
1849 1.1 mrg list = isl_ast_graft_list_add(list, graft);
1850 1.1 mrg isl_ast_build_free(build);
1851 1.1 mrg return list;
1852 1.1 mrg error:
1853 1.1 mrg list = isl_ast_graft_list_free(list);
1854 1.1 mrg done:
1855 1.1 mrg isl_set_free(domain);
1856 1.1 mrg isl_basic_set_free(bounds);
1857 1.1 mrg isl_union_map_free(executed);
1858 1.1 mrg isl_ast_build_free(build);
1859 1.1 mrg return list;
1860 1.1 mrg }
1861 1.1 mrg
1862 1.1 mrg /* Does any element of i follow or coincide with any element of j
1863 1.1 mrg * at the current depth for equal values of the outer dimensions?
1864 1.1 mrg */
1865 1.1 mrg static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
1866 1.1 mrg __isl_keep isl_basic_set *j, void *user)
1867 1.1 mrg {
1868 1.1 mrg int depth = *(int *) user;
1869 1.1 mrg isl_basic_map *test;
1870 1.1 mrg isl_bool empty;
1871 1.1 mrg int l;
1872 1.1 mrg
1873 1.1 mrg test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
1874 1.1 mrg isl_basic_set_copy(j));
1875 1.1 mrg for (l = 0; l < depth; ++l)
1876 1.1 mrg test = isl_basic_map_equate(test, isl_dim_in, l,
1877 1.1 mrg isl_dim_out, l);
1878 1.1 mrg test = isl_basic_map_order_ge(test, isl_dim_in, depth,
1879 1.1 mrg isl_dim_out, depth);
1880 1.1 mrg empty = isl_basic_map_is_empty(test);
1881 1.1 mrg isl_basic_map_free(test);
1882 1.1 mrg
1883 1.1 mrg return isl_bool_not(empty);
1884 1.1 mrg }
1885 1.1 mrg
1886 1.1 mrg /* Split up each element of "list" into a part that is related to "bset"
1887 1.1 mrg * according to "gt" and a part that is not.
1888 1.1 mrg * Return a list that consist of "bset" and all the pieces.
1889 1.1 mrg */
1890 1.1 mrg static __isl_give isl_basic_set_list *add_split_on(
1891 1.1 mrg __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
1892 1.1 mrg __isl_keep isl_basic_map *gt)
1893 1.1 mrg {
1894 1.1 mrg int i;
1895 1.1 mrg isl_size n;
1896 1.1 mrg isl_basic_set_list *res;
1897 1.1 mrg
1898 1.1 mrg n = isl_basic_set_list_n_basic_set(list);
1899 1.1 mrg if (n < 0)
1900 1.1 mrg bset = isl_basic_set_free(bset);
1901 1.1 mrg
1902 1.1 mrg gt = isl_basic_map_copy(gt);
1903 1.1 mrg gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
1904 1.1 mrg res = isl_basic_set_list_from_basic_set(bset);
1905 1.1 mrg for (i = 0; res && i < n; ++i) {
1906 1.1 mrg isl_basic_set *bset;
1907 1.1 mrg isl_set *set1, *set2;
1908 1.1 mrg isl_basic_map *bmap;
1909 1.1 mrg int empty;
1910 1.1 mrg
1911 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i);
1912 1.1 mrg bmap = isl_basic_map_copy(gt);
1913 1.1 mrg bmap = isl_basic_map_intersect_range(bmap, bset);
1914 1.1 mrg bset = isl_basic_map_range(bmap);
1915 1.1 mrg empty = isl_basic_set_is_empty(bset);
1916 1.1 mrg if (empty < 0)
1917 1.1 mrg res = isl_basic_set_list_free(res);
1918 1.1 mrg if (empty) {
1919 1.1 mrg isl_basic_set_free(bset);
1920 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i);
1921 1.1 mrg res = isl_basic_set_list_add(res, bset);
1922 1.1 mrg continue;
1923 1.1 mrg }
1924 1.1 mrg
1925 1.1 mrg res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
1926 1.1 mrg set1 = isl_set_from_basic_set(bset);
1927 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i);
1928 1.1 mrg set2 = isl_set_from_basic_set(bset);
1929 1.1 mrg set1 = isl_set_subtract(set2, set1);
1930 1.1 mrg set1 = isl_set_make_disjoint(set1);
1931 1.1 mrg
1932 1.1 mrg res = isl_basic_set_list_concat(res,
1933 1.1 mrg isl_basic_set_list_from_set(set1));
1934 1.1 mrg }
1935 1.1 mrg isl_basic_map_free(gt);
1936 1.1 mrg isl_basic_set_list_free(list);
1937 1.1 mrg return res;
1938 1.1 mrg }
1939 1.1 mrg
1940 1.1 mrg static __isl_give isl_ast_graft_list *generate_sorted_domains(
1941 1.1 mrg __isl_keep isl_basic_set_list *domain_list,
1942 1.1 mrg __isl_keep isl_union_map *executed,
1943 1.1 mrg __isl_keep isl_ast_build *build);
1944 1.1 mrg
1945 1.1 mrg /* Internal data structure for add_nodes.
1946 1.1 mrg *
1947 1.1 mrg * "executed" and "build" are extra arguments to be passed to add_node.
1948 1.1 mrg * "list" collects the results.
1949 1.1 mrg */
1950 1.1 mrg struct isl_add_nodes_data {
1951 1.1 mrg isl_union_map *executed;
1952 1.1 mrg isl_ast_build *build;
1953 1.1 mrg
1954 1.1 mrg isl_ast_graft_list *list;
1955 1.1 mrg };
1956 1.1 mrg
1957 1.1 mrg /* Generate code for the schedule domains in "scc"
1958 1.1 mrg * and add the results to "list".
1959 1.1 mrg *
1960 1.1 mrg * The domains in "scc" form a strongly connected component in the ordering.
1961 1.1 mrg * If the number of domains in "scc" is larger than 1, then this means
1962 1.1 mrg * that we cannot determine a valid ordering for the domains in the component.
1963 1.1 mrg * This should be fairly rare because the individual domains
1964 1.1 mrg * have been made disjoint first.
1965 1.1 mrg * The problem is that the domains may be integrally disjoint but not
1966 1.1 mrg * rationally disjoint. For example, we may have domains
1967 1.1 mrg *
1968 1.1 mrg * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 }
1969 1.1 mrg *
1970 1.1 mrg * These two domains have an empty intersection, but their rational
1971 1.1 mrg * relaxations do intersect. It is impossible to order these domains
1972 1.1 mrg * in the second dimension because the first should be ordered before
1973 1.1 mrg * the second for outer dimension equal to 0, while it should be ordered
1974 1.1 mrg * after for outer dimension equal to 1.
1975 1.1 mrg *
1976 1.1 mrg * This may happen in particular in case of unrolling since the domain
1977 1.1 mrg * of each slice is replaced by its simple hull.
1978 1.1 mrg *
1979 1.1 mrg * For each basic set i in "scc" and for each of the following basic sets j,
1980 1.1 mrg * we split off that part of the basic set i that shares the outer dimensions
1981 1.1 mrg * with j and lies before j in the current dimension.
1982 1.1 mrg * We collect all the pieces in a new list that replaces "scc".
1983 1.1 mrg *
1984 1.1 mrg * While the elements in "scc" should be disjoint, we double-check
1985 1.1 mrg * this property to avoid running into an infinite recursion in case
1986 1.1 mrg * they intersect due to some internal error.
1987 1.1 mrg */
1988 1.1 mrg static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
1989 1.1 mrg {
1990 1.1 mrg struct isl_add_nodes_data *data = user;
1991 1.1 mrg int i;
1992 1.1 mrg isl_size depth;
1993 1.1 mrg isl_size n;
1994 1.1 mrg isl_basic_set *bset, *first;
1995 1.1 mrg isl_basic_set_list *list;
1996 1.1 mrg isl_space *space;
1997 1.1 mrg isl_basic_map *gt;
1998 1.1 mrg
1999 1.1 mrg n = isl_basic_set_list_n_basic_set(scc);
2000 1.1 mrg if (n < 0)
2001 1.1 mrg goto error;
2002 1.1 mrg bset = isl_basic_set_list_get_basic_set(scc, 0);
2003 1.1 mrg if (n == 1) {
2004 1.1 mrg isl_basic_set_list_free(scc);
2005 1.1 mrg data->list = add_node(data->list,
2006 1.1 mrg isl_union_map_copy(data->executed), bset,
2007 1.1 mrg isl_ast_build_copy(data->build));
2008 1.1 mrg return data->list ? isl_stat_ok : isl_stat_error;
2009 1.1 mrg }
2010 1.1 mrg
2011 1.1 mrg depth = isl_ast_build_get_depth(data->build);
2012 1.1 mrg if (depth < 0)
2013 1.1 mrg bset = isl_basic_set_free(bset);
2014 1.1 mrg space = isl_basic_set_get_space(bset);
2015 1.1 mrg space = isl_space_map_from_set(space);
2016 1.1 mrg gt = isl_basic_map_universe(space);
2017 1.1 mrg for (i = 0; i < depth; ++i)
2018 1.1 mrg gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
2019 1.1 mrg gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
2020 1.1 mrg
2021 1.1 mrg first = isl_basic_set_copy(bset);
2022 1.1 mrg list = isl_basic_set_list_from_basic_set(bset);
2023 1.1 mrg for (i = 1; i < n; ++i) {
2024 1.1 mrg int disjoint;
2025 1.1 mrg
2026 1.1 mrg bset = isl_basic_set_list_get_basic_set(scc, i);
2027 1.1 mrg
2028 1.1 mrg disjoint = isl_basic_set_is_disjoint(bset, first);
2029 1.1 mrg if (disjoint < 0)
2030 1.1 mrg list = isl_basic_set_list_free(list);
2031 1.1 mrg else if (!disjoint)
2032 1.1 mrg isl_die(isl_basic_set_list_get_ctx(scc),
2033 1.1 mrg isl_error_internal,
2034 1.1 mrg "basic sets in scc are assumed to be disjoint",
2035 1.1 mrg list = isl_basic_set_list_free(list));
2036 1.1 mrg
2037 1.1 mrg list = add_split_on(list, bset, gt);
2038 1.1 mrg }
2039 1.1 mrg isl_basic_set_free(first);
2040 1.1 mrg isl_basic_map_free(gt);
2041 1.1 mrg isl_basic_set_list_free(scc);
2042 1.1 mrg scc = list;
2043 1.1 mrg data->list = isl_ast_graft_list_concat(data->list,
2044 1.1 mrg generate_sorted_domains(scc, data->executed, data->build));
2045 1.1 mrg isl_basic_set_list_free(scc);
2046 1.1 mrg
2047 1.1 mrg return data->list ? isl_stat_ok : isl_stat_error;
2048 1.1 mrg error:
2049 1.1 mrg isl_basic_set_list_free(scc);
2050 1.1 mrg return isl_stat_error;
2051 1.1 mrg }
2052 1.1 mrg
2053 1.1 mrg /* Sort the domains in "domain_list" according to the execution order
2054 1.1 mrg * at the current depth (for equal values of the outer dimensions),
2055 1.1 mrg * generate code for each of them, collecting the results in a list.
2056 1.1 mrg * If no code is generated (because the intersection of the inverse schedule
2057 1.1 mrg * with the domains turns out to be empty), then an empty list is returned.
2058 1.1 mrg *
2059 1.1 mrg * The caller is responsible for ensuring that the basic sets in "domain_list"
2060 1.1 mrg * are pair-wise disjoint. It can, however, in principle happen that
2061 1.1 mrg * two basic sets should be ordered one way for one value of the outer
2062 1.1 mrg * dimensions and the other way for some other value of the outer dimensions.
2063 1.1 mrg * We therefore play safe and look for strongly connected components.
2064 1.1 mrg * The function add_nodes takes care of handling non-trivial components.
2065 1.1 mrg */
2066 1.1 mrg static __isl_give isl_ast_graft_list *generate_sorted_domains(
2067 1.1 mrg __isl_keep isl_basic_set_list *domain_list,
2068 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2069 1.1 mrg {
2070 1.1 mrg isl_ctx *ctx;
2071 1.1 mrg struct isl_add_nodes_data data;
2072 1.1 mrg isl_size depth;
2073 1.1 mrg isl_size n;
2074 1.1 mrg
2075 1.1 mrg n = isl_basic_set_list_n_basic_set(domain_list);
2076 1.1 mrg if (n < 0)
2077 1.1 mrg return NULL;
2078 1.1 mrg
2079 1.1 mrg ctx = isl_basic_set_list_get_ctx(domain_list);
2080 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, n);
2081 1.1 mrg if (n == 0)
2082 1.1 mrg return data.list;
2083 1.1 mrg if (n == 1)
2084 1.1 mrg return add_node(data.list, isl_union_map_copy(executed),
2085 1.1 mrg isl_basic_set_list_get_basic_set(domain_list, 0),
2086 1.1 mrg isl_ast_build_copy(build));
2087 1.1 mrg
2088 1.1 mrg depth = isl_ast_build_get_depth(build);
2089 1.1 mrg data.executed = executed;
2090 1.1 mrg data.build = build;
2091 1.1 mrg if (depth < 0 || isl_basic_set_list_foreach_scc(domain_list,
2092 1.1 mrg &domain_follows_at_depth, &depth,
2093 1.1 mrg &add_nodes, &data) < 0)
2094 1.1 mrg data.list = isl_ast_graft_list_free(data.list);
2095 1.1 mrg
2096 1.1 mrg return data.list;
2097 1.1 mrg }
2098 1.1 mrg
2099 1.1 mrg /* Do i and j share any values for the outer dimensions?
2100 1.1 mrg */
2101 1.1 mrg static isl_bool shared_outer(__isl_keep isl_basic_set *i,
2102 1.1 mrg __isl_keep isl_basic_set *j, void *user)
2103 1.1 mrg {
2104 1.1 mrg int depth = *(int *) user;
2105 1.1 mrg isl_basic_map *test;
2106 1.1 mrg isl_bool empty;
2107 1.1 mrg int l;
2108 1.1 mrg
2109 1.1 mrg test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
2110 1.1 mrg isl_basic_set_copy(j));
2111 1.1 mrg for (l = 0; l < depth; ++l)
2112 1.1 mrg test = isl_basic_map_equate(test, isl_dim_in, l,
2113 1.1 mrg isl_dim_out, l);
2114 1.1 mrg empty = isl_basic_map_is_empty(test);
2115 1.1 mrg isl_basic_map_free(test);
2116 1.1 mrg
2117 1.1 mrg return isl_bool_not(empty);
2118 1.1 mrg }
2119 1.1 mrg
2120 1.1 mrg /* Internal data structure for generate_sorted_domains_wrap.
2121 1.1 mrg *
2122 1.1 mrg * "n" is the total number of basic sets
2123 1.1 mrg * "executed" and "build" are extra arguments to be passed
2124 1.1 mrg * to generate_sorted_domains.
2125 1.1 mrg *
2126 1.1 mrg * "single" is set to 1 by generate_sorted_domains_wrap if there
2127 1.1 mrg * is only a single component.
2128 1.1 mrg * "list" collects the results.
2129 1.1 mrg */
2130 1.1 mrg struct isl_ast_generate_parallel_domains_data {
2131 1.1 mrg isl_size n;
2132 1.1 mrg isl_union_map *executed;
2133 1.1 mrg isl_ast_build *build;
2134 1.1 mrg
2135 1.1 mrg int single;
2136 1.1 mrg isl_ast_graft_list *list;
2137 1.1 mrg };
2138 1.1 mrg
2139 1.1 mrg /* Call generate_sorted_domains on "scc", fuse the result into a list
2140 1.1 mrg * with either zero or one graft and collect the these single element
2141 1.1 mrg * lists into data->list.
2142 1.1 mrg *
2143 1.1 mrg * If there is only one component, i.e., if the number of basic sets
2144 1.1 mrg * in the current component is equal to the total number of basic sets,
2145 1.1 mrg * then data->single is set to 1 and the result of generate_sorted_domains
2146 1.1 mrg * is not fused.
2147 1.1 mrg */
2148 1.1 mrg static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
2149 1.1 mrg void *user)
2150 1.1 mrg {
2151 1.1 mrg struct isl_ast_generate_parallel_domains_data *data = user;
2152 1.1 mrg isl_ast_graft_list *list;
2153 1.1 mrg isl_size n;
2154 1.1 mrg
2155 1.1 mrg n = isl_basic_set_list_n_basic_set(scc);
2156 1.1 mrg if (n < 0)
2157 1.1 mrg scc = isl_basic_set_list_free(scc);
2158 1.1 mrg list = generate_sorted_domains(scc, data->executed, data->build);
2159 1.1 mrg data->single = n == data->n;
2160 1.1 mrg if (!data->single)
2161 1.1 mrg list = isl_ast_graft_list_fuse(list, data->build);
2162 1.1 mrg if (!data->list)
2163 1.1 mrg data->list = list;
2164 1.1 mrg else
2165 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list);
2166 1.1 mrg
2167 1.1 mrg isl_basic_set_list_free(scc);
2168 1.1 mrg if (!data->list)
2169 1.1 mrg return isl_stat_error;
2170 1.1 mrg
2171 1.1 mrg return isl_stat_ok;
2172 1.1 mrg }
2173 1.1 mrg
2174 1.1 mrg /* Look for any (weakly connected) components in the "domain_list"
2175 1.1 mrg * of domains that share some values of the outer dimensions.
2176 1.1 mrg * That is, domains in different components do not share any values
2177 1.1 mrg * of the outer dimensions. This means that these components
2178 1.1 mrg * can be freely reordered.
2179 1.1 mrg * Within each of the components, we sort the domains according
2180 1.1 mrg * to the execution order at the current depth.
2181 1.1 mrg *
2182 1.1 mrg * If there is more than one component, then generate_sorted_domains_wrap
2183 1.1 mrg * fuses the result of each call to generate_sorted_domains
2184 1.1 mrg * into a list with either zero or one graft and collects these (at most)
2185 1.1 mrg * single element lists into a bigger list. This means that the elements of the
2186 1.1 mrg * final list can be freely reordered. In particular, we sort them
2187 1.1 mrg * according to an arbitrary but fixed ordering to ease merging of
2188 1.1 mrg * graft lists from different components.
2189 1.1 mrg */
2190 1.1 mrg static __isl_give isl_ast_graft_list *generate_parallel_domains(
2191 1.1 mrg __isl_keep isl_basic_set_list *domain_list,
2192 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2193 1.1 mrg {
2194 1.1 mrg isl_size depth;
2195 1.1 mrg struct isl_ast_generate_parallel_domains_data data;
2196 1.1 mrg
2197 1.1 mrg data.n = isl_basic_set_list_n_basic_set(domain_list);
2198 1.1 mrg if (data.n < 0)
2199 1.1 mrg return NULL;
2200 1.1 mrg
2201 1.1 mrg if (data.n <= 1)
2202 1.1 mrg return generate_sorted_domains(domain_list, executed, build);
2203 1.1 mrg
2204 1.1 mrg depth = isl_ast_build_get_depth(build);
2205 1.1 mrg if (depth < 0)
2206 1.1 mrg return NULL;
2207 1.1 mrg data.list = NULL;
2208 1.1 mrg data.executed = executed;
2209 1.1 mrg data.build = build;
2210 1.1 mrg data.single = 0;
2211 1.1 mrg if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
2212 1.1 mrg &generate_sorted_domains_wrap,
2213 1.1 mrg &data) < 0)
2214 1.1 mrg data.list = isl_ast_graft_list_free(data.list);
2215 1.1 mrg
2216 1.1 mrg if (!data.single)
2217 1.1 mrg data.list = isl_ast_graft_list_sort_guard(data.list);
2218 1.1 mrg
2219 1.1 mrg return data.list;
2220 1.1 mrg }
2221 1.1 mrg
2222 1.1 mrg /* Internal data for separate_domain.
2223 1.1 mrg *
2224 1.1 mrg * "explicit" is set if we only want to use explicit bounds.
2225 1.1 mrg *
2226 1.1 mrg * "domain" collects the separated domains.
2227 1.1 mrg */
2228 1.1 mrg struct isl_separate_domain_data {
2229 1.1 mrg isl_ast_build *build;
2230 1.1 mrg int explicit;
2231 1.1 mrg isl_set *domain;
2232 1.1 mrg };
2233 1.1 mrg
2234 1.1 mrg /* Extract implicit bounds on the current dimension for the executed "map".
2235 1.1 mrg *
2236 1.1 mrg * The domain of "map" may involve inner dimensions, so we
2237 1.1 mrg * need to eliminate them.
2238 1.1 mrg */
2239 1.1 mrg static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
2240 1.1 mrg __isl_keep isl_ast_build *build)
2241 1.1 mrg {
2242 1.1 mrg isl_set *domain;
2243 1.1 mrg
2244 1.1 mrg domain = isl_map_domain(map);
2245 1.1 mrg domain = isl_ast_build_eliminate(build, domain);
2246 1.1 mrg
2247 1.1 mrg return domain;
2248 1.1 mrg }
2249 1.1 mrg
2250 1.1 mrg /* Extract explicit bounds on the current dimension for the executed "map".
2251 1.1 mrg *
2252 1.1 mrg * Rather than eliminating the inner dimensions as in implicit_bounds,
2253 1.1 mrg * we simply drop any constraints involving those inner dimensions.
2254 1.1 mrg * The idea is that most bounds that are implied by constraints on the
2255 1.1 mrg * inner dimensions will be enforced by for loops and not by explicit guards.
2256 1.1 mrg * There is then no need to separate along those bounds.
2257 1.1 mrg */
2258 1.1 mrg static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
2259 1.1 mrg __isl_keep isl_ast_build *build)
2260 1.1 mrg {
2261 1.1 mrg isl_set *domain;
2262 1.1 mrg isl_size depth;
2263 1.1 mrg isl_size dim;
2264 1.1 mrg
2265 1.1 mrg depth = isl_ast_build_get_depth(build);
2266 1.1 mrg dim = isl_map_dim(map, isl_dim_out);
2267 1.1 mrg if (depth < 0 || dim < 0)
2268 1.1 mrg return isl_map_domain(isl_map_free(map));
2269 1.1 mrg map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
2270 1.1 mrg
2271 1.1 mrg domain = isl_map_domain(map);
2272 1.1 mrg dim = isl_set_dim(domain, isl_dim_set);
2273 1.1 mrg domain = isl_set_detect_equalities(domain);
2274 1.1 mrg domain = isl_set_drop_constraints_involving_dims(domain,
2275 1.1 mrg isl_dim_set, depth + 1, dim - (depth + 1));
2276 1.1 mrg domain = isl_set_remove_divs_involving_dims(domain,
2277 1.1 mrg isl_dim_set, depth, 1);
2278 1.1 mrg domain = isl_set_remove_unknown_divs(domain);
2279 1.1 mrg
2280 1.1 mrg return domain;
2281 1.1 mrg }
2282 1.1 mrg
2283 1.1 mrg /* Split data->domain into pieces that intersect with the range of "map"
2284 1.1 mrg * and pieces that do not intersect with the range of "map"
2285 1.1 mrg * and then add that part of the range of "map" that does not intersect
2286 1.1 mrg * with data->domain.
2287 1.1 mrg */
2288 1.1 mrg static isl_stat separate_domain(__isl_take isl_map *map, void *user)
2289 1.1 mrg {
2290 1.1 mrg struct isl_separate_domain_data *data = user;
2291 1.1 mrg isl_set *domain;
2292 1.1 mrg isl_set *d1, *d2;
2293 1.1 mrg
2294 1.1 mrg if (data->explicit)
2295 1.1 mrg domain = explicit_bounds(map, data->build);
2296 1.1 mrg else
2297 1.1 mrg domain = implicit_bounds(map, data->build);
2298 1.1 mrg
2299 1.1 mrg domain = isl_set_coalesce(domain);
2300 1.1 mrg domain = isl_set_make_disjoint(domain);
2301 1.1 mrg d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
2302 1.1 mrg d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
2303 1.1 mrg data->domain = isl_set_intersect(data->domain, domain);
2304 1.1 mrg data->domain = isl_set_union(data->domain, d1);
2305 1.1 mrg data->domain = isl_set_union(data->domain, d2);
2306 1.1 mrg
2307 1.1 mrg return isl_stat_ok;
2308 1.1 mrg }
2309 1.1 mrg
2310 1.1 mrg /* Separate the schedule domains of "executed".
2311 1.1 mrg *
2312 1.1 mrg * That is, break up the domain of "executed" into basic sets,
2313 1.1 mrg * such that for each basic set S, every element in S is associated with
2314 1.1 mrg * the same domain spaces.
2315 1.1 mrg *
2316 1.1 mrg * "space" is the (single) domain space of "executed".
2317 1.1 mrg */
2318 1.1 mrg static __isl_give isl_set *separate_schedule_domains(
2319 1.1 mrg __isl_take isl_space *space, __isl_take isl_union_map *executed,
2320 1.1 mrg __isl_keep isl_ast_build *build)
2321 1.1 mrg {
2322 1.1 mrg struct isl_separate_domain_data data = { build };
2323 1.1 mrg isl_ctx *ctx;
2324 1.1 mrg
2325 1.1 mrg ctx = isl_ast_build_get_ctx(build);
2326 1.1 mrg data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
2327 1.1 mrg ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
2328 1.1 mrg data.domain = isl_set_empty(space);
2329 1.1 mrg if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
2330 1.1 mrg data.domain = isl_set_free(data.domain);
2331 1.1 mrg
2332 1.1 mrg isl_union_map_free(executed);
2333 1.1 mrg return data.domain;
2334 1.1 mrg }
2335 1.1 mrg
2336 1.1 mrg /* Temporary data used during the search for a lower bound for unrolling.
2337 1.1 mrg *
2338 1.1 mrg * "build" is the build in which the unrolling will be performed
2339 1.1 mrg * "domain" is the original set for which to find a lower bound
2340 1.1 mrg * "depth" is the dimension for which to find a lower boudn
2341 1.1 mrg * "expansion" is the expansion that needs to be applied to "domain"
2342 1.1 mrg * in the unrolling that will be performed
2343 1.1 mrg *
2344 1.1 mrg * "lower" is the best lower bound found so far. It is NULL if we have not
2345 1.1 mrg * found any yet.
2346 1.1 mrg * "n" is the corresponding size. If lower is NULL, then the value of n
2347 1.1 mrg * is undefined.
2348 1.1 mrg * "n_div" is the maximal number of integer divisions in the first
2349 1.1 mrg * unrolled iteration (after expansion). It is set to -1 if it hasn't
2350 1.1 mrg * been computed yet.
2351 1.1 mrg */
2352 1.1 mrg struct isl_find_unroll_data {
2353 1.1 mrg isl_ast_build *build;
2354 1.1 mrg isl_set *domain;
2355 1.1 mrg int depth;
2356 1.1 mrg isl_basic_map *expansion;
2357 1.1 mrg
2358 1.1 mrg isl_aff *lower;
2359 1.1 mrg int *n;
2360 1.1 mrg int n_div;
2361 1.1 mrg };
2362 1.1 mrg
2363 1.1 mrg /* Return the constraint
2364 1.1 mrg *
2365 1.1 mrg * i_"depth" = aff + offset
2366 1.1 mrg */
2367 1.1 mrg static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
2368 1.1 mrg int offset)
2369 1.1 mrg {
2370 1.1 mrg aff = isl_aff_copy(aff);
2371 1.1 mrg aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
2372 1.1 mrg aff = isl_aff_add_constant_si(aff, offset);
2373 1.1 mrg return isl_equality_from_aff(aff);
2374 1.1 mrg }
2375 1.1 mrg
2376 1.1 mrg /* Update *user to the number of integer divisions in the first element
2377 1.1 mrg * of "ma", if it is larger than the current value.
2378 1.1 mrg */
2379 1.1 mrg static isl_stat update_n_div(__isl_take isl_set *set,
2380 1.1 mrg __isl_take isl_multi_aff *ma, void *user)
2381 1.1 mrg {
2382 1.1 mrg isl_aff *aff;
2383 1.1 mrg int *n = user;
2384 1.1 mrg isl_size n_div;
2385 1.1 mrg
2386 1.1 mrg aff = isl_multi_aff_get_aff(ma, 0);
2387 1.1 mrg n_div = isl_aff_dim(aff, isl_dim_div);
2388 1.1 mrg isl_aff_free(aff);
2389 1.1 mrg isl_multi_aff_free(ma);
2390 1.1 mrg isl_set_free(set);
2391 1.1 mrg
2392 1.1 mrg if (n_div > *n)
2393 1.1 mrg *n = n_div;
2394 1.1 mrg
2395 1.1 mrg return n_div >= 0 ? isl_stat_ok : isl_stat_error;
2396 1.1 mrg }
2397 1.1 mrg
2398 1.1 mrg /* Get the number of integer divisions in the expression for the iterator
2399 1.1 mrg * value at the first slice in the unrolling based on lower bound "lower",
2400 1.1 mrg * taking into account the expansion that needs to be performed on this slice.
2401 1.1 mrg */
2402 1.1 mrg static int get_expanded_n_div(struct isl_find_unroll_data *data,
2403 1.1 mrg __isl_keep isl_aff *lower)
2404 1.1 mrg {
2405 1.1 mrg isl_constraint *c;
2406 1.1 mrg isl_set *set;
2407 1.1 mrg isl_map *it_map, *expansion;
2408 1.1 mrg isl_pw_multi_aff *pma;
2409 1.1 mrg int n;
2410 1.1 mrg
2411 1.1 mrg c = at_offset(data->depth, lower, 0);
2412 1.1 mrg set = isl_set_copy(data->domain);
2413 1.1 mrg set = isl_set_add_constraint(set, c);
2414 1.1 mrg expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
2415 1.1 mrg set = isl_set_apply(set, expansion);
2416 1.1 mrg it_map = isl_ast_build_map_to_iterator(data->build, set);
2417 1.1 mrg pma = isl_pw_multi_aff_from_map(it_map);
2418 1.1 mrg n = 0;
2419 1.1 mrg if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
2420 1.1 mrg n = -1;
2421 1.1 mrg isl_pw_multi_aff_free(pma);
2422 1.1 mrg
2423 1.1 mrg return n;
2424 1.1 mrg }
2425 1.1 mrg
2426 1.1 mrg /* Is the lower bound "lower" with corresponding iteration count "n"
2427 1.1 mrg * better than the one stored in "data"?
2428 1.1 mrg * If there is no upper bound on the iteration count ("n" is infinity) or
2429 1.1 mrg * if the count is too large, then we cannot use this lower bound.
2430 1.1 mrg * Otherwise, if there was no previous lower bound or
2431 1.1 mrg * if the iteration count of the new lower bound is smaller than
2432 1.1 mrg * the iteration count of the previous lower bound, then we consider
2433 1.1 mrg * the new lower bound to be better.
2434 1.1 mrg * If the iteration count is the same, then compare the number
2435 1.1 mrg * of integer divisions that would be needed to express
2436 1.1 mrg * the iterator value at the first slice in the unrolling
2437 1.1 mrg * according to the lower bound. If we end up computing this
2438 1.1 mrg * number, then store the lowest value in data->n_div.
2439 1.1 mrg */
2440 1.1 mrg static int is_better_lower_bound(struct isl_find_unroll_data *data,
2441 1.1 mrg __isl_keep isl_aff *lower, __isl_keep isl_val *n)
2442 1.1 mrg {
2443 1.1 mrg int cmp;
2444 1.1 mrg int n_div;
2445 1.1 mrg
2446 1.1 mrg if (!n)
2447 1.1 mrg return -1;
2448 1.1 mrg if (isl_val_is_infty(n))
2449 1.1 mrg return 0;
2450 1.1 mrg if (isl_val_cmp_si(n, INT_MAX) > 0)
2451 1.1 mrg return 0;
2452 1.1 mrg if (!data->lower)
2453 1.1 mrg return 1;
2454 1.1 mrg cmp = isl_val_cmp_si(n, *data->n);
2455 1.1 mrg if (cmp < 0)
2456 1.1 mrg return 1;
2457 1.1 mrg if (cmp > 0)
2458 1.1 mrg return 0;
2459 1.1 mrg if (data->n_div < 0)
2460 1.1 mrg data->n_div = get_expanded_n_div(data, data->lower);
2461 1.1 mrg if (data->n_div < 0)
2462 1.1 mrg return -1;
2463 1.1 mrg if (data->n_div == 0)
2464 1.1 mrg return 0;
2465 1.1 mrg n_div = get_expanded_n_div(data, lower);
2466 1.1 mrg if (n_div < 0)
2467 1.1 mrg return -1;
2468 1.1 mrg if (n_div >= data->n_div)
2469 1.1 mrg return 0;
2470 1.1 mrg data->n_div = n_div;
2471 1.1 mrg
2472 1.1 mrg return 1;
2473 1.1 mrg }
2474 1.1 mrg
2475 1.1 mrg /* Check if we can use "c" as a lower bound and if it is better than
2476 1.1 mrg * any previously found lower bound.
2477 1.1 mrg *
2478 1.1 mrg * If "c" does not involve the dimension at the current depth,
2479 1.1 mrg * then we cannot use it.
2480 1.1 mrg * Otherwise, let "c" be of the form
2481 1.1 mrg *
2482 1.1 mrg * i >= f(j)/a
2483 1.1 mrg *
2484 1.1 mrg * We compute the maximal value of
2485 1.1 mrg *
2486 1.1 mrg * -ceil(f(j)/a)) + i + 1
2487 1.1 mrg *
2488 1.1 mrg * over the domain. If there is such a value "n", then we know
2489 1.1 mrg *
2490 1.1 mrg * -ceil(f(j)/a)) + i + 1 <= n
2491 1.1 mrg *
2492 1.1 mrg * or
2493 1.1 mrg *
2494 1.1 mrg * i < ceil(f(j)/a)) + n
2495 1.1 mrg *
2496 1.1 mrg * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
2497 1.1 mrg * We just need to check if we have found any lower bound before and
2498 1.1 mrg * if the new lower bound is better (smaller n or fewer integer divisions)
2499 1.1 mrg * than the previously found lower bounds.
2500 1.1 mrg */
2501 1.1 mrg static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
2502 1.1 mrg __isl_keep isl_constraint *c)
2503 1.1 mrg {
2504 1.1 mrg isl_aff *aff, *lower;
2505 1.1 mrg isl_val *max;
2506 1.1 mrg int better;
2507 1.1 mrg
2508 1.1 mrg if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
2509 1.1 mrg return isl_stat_ok;
2510 1.1 mrg
2511 1.1 mrg lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
2512 1.1 mrg lower = isl_aff_ceil(lower);
2513 1.1 mrg aff = isl_aff_copy(lower);
2514 1.1 mrg aff = isl_aff_neg(aff);
2515 1.1 mrg aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
2516 1.1 mrg aff = isl_aff_add_constant_si(aff, 1);
2517 1.1 mrg max = isl_set_max_val(data->domain, aff);
2518 1.1 mrg isl_aff_free(aff);
2519 1.1 mrg
2520 1.1 mrg better = is_better_lower_bound(data, lower, max);
2521 1.1 mrg if (better < 0 || !better) {
2522 1.1 mrg isl_val_free(max);
2523 1.1 mrg isl_aff_free(lower);
2524 1.1 mrg return better < 0 ? isl_stat_error : isl_stat_ok;
2525 1.1 mrg }
2526 1.1 mrg
2527 1.1 mrg isl_aff_free(data->lower);
2528 1.1 mrg data->lower = lower;
2529 1.1 mrg *data->n = isl_val_get_num_si(max);
2530 1.1 mrg isl_val_free(max);
2531 1.1 mrg
2532 1.1 mrg return isl_stat_ok;
2533 1.1 mrg }
2534 1.1 mrg
2535 1.1 mrg /* Check if we can use "c" as a lower bound and if it is better than
2536 1.1 mrg * any previously found lower bound.
2537 1.1 mrg */
2538 1.1 mrg static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
2539 1.1 mrg {
2540 1.1 mrg struct isl_find_unroll_data *data;
2541 1.1 mrg isl_stat r;
2542 1.1 mrg
2543 1.1 mrg data = (struct isl_find_unroll_data *) user;
2544 1.1 mrg r = update_unrolling_lower_bound(data, c);
2545 1.1 mrg isl_constraint_free(c);
2546 1.1 mrg
2547 1.1 mrg return r;
2548 1.1 mrg }
2549 1.1 mrg
2550 1.1 mrg /* Look for a lower bound l(i) on the dimension at "depth"
2551 1.1 mrg * and a size n such that "domain" is a subset of
2552 1.1 mrg *
2553 1.1 mrg * { [i] : l(i) <= i_d < l(i) + n }
2554 1.1 mrg *
2555 1.1 mrg * where d is "depth" and l(i) depends only on earlier dimensions.
2556 1.1 mrg * Furthermore, try and find a lower bound such that n is as small as possible.
2557 1.1 mrg * In particular, "n" needs to be finite.
2558 1.1 mrg * "build" is the build in which the unrolling will be performed.
2559 1.1 mrg * "expansion" is the expansion that needs to be applied to "domain"
2560 1.1 mrg * in the unrolling that will be performed.
2561 1.1 mrg *
2562 1.1 mrg * Inner dimensions have been eliminated from "domain" by the caller.
2563 1.1 mrg *
2564 1.1 mrg * We first construct a collection of lower bounds on the input set
2565 1.1 mrg * by computing its simple hull. We then iterate through them,
2566 1.1 mrg * discarding those that we cannot use (either because they do not
2567 1.1 mrg * involve the dimension at "depth" or because they have no corresponding
2568 1.1 mrg * upper bound, meaning that "n" would be unbounded) and pick out the
2569 1.1 mrg * best from the remaining ones.
2570 1.1 mrg *
2571 1.1 mrg * If we cannot find a suitable lower bound, then we consider that
2572 1.1 mrg * to be an error.
2573 1.1 mrg */
2574 1.1 mrg static __isl_give isl_aff *find_unroll_lower_bound(
2575 1.1 mrg __isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
2576 1.1 mrg int depth, __isl_keep isl_basic_map *expansion, int *n)
2577 1.1 mrg {
2578 1.1 mrg struct isl_find_unroll_data data =
2579 1.1 mrg { build, domain, depth, expansion, NULL, n, -1 };
2580 1.1 mrg isl_basic_set *hull;
2581 1.1 mrg
2582 1.1 mrg hull = isl_set_simple_hull(isl_set_copy(domain));
2583 1.1 mrg
2584 1.1 mrg if (isl_basic_set_foreach_constraint(hull,
2585 1.1 mrg &constraint_find_unroll, &data) < 0)
2586 1.1 mrg goto error;
2587 1.1 mrg
2588 1.1 mrg isl_basic_set_free(hull);
2589 1.1 mrg
2590 1.1 mrg if (!data.lower)
2591 1.1 mrg isl_die(isl_set_get_ctx(domain), isl_error_invalid,
2592 1.1 mrg "cannot find lower bound for unrolling", return NULL);
2593 1.1 mrg
2594 1.1 mrg return data.lower;
2595 1.1 mrg error:
2596 1.1 mrg isl_basic_set_free(hull);
2597 1.1 mrg return isl_aff_free(data.lower);
2598 1.1 mrg }
2599 1.1 mrg
2600 1.1 mrg /* Call "fn" on each iteration of the current dimension of "domain".
2601 1.1 mrg * If "init" is not NULL, then it is called with the number of
2602 1.1 mrg * iterations before any call to "fn".
2603 1.1 mrg * Return -1 on failure.
2604 1.1 mrg *
2605 1.1 mrg * Since we are going to be iterating over the individual values,
2606 1.1 mrg * we first check if there are any strides on the current dimension.
2607 1.1 mrg * If there is, we rewrite the current dimension i as
2608 1.1 mrg *
2609 1.1 mrg * i = stride i' + offset
2610 1.1 mrg *
2611 1.1 mrg * and then iterate over individual values of i' instead.
2612 1.1 mrg *
2613 1.1 mrg * We then look for a lower bound on i' and a size such that the domain
2614 1.1 mrg * is a subset of
2615 1.1 mrg *
2616 1.1 mrg * { [j,i'] : l(j) <= i' < l(j) + n }
2617 1.1 mrg *
2618 1.1 mrg * and then take slices of the domain at values of i'
2619 1.1 mrg * between l(j) and l(j) + n - 1.
2620 1.1 mrg *
2621 1.1 mrg * We compute the unshifted simple hull of each slice to ensure that
2622 1.1 mrg * we have a single basic set per offset. The slicing constraint
2623 1.1 mrg * may get simplified away before the unshifted simple hull is taken
2624 1.1 mrg * and may therefore in some rare cases disappear from the result.
2625 1.1 mrg * We therefore explicitly add the constraint back after computing
2626 1.1 mrg * the unshifted simple hull to ensure that the basic sets
2627 1.1 mrg * remain disjoint. The constraints that are dropped by taking the hull
2628 1.1 mrg * will be taken into account at the next level, as in the case of the
2629 1.1 mrg * atomic option.
2630 1.1 mrg *
2631 1.1 mrg * Finally, we map i' back to i and call "fn".
2632 1.1 mrg */
2633 1.1 mrg static int foreach_iteration(__isl_take isl_set *domain,
2634 1.1 mrg __isl_keep isl_ast_build *build, int (*init)(int n, void *user),
2635 1.1 mrg int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
2636 1.1 mrg {
2637 1.1 mrg int i, n;
2638 1.1 mrg isl_bool empty;
2639 1.1 mrg isl_size depth;
2640 1.1 mrg isl_multi_aff *expansion;
2641 1.1 mrg isl_basic_map *bmap;
2642 1.1 mrg isl_aff *lower = NULL;
2643 1.1 mrg isl_ast_build *stride_build;
2644 1.1 mrg
2645 1.1 mrg depth = isl_ast_build_get_depth(build);
2646 1.1 mrg if (depth < 0)
2647 1.1 mrg domain = isl_set_free(domain);
2648 1.1 mrg
2649 1.1 mrg domain = isl_ast_build_eliminate_inner(build, domain);
2650 1.1 mrg domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
2651 1.1 mrg stride_build = isl_ast_build_copy(build);
2652 1.1 mrg stride_build = isl_ast_build_detect_strides(stride_build,
2653 1.1 mrg isl_set_copy(domain));
2654 1.1 mrg expansion = isl_ast_build_get_stride_expansion(stride_build);
2655 1.1 mrg
2656 1.1 mrg domain = isl_set_preimage_multi_aff(domain,
2657 1.1 mrg isl_multi_aff_copy(expansion));
2658 1.1 mrg domain = isl_ast_build_eliminate_divs(stride_build, domain);
2659 1.1 mrg isl_ast_build_free(stride_build);
2660 1.1 mrg
2661 1.1 mrg bmap = isl_basic_map_from_multi_aff(expansion);
2662 1.1 mrg
2663 1.1 mrg empty = isl_set_is_empty(domain);
2664 1.1 mrg if (empty < 0) {
2665 1.1 mrg n = -1;
2666 1.1 mrg } else if (empty) {
2667 1.1 mrg n = 0;
2668 1.1 mrg } else {
2669 1.1 mrg lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
2670 1.1 mrg if (!lower)
2671 1.1 mrg n = -1;
2672 1.1 mrg }
2673 1.1 mrg if (n >= 0 && init && init(n, user) < 0)
2674 1.1 mrg n = -1;
2675 1.1 mrg for (i = 0; i < n; ++i) {
2676 1.1 mrg isl_set *set;
2677 1.1 mrg isl_basic_set *bset;
2678 1.1 mrg isl_constraint *slice;
2679 1.1 mrg
2680 1.1 mrg slice = at_offset(depth, lower, i);
2681 1.1 mrg set = isl_set_copy(domain);
2682 1.1 mrg set = isl_set_add_constraint(set, isl_constraint_copy(slice));
2683 1.1 mrg bset = isl_set_unshifted_simple_hull(set);
2684 1.1 mrg bset = isl_basic_set_add_constraint(bset, slice);
2685 1.1 mrg bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
2686 1.1 mrg
2687 1.1 mrg if (fn(bset, user) < 0)
2688 1.1 mrg break;
2689 1.1 mrg }
2690 1.1 mrg
2691 1.1 mrg isl_aff_free(lower);
2692 1.1 mrg isl_set_free(domain);
2693 1.1 mrg isl_basic_map_free(bmap);
2694 1.1 mrg
2695 1.1 mrg return n < 0 || i < n ? -1 : 0;
2696 1.1 mrg }
2697 1.1 mrg
2698 1.1 mrg /* Data structure for storing the results and the intermediate objects
2699 1.1 mrg * of compute_domains.
2700 1.1 mrg *
2701 1.1 mrg * "list" is the main result of the function and contains a list
2702 1.1 mrg * of disjoint basic sets for which code should be generated.
2703 1.1 mrg *
2704 1.1 mrg * "executed" and "build" are inputs to compute_domains.
2705 1.1 mrg * "schedule_domain" is the domain of "executed".
2706 1.1 mrg *
2707 1.1 mrg * "option" contains the domains at the current depth that should by
2708 1.1 mrg * atomic, separated or unrolled. These domains are as specified by
2709 1.1 mrg * the user, except that inner dimensions have been eliminated and
2710 1.1 mrg * that they have been made pair-wise disjoint.
2711 1.1 mrg *
2712 1.1 mrg * "sep_class" contains the user-specified split into separation classes
2713 1.1 mrg * specialized to the current depth.
2714 1.1 mrg * "done" contains the union of the separation domains that have already
2715 1.1 mrg * been handled.
2716 1.1 mrg */
2717 1.1 mrg struct isl_codegen_domains {
2718 1.1 mrg isl_basic_set_list *list;
2719 1.1 mrg
2720 1.1 mrg isl_union_map *executed;
2721 1.1 mrg isl_ast_build *build;
2722 1.1 mrg isl_set *schedule_domain;
2723 1.1 mrg
2724 1.1 mrg isl_set *option[4];
2725 1.1 mrg
2726 1.1 mrg isl_map *sep_class;
2727 1.1 mrg isl_set *done;
2728 1.1 mrg };
2729 1.1 mrg
2730 1.1 mrg /* Internal data structure for do_unroll.
2731 1.1 mrg *
2732 1.1 mrg * "domains" stores the results of compute_domains.
2733 1.1 mrg * "class_domain" is the original class domain passed to do_unroll.
2734 1.1 mrg * "unroll_domain" collects the unrolled iterations.
2735 1.1 mrg */
2736 1.1 mrg struct isl_ast_unroll_data {
2737 1.1 mrg struct isl_codegen_domains *domains;
2738 1.1 mrg isl_set *class_domain;
2739 1.1 mrg isl_set *unroll_domain;
2740 1.1 mrg };
2741 1.1 mrg
2742 1.1 mrg /* Given an iteration of an unrolled domain represented by "bset",
2743 1.1 mrg * add it to data->domains->list.
2744 1.1 mrg * Since we may have dropped some constraints, we intersect with
2745 1.1 mrg * the class domain again to ensure that each element in the list
2746 1.1 mrg * is disjoint from the other class domains.
2747 1.1 mrg */
2748 1.1 mrg static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
2749 1.1 mrg {
2750 1.1 mrg struct isl_ast_unroll_data *data = user;
2751 1.1 mrg isl_set *set;
2752 1.1 mrg isl_basic_set_list *list;
2753 1.1 mrg
2754 1.1 mrg set = isl_set_from_basic_set(bset);
2755 1.1 mrg data->unroll_domain = isl_set_union(data->unroll_domain,
2756 1.1 mrg isl_set_copy(set));
2757 1.1 mrg set = isl_set_intersect(set, isl_set_copy(data->class_domain));
2758 1.1 mrg set = isl_set_make_disjoint(set);
2759 1.1 mrg list = isl_basic_set_list_from_set(set);
2760 1.1 mrg data->domains->list = isl_basic_set_list_concat(data->domains->list,
2761 1.1 mrg list);
2762 1.1 mrg
2763 1.1 mrg return 0;
2764 1.1 mrg }
2765 1.1 mrg
2766 1.1 mrg /* Extend domains->list with a list of basic sets, one for each value
2767 1.1 mrg * of the current dimension in "domain" and remove the corresponding
2768 1.1 mrg * sets from the class domain. Return the updated class domain.
2769 1.1 mrg * The divs that involve the current dimension have not been projected out
2770 1.1 mrg * from this domain.
2771 1.1 mrg *
2772 1.1 mrg * We call foreach_iteration to iterate over the individual values and
2773 1.1 mrg * in do_unroll_iteration we collect the individual basic sets in
2774 1.1 mrg * domains->list and their union in data->unroll_domain, which is then
2775 1.1 mrg * used to update the class domain.
2776 1.1 mrg */
2777 1.1 mrg static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
2778 1.1 mrg __isl_take isl_set *domain, __isl_take isl_set *class_domain)
2779 1.1 mrg {
2780 1.1 mrg struct isl_ast_unroll_data data;
2781 1.1 mrg
2782 1.1 mrg if (!domain)
2783 1.1 mrg return isl_set_free(class_domain);
2784 1.1 mrg if (!class_domain)
2785 1.1 mrg return isl_set_free(domain);
2786 1.1 mrg
2787 1.1 mrg data.domains = domains;
2788 1.1 mrg data.class_domain = class_domain;
2789 1.1 mrg data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
2790 1.1 mrg
2791 1.1 mrg if (foreach_iteration(domain, domains->build, NULL,
2792 1.1 mrg &do_unroll_iteration, &data) < 0)
2793 1.1 mrg data.unroll_domain = isl_set_free(data.unroll_domain);
2794 1.1 mrg
2795 1.1 mrg class_domain = isl_set_subtract(class_domain, data.unroll_domain);
2796 1.1 mrg
2797 1.1 mrg return class_domain;
2798 1.1 mrg }
2799 1.1 mrg
2800 1.1 mrg /* Add domains to domains->list for each individual value of the current
2801 1.1 mrg * dimension, for that part of the schedule domain that lies in the
2802 1.1 mrg * intersection of the option domain and the class domain.
2803 1.1 mrg * Remove the corresponding sets from the class domain and
2804 1.1 mrg * return the updated class domain.
2805 1.1 mrg *
2806 1.1 mrg * We first break up the unroll option domain into individual pieces
2807 1.1 mrg * and then handle each of them separately. The unroll option domain
2808 1.1 mrg * has been made disjoint in compute_domains_init_options,
2809 1.1 mrg *
2810 1.1 mrg * Note that we actively want to combine different pieces of the
2811 1.1 mrg * schedule domain that have the same value at the current dimension.
2812 1.1 mrg * We therefore need to break up the unroll option domain before
2813 1.1 mrg * intersecting with class and schedule domain, hoping that the
2814 1.1 mrg * unroll option domain specified by the user is relatively simple.
2815 1.1 mrg */
2816 1.1 mrg static __isl_give isl_set *compute_unroll_domains(
2817 1.1 mrg struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2818 1.1 mrg {
2819 1.1 mrg isl_set *unroll_domain;
2820 1.1 mrg isl_basic_set_list *unroll_list;
2821 1.1 mrg int i;
2822 1.1 mrg isl_size n;
2823 1.1 mrg isl_bool empty;
2824 1.1 mrg
2825 1.1 mrg empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
2826 1.1 mrg if (empty < 0)
2827 1.1 mrg return isl_set_free(class_domain);
2828 1.1 mrg if (empty)
2829 1.1 mrg return class_domain;
2830 1.1 mrg
2831 1.1 mrg unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
2832 1.1 mrg unroll_list = isl_basic_set_list_from_set(unroll_domain);
2833 1.1 mrg
2834 1.1 mrg n = isl_basic_set_list_n_basic_set(unroll_list);
2835 1.1 mrg if (n < 0)
2836 1.1 mrg class_domain = isl_set_free(class_domain);
2837 1.1 mrg for (i = 0; i < n; ++i) {
2838 1.1 mrg isl_basic_set *bset;
2839 1.1 mrg
2840 1.1 mrg bset = isl_basic_set_list_get_basic_set(unroll_list, i);
2841 1.1 mrg unroll_domain = isl_set_from_basic_set(bset);
2842 1.1 mrg unroll_domain = isl_set_intersect(unroll_domain,
2843 1.1 mrg isl_set_copy(class_domain));
2844 1.1 mrg unroll_domain = isl_set_intersect(unroll_domain,
2845 1.1 mrg isl_set_copy(domains->schedule_domain));
2846 1.1 mrg
2847 1.1 mrg empty = isl_set_is_empty(unroll_domain);
2848 1.1 mrg if (empty >= 0 && empty) {
2849 1.1 mrg isl_set_free(unroll_domain);
2850 1.1 mrg continue;
2851 1.1 mrg }
2852 1.1 mrg
2853 1.1 mrg class_domain = do_unroll(domains, unroll_domain, class_domain);
2854 1.1 mrg }
2855 1.1 mrg
2856 1.1 mrg isl_basic_set_list_free(unroll_list);
2857 1.1 mrg
2858 1.1 mrg return class_domain;
2859 1.1 mrg }
2860 1.1 mrg
2861 1.1 mrg /* Try and construct a single basic set that includes the intersection of
2862 1.1 mrg * the schedule domain, the atomic option domain and the class domain.
2863 1.1 mrg * Add the resulting basic set(s) to domains->list and remove them
2864 1.1 mrg * from class_domain. Return the updated class domain.
2865 1.1 mrg *
2866 1.1 mrg * We construct a single domain rather than trying to combine
2867 1.1 mrg * the schedule domains of individual domains because we are working
2868 1.1 mrg * within a single component so that non-overlapping schedule domains
2869 1.1 mrg * should already have been separated.
2870 1.1 mrg * We do however need to make sure that this single domains is a subset
2871 1.1 mrg * of the class domain so that it would not intersect with any other
2872 1.1 mrg * class domains. This means that we may end up splitting up the atomic
2873 1.1 mrg * domain in case separation classes are being used.
2874 1.1 mrg *
2875 1.1 mrg * "domain" is the intersection of the schedule domain and the class domain,
2876 1.1 mrg * with inner dimensions projected out.
2877 1.1 mrg */
2878 1.1 mrg static __isl_give isl_set *compute_atomic_domain(
2879 1.1 mrg struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2880 1.1 mrg {
2881 1.1 mrg isl_basic_set *bset;
2882 1.1 mrg isl_basic_set_list *list;
2883 1.1 mrg isl_set *domain, *atomic_domain;
2884 1.1 mrg int empty;
2885 1.1 mrg
2886 1.1 mrg domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
2887 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2888 1.1 mrg domain = isl_set_intersect(domain,
2889 1.1 mrg isl_set_copy(domains->schedule_domain));
2890 1.1 mrg empty = isl_set_is_empty(domain);
2891 1.1 mrg if (empty < 0)
2892 1.1 mrg class_domain = isl_set_free(class_domain);
2893 1.1 mrg if (empty) {
2894 1.1 mrg isl_set_free(domain);
2895 1.1 mrg return class_domain;
2896 1.1 mrg }
2897 1.1 mrg
2898 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain);
2899 1.1 mrg domain = isl_set_coalesce_preserve(domain);
2900 1.1 mrg bset = isl_set_unshifted_simple_hull(domain);
2901 1.1 mrg domain = isl_set_from_basic_set(bset);
2902 1.1 mrg atomic_domain = isl_set_copy(domain);
2903 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2904 1.1 mrg class_domain = isl_set_subtract(class_domain, atomic_domain);
2905 1.1 mrg domain = isl_set_make_disjoint(domain);
2906 1.1 mrg list = isl_basic_set_list_from_set(domain);
2907 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list);
2908 1.1 mrg
2909 1.1 mrg return class_domain;
2910 1.1 mrg }
2911 1.1 mrg
2912 1.1 mrg /* Split up the schedule domain into uniform basic sets,
2913 1.1 mrg * in the sense that each element in a basic set is associated to
2914 1.1 mrg * elements of the same domains, and add the result to domains->list.
2915 1.1 mrg * Do this for that part of the schedule domain that lies in the
2916 1.1 mrg * intersection of "class_domain" and the separate option domain.
2917 1.1 mrg *
2918 1.1 mrg * "class_domain" may or may not include the constraints
2919 1.1 mrg * of the schedule domain, but this does not make a difference
2920 1.1 mrg * since we are going to intersect it with the domain of the inverse schedule.
2921 1.1 mrg * If it includes schedule domain constraints, then they may involve
2922 1.1 mrg * inner dimensions, but we will eliminate them in separation_domain.
2923 1.1 mrg */
2924 1.1 mrg static int compute_separate_domain(struct isl_codegen_domains *domains,
2925 1.1 mrg __isl_keep isl_set *class_domain)
2926 1.1 mrg {
2927 1.1 mrg isl_space *space;
2928 1.1 mrg isl_set *domain;
2929 1.1 mrg isl_union_map *executed;
2930 1.1 mrg isl_basic_set_list *list;
2931 1.1 mrg int empty;
2932 1.1 mrg
2933 1.1 mrg domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
2934 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2935 1.1 mrg executed = isl_union_map_copy(domains->executed);
2936 1.1 mrg executed = isl_union_map_intersect_domain(executed,
2937 1.1 mrg isl_union_set_from_set(domain));
2938 1.1 mrg empty = isl_union_map_is_empty(executed);
2939 1.1 mrg if (empty < 0 || empty) {
2940 1.1 mrg isl_union_map_free(executed);
2941 1.1 mrg return empty < 0 ? -1 : 0;
2942 1.1 mrg }
2943 1.1 mrg
2944 1.1 mrg space = isl_set_get_space(class_domain);
2945 1.1 mrg domain = separate_schedule_domains(space, executed, domains->build);
2946 1.1 mrg
2947 1.1 mrg list = isl_basic_set_list_from_set(domain);
2948 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list);
2949 1.1 mrg
2950 1.1 mrg return 0;
2951 1.1 mrg }
2952 1.1 mrg
2953 1.1 mrg /* Split up the domain at the current depth into disjoint
2954 1.1 mrg * basic sets for which code should be generated separately
2955 1.1 mrg * for the given separation class domain.
2956 1.1 mrg *
2957 1.1 mrg * If any separation classes have been defined, then "class_domain"
2958 1.1 mrg * is the domain of the current class and does not refer to inner dimensions.
2959 1.1 mrg * Otherwise, "class_domain" is the universe domain.
2960 1.1 mrg *
2961 1.1 mrg * We first make sure that the class domain is disjoint from
2962 1.1 mrg * previously considered class domains.
2963 1.1 mrg *
2964 1.1 mrg * The separate domains can be computed directly from the "class_domain".
2965 1.1 mrg *
2966 1.1 mrg * The unroll, atomic and remainder domains need the constraints
2967 1.1 mrg * from the schedule domain.
2968 1.1 mrg *
2969 1.1 mrg * For unrolling, the actual schedule domain is needed (with divs that
2970 1.1 mrg * may refer to the current dimension) so that stride detection can be
2971 1.1 mrg * performed.
2972 1.1 mrg *
2973 1.1 mrg * For atomic and remainder domains, inner dimensions and divs involving
2974 1.1 mrg * the current dimensions should be eliminated.
2975 1.1 mrg * In case we are working within a separation class, we need to intersect
2976 1.1 mrg * the result with the current "class_domain" to ensure that the domains
2977 1.1 mrg * are disjoint from those generated from other class domains.
2978 1.1 mrg *
2979 1.1 mrg * The domain that has been made atomic may be larger than specified
2980 1.1 mrg * by the user since it needs to be representable as a single basic set.
2981 1.1 mrg * This possibly larger domain is removed from class_domain by
2982 1.1 mrg * compute_atomic_domain. It is computed first so that the extended domain
2983 1.1 mrg * would not overlap with any domains computed before.
2984 1.1 mrg * Similary, the unrolled domains may have some constraints removed and
2985 1.1 mrg * may therefore also be larger than specified by the user.
2986 1.1 mrg *
2987 1.1 mrg * If anything is left after handling separate, unroll and atomic,
2988 1.1 mrg * we split it up into basic sets and append the basic sets to domains->list.
2989 1.1 mrg */
2990 1.1 mrg static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
2991 1.1 mrg __isl_take isl_set *class_domain)
2992 1.1 mrg {
2993 1.1 mrg isl_basic_set_list *list;
2994 1.1 mrg isl_set *domain;
2995 1.1 mrg
2996 1.1 mrg class_domain = isl_set_subtract(class_domain,
2997 1.1 mrg isl_set_copy(domains->done));
2998 1.1 mrg domains->done = isl_set_union(domains->done,
2999 1.1 mrg isl_set_copy(class_domain));
3000 1.1 mrg
3001 1.1 mrg class_domain = compute_atomic_domain(domains, class_domain);
3002 1.1 mrg class_domain = compute_unroll_domains(domains, class_domain);
3003 1.1 mrg
3004 1.1 mrg domain = isl_set_copy(class_domain);
3005 1.1 mrg
3006 1.1 mrg if (compute_separate_domain(domains, domain) < 0)
3007 1.1 mrg goto error;
3008 1.1 mrg domain = isl_set_subtract(domain,
3009 1.1 mrg isl_set_copy(domains->option[isl_ast_loop_separate]));
3010 1.1 mrg
3011 1.1 mrg domain = isl_set_intersect(domain,
3012 1.1 mrg isl_set_copy(domains->schedule_domain));
3013 1.1 mrg
3014 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain);
3015 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain));
3016 1.1 mrg
3017 1.1 mrg domain = isl_set_coalesce_preserve(domain);
3018 1.1 mrg domain = isl_set_make_disjoint(domain);
3019 1.1 mrg
3020 1.1 mrg list = isl_basic_set_list_from_set(domain);
3021 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list);
3022 1.1 mrg
3023 1.1 mrg isl_set_free(class_domain);
3024 1.1 mrg
3025 1.1 mrg return isl_stat_ok;
3026 1.1 mrg error:
3027 1.1 mrg isl_set_free(domain);
3028 1.1 mrg isl_set_free(class_domain);
3029 1.1 mrg return isl_stat_error;
3030 1.1 mrg }
3031 1.1 mrg
3032 1.1 mrg /* Split up the domain at the current depth into disjoint
3033 1.1 mrg * basic sets for which code should be generated separately
3034 1.1 mrg * for the separation class identified by "pnt".
3035 1.1 mrg *
3036 1.1 mrg * We extract the corresponding class domain from domains->sep_class,
3037 1.1 mrg * eliminate inner dimensions and pass control to compute_partial_domains.
3038 1.1 mrg */
3039 1.1 mrg static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
3040 1.1 mrg {
3041 1.1 mrg struct isl_codegen_domains *domains = user;
3042 1.1 mrg isl_set *class_set;
3043 1.1 mrg isl_set *domain;
3044 1.1 mrg int disjoint;
3045 1.1 mrg
3046 1.1 mrg class_set = isl_set_from_point(pnt);
3047 1.1 mrg domain = isl_map_domain(isl_map_intersect_range(
3048 1.1 mrg isl_map_copy(domains->sep_class), class_set));
3049 1.1 mrg domain = isl_ast_build_compute_gist(domains->build, domain);
3050 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain);
3051 1.1 mrg
3052 1.1 mrg disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
3053 1.1 mrg if (disjoint < 0)
3054 1.1 mrg return isl_stat_error;
3055 1.1 mrg if (disjoint) {
3056 1.1 mrg isl_set_free(domain);
3057 1.1 mrg return isl_stat_ok;
3058 1.1 mrg }
3059 1.1 mrg
3060 1.1 mrg return compute_partial_domains(domains, domain);
3061 1.1 mrg }
3062 1.1 mrg
3063 1.1 mrg /* Extract the domains at the current depth that should be atomic,
3064 1.1 mrg * separated or unrolled and store them in option.
3065 1.1 mrg *
3066 1.1 mrg * The domains specified by the user might overlap, so we make
3067 1.1 mrg * them disjoint by subtracting earlier domains from later domains.
3068 1.1 mrg */
3069 1.1 mrg static void compute_domains_init_options(isl_set *option[4],
3070 1.1 mrg __isl_keep isl_ast_build *build)
3071 1.1 mrg {
3072 1.1 mrg enum isl_ast_loop_type type, type2;
3073 1.1 mrg isl_set *unroll;
3074 1.1 mrg
3075 1.1 mrg for (type = isl_ast_loop_atomic;
3076 1.1 mrg type <= isl_ast_loop_separate; ++type) {
3077 1.1 mrg option[type] = isl_ast_build_get_option_domain(build, type);
3078 1.1 mrg for (type2 = isl_ast_loop_atomic; type2 < type; ++type2)
3079 1.1 mrg option[type] = isl_set_subtract(option[type],
3080 1.1 mrg isl_set_copy(option[type2]));
3081 1.1 mrg }
3082 1.1 mrg
3083 1.1 mrg unroll = option[isl_ast_loop_unroll];
3084 1.1 mrg unroll = isl_set_coalesce(unroll);
3085 1.1 mrg unroll = isl_set_make_disjoint(unroll);
3086 1.1 mrg option[isl_ast_loop_unroll] = unroll;
3087 1.1 mrg }
3088 1.1 mrg
3089 1.1 mrg /* Split up the domain at the current depth into disjoint
3090 1.1 mrg * basic sets for which code should be generated separately,
3091 1.1 mrg * based on the user-specified options.
3092 1.1 mrg * Return the list of disjoint basic sets.
3093 1.1 mrg *
3094 1.1 mrg * There are three kinds of domains that we need to keep track of.
3095 1.1 mrg * - the "schedule domain" is the domain of "executed"
3096 1.1 mrg * - the "class domain" is the domain corresponding to the currrent
3097 1.1 mrg * separation class
3098 1.1 mrg * - the "option domain" is the domain corresponding to one of the options
3099 1.1 mrg * atomic, unroll or separate
3100 1.1 mrg *
3101 1.1 mrg * We first consider the individial values of the separation classes
3102 1.1 mrg * and split up the domain for each of them separately.
3103 1.1 mrg * Finally, we consider the remainder. If no separation classes were
3104 1.1 mrg * specified, then we call compute_partial_domains with the universe
3105 1.1 mrg * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain",
3106 1.1 mrg * with inner dimensions removed. We do this because we want to
3107 1.1 mrg * avoid computing the complement of the class domains (i.e., the difference
3108 1.1 mrg * between the universe and domains->done).
3109 1.1 mrg */
3110 1.1 mrg static __isl_give isl_basic_set_list *compute_domains(
3111 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
3112 1.1 mrg {
3113 1.1 mrg struct isl_codegen_domains domains;
3114 1.1 mrg isl_ctx *ctx;
3115 1.1 mrg isl_set *domain;
3116 1.1 mrg isl_union_set *schedule_domain;
3117 1.1 mrg isl_set *classes;
3118 1.1 mrg isl_space *space;
3119 1.1 mrg int n_param;
3120 1.1 mrg enum isl_ast_loop_type type;
3121 1.1 mrg isl_bool empty;
3122 1.1 mrg
3123 1.1 mrg if (!executed)
3124 1.1 mrg return NULL;
3125 1.1 mrg
3126 1.1 mrg ctx = isl_union_map_get_ctx(executed);
3127 1.1 mrg domains.list = isl_basic_set_list_alloc(ctx, 0);
3128 1.1 mrg
3129 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3130 1.1 mrg domain = isl_set_from_union_set(schedule_domain);
3131 1.1 mrg
3132 1.1 mrg compute_domains_init_options(domains.option, build);
3133 1.1 mrg
3134 1.1 mrg domains.sep_class = isl_ast_build_get_separation_class(build);
3135 1.1 mrg classes = isl_map_range(isl_map_copy(domains.sep_class));
3136 1.1 mrg n_param = isl_set_dim(classes, isl_dim_param);
3137 1.1 mrg if (n_param < 0)
3138 1.1 mrg classes = isl_set_free(classes);
3139 1.1 mrg classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
3140 1.1 mrg
3141 1.1 mrg space = isl_set_get_space(domain);
3142 1.1 mrg domains.build = build;
3143 1.1 mrg domains.schedule_domain = isl_set_copy(domain);
3144 1.1 mrg domains.executed = executed;
3145 1.1 mrg domains.done = isl_set_empty(space);
3146 1.1 mrg
3147 1.1 mrg if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
3148 1.1 mrg domains.list = isl_basic_set_list_free(domains.list);
3149 1.1 mrg isl_set_free(classes);
3150 1.1 mrg
3151 1.1 mrg empty = isl_set_is_empty(domains.done);
3152 1.1 mrg if (empty < 0) {
3153 1.1 mrg domains.list = isl_basic_set_list_free(domains.list);
3154 1.1 mrg domain = isl_set_free(domain);
3155 1.1 mrg } else if (empty) {
3156 1.1 mrg isl_set_free(domain);
3157 1.1 mrg domain = isl_set_universe(isl_set_get_space(domains.done));
3158 1.1 mrg } else {
3159 1.1 mrg domain = isl_ast_build_eliminate(build, domain);
3160 1.1 mrg }
3161 1.1 mrg if (compute_partial_domains(&domains, domain) < 0)
3162 1.1 mrg domains.list = isl_basic_set_list_free(domains.list);
3163 1.1 mrg
3164 1.1 mrg isl_set_free(domains.schedule_domain);
3165 1.1 mrg isl_set_free(domains.done);
3166 1.1 mrg isl_map_free(domains.sep_class);
3167 1.1 mrg for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type)
3168 1.1 mrg isl_set_free(domains.option[type]);
3169 1.1 mrg
3170 1.1 mrg return domains.list;
3171 1.1 mrg }
3172 1.1 mrg
3173 1.1 mrg /* Generate code for a single component, after shifting (if any)
3174 1.1 mrg * has been applied, in case the schedule was specified as a union map.
3175 1.1 mrg *
3176 1.1 mrg * We first split up the domain at the current depth into disjoint
3177 1.1 mrg * basic sets based on the user-specified options.
3178 1.1 mrg * Then we generated code for each of them and concatenate the results.
3179 1.1 mrg */
3180 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
3181 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3182 1.1 mrg {
3183 1.1 mrg isl_basic_set_list *domain_list;
3184 1.1 mrg isl_ast_graft_list *list = NULL;
3185 1.1 mrg
3186 1.1 mrg domain_list = compute_domains(executed, build);
3187 1.1 mrg list = generate_parallel_domains(domain_list, executed, build);
3188 1.1 mrg
3189 1.1 mrg isl_basic_set_list_free(domain_list);
3190 1.1 mrg isl_union_map_free(executed);
3191 1.1 mrg isl_ast_build_free(build);
3192 1.1 mrg
3193 1.1 mrg return list;
3194 1.1 mrg }
3195 1.1 mrg
3196 1.1 mrg /* Generate code for a single component, after shifting (if any)
3197 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree
3198 1.1 mrg * and the separate option was specified.
3199 1.1 mrg *
3200 1.1 mrg * We perform separation on the domain of "executed" and then generate
3201 1.1 mrg * an AST for each of the resulting disjoint basic sets.
3202 1.1 mrg */
3203 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
3204 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3205 1.1 mrg {
3206 1.1 mrg isl_space *space;
3207 1.1 mrg isl_set *domain;
3208 1.1 mrg isl_basic_set_list *domain_list;
3209 1.1 mrg isl_ast_graft_list *list;
3210 1.1 mrg
3211 1.1 mrg space = isl_ast_build_get_space(build, 1);
3212 1.1 mrg domain = separate_schedule_domains(space,
3213 1.1 mrg isl_union_map_copy(executed), build);
3214 1.1 mrg domain_list = isl_basic_set_list_from_set(domain);
3215 1.1 mrg
3216 1.1 mrg list = generate_parallel_domains(domain_list, executed, build);
3217 1.1 mrg
3218 1.1 mrg isl_basic_set_list_free(domain_list);
3219 1.1 mrg isl_union_map_free(executed);
3220 1.1 mrg isl_ast_build_free(build);
3221 1.1 mrg
3222 1.1 mrg return list;
3223 1.1 mrg }
3224 1.1 mrg
3225 1.1 mrg /* Internal data structure for generate_shifted_component_tree_unroll.
3226 1.1 mrg *
3227 1.1 mrg * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
3228 1.1 mrg * "list" collects the constructs grafts.
3229 1.1 mrg */
3230 1.1 mrg struct isl_ast_unroll_tree_data {
3231 1.1 mrg isl_union_map *executed;
3232 1.1 mrg isl_ast_build *build;
3233 1.1 mrg isl_ast_graft_list *list;
3234 1.1 mrg };
3235 1.1 mrg
3236 1.1 mrg /* Initialize data->list to a list of "n" elements.
3237 1.1 mrg */
3238 1.1 mrg static int init_unroll_tree(int n, void *user)
3239 1.1 mrg {
3240 1.1 mrg struct isl_ast_unroll_tree_data *data = user;
3241 1.1 mrg isl_ctx *ctx;
3242 1.1 mrg
3243 1.1 mrg ctx = isl_ast_build_get_ctx(data->build);
3244 1.1 mrg data->list = isl_ast_graft_list_alloc(ctx, n);
3245 1.1 mrg
3246 1.1 mrg return 0;
3247 1.1 mrg }
3248 1.1 mrg
3249 1.1 mrg /* Given an iteration of an unrolled domain represented by "bset",
3250 1.1 mrg * generate the corresponding AST and add the result to data->list.
3251 1.1 mrg */
3252 1.1 mrg static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
3253 1.1 mrg {
3254 1.1 mrg struct isl_ast_unroll_tree_data *data = user;
3255 1.1 mrg
3256 1.1 mrg data->list = add_node(data->list, isl_union_map_copy(data->executed),
3257 1.1 mrg bset, isl_ast_build_copy(data->build));
3258 1.1 mrg
3259 1.1 mrg return 0;
3260 1.1 mrg }
3261 1.1 mrg
3262 1.1 mrg /* Generate code for a single component, after shifting (if any)
3263 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree
3264 1.1 mrg * and the unroll option was specified.
3265 1.1 mrg *
3266 1.1 mrg * We call foreach_iteration to iterate over the individual values and
3267 1.1 mrg * construct and collect the corresponding grafts in do_unroll_tree_iteration.
3268 1.1 mrg */
3269 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
3270 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *domain,
3271 1.1 mrg __isl_take isl_ast_build *build)
3272 1.1 mrg {
3273 1.1 mrg struct isl_ast_unroll_tree_data data = { executed, build, NULL };
3274 1.1 mrg
3275 1.1 mrg if (foreach_iteration(domain, build, &init_unroll_tree,
3276 1.1 mrg &do_unroll_tree_iteration, &data) < 0)
3277 1.1 mrg data.list = isl_ast_graft_list_free(data.list);
3278 1.1 mrg
3279 1.1 mrg isl_union_map_free(executed);
3280 1.1 mrg isl_ast_build_free(build);
3281 1.1 mrg
3282 1.1 mrg return data.list;
3283 1.1 mrg }
3284 1.1 mrg
3285 1.1 mrg /* Does "domain" involve a disjunction that is purely based on
3286 1.1 mrg * constraints involving only outer dimension?
3287 1.1 mrg *
3288 1.1 mrg * In particular, is there a disjunction such that the constraints
3289 1.1 mrg * involving the current and later dimensions are the same over
3290 1.1 mrg * all the disjuncts?
3291 1.1 mrg */
3292 1.1 mrg static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
3293 1.1 mrg __isl_keep isl_ast_build *build)
3294 1.1 mrg {
3295 1.1 mrg isl_basic_set *hull;
3296 1.1 mrg isl_set *shared, *inner;
3297 1.1 mrg isl_bool equal;
3298 1.1 mrg isl_size depth;
3299 1.1 mrg isl_size n;
3300 1.1 mrg isl_size dim;
3301 1.1 mrg
3302 1.1 mrg n = isl_set_n_basic_set(domain);
3303 1.1 mrg if (n < 0)
3304 1.1 mrg return isl_bool_error;
3305 1.1 mrg if (n <= 1)
3306 1.1 mrg return isl_bool_false;
3307 1.1 mrg dim = isl_set_dim(domain, isl_dim_set);
3308 1.1 mrg depth = isl_ast_build_get_depth(build);
3309 1.1 mrg if (dim < 0 || depth < 0)
3310 1.1 mrg return isl_bool_error;
3311 1.1 mrg
3312 1.1 mrg inner = isl_set_copy(domain);
3313 1.1 mrg inner = isl_set_drop_constraints_not_involving_dims(inner,
3314 1.1 mrg isl_dim_set, depth, dim - depth);
3315 1.1 mrg hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
3316 1.1 mrg shared = isl_set_from_basic_set(hull);
3317 1.1 mrg equal = isl_set_plain_is_equal(inner, shared);
3318 1.1 mrg isl_set_free(inner);
3319 1.1 mrg isl_set_free(shared);
3320 1.1 mrg
3321 1.1 mrg return equal;
3322 1.1 mrg }
3323 1.1 mrg
3324 1.1 mrg /* Generate code for a single component, after shifting (if any)
3325 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree.
3326 1.1 mrg * In particular, handle the base case where there is either no isolated
3327 1.1 mrg * set or we are within the isolated set (in which case "isolated" is set)
3328 1.1 mrg * or the iterations that precede or follow the isolated set.
3329 1.1 mrg *
3330 1.1 mrg * The schedule domain is broken up or combined into basic sets
3331 1.1 mrg * according to the AST generation option specified in the current
3332 1.1 mrg * schedule node, which may be either atomic, separate, unroll or
3333 1.1 mrg * unspecified. If the option is unspecified, then we currently simply
3334 1.1 mrg * split the schedule domain into disjoint basic sets.
3335 1.1 mrg *
3336 1.1 mrg * In case the separate option is specified, the AST generation is
3337 1.1 mrg * handled by generate_shifted_component_tree_separate.
3338 1.1 mrg * In the other cases, we need the global schedule domain.
3339 1.1 mrg * In the unroll case, the AST generation is then handled by
3340 1.1 mrg * generate_shifted_component_tree_unroll which needs the actual
3341 1.1 mrg * schedule domain (with divs that may refer to the current dimension)
3342 1.1 mrg * so that stride detection can be performed.
3343 1.1 mrg * In the atomic or unspecified case, inner dimensions and divs involving
3344 1.1 mrg * the current dimensions should be eliminated.
3345 1.1 mrg * The result is then either combined into a single basic set or
3346 1.1 mrg * split up into disjoint basic sets.
3347 1.1 mrg * Finally an AST is generated for each basic set and the results are
3348 1.1 mrg * concatenated.
3349 1.1 mrg *
3350 1.1 mrg * If the schedule domain involves a disjunction that is purely based on
3351 1.1 mrg * constraints involving only outer dimension, then it is treated as
3352 1.1 mrg * if atomic was specified. This ensures that only a single loop
3353 1.1 mrg * is generated instead of a sequence of identical loops with
3354 1.1 mrg * different guards.
3355 1.1 mrg */
3356 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
3357 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
3358 1.1 mrg int isolated)
3359 1.1 mrg {
3360 1.1 mrg isl_bool outer_disjunction;
3361 1.1 mrg isl_union_set *schedule_domain;
3362 1.1 mrg isl_set *domain;
3363 1.1 mrg isl_basic_set_list *domain_list;
3364 1.1 mrg isl_ast_graft_list *list;
3365 1.1 mrg enum isl_ast_loop_type type;
3366 1.1 mrg
3367 1.1 mrg type = isl_ast_build_get_loop_type(build, isolated);
3368 1.1 mrg if (type < 0)
3369 1.1 mrg goto error;
3370 1.1 mrg
3371 1.1 mrg if (type == isl_ast_loop_separate)
3372 1.1 mrg return generate_shifted_component_tree_separate(executed,
3373 1.1 mrg build);
3374 1.1 mrg
3375 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3376 1.1 mrg domain = isl_set_from_union_set(schedule_domain);
3377 1.1 mrg
3378 1.1 mrg if (type == isl_ast_loop_unroll)
3379 1.1 mrg return generate_shifted_component_tree_unroll(executed, domain,
3380 1.1 mrg build);
3381 1.1 mrg
3382 1.1 mrg domain = isl_ast_build_eliminate(build, domain);
3383 1.1 mrg domain = isl_set_coalesce_preserve(domain);
3384 1.1 mrg
3385 1.1 mrg outer_disjunction = has_pure_outer_disjunction(domain, build);
3386 1.1 mrg if (outer_disjunction < 0)
3387 1.1 mrg domain = isl_set_free(domain);
3388 1.1 mrg
3389 1.1 mrg if (outer_disjunction || type == isl_ast_loop_atomic) {
3390 1.1 mrg isl_basic_set *hull;
3391 1.1 mrg hull = isl_set_unshifted_simple_hull(domain);
3392 1.1 mrg domain_list = isl_basic_set_list_from_basic_set(hull);
3393 1.1 mrg } else {
3394 1.1 mrg domain = isl_set_make_disjoint(domain);
3395 1.1 mrg domain_list = isl_basic_set_list_from_set(domain);
3396 1.1 mrg }
3397 1.1 mrg
3398 1.1 mrg list = generate_parallel_domains(domain_list, executed, build);
3399 1.1 mrg
3400 1.1 mrg isl_basic_set_list_free(domain_list);
3401 1.1 mrg isl_union_map_free(executed);
3402 1.1 mrg isl_ast_build_free(build);
3403 1.1 mrg
3404 1.1 mrg return list;
3405 1.1 mrg error:
3406 1.1 mrg isl_union_map_free(executed);
3407 1.1 mrg isl_ast_build_free(build);
3408 1.1 mrg return NULL;
3409 1.1 mrg }
3410 1.1 mrg
3411 1.1 mrg /* Extract out the disjunction imposed by "domain" on the outer
3412 1.1 mrg * schedule dimensions.
3413 1.1 mrg *
3414 1.1 mrg * In particular, remove all inner dimensions from "domain" (including
3415 1.1 mrg * the current dimension) and then remove the constraints that are shared
3416 1.1 mrg * by all disjuncts in the result.
3417 1.1 mrg */
3418 1.1 mrg static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
3419 1.1 mrg __isl_keep isl_ast_build *build)
3420 1.1 mrg {
3421 1.1 mrg isl_set *hull;
3422 1.1 mrg isl_size depth;
3423 1.1 mrg isl_size dim;
3424 1.1 mrg
3425 1.1 mrg domain = isl_ast_build_specialize(build, domain);
3426 1.1 mrg depth = isl_ast_build_get_depth(build);
3427 1.1 mrg dim = isl_set_dim(domain, isl_dim_set);
3428 1.1 mrg if (depth < 0 || dim < 0)
3429 1.1 mrg return isl_set_free(domain);
3430 1.1 mrg domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
3431 1.1 mrg domain = isl_set_remove_unknown_divs(domain);
3432 1.1 mrg hull = isl_set_copy(domain);
3433 1.1 mrg hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
3434 1.1 mrg domain = isl_set_gist(domain, hull);
3435 1.1 mrg
3436 1.1 mrg return domain;
3437 1.1 mrg }
3438 1.1 mrg
3439 1.1 mrg /* Add "guard" to the grafts in "list".
3440 1.1 mrg * "build" is the outer AST build, while "sub_build" includes "guard"
3441 1.1 mrg * in its generated domain.
3442 1.1 mrg *
3443 1.1 mrg * First combine the grafts into a single graft and then add the guard.
3444 1.1 mrg * If the list is empty, or if some error occurred, then simply return
3445 1.1 mrg * the list.
3446 1.1 mrg */
3447 1.1 mrg static __isl_give isl_ast_graft_list *list_add_guard(
3448 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
3449 1.1 mrg __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
3450 1.1 mrg {
3451 1.1 mrg isl_ast_graft *graft;
3452 1.1 mrg isl_size n;
3453 1.1 mrg
3454 1.1 mrg list = isl_ast_graft_list_fuse(list, sub_build);
3455 1.1 mrg
3456 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list);
3457 1.1 mrg if (n < 0)
3458 1.1 mrg return isl_ast_graft_list_free(list);
3459 1.1 mrg if (n != 1)
3460 1.1 mrg return list;
3461 1.1 mrg
3462 1.1 mrg graft = isl_ast_graft_list_get_ast_graft(list, 0);
3463 1.1 mrg graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
3464 1.1 mrg list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
3465 1.1 mrg
3466 1.1 mrg return list;
3467 1.1 mrg }
3468 1.1 mrg
3469 1.1 mrg /* Generate code for a single component, after shifting (if any)
3470 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree.
3471 1.1 mrg * In particular, do so for the specified subset of the schedule domain.
3472 1.1 mrg *
3473 1.1 mrg * If we are outside of the isolated part, then "domain" may include
3474 1.1 mrg * a disjunction. Explicitly generate this disjunction at this point
3475 1.1 mrg * instead of relying on the disjunction getting hoisted back up
3476 1.1 mrg * to this level.
3477 1.1 mrg */
3478 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
3479 1.1 mrg __isl_keep isl_union_map *executed, __isl_take isl_set *domain,
3480 1.1 mrg __isl_keep isl_ast_build *build, int isolated)
3481 1.1 mrg {
3482 1.1 mrg isl_union_set *uset;
3483 1.1 mrg isl_ast_graft_list *list;
3484 1.1 mrg isl_ast_build *sub_build;
3485 1.1 mrg int empty;
3486 1.1 mrg
3487 1.1 mrg uset = isl_union_set_from_set(isl_set_copy(domain));
3488 1.1 mrg executed = isl_union_map_copy(executed);
3489 1.1 mrg executed = isl_union_map_intersect_domain(executed, uset);
3490 1.1 mrg empty = isl_union_map_is_empty(executed);
3491 1.1 mrg if (empty < 0)
3492 1.1 mrg goto error;
3493 1.1 mrg if (empty) {
3494 1.1 mrg isl_ctx *ctx;
3495 1.1 mrg isl_union_map_free(executed);
3496 1.1 mrg isl_set_free(domain);
3497 1.1 mrg ctx = isl_ast_build_get_ctx(build);
3498 1.1 mrg return isl_ast_graft_list_alloc(ctx, 0);
3499 1.1 mrg }
3500 1.1 mrg
3501 1.1 mrg sub_build = isl_ast_build_copy(build);
3502 1.1 mrg if (!isolated) {
3503 1.1 mrg domain = extract_disjunction(domain, build);
3504 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build,
3505 1.1 mrg isl_set_copy(domain));
3506 1.1 mrg }
3507 1.1 mrg list = generate_shifted_component_tree_base(executed,
3508 1.1 mrg isl_ast_build_copy(sub_build), isolated);
3509 1.1 mrg if (!isolated)
3510 1.1 mrg list = list_add_guard(list, domain, build, sub_build);
3511 1.1 mrg isl_ast_build_free(sub_build);
3512 1.1 mrg isl_set_free(domain);
3513 1.1 mrg return list;
3514 1.1 mrg error:
3515 1.1 mrg isl_union_map_free(executed);
3516 1.1 mrg isl_set_free(domain);
3517 1.1 mrg return NULL;
3518 1.1 mrg }
3519 1.1 mrg
3520 1.1 mrg /* Generate code for a single component, after shifting (if any)
3521 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree.
3522 1.1 mrg * In particular, do so for the specified sequence of subsets
3523 1.1 mrg * of the schedule domain, "before", "isolated", "after" and "other",
3524 1.1 mrg * where only the "isolated" part is considered to be isolated.
3525 1.1 mrg */
3526 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
3527 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *before,
3528 1.1 mrg __isl_take isl_set *isolated, __isl_take isl_set *after,
3529 1.1 mrg __isl_take isl_set *other, __isl_take isl_ast_build *build)
3530 1.1 mrg {
3531 1.1 mrg isl_ast_graft_list *list, *res;
3532 1.1 mrg
3533 1.1 mrg res = generate_shifted_component_tree_part(executed, before, build, 0);
3534 1.1 mrg list = generate_shifted_component_tree_part(executed, isolated,
3535 1.1 mrg build, 1);
3536 1.1 mrg res = isl_ast_graft_list_concat(res, list);
3537 1.1 mrg list = generate_shifted_component_tree_part(executed, after, build, 0);
3538 1.1 mrg res = isl_ast_graft_list_concat(res, list);
3539 1.1 mrg list = generate_shifted_component_tree_part(executed, other, build, 0);
3540 1.1 mrg res = isl_ast_graft_list_concat(res, list);
3541 1.1 mrg
3542 1.1 mrg isl_union_map_free(executed);
3543 1.1 mrg isl_ast_build_free(build);
3544 1.1 mrg
3545 1.1 mrg return res;
3546 1.1 mrg }
3547 1.1 mrg
3548 1.1 mrg /* Does "set" intersect "first", but not "second"?
3549 1.1 mrg */
3550 1.1 mrg static isl_bool only_intersects_first(__isl_keep isl_set *set,
3551 1.1 mrg __isl_keep isl_set *first, __isl_keep isl_set *second)
3552 1.1 mrg {
3553 1.1 mrg isl_bool disjoint;
3554 1.1 mrg
3555 1.1 mrg disjoint = isl_set_is_disjoint(set, first);
3556 1.1 mrg if (disjoint < 0)
3557 1.1 mrg return isl_bool_error;
3558 1.1 mrg if (disjoint)
3559 1.1 mrg return isl_bool_false;
3560 1.1 mrg
3561 1.1 mrg return isl_set_is_disjoint(set, second);
3562 1.1 mrg }
3563 1.1 mrg
3564 1.1 mrg /* Generate code for a single component, after shifting (if any)
3565 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree.
3566 1.1 mrg * In particular, do so in case of isolation where there is
3567 1.1 mrg * only an "isolated" part and an "after" part.
3568 1.1 mrg * "dead1" and "dead2" are freed by this function in order to simplify
3569 1.1 mrg * the caller.
3570 1.1 mrg *
3571 1.1 mrg * The "before" and "other" parts are set to empty sets.
3572 1.1 mrg */
3573 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
3574 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *isolated,
3575 1.1 mrg __isl_take isl_set *after, __isl_take isl_ast_build *build,
3576 1.1 mrg __isl_take isl_set *dead1, __isl_take isl_set *dead2)
3577 1.1 mrg {
3578 1.1 mrg isl_set *empty;
3579 1.1 mrg
3580 1.1 mrg empty = isl_set_empty(isl_set_get_space(after));
3581 1.1 mrg isl_set_free(dead1);
3582 1.1 mrg isl_set_free(dead2);
3583 1.1 mrg return generate_shifted_component_parts(executed, isl_set_copy(empty),
3584 1.1 mrg isolated, after, empty, build);
3585 1.1 mrg }
3586 1.1 mrg
3587 1.1 mrg /* Generate code for a single component, after shifting (if any)
3588 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree.
3589 1.1 mrg *
3590 1.1 mrg * We first check if the user has specified an isolated schedule domain
3591 1.1 mrg * and that we are not already outside of this isolated schedule domain.
3592 1.1 mrg * If so, we break up the schedule domain into iterations that
3593 1.1 mrg * precede the isolated domain, the isolated domain itself,
3594 1.1 mrg * the iterations that follow the isolated domain and
3595 1.1 mrg * the remaining iterations (those that are incomparable
3596 1.1 mrg * to the isolated domain).
3597 1.1 mrg * We generate an AST for each piece and concatenate the results.
3598 1.1 mrg *
3599 1.1 mrg * If the isolated domain is not convex, then it is replaced
3600 1.1 mrg * by a convex superset to ensure that the sets of preceding and
3601 1.1 mrg * following iterations are properly defined and, in particular,
3602 1.1 mrg * that there are no intermediate iterations that do not belong
3603 1.1 mrg * to the isolated domain.
3604 1.1 mrg *
3605 1.1 mrg * In the special case where at least one element of the schedule
3606 1.1 mrg * domain that does not belong to the isolated domain needs
3607 1.1 mrg * to be scheduled after this isolated domain, but none of those
3608 1.1 mrg * elements need to be scheduled before, break up the schedule domain
3609 1.1 mrg * in only two parts, the isolated domain, and a part that will be
3610 1.1 mrg * scheduled after the isolated domain.
3611 1.1 mrg *
3612 1.1 mrg * If no isolated set has been specified, then we generate an
3613 1.1 mrg * AST for the entire inverse schedule.
3614 1.1 mrg */
3615 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
3616 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3617 1.1 mrg {
3618 1.1 mrg int i;
3619 1.1 mrg isl_size depth;
3620 1.1 mrg int empty, has_isolate;
3621 1.1 mrg isl_space *space;
3622 1.1 mrg isl_union_set *schedule_domain;
3623 1.1 mrg isl_set *domain;
3624 1.1 mrg isl_basic_set *hull;
3625 1.1 mrg isl_set *isolated, *before, *after, *test;
3626 1.1 mrg isl_map *gt, *lt;
3627 1.1 mrg isl_bool pure;
3628 1.1 mrg
3629 1.1 mrg build = isl_ast_build_extract_isolated(build);
3630 1.1 mrg has_isolate = isl_ast_build_has_isolated(build);
3631 1.1 mrg if (has_isolate < 0)
3632 1.1 mrg executed = isl_union_map_free(executed);
3633 1.1 mrg else if (!has_isolate)
3634 1.1 mrg return generate_shifted_component_tree_base(executed, build, 0);
3635 1.1 mrg
3636 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3637 1.1 mrg domain = isl_set_from_union_set(schedule_domain);
3638 1.1 mrg
3639 1.1 mrg isolated = isl_ast_build_get_isolated(build);
3640 1.1 mrg isolated = isl_set_intersect(isolated, isl_set_copy(domain));
3641 1.1 mrg test = isl_ast_build_specialize(build, isl_set_copy(isolated));
3642 1.1 mrg empty = isl_set_is_empty(test);
3643 1.1 mrg isl_set_free(test);
3644 1.1 mrg if (empty < 0)
3645 1.1 mrg goto error;
3646 1.1 mrg if (empty) {
3647 1.1 mrg isl_set_free(isolated);
3648 1.1 mrg isl_set_free(domain);
3649 1.1 mrg return generate_shifted_component_tree_base(executed, build, 0);
3650 1.1 mrg }
3651 1.1 mrg depth = isl_ast_build_get_depth(build);
3652 1.1 mrg if (depth < 0)
3653 1.1 mrg goto error;
3654 1.1 mrg
3655 1.1 mrg isolated = isl_ast_build_eliminate(build, isolated);
3656 1.1 mrg hull = isl_set_unshifted_simple_hull(isolated);
3657 1.1 mrg isolated = isl_set_from_basic_set(hull);
3658 1.1 mrg
3659 1.1 mrg space = isl_space_map_from_set(isl_set_get_space(isolated));
3660 1.1 mrg gt = isl_map_universe(space);
3661 1.1 mrg for (i = 0; i < depth; ++i)
3662 1.1 mrg gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
3663 1.1 mrg gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
3664 1.1 mrg lt = isl_map_reverse(isl_map_copy(gt));
3665 1.1 mrg before = isl_set_apply(isl_set_copy(isolated), gt);
3666 1.1 mrg after = isl_set_apply(isl_set_copy(isolated), lt);
3667 1.1 mrg
3668 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(isolated));
3669 1.1 mrg pure = only_intersects_first(domain, after, before);
3670 1.1 mrg if (pure < 0)
3671 1.1 mrg executed = isl_union_map_free(executed);
3672 1.1 mrg else if (pure)
3673 1.1 mrg return generate_shifted_component_only_after(executed, isolated,
3674 1.1 mrg domain, build, before, after);
3675 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(before));
3676 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(after));
3677 1.1 mrg after = isl_set_subtract(after, isl_set_copy(isolated));
3678 1.1 mrg after = isl_set_subtract(after, isl_set_copy(before));
3679 1.1 mrg before = isl_set_subtract(before, isl_set_copy(isolated));
3680 1.1 mrg
3681 1.1 mrg return generate_shifted_component_parts(executed, before, isolated,
3682 1.1 mrg after, domain, build);
3683 1.1 mrg error:
3684 1.1 mrg isl_set_free(domain);
3685 1.1 mrg isl_set_free(isolated);
3686 1.1 mrg isl_union_map_free(executed);
3687 1.1 mrg isl_ast_build_free(build);
3688 1.1 mrg return NULL;
3689 1.1 mrg }
3690 1.1 mrg
3691 1.1 mrg /* Generate code for a single component, after shifting (if any)
3692 1.1 mrg * has been applied.
3693 1.1 mrg *
3694 1.1 mrg * Call generate_shifted_component_tree or generate_shifted_component_flat
3695 1.1 mrg * depending on whether the schedule was specified as a schedule tree.
3696 1.1 mrg */
3697 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component(
3698 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3699 1.1 mrg {
3700 1.1 mrg if (isl_ast_build_has_schedule_node(build))
3701 1.1 mrg return generate_shifted_component_tree(executed, build);
3702 1.1 mrg else
3703 1.1 mrg return generate_shifted_component_flat(executed, build);
3704 1.1 mrg }
3705 1.1 mrg
3706 1.1 mrg struct isl_set_map_pair {
3707 1.1 mrg isl_set *set;
3708 1.1 mrg isl_map *map;
3709 1.1 mrg };
3710 1.1 mrg
3711 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
3712 1.1 mrg * of indices into the "domain" array,
3713 1.1 mrg * return the union of the "map" fields of the elements
3714 1.1 mrg * indexed by the first "n" elements of "order".
3715 1.1 mrg */
3716 1.1 mrg static __isl_give isl_union_map *construct_component_executed(
3717 1.1 mrg struct isl_set_map_pair *domain, int *order, int n)
3718 1.1 mrg {
3719 1.1 mrg int i;
3720 1.1 mrg isl_map *map;
3721 1.1 mrg isl_union_map *executed;
3722 1.1 mrg
3723 1.1 mrg map = isl_map_copy(domain[order[0]].map);
3724 1.1 mrg executed = isl_union_map_from_map(map);
3725 1.1 mrg for (i = 1; i < n; ++i) {
3726 1.1 mrg map = isl_map_copy(domain[order[i]].map);
3727 1.1 mrg executed = isl_union_map_add_map(executed, map);
3728 1.1 mrg }
3729 1.1 mrg
3730 1.1 mrg return executed;
3731 1.1 mrg }
3732 1.1 mrg
3733 1.1 mrg /* Generate code for a single component, after shifting (if any)
3734 1.1 mrg * has been applied.
3735 1.1 mrg *
3736 1.1 mrg * The component inverse schedule is specified as the "map" fields
3737 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order".
3738 1.1 mrg */
3739 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
3740 1.1 mrg struct isl_set_map_pair *domain, int *order, int n,
3741 1.1 mrg __isl_take isl_ast_build *build)
3742 1.1 mrg {
3743 1.1 mrg isl_union_map *executed;
3744 1.1 mrg
3745 1.1 mrg executed = construct_component_executed(domain, order, n);
3746 1.1 mrg return generate_shifted_component(executed, build);
3747 1.1 mrg }
3748 1.1 mrg
3749 1.1 mrg /* Does set dimension "pos" of "set" have an obviously fixed value?
3750 1.1 mrg */
3751 1.1 mrg static int dim_is_fixed(__isl_keep isl_set *set, int pos)
3752 1.1 mrg {
3753 1.1 mrg int fixed;
3754 1.1 mrg isl_val *v;
3755 1.1 mrg
3756 1.1 mrg v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
3757 1.1 mrg if (!v)
3758 1.1 mrg return -1;
3759 1.1 mrg fixed = !isl_val_is_nan(v);
3760 1.1 mrg isl_val_free(v);
3761 1.1 mrg
3762 1.1 mrg return fixed;
3763 1.1 mrg }
3764 1.1 mrg
3765 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
3766 1.1 mrg * of indices into the "domain" array,
3767 1.1 mrg * do all (except for at most one) of the "set" field of the elements
3768 1.1 mrg * indexed by the first "n" elements of "order" have a fixed value
3769 1.1 mrg * at position "depth"?
3770 1.1 mrg */
3771 1.1 mrg static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
3772 1.1 mrg int *order, int n, int depth)
3773 1.1 mrg {
3774 1.1 mrg int i;
3775 1.1 mrg int non_fixed = -1;
3776 1.1 mrg
3777 1.1 mrg for (i = 0; i < n; ++i) {
3778 1.1 mrg int f;
3779 1.1 mrg
3780 1.1 mrg f = dim_is_fixed(domain[order[i]].set, depth);
3781 1.1 mrg if (f < 0)
3782 1.1 mrg return -1;
3783 1.1 mrg if (f)
3784 1.1 mrg continue;
3785 1.1 mrg if (non_fixed >= 0)
3786 1.1 mrg return 0;
3787 1.1 mrg non_fixed = i;
3788 1.1 mrg }
3789 1.1 mrg
3790 1.1 mrg return 1;
3791 1.1 mrg }
3792 1.1 mrg
3793 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
3794 1.1 mrg * of indices into the "domain" array,
3795 1.1 mrg * eliminate the inner dimensions from the "set" field of the elements
3796 1.1 mrg * indexed by the first "n" elements of "order", provided the current
3797 1.1 mrg * dimension does not have a fixed value.
3798 1.1 mrg *
3799 1.1 mrg * Return the index of the first element in "order" with a corresponding
3800 1.1 mrg * "set" field that does not have an (obviously) fixed value.
3801 1.1 mrg */
3802 1.1 mrg static int eliminate_non_fixed(struct isl_set_map_pair *domain,
3803 1.1 mrg int *order, int n, int depth, __isl_keep isl_ast_build *build)
3804 1.1 mrg {
3805 1.1 mrg int i;
3806 1.1 mrg int base = -1;
3807 1.1 mrg
3808 1.1 mrg for (i = n - 1; i >= 0; --i) {
3809 1.1 mrg int f;
3810 1.1 mrg f = dim_is_fixed(domain[order[i]].set, depth);
3811 1.1 mrg if (f < 0)
3812 1.1 mrg return -1;
3813 1.1 mrg if (f)
3814 1.1 mrg continue;
3815 1.1 mrg domain[order[i]].set = isl_ast_build_eliminate_inner(build,
3816 1.1 mrg domain[order[i]].set);
3817 1.1 mrg base = i;
3818 1.1 mrg }
3819 1.1 mrg
3820 1.1 mrg return base;
3821 1.1 mrg }
3822 1.1 mrg
3823 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order"
3824 1.1 mrg * of indices into the "domain" array,
3825 1.1 mrg * find the element of "domain" (amongst those indexed by the first "n"
3826 1.1 mrg * elements of "order") with the "set" field that has the smallest
3827 1.1 mrg * value for the current iterator.
3828 1.1 mrg *
3829 1.1 mrg * Note that the domain with the smallest value may depend on the parameters
3830 1.1 mrg * and/or outer loop dimension. Since the result of this function is only
3831 1.1 mrg * used as heuristic, we only make a reasonable attempt at finding the best
3832 1.1 mrg * domain, one that should work in case a single domain provides the smallest
3833 1.1 mrg * value for the current dimension over all values of the parameters
3834 1.1 mrg * and outer dimensions.
3835 1.1 mrg *
3836 1.1 mrg * In particular, we compute the smallest value of the first domain
3837 1.1 mrg * and replace it by that of any later domain if that later domain
3838 1.1 mrg * has a smallest value that is smaller for at least some value
3839 1.1 mrg * of the parameters and outer dimensions.
3840 1.1 mrg */
3841 1.1 mrg static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
3842 1.1 mrg __isl_keep isl_ast_build *build)
3843 1.1 mrg {
3844 1.1 mrg int i;
3845 1.1 mrg isl_map *min_first;
3846 1.1 mrg int first = 0;
3847 1.1 mrg
3848 1.1 mrg min_first = isl_ast_build_map_to_iterator(build,
3849 1.1 mrg isl_set_copy(domain[order[0]].set));
3850 1.1 mrg min_first = isl_map_lexmin(min_first);
3851 1.1 mrg
3852 1.1 mrg for (i = 1; i < n; ++i) {
3853 1.1 mrg isl_map *min, *test;
3854 1.1 mrg int empty;
3855 1.1 mrg
3856 1.1 mrg min = isl_ast_build_map_to_iterator(build,
3857 1.1 mrg isl_set_copy(domain[order[i]].set));
3858 1.1 mrg min = isl_map_lexmin(min);
3859 1.1 mrg test = isl_map_copy(min);
3860 1.1 mrg test = isl_map_apply_domain(isl_map_copy(min_first), test);
3861 1.1 mrg test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
3862 1.1 mrg empty = isl_map_is_empty(test);
3863 1.1 mrg isl_map_free(test);
3864 1.1 mrg if (empty >= 0 && !empty) {
3865 1.1 mrg isl_map_free(min_first);
3866 1.1 mrg first = i;
3867 1.1 mrg min_first = min;
3868 1.1 mrg } else
3869 1.1 mrg isl_map_free(min);
3870 1.1 mrg
3871 1.1 mrg if (empty < 0)
3872 1.1 mrg break;
3873 1.1 mrg }
3874 1.1 mrg
3875 1.1 mrg isl_map_free(min_first);
3876 1.1 mrg
3877 1.1 mrg return i < n ? -1 : first;
3878 1.1 mrg }
3879 1.1 mrg
3880 1.1 mrg /* Construct a shifted inverse schedule based on the original inverse schedule,
3881 1.1 mrg * the stride and the offset.
3882 1.1 mrg *
3883 1.1 mrg * The original inverse schedule is specified as the "map" fields
3884 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order".
3885 1.1 mrg *
3886 1.1 mrg * "stride" and "offset" are such that the difference
3887 1.1 mrg * between the values of the current dimension of domain "i"
3888 1.1 mrg * and the values of the current dimension for some reference domain are
3889 1.1 mrg * equal to
3890 1.1 mrg *
3891 1.1 mrg * stride * integer + offset[i]
3892 1.1 mrg *
3893 1.1 mrg * Moreover, 0 <= offset[i] < stride.
3894 1.1 mrg *
3895 1.1 mrg * For each domain, we create a map
3896 1.1 mrg *
3897 1.1 mrg * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
3898 1.1 mrg *
3899 1.1 mrg * where j refers to the current dimension and the other dimensions are
3900 1.1 mrg * unchanged, and apply this map to the original schedule domain.
3901 1.1 mrg *
3902 1.1 mrg * For example, for the original schedule
3903 1.1 mrg *
3904 1.1 mrg * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
3905 1.1 mrg *
3906 1.1 mrg * and assuming the offset is 0 for the A domain and 1 for the B domain,
3907 1.1 mrg * we apply the mapping
3908 1.1 mrg *
3909 1.1 mrg * { [j] -> [j, 0] }
3910 1.1 mrg *
3911 1.1 mrg * to the schedule of the "A" domain and the mapping
3912 1.1 mrg *
3913 1.1 mrg * { [j - 1] -> [j, 1] }
3914 1.1 mrg *
3915 1.1 mrg * to the schedule of the "B" domain.
3916 1.1 mrg *
3917 1.1 mrg *
3918 1.1 mrg * Note that after the transformation, the differences between pairs
3919 1.1 mrg * of values of the current dimension over all domains are multiples
3920 1.1 mrg * of stride and that we have therefore exposed the stride.
3921 1.1 mrg *
3922 1.1 mrg *
3923 1.1 mrg * To see that the mapping preserves the lexicographic order,
3924 1.1 mrg * first note that each of the individual maps above preserves the order.
3925 1.1 mrg * If the value of the current iterator is j1 in one domain and j2 in another,
3926 1.1 mrg * then if j1 = j2, we know that the same map is applied to both domains
3927 1.1 mrg * and the order is preserved.
3928 1.1 mrg * Otherwise, let us assume, without loss of generality, that j1 < j2.
3929 1.1 mrg * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
3930 1.1 mrg *
3931 1.1 mrg * j1 - c1 < j2 - c2
3932 1.1 mrg *
3933 1.1 mrg * and the order is preserved.
3934 1.1 mrg * If c1 < c2, then we know
3935 1.1 mrg *
3936 1.1 mrg * 0 <= c2 - c1 < s
3937 1.1 mrg *
3938 1.1 mrg * We also have
3939 1.1 mrg *
3940 1.1 mrg * j2 - j1 = n * s + r
3941 1.1 mrg *
3942 1.1 mrg * with n >= 0 and 0 <= r < s.
3943 1.1 mrg * In other words, r = c2 - c1.
3944 1.1 mrg * If n > 0, then
3945 1.1 mrg *
3946 1.1 mrg * j1 - c1 < j2 - c2
3947 1.1 mrg *
3948 1.1 mrg * If n = 0, then
3949 1.1 mrg *
3950 1.1 mrg * j1 - c1 = j2 - c2
3951 1.1 mrg *
3952 1.1 mrg * and so
3953 1.1 mrg *
3954 1.1 mrg * (j1 - c1, c1) << (j2 - c2, c2)
3955 1.1 mrg *
3956 1.1 mrg * with "<<" the lexicographic order, proving that the order is preserved
3957 1.1 mrg * in all cases.
3958 1.1 mrg */
3959 1.1 mrg static __isl_give isl_union_map *construct_shifted_executed(
3960 1.1 mrg struct isl_set_map_pair *domain, int *order, int n,
3961 1.1 mrg __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3962 1.1 mrg __isl_keep isl_ast_build *build)
3963 1.1 mrg {
3964 1.1 mrg int i;
3965 1.1 mrg isl_union_map *executed;
3966 1.1 mrg isl_space *space;
3967 1.1 mrg isl_map *map;
3968 1.1 mrg isl_size depth;
3969 1.1 mrg isl_constraint *c;
3970 1.1 mrg
3971 1.1 mrg depth = isl_ast_build_get_depth(build);
3972 1.1 mrg if (depth < 0)
3973 1.1 mrg return NULL;
3974 1.1 mrg space = isl_ast_build_get_space(build, 1);
3975 1.1 mrg executed = isl_union_map_empty(isl_space_copy(space));
3976 1.1 mrg space = isl_space_map_from_set(space);
3977 1.1 mrg map = isl_map_identity(isl_space_copy(space));
3978 1.1 mrg map = isl_map_eliminate(map, isl_dim_out, depth, 1);
3979 1.1 mrg map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
3980 1.1 mrg space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
3981 1.1 mrg
3982 1.1 mrg c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
3983 1.1 mrg c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
3984 1.1 mrg c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
3985 1.1 mrg
3986 1.1 mrg for (i = 0; i < n; ++i) {
3987 1.1 mrg isl_map *map_i;
3988 1.1 mrg isl_val *v;
3989 1.1 mrg
3990 1.1 mrg v = isl_multi_val_get_val(offset, i);
3991 1.1 mrg if (!v)
3992 1.1 mrg break;
3993 1.1 mrg map_i = isl_map_copy(map);
3994 1.1 mrg map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
3995 1.1 mrg isl_val_copy(v));
3996 1.1 mrg v = isl_val_neg(v);
3997 1.1 mrg c = isl_constraint_set_constant_val(c, v);
3998 1.1 mrg map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
3999 1.1 mrg
4000 1.1 mrg map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
4001 1.1 mrg map_i);
4002 1.1 mrg executed = isl_union_map_add_map(executed, map_i);
4003 1.1 mrg }
4004 1.1 mrg
4005 1.1 mrg isl_constraint_free(c);
4006 1.1 mrg isl_map_free(map);
4007 1.1 mrg
4008 1.1 mrg if (i < n)
4009 1.1 mrg executed = isl_union_map_free(executed);
4010 1.1 mrg
4011 1.1 mrg return executed;
4012 1.1 mrg }
4013 1.1 mrg
4014 1.1 mrg /* Generate code for a single component, after exposing the stride,
4015 1.1 mrg * given that the schedule domain is "shifted strided".
4016 1.1 mrg *
4017 1.1 mrg * The component inverse schedule is specified as the "map" fields
4018 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order".
4019 1.1 mrg *
4020 1.1 mrg * The schedule domain being "shifted strided" means that the differences
4021 1.1 mrg * between the values of the current dimension of domain "i"
4022 1.1 mrg * and the values of the current dimension for some reference domain are
4023 1.1 mrg * equal to
4024 1.1 mrg *
4025 1.1 mrg * stride * integer + offset[i]
4026 1.1 mrg *
4027 1.1 mrg * We first look for the domain with the "smallest" value for the current
4028 1.1 mrg * dimension and adjust the offsets such that the offset of the "smallest"
4029 1.1 mrg * domain is equal to zero. The other offsets are reduced modulo stride.
4030 1.1 mrg *
4031 1.1 mrg * Based on this information, we construct a new inverse schedule in
4032 1.1 mrg * construct_shifted_executed that exposes the stride.
4033 1.1 mrg * Since this involves the introduction of a new schedule dimension,
4034 1.1 mrg * the build needs to be changed accordingly.
4035 1.1 mrg * After computing the AST, the newly introduced dimension needs
4036 1.1 mrg * to be removed again from the list of grafts. We do this by plugging
4037 1.1 mrg * in a mapping that represents the new schedule domain in terms of the
4038 1.1 mrg * old schedule domain.
4039 1.1 mrg */
4040 1.1 mrg static __isl_give isl_ast_graft_list *generate_shift_component(
4041 1.1 mrg struct isl_set_map_pair *domain, int *order, int n,
4042 1.1 mrg __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
4043 1.1 mrg __isl_take isl_ast_build *build)
4044 1.1 mrg {
4045 1.1 mrg isl_ast_graft_list *list;
4046 1.1 mrg int first;
4047 1.1 mrg isl_size depth;
4048 1.1 mrg isl_val *val;
4049 1.1 mrg isl_multi_val *mv;
4050 1.1 mrg isl_space *space;
4051 1.1 mrg isl_multi_aff *ma, *zero;
4052 1.1 mrg isl_union_map *executed;
4053 1.1 mrg
4054 1.1 mrg depth = isl_ast_build_get_depth(build);
4055 1.1 mrg
4056 1.1 mrg first = first_offset(domain, order, n, build);
4057 1.1 mrg if (depth < 0 || first < 0)
4058 1.1 mrg goto error;
4059 1.1 mrg
4060 1.1 mrg mv = isl_multi_val_copy(offset);
4061 1.1 mrg val = isl_multi_val_get_val(offset, first);
4062 1.1 mrg val = isl_val_neg(val);
4063 1.1 mrg mv = isl_multi_val_add_val(mv, val);
4064 1.1 mrg mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
4065 1.1 mrg
4066 1.1 mrg executed = construct_shifted_executed(domain, order, n, stride, mv,
4067 1.1 mrg build);
4068 1.1 mrg space = isl_ast_build_get_space(build, 1);
4069 1.1 mrg space = isl_space_map_from_set(space);
4070 1.1 mrg ma = isl_multi_aff_identity(isl_space_copy(space));
4071 1.1 mrg space = isl_space_from_domain(isl_space_domain(space));
4072 1.1 mrg space = isl_space_add_dims(space, isl_dim_out, 1);
4073 1.1 mrg zero = isl_multi_aff_zero(space);
4074 1.1 mrg ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
4075 1.1 mrg build = isl_ast_build_insert_dim(build, depth + 1);
4076 1.1 mrg list = generate_shifted_component(executed, build);
4077 1.1 mrg
4078 1.1 mrg list = isl_ast_graft_list_preimage_multi_aff(list, ma);
4079 1.1 mrg
4080 1.1 mrg isl_multi_val_free(mv);
4081 1.1 mrg
4082 1.1 mrg return list;
4083 1.1 mrg error:
4084 1.1 mrg isl_ast_build_free(build);
4085 1.1 mrg return NULL;
4086 1.1 mrg }
4087 1.1 mrg
4088 1.1 mrg /* Does any node in the schedule tree rooted at the current schedule node
4089 1.1 mrg * of "build" depend on outer schedule nodes?
4090 1.1 mrg */
4091 1.1 mrg static int has_anchored_subtree(__isl_keep isl_ast_build *build)
4092 1.1 mrg {
4093 1.1 mrg isl_schedule_node *node;
4094 1.1 mrg int dependent = 0;
4095 1.1 mrg
4096 1.1 mrg node = isl_ast_build_get_schedule_node(build);
4097 1.1 mrg dependent = isl_schedule_node_is_subtree_anchored(node);
4098 1.1 mrg isl_schedule_node_free(node);
4099 1.1 mrg
4100 1.1 mrg return dependent;
4101 1.1 mrg }
4102 1.1 mrg
4103 1.1 mrg /* Generate code for a single component.
4104 1.1 mrg *
4105 1.1 mrg * The component inverse schedule is specified as the "map" fields
4106 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order".
4107 1.1 mrg *
4108 1.1 mrg * This function may modify the "set" fields of "domain".
4109 1.1 mrg *
4110 1.1 mrg * Before proceeding with the actual code generation for the component,
4111 1.1 mrg * we first check if there are any "shifted" strides, meaning that
4112 1.1 mrg * the schedule domains of the individual domains are all strided,
4113 1.1 mrg * but that they have different offsets, resulting in the union
4114 1.1 mrg * of schedule domains not being strided anymore.
4115 1.1 mrg *
4116 1.1 mrg * The simplest example is the schedule
4117 1.1 mrg *
4118 1.1 mrg * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
4119 1.1 mrg *
4120 1.1 mrg * Both schedule domains are strided, but their union is not.
4121 1.1 mrg * This function detects such cases and then rewrites the schedule to
4122 1.1 mrg *
4123 1.1 mrg * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
4124 1.1 mrg *
4125 1.1 mrg * In the new schedule, the schedule domains have the same offset (modulo
4126 1.1 mrg * the stride), ensuring that the union of schedule domains is also strided.
4127 1.1 mrg *
4128 1.1 mrg *
4129 1.1 mrg * If there is only a single domain in the component, then there is
4130 1.1 mrg * nothing to do. Similarly, if the current schedule dimension has
4131 1.1 mrg * a fixed value for almost all domains then there is nothing to be done.
4132 1.1 mrg * In particular, we need at least two domains where the current schedule
4133 1.1 mrg * dimension does not have a fixed value.
4134 1.1 mrg * Finally, in case of a schedule map input,
4135 1.1 mrg * if any of the options refer to the current schedule dimension,
4136 1.1 mrg * then we bail out as well. It would be possible to reformulate the options
4137 1.1 mrg * in terms of the new schedule domain, but that would introduce constraints
4138 1.1 mrg * that separate the domains in the options and that is something we would
4139 1.1 mrg * like to avoid.
4140 1.1 mrg * In the case of a schedule tree input, we bail out if any of
4141 1.1 mrg * the descendants of the current schedule node refer to outer
4142 1.1 mrg * schedule nodes in any way.
4143 1.1 mrg *
4144 1.1 mrg *
4145 1.1 mrg * To see if there is any shifted stride, we look at the differences
4146 1.1 mrg * between the values of the current dimension in pairs of domains
4147 1.1 mrg * for equal values of outer dimensions. These differences should be
4148 1.1 mrg * of the form
4149 1.1 mrg *
4150 1.1 mrg * m x + r
4151 1.1 mrg *
4152 1.1 mrg * with "m" the stride and "r" a constant. Note that we cannot perform
4153 1.1 mrg * this analysis on individual domains as the lower bound in each domain
4154 1.1 mrg * may depend on parameters or outer dimensions and so the current dimension
4155 1.1 mrg * itself may not have a fixed remainder on division by the stride.
4156 1.1 mrg *
4157 1.1 mrg * In particular, we compare the first domain that does not have an
4158 1.1 mrg * obviously fixed value for the current dimension to itself and all
4159 1.1 mrg * other domains and collect the offsets and the gcd of the strides.
4160 1.1 mrg * If the gcd becomes one, then we failed to find shifted strides.
4161 1.1 mrg * If the gcd is zero, then the differences were all fixed, meaning
4162 1.1 mrg * that some domains had non-obviously fixed values for the current dimension.
4163 1.1 mrg * If all the offsets are the same (for those domains that do not have
4164 1.1 mrg * an obviously fixed value for the current dimension), then we do not
4165 1.1 mrg * apply the transformation.
4166 1.1 mrg * If none of the domains were skipped, then there is nothing to do.
4167 1.1 mrg * If some of them were skipped, then if we apply separation, the schedule
4168 1.1 mrg * domain should get split in pieces with a (non-shifted) stride.
4169 1.1 mrg *
4170 1.1 mrg * Otherwise, we apply a shift to expose the stride in
4171 1.1 mrg * generate_shift_component.
4172 1.1 mrg */
4173 1.1 mrg static __isl_give isl_ast_graft_list *generate_component(
4174 1.1 mrg struct isl_set_map_pair *domain, int *order, int n,
4175 1.1 mrg __isl_take isl_ast_build *build)
4176 1.1 mrg {
4177 1.1 mrg int i, d;
4178 1.1 mrg isl_size depth;
4179 1.1 mrg isl_ctx *ctx;
4180 1.1 mrg isl_map *map;
4181 1.1 mrg isl_set *deltas;
4182 1.1 mrg isl_val *gcd = NULL;
4183 1.1 mrg isl_multi_val *mv;
4184 1.1 mrg int fixed, skip;
4185 1.1 mrg int base;
4186 1.1 mrg isl_ast_graft_list *list;
4187 1.1 mrg int res = 0;
4188 1.1 mrg
4189 1.1 mrg depth = isl_ast_build_get_depth(build);
4190 1.1 mrg if (depth < 0)
4191 1.1 mrg goto error;
4192 1.1 mrg
4193 1.1 mrg skip = n == 1;
4194 1.1 mrg if (skip >= 0 && !skip)
4195 1.1 mrg skip = at_most_one_non_fixed(domain, order, n, depth);
4196 1.1 mrg if (skip >= 0 && !skip) {
4197 1.1 mrg if (isl_ast_build_has_schedule_node(build))
4198 1.1 mrg skip = has_anchored_subtree(build);
4199 1.1 mrg else
4200 1.1 mrg skip = isl_ast_build_options_involve_depth(build);
4201 1.1 mrg }
4202 1.1 mrg if (skip < 0)
4203 1.1 mrg goto error;
4204 1.1 mrg if (skip)
4205 1.1 mrg return generate_shifted_component_from_list(domain,
4206 1.1 mrg order, n, build);
4207 1.1 mrg
4208 1.1 mrg base = eliminate_non_fixed(domain, order, n, depth, build);
4209 1.1 mrg if (base < 0)
4210 1.1 mrg goto error;
4211 1.1 mrg
4212 1.1 mrg ctx = isl_ast_build_get_ctx(build);
4213 1.1 mrg
4214 1.1 mrg mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
4215 1.1 mrg
4216 1.1 mrg fixed = 1;
4217 1.1 mrg for (i = 0; i < n; ++i) {
4218 1.1 mrg isl_val *r, *m;
4219 1.1 mrg
4220 1.1 mrg map = isl_map_from_domain_and_range(
4221 1.1 mrg isl_set_copy(domain[order[base]].set),
4222 1.1 mrg isl_set_copy(domain[order[i]].set));
4223 1.1 mrg for (d = 0; d < depth; ++d)
4224 1.1 mrg map = isl_map_equate(map, isl_dim_in, d,
4225 1.1 mrg isl_dim_out, d);
4226 1.1 mrg deltas = isl_map_deltas(map);
4227 1.1 mrg res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
4228 1.1 mrg isl_set_free(deltas);
4229 1.1 mrg if (res < 0)
4230 1.1 mrg break;
4231 1.1 mrg
4232 1.1 mrg if (i == 0)
4233 1.1 mrg gcd = m;
4234 1.1 mrg else
4235 1.1 mrg gcd = isl_val_gcd(gcd, m);
4236 1.1 mrg if (isl_val_is_one(gcd)) {
4237 1.1 mrg isl_val_free(r);
4238 1.1 mrg break;
4239 1.1 mrg }
4240 1.1 mrg mv = isl_multi_val_set_val(mv, i, r);
4241 1.1 mrg
4242 1.1 mrg res = dim_is_fixed(domain[order[i]].set, depth);
4243 1.1 mrg if (res < 0)
4244 1.1 mrg break;
4245 1.1 mrg if (res)
4246 1.1 mrg continue;
4247 1.1 mrg
4248 1.1 mrg if (fixed && i > base) {
4249 1.1 mrg isl_val *a, *b;
4250 1.1 mrg a = isl_multi_val_get_val(mv, i);
4251 1.1 mrg b = isl_multi_val_get_val(mv, base);
4252 1.1 mrg if (isl_val_ne(a, b))
4253 1.1 mrg fixed = 0;
4254 1.1 mrg isl_val_free(a);
4255 1.1 mrg isl_val_free(b);
4256 1.1 mrg }
4257 1.1 mrg }
4258 1.1 mrg
4259 1.1 mrg if (res < 0 || !gcd) {
4260 1.1 mrg isl_ast_build_free(build);
4261 1.1 mrg list = NULL;
4262 1.1 mrg } else if (i < n || fixed || isl_val_is_zero(gcd)) {
4263 1.1 mrg list = generate_shifted_component_from_list(domain,
4264 1.1 mrg order, n, build);
4265 1.1 mrg } else {
4266 1.1 mrg list = generate_shift_component(domain, order, n, gcd, mv,
4267 1.1 mrg build);
4268 1.1 mrg }
4269 1.1 mrg
4270 1.1 mrg isl_val_free(gcd);
4271 1.1 mrg isl_multi_val_free(mv);
4272 1.1 mrg
4273 1.1 mrg return list;
4274 1.1 mrg error:
4275 1.1 mrg isl_ast_build_free(build);
4276 1.1 mrg return NULL;
4277 1.1 mrg }
4278 1.1 mrg
4279 1.1 mrg /* Store both "map" itself and its domain in the
4280 1.1 mrg * structure pointed to by *next and advance to the next array element.
4281 1.1 mrg */
4282 1.1 mrg static isl_stat extract_domain(__isl_take isl_map *map, void *user)
4283 1.1 mrg {
4284 1.1 mrg struct isl_set_map_pair **next = user;
4285 1.1 mrg
4286 1.1 mrg (*next)->map = isl_map_copy(map);
4287 1.1 mrg (*next)->set = isl_map_domain(map);
4288 1.1 mrg (*next)++;
4289 1.1 mrg
4290 1.1 mrg return isl_stat_ok;
4291 1.1 mrg }
4292 1.1 mrg
4293 1.1 mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
4294 1.1 mrg __isl_keep isl_schedule_node *node);
4295 1.1 mrg
4296 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4297 1.1 mrg * the corresponding image elements by the tree rooted at
4298 1.1 mrg * the child of "node"?
4299 1.1 mrg */
4300 1.1 mrg static isl_bool after_in_child(__isl_keep isl_union_map *umap,
4301 1.1 mrg __isl_keep isl_schedule_node *node)
4302 1.1 mrg {
4303 1.1 mrg isl_schedule_node *child;
4304 1.1 mrg isl_bool after;
4305 1.1 mrg
4306 1.1 mrg child = isl_schedule_node_get_child(node, 0);
4307 1.1 mrg after = after_in_tree(umap, child);
4308 1.1 mrg isl_schedule_node_free(child);
4309 1.1 mrg
4310 1.1 mrg return after;
4311 1.1 mrg }
4312 1.1 mrg
4313 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4314 1.1 mrg * the corresponding image elements by the tree rooted at
4315 1.1 mrg * the band node "node"?
4316 1.1 mrg *
4317 1.1 mrg * We first check if any domain element is scheduled after any
4318 1.1 mrg * of the corresponding image elements by the band node itself.
4319 1.1 mrg * If not, we restrict "map" to those pairs of element that
4320 1.1 mrg * are scheduled together by the band node and continue with
4321 1.1 mrg * the child of the band node.
4322 1.1 mrg * If there are no such pairs then the map passed to after_in_child
4323 1.1 mrg * will be empty causing it to return 0.
4324 1.1 mrg */
4325 1.1 mrg static isl_bool after_in_band(__isl_keep isl_union_map *umap,
4326 1.1 mrg __isl_keep isl_schedule_node *node)
4327 1.1 mrg {
4328 1.1 mrg isl_multi_union_pw_aff *mupa;
4329 1.1 mrg isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
4330 1.1 mrg isl_union_set *domain, *range;
4331 1.1 mrg isl_space *space;
4332 1.1 mrg isl_bool empty;
4333 1.1 mrg isl_bool after;
4334 1.1 mrg isl_size n;
4335 1.1 mrg
4336 1.1 mrg n = isl_schedule_node_band_n_member(node);
4337 1.1 mrg if (n < 0)
4338 1.1 mrg return isl_bool_error;
4339 1.1 mrg if (n == 0)
4340 1.1 mrg return after_in_child(umap, node);
4341 1.1 mrg
4342 1.1 mrg mupa = isl_schedule_node_band_get_partial_schedule(node);
4343 1.1 mrg space = isl_multi_union_pw_aff_get_space(mupa);
4344 1.1 mrg partial = isl_union_map_from_multi_union_pw_aff(mupa);
4345 1.1 mrg test = isl_union_map_copy(umap);
4346 1.1 mrg test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
4347 1.1 mrg test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
4348 1.1 mrg gt = isl_union_map_from_map(isl_map_lex_gt(space));
4349 1.1 mrg test = isl_union_map_intersect(test, gt);
4350 1.1 mrg empty = isl_union_map_is_empty(test);
4351 1.1 mrg isl_union_map_free(test);
4352 1.1 mrg
4353 1.1 mrg if (empty < 0 || !empty) {
4354 1.1 mrg isl_union_map_free(partial);
4355 1.1 mrg return isl_bool_not(empty);
4356 1.1 mrg }
4357 1.1 mrg
4358 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(umap));
4359 1.1 mrg domain = isl_union_map_domain(isl_union_map_copy(universe));
4360 1.1 mrg range = isl_union_map_range(universe);
4361 1.1 mrg umap1 = isl_union_map_copy(partial);
4362 1.1 mrg umap1 = isl_union_map_intersect_domain(umap1, domain);
4363 1.1 mrg umap2 = isl_union_map_intersect_domain(partial, range);
4364 1.1 mrg test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4365 1.1 mrg test = isl_union_map_intersect(test, isl_union_map_copy(umap));
4366 1.1 mrg after = after_in_child(test, node);
4367 1.1 mrg isl_union_map_free(test);
4368 1.1 mrg return after;
4369 1.1 mrg }
4370 1.1 mrg
4371 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4372 1.1 mrg * the corresponding image elements by the tree rooted at
4373 1.1 mrg * the context node "node"?
4374 1.1 mrg *
4375 1.1 mrg * The context constraints apply to the schedule domain,
4376 1.1 mrg * so we cannot apply them directly to "umap", which contains
4377 1.1 mrg * pairs of statement instances. Instead, we add them
4378 1.1 mrg * to the range of the prefix schedule for both domain and
4379 1.1 mrg * range of "umap".
4380 1.1 mrg */
4381 1.1 mrg static isl_bool after_in_context(__isl_keep isl_union_map *umap,
4382 1.1 mrg __isl_keep isl_schedule_node *node)
4383 1.1 mrg {
4384 1.1 mrg isl_union_map *prefix, *universe, *umap1, *umap2;
4385 1.1 mrg isl_union_set *domain, *range;
4386 1.1 mrg isl_set *context;
4387 1.1 mrg isl_bool after;
4388 1.1 mrg
4389 1.1 mrg umap = isl_union_map_copy(umap);
4390 1.1 mrg context = isl_schedule_node_context_get_context(node);
4391 1.1 mrg prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
4392 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(umap));
4393 1.1 mrg domain = isl_union_map_domain(isl_union_map_copy(universe));
4394 1.1 mrg range = isl_union_map_range(universe);
4395 1.1 mrg umap1 = isl_union_map_copy(prefix);
4396 1.1 mrg umap1 = isl_union_map_intersect_domain(umap1, domain);
4397 1.1 mrg umap2 = isl_union_map_intersect_domain(prefix, range);
4398 1.1 mrg umap1 = isl_union_map_intersect_range(umap1,
4399 1.1 mrg isl_union_set_from_set(context));
4400 1.1 mrg umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4401 1.1 mrg umap = isl_union_map_intersect(umap, umap1);
4402 1.1 mrg
4403 1.1 mrg after = after_in_child(umap, node);
4404 1.1 mrg
4405 1.1 mrg isl_union_map_free(umap);
4406 1.1 mrg
4407 1.1 mrg return after;
4408 1.1 mrg }
4409 1.1 mrg
4410 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4411 1.1 mrg * the corresponding image elements by the tree rooted at
4412 1.1 mrg * the expansion node "node"?
4413 1.1 mrg *
4414 1.1 mrg * We apply the expansion to domain and range of "umap" and
4415 1.1 mrg * continue with its child.
4416 1.1 mrg */
4417 1.1 mrg static isl_bool after_in_expansion(__isl_keep isl_union_map *umap,
4418 1.1 mrg __isl_keep isl_schedule_node *node)
4419 1.1 mrg {
4420 1.1 mrg isl_union_map *expansion;
4421 1.1 mrg isl_bool after;
4422 1.1 mrg
4423 1.1 mrg expansion = isl_schedule_node_expansion_get_expansion(node);
4424 1.1 mrg umap = isl_union_map_copy(umap);
4425 1.1 mrg umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
4426 1.1 mrg umap = isl_union_map_apply_range(umap, expansion);
4427 1.1 mrg
4428 1.1 mrg after = after_in_child(umap, node);
4429 1.1 mrg
4430 1.1 mrg isl_union_map_free(umap);
4431 1.1 mrg
4432 1.1 mrg return after;
4433 1.1 mrg }
4434 1.1 mrg
4435 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4436 1.1 mrg * the corresponding image elements by the tree rooted at
4437 1.1 mrg * the extension node "node"?
4438 1.1 mrg *
4439 1.1 mrg * Since the extension node may add statement instances before or
4440 1.1 mrg * after the pairs of statement instances in "umap", we return isl_bool_true
4441 1.1 mrg * to ensure that these pairs are not broken up.
4442 1.1 mrg */
4443 1.1 mrg static isl_bool after_in_extension(__isl_keep isl_union_map *umap,
4444 1.1 mrg __isl_keep isl_schedule_node *node)
4445 1.1 mrg {
4446 1.1 mrg return isl_bool_true;
4447 1.1 mrg }
4448 1.1 mrg
4449 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4450 1.1 mrg * the corresponding image elements by the tree rooted at
4451 1.1 mrg * the filter node "node"?
4452 1.1 mrg *
4453 1.1 mrg * We intersect domain and range of "umap" with the filter and
4454 1.1 mrg * continue with its child.
4455 1.1 mrg */
4456 1.1 mrg static isl_bool after_in_filter(__isl_keep isl_union_map *umap,
4457 1.1 mrg __isl_keep isl_schedule_node *node)
4458 1.1 mrg {
4459 1.1 mrg isl_union_set *filter;
4460 1.1 mrg isl_bool after;
4461 1.1 mrg
4462 1.1 mrg umap = isl_union_map_copy(umap);
4463 1.1 mrg filter = isl_schedule_node_filter_get_filter(node);
4464 1.1 mrg umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
4465 1.1 mrg umap = isl_union_map_intersect_range(umap, filter);
4466 1.1 mrg
4467 1.1 mrg after = after_in_child(umap, node);
4468 1.1 mrg
4469 1.1 mrg isl_union_map_free(umap);
4470 1.1 mrg
4471 1.1 mrg return after;
4472 1.1 mrg }
4473 1.1 mrg
4474 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4475 1.1 mrg * the corresponding image elements by the tree rooted at
4476 1.1 mrg * the set node "node"?
4477 1.1 mrg *
4478 1.1 mrg * This is only the case if this condition holds in any
4479 1.1 mrg * of the (filter) children of the set node.
4480 1.1 mrg * In particular, if the domain and the range of "umap"
4481 1.1 mrg * are contained in different children, then the condition
4482 1.1 mrg * does not hold.
4483 1.1 mrg */
4484 1.1 mrg static isl_bool after_in_set(__isl_keep isl_union_map *umap,
4485 1.1 mrg __isl_keep isl_schedule_node *node)
4486 1.1 mrg {
4487 1.1 mrg int i;
4488 1.1 mrg isl_size n;
4489 1.1 mrg
4490 1.1 mrg n = isl_schedule_node_n_children(node);
4491 1.1 mrg if (n < 0)
4492 1.1 mrg return isl_bool_error;
4493 1.1 mrg for (i = 0; i < n; ++i) {
4494 1.1 mrg isl_schedule_node *child;
4495 1.1 mrg isl_bool after;
4496 1.1 mrg
4497 1.1 mrg child = isl_schedule_node_get_child(node, i);
4498 1.1 mrg after = after_in_tree(umap, child);
4499 1.1 mrg isl_schedule_node_free(child);
4500 1.1 mrg
4501 1.1 mrg if (after < 0 || after)
4502 1.1 mrg return after;
4503 1.1 mrg }
4504 1.1 mrg
4505 1.1 mrg return isl_bool_false;
4506 1.1 mrg }
4507 1.1 mrg
4508 1.1 mrg /* Return the filter of child "i" of "node".
4509 1.1 mrg */
4510 1.1 mrg static __isl_give isl_union_set *child_filter(
4511 1.1 mrg __isl_keep isl_schedule_node *node, int i)
4512 1.1 mrg {
4513 1.1 mrg isl_schedule_node *child;
4514 1.1 mrg isl_union_set *filter;
4515 1.1 mrg
4516 1.1 mrg child = isl_schedule_node_get_child(node, i);
4517 1.1 mrg filter = isl_schedule_node_filter_get_filter(child);
4518 1.1 mrg isl_schedule_node_free(child);
4519 1.1 mrg
4520 1.1 mrg return filter;
4521 1.1 mrg }
4522 1.1 mrg
4523 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4524 1.1 mrg * the corresponding image elements by the tree rooted at
4525 1.1 mrg * the sequence node "node"?
4526 1.1 mrg *
4527 1.1 mrg * This happens in particular if any domain element is
4528 1.1 mrg * contained in a later child than one containing a range element or
4529 1.1 mrg * if the condition holds within a given child in the sequence.
4530 1.1 mrg * The later part of the condition is checked by after_in_set.
4531 1.1 mrg */
4532 1.1 mrg static isl_bool after_in_sequence(__isl_keep isl_union_map *umap,
4533 1.1 mrg __isl_keep isl_schedule_node *node)
4534 1.1 mrg {
4535 1.1 mrg int i, j;
4536 1.1 mrg isl_size n;
4537 1.1 mrg isl_union_map *umap_i;
4538 1.1 mrg isl_bool empty;
4539 1.1 mrg isl_bool after = isl_bool_false;
4540 1.1 mrg
4541 1.1 mrg n = isl_schedule_node_n_children(node);
4542 1.1 mrg if (n < 0)
4543 1.1 mrg return isl_bool_error;
4544 1.1 mrg for (i = 1; i < n; ++i) {
4545 1.1 mrg isl_union_set *filter_i;
4546 1.1 mrg
4547 1.1 mrg umap_i = isl_union_map_copy(umap);
4548 1.1 mrg filter_i = child_filter(node, i);
4549 1.1 mrg umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
4550 1.1 mrg empty = isl_union_map_is_empty(umap_i);
4551 1.1 mrg if (empty < 0)
4552 1.1 mrg goto error;
4553 1.1 mrg if (empty) {
4554 1.1 mrg isl_union_map_free(umap_i);
4555 1.1 mrg continue;
4556 1.1 mrg }
4557 1.1 mrg
4558 1.1 mrg for (j = 0; j < i; ++j) {
4559 1.1 mrg isl_union_set *filter_j;
4560 1.1 mrg isl_union_map *umap_ij;
4561 1.1 mrg
4562 1.1 mrg umap_ij = isl_union_map_copy(umap_i);
4563 1.1 mrg filter_j = child_filter(node, j);
4564 1.1 mrg umap_ij = isl_union_map_intersect_range(umap_ij,
4565 1.1 mrg filter_j);
4566 1.1 mrg empty = isl_union_map_is_empty(umap_ij);
4567 1.1 mrg isl_union_map_free(umap_ij);
4568 1.1 mrg
4569 1.1 mrg if (empty < 0)
4570 1.1 mrg goto error;
4571 1.1 mrg if (!empty)
4572 1.1 mrg after = isl_bool_true;
4573 1.1 mrg if (after)
4574 1.1 mrg break;
4575 1.1 mrg }
4576 1.1 mrg
4577 1.1 mrg isl_union_map_free(umap_i);
4578 1.1 mrg if (after)
4579 1.1 mrg break;
4580 1.1 mrg }
4581 1.1 mrg
4582 1.1 mrg if (after < 0 || after)
4583 1.1 mrg return after;
4584 1.1 mrg
4585 1.1 mrg return after_in_set(umap, node);
4586 1.1 mrg error:
4587 1.1 mrg isl_union_map_free(umap_i);
4588 1.1 mrg return isl_bool_error;
4589 1.1 mrg }
4590 1.1 mrg
4591 1.1 mrg /* Is any domain element of "umap" scheduled after any of
4592 1.1 mrg * the corresponding image elements by the tree rooted at "node"?
4593 1.1 mrg *
4594 1.1 mrg * If "umap" is empty, then clearly there is no such element.
4595 1.1 mrg * Otherwise, consider the different types of nodes separately.
4596 1.1 mrg */
4597 1.1 mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
4598 1.1 mrg __isl_keep isl_schedule_node *node)
4599 1.1 mrg {
4600 1.1 mrg isl_bool empty;
4601 1.1 mrg enum isl_schedule_node_type type;
4602 1.1 mrg
4603 1.1 mrg empty = isl_union_map_is_empty(umap);
4604 1.1 mrg if (empty < 0)
4605 1.1 mrg return isl_bool_error;
4606 1.1 mrg if (empty)
4607 1.1 mrg return isl_bool_false;
4608 1.1 mrg if (!node)
4609 1.1 mrg return isl_bool_error;
4610 1.1 mrg
4611 1.1 mrg type = isl_schedule_node_get_type(node);
4612 1.1 mrg switch (type) {
4613 1.1 mrg case isl_schedule_node_error:
4614 1.1 mrg return isl_bool_error;
4615 1.1 mrg case isl_schedule_node_leaf:
4616 1.1 mrg return isl_bool_false;
4617 1.1 mrg case isl_schedule_node_band:
4618 1.1 mrg return after_in_band(umap, node);
4619 1.1 mrg case isl_schedule_node_domain:
4620 1.1 mrg isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4621 1.1 mrg "unexpected internal domain node",
4622 1.1 mrg return isl_bool_error);
4623 1.1 mrg case isl_schedule_node_context:
4624 1.1 mrg return after_in_context(umap, node);
4625 1.1 mrg case isl_schedule_node_expansion:
4626 1.1 mrg return after_in_expansion(umap, node);
4627 1.1 mrg case isl_schedule_node_extension:
4628 1.1 mrg return after_in_extension(umap, node);
4629 1.1 mrg case isl_schedule_node_filter:
4630 1.1 mrg return after_in_filter(umap, node);
4631 1.1 mrg case isl_schedule_node_guard:
4632 1.1 mrg case isl_schedule_node_mark:
4633 1.1 mrg return after_in_child(umap, node);
4634 1.1 mrg case isl_schedule_node_set:
4635 1.1 mrg return after_in_set(umap, node);
4636 1.1 mrg case isl_schedule_node_sequence:
4637 1.1 mrg return after_in_sequence(umap, node);
4638 1.1 mrg }
4639 1.1 mrg
4640 1.1 mrg return isl_bool_true;
4641 1.1 mrg }
4642 1.1 mrg
4643 1.1 mrg /* Is any domain element of "map1" scheduled after any domain
4644 1.1 mrg * element of "map2" by the subtree underneath the current band node,
4645 1.1 mrg * while at the same time being scheduled together by the current
4646 1.1 mrg * band node, i.e., by "map1" and "map2?
4647 1.1 mrg *
4648 1.1 mrg * If the child of the current band node is a leaf, then
4649 1.1 mrg * no element can be scheduled after any other element.
4650 1.1 mrg *
4651 1.1 mrg * Otherwise, we construct a relation between domain elements
4652 1.1 mrg * of "map1" and domain elements of "map2" that are scheduled
4653 1.1 mrg * together and then check if the subtree underneath the current
4654 1.1 mrg * band node determines their relative order.
4655 1.1 mrg */
4656 1.1 mrg static isl_bool after_in_subtree(__isl_keep isl_ast_build *build,
4657 1.1 mrg __isl_keep isl_map *map1, __isl_keep isl_map *map2)
4658 1.1 mrg {
4659 1.1 mrg isl_schedule_node *node;
4660 1.1 mrg isl_map *map;
4661 1.1 mrg isl_union_map *umap;
4662 1.1 mrg isl_bool after;
4663 1.1 mrg
4664 1.1 mrg node = isl_ast_build_get_schedule_node(build);
4665 1.1 mrg if (!node)
4666 1.1 mrg return isl_bool_error;
4667 1.1 mrg node = isl_schedule_node_child(node, 0);
4668 1.1 mrg if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) {
4669 1.1 mrg isl_schedule_node_free(node);
4670 1.1 mrg return isl_bool_false;
4671 1.1 mrg }
4672 1.1 mrg map = isl_map_copy(map2);
4673 1.1 mrg map = isl_map_apply_domain(map, isl_map_copy(map1));
4674 1.1 mrg umap = isl_union_map_from_map(map);
4675 1.1 mrg after = after_in_tree(umap, node);
4676 1.1 mrg isl_union_map_free(umap);
4677 1.1 mrg isl_schedule_node_free(node);
4678 1.1 mrg return after;
4679 1.1 mrg }
4680 1.1 mrg
4681 1.1 mrg /* Internal data for any_scheduled_after.
4682 1.1 mrg *
4683 1.1 mrg * "build" is the build in which the AST is constructed.
4684 1.1 mrg * "depth" is the number of loops that have already been generated
4685 1.1 mrg * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
4686 1.1 mrg * "domain" is an array of set-map pairs corresponding to the different
4687 1.1 mrg * iteration domains. The set is the schedule domain, i.e., the domain
4688 1.1 mrg * of the inverse schedule, while the map is the inverse schedule itself.
4689 1.1 mrg */
4690 1.1 mrg struct isl_any_scheduled_after_data {
4691 1.1 mrg isl_ast_build *build;
4692 1.1 mrg int depth;
4693 1.1 mrg int group_coscheduled;
4694 1.1 mrg struct isl_set_map_pair *domain;
4695 1.1 mrg };
4696 1.1 mrg
4697 1.1 mrg /* Is any element of domain "i" scheduled after any element of domain "j"
4698 1.1 mrg * (for a common iteration of the first data->depth loops)?
4699 1.1 mrg *
4700 1.1 mrg * data->domain[i].set contains the domain of the inverse schedule
4701 1.1 mrg * for domain "i", i.e., elements in the schedule domain.
4702 1.1 mrg *
4703 1.1 mrg * If we are inside a band of a schedule tree and there is a pair
4704 1.1 mrg * of elements in the two domains that is schedule together by
4705 1.1 mrg * the current band, then we check if any element of "i" may be schedule
4706 1.1 mrg * after element of "j" by the descendants of the band node.
4707 1.1 mrg *
4708 1.1 mrg * If data->group_coscheduled is set, then we also return 1 if there
4709 1.1 mrg * is any pair of elements in the two domains that are scheduled together.
4710 1.1 mrg */
4711 1.1 mrg static isl_bool any_scheduled_after(int i, int j, void *user)
4712 1.1 mrg {
4713 1.1 mrg struct isl_any_scheduled_after_data *data = user;
4714 1.1 mrg isl_size dim = isl_set_dim(data->domain[i].set, isl_dim_set);
4715 1.1 mrg int pos;
4716 1.1 mrg
4717 1.1 mrg if (dim < 0)
4718 1.1 mrg return isl_bool_error;
4719 1.1 mrg
4720 1.1 mrg for (pos = data->depth; pos < dim; ++pos) {
4721 1.1 mrg int follows;
4722 1.1 mrg
4723 1.1 mrg follows = isl_set_follows_at(data->domain[i].set,
4724 1.1 mrg data->domain[j].set, pos);
4725 1.1 mrg
4726 1.1 mrg if (follows < -1)
4727 1.1 mrg return isl_bool_error;
4728 1.1 mrg if (follows > 0)
4729 1.1 mrg return isl_bool_true;
4730 1.1 mrg if (follows < 0)
4731 1.1 mrg return isl_bool_false;
4732 1.1 mrg }
4733 1.1 mrg
4734 1.1 mrg if (isl_ast_build_has_schedule_node(data->build)) {
4735 1.1 mrg isl_bool after;
4736 1.1 mrg
4737 1.1 mrg after = after_in_subtree(data->build, data->domain[i].map,
4738 1.1 mrg data->domain[j].map);
4739 1.1 mrg if (after < 0 || after)
4740 1.1 mrg return after;
4741 1.1 mrg }
4742 1.1 mrg
4743 1.1 mrg return isl_bool_ok(data->group_coscheduled);
4744 1.1 mrg }
4745 1.1 mrg
4746 1.1 mrg /* Look for independent components at the current depth and generate code
4747 1.1 mrg * for each component separately. The resulting lists of grafts are
4748 1.1 mrg * merged in an attempt to combine grafts with identical guards.
4749 1.1 mrg *
4750 1.1 mrg * Code for two domains can be generated separately if all the elements
4751 1.1 mrg * of one domain are scheduled before (or together with) all the elements
4752 1.1 mrg * of the other domain. We therefore consider the graph with as nodes
4753 1.1 mrg * the domains and an edge between two nodes if any element of the first
4754 1.1 mrg * node is scheduled after any element of the second node.
4755 1.1 mrg * If the ast_build_group_coscheduled is set, then we also add an edge if
4756 1.1 mrg * there is any pair of elements in the two domains that are scheduled
4757 1.1 mrg * together.
4758 1.1 mrg * Code is then generated (by generate_component)
4759 1.1 mrg * for each of the strongly connected components in this graph
4760 1.1 mrg * in their topological order.
4761 1.1 mrg *
4762 1.1 mrg * Since the test is performed on the domain of the inverse schedules of
4763 1.1 mrg * the different domains, we precompute these domains and store
4764 1.1 mrg * them in data.domain.
4765 1.1 mrg */
4766 1.1 mrg static __isl_give isl_ast_graft_list *generate_components(
4767 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4768 1.1 mrg {
4769 1.1 mrg int i;
4770 1.1 mrg isl_ctx *ctx = isl_ast_build_get_ctx(build);
4771 1.1 mrg isl_size n = isl_union_map_n_map(executed);
4772 1.1 mrg isl_size depth;
4773 1.1 mrg struct isl_any_scheduled_after_data data;
4774 1.1 mrg struct isl_set_map_pair *next;
4775 1.1 mrg struct isl_tarjan_graph *g = NULL;
4776 1.1 mrg isl_ast_graft_list *list = NULL;
4777 1.1 mrg int n_domain = 0;
4778 1.1 mrg
4779 1.1 mrg data.domain = NULL;
4780 1.1 mrg if (n < 0)
4781 1.1 mrg goto error;
4782 1.1 mrg data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
4783 1.1 mrg if (!data.domain)
4784 1.1 mrg goto error;
4785 1.1 mrg n_domain = n;
4786 1.1 mrg
4787 1.1 mrg next = data.domain;
4788 1.1 mrg if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
4789 1.1 mrg goto error;
4790 1.1 mrg
4791 1.1 mrg depth = isl_ast_build_get_depth(build);
4792 1.1 mrg if (depth < 0)
4793 1.1 mrg goto error;
4794 1.1 mrg data.build = build;
4795 1.1 mrg data.depth = depth;
4796 1.1 mrg data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
4797 1.1 mrg g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
4798 1.1 mrg if (!g)
4799 1.1 mrg goto error;
4800 1.1 mrg
4801 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0);
4802 1.1 mrg
4803 1.1 mrg i = 0;
4804 1.1 mrg while (list && n) {
4805 1.1 mrg isl_ast_graft_list *list_c;
4806 1.1 mrg int first = i;
4807 1.1 mrg
4808 1.1 mrg if (g->order[i] == -1)
4809 1.1 mrg isl_die(ctx, isl_error_internal, "cannot happen",
4810 1.1 mrg goto error);
4811 1.1 mrg ++i; --n;
4812 1.1 mrg while (g->order[i] != -1) {
4813 1.1 mrg ++i; --n;
4814 1.1 mrg }
4815 1.1 mrg
4816 1.1 mrg list_c = generate_component(data.domain,
4817 1.1 mrg g->order + first, i - first,
4818 1.1 mrg isl_ast_build_copy(build));
4819 1.1 mrg list = isl_ast_graft_list_merge(list, list_c, build);
4820 1.1 mrg
4821 1.1 mrg ++i;
4822 1.1 mrg }
4823 1.1 mrg
4824 1.1 mrg if (0)
4825 1.1 mrg error: list = isl_ast_graft_list_free(list);
4826 1.1 mrg isl_tarjan_graph_free(g);
4827 1.1 mrg for (i = 0; i < n_domain; ++i) {
4828 1.1 mrg isl_map_free(data.domain[i].map);
4829 1.1 mrg isl_set_free(data.domain[i].set);
4830 1.1 mrg }
4831 1.1 mrg free(data.domain);
4832 1.1 mrg isl_union_map_free(executed);
4833 1.1 mrg isl_ast_build_free(build);
4834 1.1 mrg
4835 1.1 mrg return list;
4836 1.1 mrg }
4837 1.1 mrg
4838 1.1 mrg /* Generate code for the next level (and all inner levels).
4839 1.1 mrg *
4840 1.1 mrg * If "executed" is empty, i.e., no code needs to be generated,
4841 1.1 mrg * then we return an empty list.
4842 1.1 mrg *
4843 1.1 mrg * If we have already generated code for all loop levels, then we pass
4844 1.1 mrg * control to generate_inner_level.
4845 1.1 mrg *
4846 1.1 mrg * If "executed" lives in a single space, i.e., if code needs to be
4847 1.1 mrg * generated for a single domain, then there can only be a single
4848 1.1 mrg * component and we go directly to generate_shifted_component.
4849 1.1 mrg * Otherwise, we call generate_components to detect the components
4850 1.1 mrg * and to call generate_component on each of them separately.
4851 1.1 mrg */
4852 1.1 mrg static __isl_give isl_ast_graft_list *generate_next_level(
4853 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4854 1.1 mrg {
4855 1.1 mrg isl_size depth;
4856 1.1 mrg isl_size dim;
4857 1.1 mrg isl_size n;
4858 1.1 mrg
4859 1.1 mrg if (!build || !executed)
4860 1.1 mrg goto error;
4861 1.1 mrg
4862 1.1 mrg if (isl_union_map_is_empty(executed)) {
4863 1.1 mrg isl_ctx *ctx = isl_ast_build_get_ctx(build);
4864 1.1 mrg isl_union_map_free(executed);
4865 1.1 mrg isl_ast_build_free(build);
4866 1.1 mrg return isl_ast_graft_list_alloc(ctx, 0);
4867 1.1 mrg }
4868 1.1 mrg
4869 1.1 mrg depth = isl_ast_build_get_depth(build);
4870 1.1 mrg dim = isl_ast_build_dim(build, isl_dim_set);
4871 1.1 mrg if (depth < 0 || dim < 0)
4872 1.1 mrg goto error;
4873 1.1 mrg if (depth >= dim)
4874 1.1 mrg return generate_inner_level(executed, build);
4875 1.1 mrg
4876 1.1 mrg n = isl_union_map_n_map(executed);
4877 1.1 mrg if (n < 0)
4878 1.1 mrg goto error;
4879 1.1 mrg if (n == 1)
4880 1.1 mrg return generate_shifted_component(executed, build);
4881 1.1 mrg
4882 1.1 mrg return generate_components(executed, build);
4883 1.1 mrg error:
4884 1.1 mrg isl_union_map_free(executed);
4885 1.1 mrg isl_ast_build_free(build);
4886 1.1 mrg return NULL;
4887 1.1 mrg }
4888 1.1 mrg
4889 1.1 mrg /* Internal data structure used by isl_ast_build_node_from_schedule_map.
4890 1.1 mrg * internal, executed and build are the inputs to generate_code.
4891 1.1 mrg * list collects the output.
4892 1.1 mrg */
4893 1.1 mrg struct isl_generate_code_data {
4894 1.1 mrg int internal;
4895 1.1 mrg isl_union_map *executed;
4896 1.1 mrg isl_ast_build *build;
4897 1.1 mrg
4898 1.1 mrg isl_ast_graft_list *list;
4899 1.1 mrg };
4900 1.1 mrg
4901 1.1 mrg /* Given an inverse schedule in terms of the external build schedule, i.e.,
4902 1.1 mrg *
4903 1.1 mrg * [E -> S] -> D
4904 1.1 mrg *
4905 1.1 mrg * with E the external build schedule and S the additional schedule "space",
4906 1.1 mrg * reformulate the inverse schedule in terms of the internal schedule domain,
4907 1.1 mrg * i.e., return
4908 1.1 mrg *
4909 1.1 mrg * [I -> S] -> D
4910 1.1 mrg *
4911 1.1 mrg * We first obtain a mapping
4912 1.1 mrg *
4913 1.1 mrg * I -> E
4914 1.1 mrg *
4915 1.1 mrg * take the inverse and the product with S -> S, resulting in
4916 1.1 mrg *
4917 1.1 mrg * [I -> S] -> [E -> S]
4918 1.1 mrg *
4919 1.1 mrg * Applying the map to the input produces the desired result.
4920 1.1 mrg */
4921 1.1 mrg static __isl_give isl_union_map *internal_executed(
4922 1.1 mrg __isl_take isl_union_map *executed, __isl_keep isl_space *space,
4923 1.1 mrg __isl_keep isl_ast_build *build)
4924 1.1 mrg {
4925 1.1 mrg isl_map *id, *proj;
4926 1.1 mrg
4927 1.1 mrg proj = isl_ast_build_get_schedule_map(build);
4928 1.1 mrg proj = isl_map_reverse(proj);
4929 1.1 mrg space = isl_space_map_from_set(isl_space_copy(space));
4930 1.1 mrg id = isl_map_identity(space);
4931 1.1 mrg proj = isl_map_product(proj, id);
4932 1.1 mrg executed = isl_union_map_apply_domain(executed,
4933 1.1 mrg isl_union_map_from_map(proj));
4934 1.1 mrg return executed;
4935 1.1 mrg }
4936 1.1 mrg
4937 1.1 mrg /* Generate an AST that visits the elements in the range of data->executed
4938 1.1 mrg * in the relative order specified by the corresponding domain element(s)
4939 1.1 mrg * for those domain elements that belong to "set".
4940 1.1 mrg * Add the result to data->list.
4941 1.1 mrg *
4942 1.1 mrg * The caller ensures that "set" is a universe domain.
4943 1.1 mrg * "space" is the space of the additional part of the schedule.
4944 1.1 mrg * It is equal to the space of "set" if build->domain is parametric.
4945 1.1 mrg * Otherwise, it is equal to the range of the wrapped space of "set".
4946 1.1 mrg *
4947 1.1 mrg * If the build space is not parametric and
4948 1.1 mrg * if isl_ast_build_node_from_schedule_map
4949 1.1 mrg * was called from an outside user (data->internal not set), then
4950 1.1 mrg * the (inverse) schedule refers to the external build domain and needs to
4951 1.1 mrg * be transformed to refer to the internal build domain.
4952 1.1 mrg *
4953 1.1 mrg * If the build space is parametric, then we add some of the parameter
4954 1.1 mrg * constraints to the executed relation. Adding these constraints
4955 1.1 mrg * allows for an earlier detection of conflicts in some cases.
4956 1.1 mrg * However, we do not want to divide the executed relation into
4957 1.1 mrg * more disjuncts than necessary. We therefore approximate
4958 1.1 mrg * the constraints on the parameters by a single disjunct set.
4959 1.1 mrg *
4960 1.1 mrg * The build is extended to include the additional part of the schedule.
4961 1.1 mrg * If the original build space was not parametric, then the options
4962 1.1 mrg * in data->build refer only to the additional part of the schedule
4963 1.1 mrg * and they need to be adjusted to refer to the complete AST build
4964 1.1 mrg * domain.
4965 1.1 mrg *
4966 1.1 mrg * After having adjusted inverse schedule and build, we start generating
4967 1.1 mrg * code with the outer loop of the current code generation
4968 1.1 mrg * in generate_next_level.
4969 1.1 mrg *
4970 1.1 mrg * If the original build space was not parametric, we undo the embedding
4971 1.1 mrg * on the resulting isl_ast_node_list so that it can be used within
4972 1.1 mrg * the outer AST build.
4973 1.1 mrg */
4974 1.1 mrg static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
4975 1.1 mrg __isl_take isl_set *set, __isl_take isl_space *space)
4976 1.1 mrg {
4977 1.1 mrg isl_union_map *executed;
4978 1.1 mrg isl_ast_build *build;
4979 1.1 mrg isl_ast_graft_list *list;
4980 1.1 mrg int embed;
4981 1.1 mrg
4982 1.1 mrg executed = isl_union_map_copy(data->executed);
4983 1.1 mrg executed = isl_union_map_intersect_domain(executed,
4984 1.1 mrg isl_union_set_from_set(set));
4985 1.1 mrg
4986 1.1 mrg embed = !isl_set_is_params(data->build->domain);
4987 1.1 mrg if (embed && !data->internal)
4988 1.1 mrg executed = internal_executed(executed, space, data->build);
4989 1.1 mrg if (!embed) {
4990 1.1 mrg isl_set *domain;
4991 1.1 mrg domain = isl_ast_build_get_domain(data->build);
4992 1.1 mrg domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
4993 1.1 mrg executed = isl_union_map_intersect_params(executed, domain);
4994 1.1 mrg }
4995 1.1 mrg
4996 1.1 mrg build = isl_ast_build_copy(data->build);
4997 1.1 mrg build = isl_ast_build_product(build, space);
4998 1.1 mrg
4999 1.1 mrg list = generate_next_level(executed, build);
5000 1.1 mrg
5001 1.1 mrg list = isl_ast_graft_list_unembed(list, embed);
5002 1.1 mrg
5003 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list);
5004 1.1 mrg
5005 1.1 mrg return isl_stat_ok;
5006 1.1 mrg }
5007 1.1 mrg
5008 1.1 mrg /* Generate an AST that visits the elements in the range of data->executed
5009 1.1 mrg * in the relative order specified by the corresponding domain element(s)
5010 1.1 mrg * for those domain elements that belong to "set".
5011 1.1 mrg * Add the result to data->list.
5012 1.1 mrg *
5013 1.1 mrg * The caller ensures that "set" is a universe domain.
5014 1.1 mrg *
5015 1.1 mrg * If the build space S is not parametric, then the space of "set"
5016 1.1 mrg * need to be a wrapped relation with S as domain. That is, it needs
5017 1.1 mrg * to be of the form
5018 1.1 mrg *
5019 1.1 mrg * [S -> T]
5020 1.1 mrg *
5021 1.1 mrg * Check this property and pass control to generate_code_in_space
5022 1.1 mrg * passing along T.
5023 1.1 mrg * If the build space is not parametric, then T is the space of "set".
5024 1.1 mrg */
5025 1.1 mrg static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
5026 1.1 mrg {
5027 1.1 mrg struct isl_generate_code_data *data = user;
5028 1.1 mrg isl_space *space, *build_space;
5029 1.1 mrg int is_domain;
5030 1.1 mrg
5031 1.1 mrg space = isl_set_get_space(set);
5032 1.1 mrg
5033 1.1 mrg if (isl_set_is_params(data->build->domain))
5034 1.1 mrg return generate_code_in_space(data, set, space);
5035 1.1 mrg
5036 1.1 mrg build_space = isl_ast_build_get_space(data->build, data->internal);
5037 1.1 mrg space = isl_space_unwrap(space);
5038 1.1 mrg is_domain = isl_space_is_domain(build_space, space);
5039 1.1 mrg isl_space_free(build_space);
5040 1.1 mrg space = isl_space_range(space);
5041 1.1 mrg
5042 1.1 mrg if (is_domain < 0)
5043 1.1 mrg goto error;
5044 1.1 mrg if (!is_domain)
5045 1.1 mrg isl_die(isl_set_get_ctx(set), isl_error_invalid,
5046 1.1 mrg "invalid nested schedule space", goto error);
5047 1.1 mrg
5048 1.1 mrg return generate_code_in_space(data, set, space);
5049 1.1 mrg error:
5050 1.1 mrg isl_set_free(set);
5051 1.1 mrg isl_space_free(space);
5052 1.1 mrg return isl_stat_error;
5053 1.1 mrg }
5054 1.1 mrg
5055 1.1 mrg /* Generate an AST that visits the elements in the range of "executed"
5056 1.1 mrg * in the relative order specified by the corresponding domain element(s).
5057 1.1 mrg *
5058 1.1 mrg * "build" is an isl_ast_build that has either been constructed by
5059 1.1 mrg * isl_ast_build_from_context or passed to a callback set by
5060 1.1 mrg * isl_ast_build_set_create_leaf.
5061 1.1 mrg * In the first case, the space of the isl_ast_build is typically
5062 1.1 mrg * a parametric space, although this is currently not enforced.
5063 1.1 mrg * In the second case, the space is never a parametric space.
5064 1.1 mrg * If the space S is not parametric, then the domain space(s) of "executed"
5065 1.1 mrg * need to be wrapped relations with S as domain.
5066 1.1 mrg *
5067 1.1 mrg * If the domain of "executed" consists of several spaces, then an AST
5068 1.1 mrg * is generated for each of them (in arbitrary order) and the results
5069 1.1 mrg * are concatenated.
5070 1.1 mrg *
5071 1.1 mrg * If "internal" is set, then the domain "S" above refers to the internal
5072 1.1 mrg * schedule domain representation. Otherwise, it refers to the external
5073 1.1 mrg * representation, as returned by isl_ast_build_get_schedule_space.
5074 1.1 mrg *
5075 1.1 mrg * We essentially run over all the spaces in the domain of "executed"
5076 1.1 mrg * and call generate_code_set on each of them.
5077 1.1 mrg */
5078 1.1 mrg static __isl_give isl_ast_graft_list *generate_code(
5079 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
5080 1.1 mrg int internal)
5081 1.1 mrg {
5082 1.1 mrg isl_ctx *ctx;
5083 1.1 mrg struct isl_generate_code_data data = { 0 };
5084 1.1 mrg isl_space *space;
5085 1.1 mrg isl_union_set *schedule_domain;
5086 1.1 mrg isl_union_map *universe;
5087 1.1 mrg
5088 1.1 mrg if (!build)
5089 1.1 mrg goto error;
5090 1.1 mrg space = isl_ast_build_get_space(build, 1);
5091 1.1 mrg space = isl_space_align_params(space,
5092 1.1 mrg isl_union_map_get_space(executed));
5093 1.1 mrg space = isl_space_align_params(space,
5094 1.1 mrg isl_union_map_get_space(build->options));
5095 1.1 mrg build = isl_ast_build_align_params(build, isl_space_copy(space));
5096 1.1 mrg executed = isl_union_map_align_params(executed, space);
5097 1.1 mrg if (!executed || !build)
5098 1.1 mrg goto error;
5099 1.1 mrg
5100 1.1 mrg ctx = isl_ast_build_get_ctx(build);
5101 1.1 mrg
5102 1.1 mrg data.internal = internal;
5103 1.1 mrg data.executed = executed;
5104 1.1 mrg data.build = build;
5105 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, 0);
5106 1.1 mrg
5107 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(executed));
5108 1.1 mrg schedule_domain = isl_union_map_domain(universe);
5109 1.1 mrg if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
5110 1.1 mrg &data) < 0)
5111 1.1 mrg data.list = isl_ast_graft_list_free(data.list);
5112 1.1 mrg
5113 1.1 mrg isl_union_set_free(schedule_domain);
5114 1.1 mrg isl_union_map_free(executed);
5115 1.1 mrg
5116 1.1 mrg isl_ast_build_free(build);
5117 1.1 mrg return data.list;
5118 1.1 mrg error:
5119 1.1 mrg isl_union_map_free(executed);
5120 1.1 mrg isl_ast_build_free(build);
5121 1.1 mrg return NULL;
5122 1.1 mrg }
5123 1.1 mrg
5124 1.1 mrg /* Generate an AST that visits the elements in the domain of "schedule"
5125 1.1 mrg * in the relative order specified by the corresponding image element(s).
5126 1.1 mrg *
5127 1.1 mrg * "build" is an isl_ast_build that has either been constructed by
5128 1.1 mrg * isl_ast_build_from_context or passed to a callback set by
5129 1.1 mrg * isl_ast_build_set_create_leaf.
5130 1.1 mrg * In the first case, the space of the isl_ast_build is typically
5131 1.1 mrg * a parametric space, although this is currently not enforced.
5132 1.1 mrg * In the second case, the space is never a parametric space.
5133 1.1 mrg * If the space S is not parametric, then the range space(s) of "schedule"
5134 1.1 mrg * need to be wrapped relations with S as domain.
5135 1.1 mrg *
5136 1.1 mrg * If the range of "schedule" consists of several spaces, then an AST
5137 1.1 mrg * is generated for each of them (in arbitrary order) and the results
5138 1.1 mrg * are concatenated.
5139 1.1 mrg *
5140 1.1 mrg * We first initialize the local copies of the relevant options.
5141 1.1 mrg * We do this here rather than when the isl_ast_build is created
5142 1.1 mrg * because the options may have changed between the construction
5143 1.1 mrg * of the isl_ast_build and the call to isl_generate_code.
5144 1.1 mrg *
5145 1.1 mrg * The main computation is performed on an inverse schedule (with
5146 1.1 mrg * the schedule domain in the domain and the elements to be executed
5147 1.1 mrg * in the range) called "executed".
5148 1.1 mrg */
5149 1.1 mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
5150 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5151 1.1 mrg {
5152 1.1 mrg isl_ast_graft_list *list;
5153 1.1 mrg isl_ast_node *node;
5154 1.1 mrg isl_union_map *executed;
5155 1.1 mrg
5156 1.1 mrg build = isl_ast_build_copy(build);
5157 1.1 mrg build = isl_ast_build_set_single_valued(build, 0);
5158 1.1 mrg schedule = isl_union_map_coalesce(schedule);
5159 1.1 mrg schedule = isl_union_map_remove_redundancies(schedule);
5160 1.1 mrg executed = isl_union_map_reverse(schedule);
5161 1.1 mrg list = generate_code(executed, isl_ast_build_copy(build), 0);
5162 1.1 mrg node = isl_ast_node_from_graft_list(list, build);
5163 1.1 mrg isl_ast_build_free(build);
5164 1.1 mrg
5165 1.1 mrg return node;
5166 1.1 mrg }
5167 1.1 mrg
5168 1.1 mrg /* The old name for isl_ast_build_node_from_schedule_map.
5169 1.1 mrg * It is being kept for backward compatibility, but
5170 1.1 mrg * it will be removed in the future.
5171 1.1 mrg */
5172 1.1 mrg __isl_give isl_ast_node *isl_ast_build_ast_from_schedule(
5173 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5174 1.1 mrg {
5175 1.1 mrg return isl_ast_build_node_from_schedule_map(build, schedule);
5176 1.1 mrg }
5177 1.1 mrg
5178 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5179 1.1 mrg * in the relative order specified by the leaf node "node".
5180 1.1 mrg *
5181 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5182 1.1 mrg * to the domain elements executed by those iterations.
5183 1.1 mrg *
5184 1.1 mrg * Simply pass control to generate_inner_level.
5185 1.1 mrg * Note that the current build does not refer to any band node, so
5186 1.1 mrg * that generate_inner_level will not try to visit the child of
5187 1.1 mrg * the leaf node.
5188 1.1 mrg *
5189 1.1 mrg * If multiple statement instances reach a leaf,
5190 1.1 mrg * then they can be executed in any order.
5191 1.1 mrg * Group the list of grafts based on shared guards
5192 1.1 mrg * such that identical guards are only generated once
5193 1.1 mrg * when the list is eventually passed on to isl_ast_graft_list_fuse.
5194 1.1 mrg */
5195 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_leaf(
5196 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5197 1.1 mrg __isl_take isl_union_map *executed)
5198 1.1 mrg {
5199 1.1 mrg isl_ast_graft_list *list;
5200 1.1 mrg
5201 1.1 mrg isl_schedule_node_free(node);
5202 1.1 mrg list = generate_inner_level(executed, isl_ast_build_copy(build));
5203 1.1 mrg list = isl_ast_graft_list_group_on_guard(list, build);
5204 1.1 mrg isl_ast_build_free(build);
5205 1.1 mrg
5206 1.1 mrg return list;
5207 1.1 mrg }
5208 1.1 mrg
5209 1.1 mrg /* Check that the band partial schedule "partial" does not filter out
5210 1.1 mrg * any statement instances, as specified by the range of "executed".
5211 1.1 mrg */
5212 1.1 mrg static isl_stat check_band_schedule_total_on_instances(
5213 1.1 mrg __isl_keep isl_multi_union_pw_aff *partial,
5214 1.1 mrg __isl_keep isl_union_map *executed)
5215 1.1 mrg {
5216 1.1 mrg isl_bool subset;
5217 1.1 mrg isl_union_set *domain, *instances;
5218 1.1 mrg
5219 1.1 mrg instances = isl_union_map_range(isl_union_map_copy(executed));
5220 1.1 mrg partial = isl_multi_union_pw_aff_copy(partial);
5221 1.1 mrg domain = isl_multi_union_pw_aff_domain(partial);
5222 1.1 mrg subset = isl_union_set_is_subset(instances, domain);
5223 1.1 mrg isl_union_set_free(domain);
5224 1.1 mrg isl_union_set_free(instances);
5225 1.1 mrg
5226 1.1 mrg if (subset < 0)
5227 1.1 mrg return isl_stat_error;
5228 1.1 mrg if (!subset)
5229 1.1 mrg isl_die(isl_union_map_get_ctx(executed), isl_error_invalid,
5230 1.1 mrg "band node is not allowed to drop statement instances",
5231 1.1 mrg return isl_stat_error);
5232 1.1 mrg return isl_stat_ok;
5233 1.1 mrg }
5234 1.1 mrg
5235 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5236 1.1 mrg * in the relative order specified by the band node "node" and its descendants.
5237 1.1 mrg *
5238 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5239 1.1 mrg * to the domain elements executed by those iterations.
5240 1.1 mrg *
5241 1.1 mrg * If the band is empty, we continue with its descendants.
5242 1.1 mrg * Otherwise, we extend the build and the inverse schedule with
5243 1.1 mrg * the additional space/partial schedule and continue generating
5244 1.1 mrg * an AST in generate_next_level.
5245 1.1 mrg * As soon as we have extended the inverse schedule with the additional
5246 1.1 mrg * partial schedule, we look for equalities that may exists between
5247 1.1 mrg * the old and the new part.
5248 1.1 mrg */
5249 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_band(
5250 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5251 1.1 mrg __isl_take isl_union_map *executed)
5252 1.1 mrg {
5253 1.1 mrg isl_space *space;
5254 1.1 mrg isl_multi_union_pw_aff *extra;
5255 1.1 mrg isl_union_map *extra_umap;
5256 1.1 mrg isl_ast_graft_list *list;
5257 1.1 mrg isl_size n1, n2;
5258 1.1 mrg isl_size n;
5259 1.1 mrg
5260 1.1 mrg n = isl_schedule_node_band_n_member(node);
5261 1.1 mrg if (!build || n < 0 || !executed)
5262 1.1 mrg goto error;
5263 1.1 mrg
5264 1.1 mrg if (n == 0)
5265 1.1 mrg return build_ast_from_child(build, node, executed);
5266 1.1 mrg
5267 1.1 mrg extra = isl_schedule_node_band_get_partial_schedule(node);
5268 1.1 mrg extra = isl_multi_union_pw_aff_align_params(extra,
5269 1.1 mrg isl_ast_build_get_space(build, 1));
5270 1.1 mrg space = isl_multi_union_pw_aff_get_space(extra);
5271 1.1 mrg
5272 1.1 mrg if (check_band_schedule_total_on_instances(extra, executed) < 0)
5273 1.1 mrg executed = isl_union_map_free(executed);
5274 1.1 mrg
5275 1.1 mrg extra_umap = isl_union_map_from_multi_union_pw_aff(extra);
5276 1.1 mrg extra_umap = isl_union_map_reverse(extra_umap);
5277 1.1 mrg
5278 1.1 mrg executed = isl_union_map_domain_product(executed, extra_umap);
5279 1.1 mrg executed = isl_union_map_detect_equalities(executed);
5280 1.1 mrg
5281 1.1 mrg n1 = isl_ast_build_dim(build, isl_dim_param);
5282 1.1 mrg build = isl_ast_build_product(build, space);
5283 1.1 mrg n2 = isl_ast_build_dim(build, isl_dim_param);
5284 1.1 mrg if (n1 < 0 || n2 < 0)
5285 1.1 mrg build = isl_ast_build_free(build);
5286 1.1 mrg else if (n2 > n1)
5287 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5288 1.1 mrg "band node is not allowed to introduce new parameters",
5289 1.1 mrg build = isl_ast_build_free(build));
5290 1.1 mrg build = isl_ast_build_set_schedule_node(build, node);
5291 1.1 mrg
5292 1.1 mrg list = generate_next_level(executed, build);
5293 1.1 mrg
5294 1.1 mrg list = isl_ast_graft_list_unembed(list, 1);
5295 1.1 mrg
5296 1.1 mrg return list;
5297 1.1 mrg error:
5298 1.1 mrg isl_schedule_node_free(node);
5299 1.1 mrg isl_union_map_free(executed);
5300 1.1 mrg isl_ast_build_free(build);
5301 1.1 mrg return NULL;
5302 1.1 mrg }
5303 1.1 mrg
5304 1.1 mrg /* Hoist a list of grafts (in practice containing a single graft)
5305 1.1 mrg * from "sub_build" (which includes extra context information)
5306 1.1 mrg * to "build".
5307 1.1 mrg *
5308 1.1 mrg * In particular, project out all additional parameters introduced
5309 1.1 mrg * by the context node from the enforced constraints and the guard
5310 1.1 mrg * of the single graft.
5311 1.1 mrg */
5312 1.1 mrg static __isl_give isl_ast_graft_list *hoist_out_of_context(
5313 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build,
5314 1.1 mrg __isl_keep isl_ast_build *sub_build)
5315 1.1 mrg {
5316 1.1 mrg isl_ast_graft *graft;
5317 1.1 mrg isl_basic_set *enforced;
5318 1.1 mrg isl_set *guard;
5319 1.1 mrg isl_size n_param, extra_param;
5320 1.1 mrg
5321 1.1 mrg n_param = isl_ast_build_dim(build, isl_dim_param);
5322 1.1 mrg extra_param = isl_ast_build_dim(sub_build, isl_dim_param);
5323 1.1 mrg if (n_param < 0 || extra_param < 0)
5324 1.1 mrg return isl_ast_graft_list_free(list);
5325 1.1 mrg
5326 1.1 mrg if (extra_param == n_param)
5327 1.1 mrg return list;
5328 1.1 mrg
5329 1.1 mrg extra_param -= n_param;
5330 1.1 mrg enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build);
5331 1.1 mrg enforced = isl_basic_set_project_out(enforced, isl_dim_param,
5332 1.1 mrg n_param, extra_param);
5333 1.1 mrg enforced = isl_basic_set_remove_unknown_divs(enforced);
5334 1.1 mrg guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5335 1.1 mrg guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param,
5336 1.1 mrg n_param, extra_param);
5337 1.1 mrg guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param);
5338 1.1 mrg guard = isl_set_compute_divs(guard);
5339 1.1 mrg graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5340 1.1 mrg build, sub_build);
5341 1.1 mrg list = isl_ast_graft_list_from_ast_graft(graft);
5342 1.1 mrg
5343 1.1 mrg return list;
5344 1.1 mrg }
5345 1.1 mrg
5346 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5347 1.1 mrg * in the relative order specified by the context node "node"
5348 1.1 mrg * and its descendants.
5349 1.1 mrg *
5350 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5351 1.1 mrg * to the domain elements executed by those iterations.
5352 1.1 mrg *
5353 1.1 mrg * The context node may introduce additional parameters as well as
5354 1.1 mrg * constraints on the outer schedule dimensions or original parameters.
5355 1.1 mrg *
5356 1.1 mrg * We add the extra parameters to a new build and the context
5357 1.1 mrg * constraints to both the build and (as a single disjunct)
5358 1.1 mrg * to the domain of "executed". Since the context constraints
5359 1.1 mrg * are specified in terms of the input schedule, we first need
5360 1.1 mrg * to map them to the internal schedule domain.
5361 1.1 mrg *
5362 1.1 mrg * After constructing the AST from the descendants of "node",
5363 1.1 mrg * we combine the list of grafts into a single graft within
5364 1.1 mrg * the new build, in order to be able to exploit the additional
5365 1.1 mrg * context constraints during this combination.
5366 1.1 mrg *
5367 1.1 mrg * Additionally, if the current node is the outermost node in
5368 1.1 mrg * the schedule tree (apart from the root domain node), we generate
5369 1.1 mrg * all pending guards, again to be able to exploit the additional
5370 1.1 mrg * context constraints. We currently do not do this for internal
5371 1.1 mrg * context nodes since we may still want to hoist conditions
5372 1.1 mrg * to outer AST nodes.
5373 1.1 mrg *
5374 1.1 mrg * If the context node introduced any new parameters, then they
5375 1.1 mrg * are removed from the set of enforced constraints and guard
5376 1.1 mrg * in hoist_out_of_context.
5377 1.1 mrg */
5378 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_context(
5379 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5380 1.1 mrg __isl_take isl_union_map *executed)
5381 1.1 mrg {
5382 1.1 mrg isl_set *context;
5383 1.1 mrg isl_space *space;
5384 1.1 mrg isl_multi_aff *internal2input;
5385 1.1 mrg isl_ast_build *sub_build;
5386 1.1 mrg isl_ast_graft_list *list;
5387 1.1 mrg isl_size n;
5388 1.1 mrg isl_size depth;
5389 1.1 mrg
5390 1.1 mrg depth = isl_schedule_node_get_tree_depth(node);
5391 1.1 mrg if (depth < 0)
5392 1.1 mrg build = isl_ast_build_free(build);
5393 1.1 mrg space = isl_ast_build_get_space(build, 1);
5394 1.1 mrg context = isl_schedule_node_context_get_context(node);
5395 1.1 mrg context = isl_set_align_params(context, space);
5396 1.1 mrg sub_build = isl_ast_build_copy(build);
5397 1.1 mrg space = isl_set_get_space(context);
5398 1.1 mrg sub_build = isl_ast_build_align_params(sub_build, space);
5399 1.1 mrg internal2input = isl_ast_build_get_internal2input(sub_build);
5400 1.1 mrg context = isl_set_preimage_multi_aff(context, internal2input);
5401 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build,
5402 1.1 mrg isl_set_copy(context));
5403 1.1 mrg context = isl_set_from_basic_set(isl_set_simple_hull(context));
5404 1.1 mrg executed = isl_union_map_intersect_domain(executed,
5405 1.1 mrg isl_union_set_from_set(context));
5406 1.1 mrg
5407 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(sub_build),
5408 1.1 mrg node, executed);
5409 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list);
5410 1.1 mrg if (n < 0)
5411 1.1 mrg list = isl_ast_graft_list_free(list);
5412 1.1 mrg
5413 1.1 mrg list = isl_ast_graft_list_fuse(list, sub_build);
5414 1.1 mrg if (depth == 1)
5415 1.1 mrg list = isl_ast_graft_list_insert_pending_guard_nodes(list,
5416 1.1 mrg sub_build);
5417 1.1 mrg if (n >= 1)
5418 1.1 mrg list = hoist_out_of_context(list, build, sub_build);
5419 1.1 mrg
5420 1.1 mrg isl_ast_build_free(build);
5421 1.1 mrg isl_ast_build_free(sub_build);
5422 1.1 mrg
5423 1.1 mrg return list;
5424 1.1 mrg }
5425 1.1 mrg
5426 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5427 1.1 mrg * in the relative order specified by the expansion node "node" and
5428 1.1 mrg * its descendants.
5429 1.1 mrg *
5430 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5431 1.1 mrg * to the domain elements executed by those iterations.
5432 1.1 mrg *
5433 1.1 mrg * We expand the domain elements by the expansion and
5434 1.1 mrg * continue with the descendants of the node.
5435 1.1 mrg */
5436 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_expansion(
5437 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5438 1.1 mrg __isl_take isl_union_map *executed)
5439 1.1 mrg {
5440 1.1 mrg isl_union_map *expansion;
5441 1.1 mrg isl_size n1, n2;
5442 1.1 mrg
5443 1.1 mrg expansion = isl_schedule_node_expansion_get_expansion(node);
5444 1.1 mrg expansion = isl_union_map_align_params(expansion,
5445 1.1 mrg isl_union_map_get_space(executed));
5446 1.1 mrg
5447 1.1 mrg n1 = isl_union_map_dim(executed, isl_dim_param);
5448 1.1 mrg executed = isl_union_map_apply_range(executed, expansion);
5449 1.1 mrg n2 = isl_union_map_dim(executed, isl_dim_param);
5450 1.1 mrg if (n1 < 0 || n2 < 0)
5451 1.1 mrg goto error;
5452 1.1 mrg if (n2 > n1)
5453 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5454 1.1 mrg "expansion node is not allowed to introduce "
5455 1.1 mrg "new parameters", goto error);
5456 1.1 mrg
5457 1.1 mrg return build_ast_from_child(build, node, executed);
5458 1.1 mrg error:
5459 1.1 mrg isl_ast_build_free(build);
5460 1.1 mrg isl_schedule_node_free(node);
5461 1.1 mrg isl_union_map_free(executed);
5462 1.1 mrg return NULL;
5463 1.1 mrg }
5464 1.1 mrg
5465 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5466 1.1 mrg * in the relative order specified by the extension node "node" and
5467 1.1 mrg * its descendants.
5468 1.1 mrg *
5469 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5470 1.1 mrg * to the domain elements executed by those iterations.
5471 1.1 mrg *
5472 1.1 mrg * Extend the inverse schedule with the extension applied to current
5473 1.1 mrg * set of generated constraints. Since the extension if formulated
5474 1.1 mrg * in terms of the input schedule, it first needs to be transformed
5475 1.1 mrg * to refer to the internal schedule.
5476 1.1 mrg */
5477 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_extension(
5478 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5479 1.1 mrg __isl_take isl_union_map *executed)
5480 1.1 mrg {
5481 1.1 mrg isl_union_set *schedule_domain;
5482 1.1 mrg isl_union_map *extension;
5483 1.1 mrg isl_set *set;
5484 1.1 mrg
5485 1.1 mrg set = isl_ast_build_get_generated(build);
5486 1.1 mrg set = isl_set_from_basic_set(isl_set_simple_hull(set));
5487 1.1 mrg schedule_domain = isl_union_set_from_set(set);
5488 1.1 mrg
5489 1.1 mrg extension = isl_schedule_node_extension_get_extension(node);
5490 1.1 mrg
5491 1.1 mrg extension = isl_union_map_preimage_domain_multi_aff(extension,
5492 1.1 mrg isl_multi_aff_copy(build->internal2input));
5493 1.1 mrg extension = isl_union_map_intersect_domain(extension, schedule_domain);
5494 1.1 mrg extension = isl_ast_build_substitute_values_union_map_domain(build,
5495 1.1 mrg extension);
5496 1.1 mrg executed = isl_union_map_union(executed, extension);
5497 1.1 mrg
5498 1.1 mrg return build_ast_from_child(build, node, executed);
5499 1.1 mrg }
5500 1.1 mrg
5501 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5502 1.1 mrg * in the relative order specified by the filter node "node" and
5503 1.1 mrg * its descendants.
5504 1.1 mrg *
5505 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5506 1.1 mrg * to the domain elements executed by those iterations.
5507 1.1 mrg *
5508 1.1 mrg * We simply intersect the iteration domain (i.e., the range of "executed")
5509 1.1 mrg * with the filter and continue with the descendants of the node,
5510 1.1 mrg * unless the resulting inverse schedule is empty, in which
5511 1.1 mrg * case we return an empty list.
5512 1.1 mrg *
5513 1.1 mrg * If the result of the intersection is equal to the original "executed"
5514 1.1 mrg * relation, then keep the original representation since the intersection
5515 1.1 mrg * may have unnecessarily broken up the relation into a greater number
5516 1.1 mrg * of disjuncts.
5517 1.1 mrg */
5518 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_filter(
5519 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5520 1.1 mrg __isl_take isl_union_map *executed)
5521 1.1 mrg {
5522 1.1 mrg isl_ctx *ctx;
5523 1.1 mrg isl_union_set *filter;
5524 1.1 mrg isl_union_map *orig;
5525 1.1 mrg isl_ast_graft_list *list;
5526 1.1 mrg int empty;
5527 1.1 mrg isl_bool unchanged;
5528 1.1 mrg isl_size n1, n2;
5529 1.1 mrg
5530 1.1 mrg orig = isl_union_map_copy(executed);
5531 1.1 mrg if (!build || !node || !executed)
5532 1.1 mrg goto error;
5533 1.1 mrg
5534 1.1 mrg filter = isl_schedule_node_filter_get_filter(node);
5535 1.1 mrg filter = isl_union_set_align_params(filter,
5536 1.1 mrg isl_union_map_get_space(executed));
5537 1.1 mrg n1 = isl_union_map_dim(executed, isl_dim_param);
5538 1.1 mrg executed = isl_union_map_intersect_range(executed, filter);
5539 1.1 mrg n2 = isl_union_map_dim(executed, isl_dim_param);
5540 1.1 mrg if (n1 < 0 || n2 < 0)
5541 1.1 mrg goto error;
5542 1.1 mrg if (n2 > n1)
5543 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5544 1.1 mrg "filter node is not allowed to introduce "
5545 1.1 mrg "new parameters", goto error);
5546 1.1 mrg
5547 1.1 mrg unchanged = isl_union_map_is_subset(orig, executed);
5548 1.1 mrg empty = isl_union_map_is_empty(executed);
5549 1.1 mrg if (unchanged < 0 || empty < 0)
5550 1.1 mrg goto error;
5551 1.1 mrg if (unchanged) {
5552 1.1 mrg isl_union_map_free(executed);
5553 1.1 mrg return build_ast_from_child(build, node, orig);
5554 1.1 mrg }
5555 1.1 mrg isl_union_map_free(orig);
5556 1.1 mrg if (!empty)
5557 1.1 mrg return build_ast_from_child(build, node, executed);
5558 1.1 mrg
5559 1.1 mrg ctx = isl_ast_build_get_ctx(build);
5560 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0);
5561 1.1 mrg isl_ast_build_free(build);
5562 1.1 mrg isl_schedule_node_free(node);
5563 1.1 mrg isl_union_map_free(executed);
5564 1.1 mrg return list;
5565 1.1 mrg error:
5566 1.1 mrg isl_ast_build_free(build);
5567 1.1 mrg isl_schedule_node_free(node);
5568 1.1 mrg isl_union_map_free(executed);
5569 1.1 mrg isl_union_map_free(orig);
5570 1.1 mrg return NULL;
5571 1.1 mrg }
5572 1.1 mrg
5573 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5574 1.1 mrg * in the relative order specified by the guard node "node" and
5575 1.1 mrg * its descendants.
5576 1.1 mrg *
5577 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5578 1.1 mrg * to the domain elements executed by those iterations.
5579 1.1 mrg *
5580 1.1 mrg * Ensure that the associated guard is enforced by the outer AST
5581 1.1 mrg * constructs by adding it to the guard of the graft.
5582 1.1 mrg * Since we know that we will enforce the guard, we can also include it
5583 1.1 mrg * in the generated constraints used to construct an AST for
5584 1.1 mrg * the descendant nodes.
5585 1.1 mrg */
5586 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_guard(
5587 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5588 1.1 mrg __isl_take isl_union_map *executed)
5589 1.1 mrg {
5590 1.1 mrg isl_space *space;
5591 1.1 mrg isl_set *guard, *hoisted;
5592 1.1 mrg isl_basic_set *enforced;
5593 1.1 mrg isl_ast_build *sub_build;
5594 1.1 mrg isl_ast_graft *graft;
5595 1.1 mrg isl_ast_graft_list *list;
5596 1.1 mrg isl_size n1, n2, n;
5597 1.1 mrg
5598 1.1 mrg space = isl_ast_build_get_space(build, 1);
5599 1.1 mrg guard = isl_schedule_node_guard_get_guard(node);
5600 1.1 mrg n1 = isl_space_dim(space, isl_dim_param);
5601 1.1 mrg guard = isl_set_align_params(guard, space);
5602 1.1 mrg n2 = isl_set_dim(guard, isl_dim_param);
5603 1.1 mrg if (n1 < 0 || n2 < 0)
5604 1.1 mrg guard = isl_set_free(guard);
5605 1.1 mrg else if (n2 > n1)
5606 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5607 1.1 mrg "guard node is not allowed to introduce "
5608 1.1 mrg "new parameters", guard = isl_set_free(guard));
5609 1.1 mrg guard = isl_set_preimage_multi_aff(guard,
5610 1.1 mrg isl_multi_aff_copy(build->internal2input));
5611 1.1 mrg guard = isl_ast_build_specialize(build, guard);
5612 1.1 mrg guard = isl_set_gist(guard, isl_set_copy(build->generated));
5613 1.1 mrg
5614 1.1 mrg sub_build = isl_ast_build_copy(build);
5615 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build,
5616 1.1 mrg isl_set_copy(guard));
5617 1.1 mrg
5618 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(sub_build),
5619 1.1 mrg node, executed);
5620 1.1 mrg
5621 1.1 mrg hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5622 1.1 mrg n = isl_set_n_basic_set(hoisted);
5623 1.1 mrg if (n < 0)
5624 1.1 mrg list = isl_ast_graft_list_free(list);
5625 1.1 mrg if (n > 1)
5626 1.1 mrg list = isl_ast_graft_list_gist_guards(list,
5627 1.1 mrg isl_set_copy(hoisted));
5628 1.1 mrg guard = isl_set_intersect(guard, hoisted);
5629 1.1 mrg enforced = extract_shared_enforced(list, build);
5630 1.1 mrg graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5631 1.1 mrg build, sub_build);
5632 1.1 mrg
5633 1.1 mrg isl_ast_build_free(sub_build);
5634 1.1 mrg isl_ast_build_free(build);
5635 1.1 mrg return isl_ast_graft_list_from_ast_graft(graft);
5636 1.1 mrg }
5637 1.1 mrg
5638 1.1 mrg /* Call the before_each_mark callback, if requested by the user.
5639 1.1 mrg *
5640 1.1 mrg * Return 0 on success and -1 on error.
5641 1.1 mrg *
5642 1.1 mrg * The caller is responsible for recording the current inverse schedule
5643 1.1 mrg * in "build".
5644 1.1 mrg */
5645 1.1 mrg static isl_stat before_each_mark(__isl_keep isl_id *mark,
5646 1.1 mrg __isl_keep isl_ast_build *build)
5647 1.1 mrg {
5648 1.1 mrg if (!build)
5649 1.1 mrg return isl_stat_error;
5650 1.1 mrg if (!build->before_each_mark)
5651 1.1 mrg return isl_stat_ok;
5652 1.1 mrg return build->before_each_mark(mark, build,
5653 1.1 mrg build->before_each_mark_user);
5654 1.1 mrg }
5655 1.1 mrg
5656 1.1 mrg /* Call the after_each_mark callback, if requested by the user.
5657 1.1 mrg *
5658 1.1 mrg * The caller is responsible for recording the current inverse schedule
5659 1.1 mrg * in "build".
5660 1.1 mrg */
5661 1.1 mrg static __isl_give isl_ast_graft *after_each_mark(
5662 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build)
5663 1.1 mrg {
5664 1.1 mrg if (!graft || !build)
5665 1.1 mrg return isl_ast_graft_free(graft);
5666 1.1 mrg if (!build->after_each_mark)
5667 1.1 mrg return graft;
5668 1.1 mrg graft->node = build->after_each_mark(graft->node, build,
5669 1.1 mrg build->after_each_mark_user);
5670 1.1 mrg if (!graft->node)
5671 1.1 mrg return isl_ast_graft_free(graft);
5672 1.1 mrg return graft;
5673 1.1 mrg }
5674 1.1 mrg
5675 1.1 mrg
5676 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5677 1.1 mrg * in the relative order specified by the mark node "node" and
5678 1.1 mrg * its descendants.
5679 1.1 mrg *
5680 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5681 1.1 mrg * to the domain elements executed by those iterations.
5682 1.1 mrg
5683 1.1 mrg * Since we may be calling before_each_mark and after_each_mark
5684 1.1 mrg * callbacks, we record the current inverse schedule in the build.
5685 1.1 mrg *
5686 1.1 mrg * We generate an AST for the child of the mark node, combine
5687 1.1 mrg * the graft list into a single graft and then insert the mark
5688 1.1 mrg * in the AST of that single graft.
5689 1.1 mrg */
5690 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_mark(
5691 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5692 1.1 mrg __isl_take isl_union_map *executed)
5693 1.1 mrg {
5694 1.1 mrg isl_id *mark;
5695 1.1 mrg isl_ast_graft *graft;
5696 1.1 mrg isl_ast_graft_list *list;
5697 1.1 mrg isl_size n;
5698 1.1 mrg
5699 1.1 mrg build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
5700 1.1 mrg
5701 1.1 mrg mark = isl_schedule_node_mark_get_id(node);
5702 1.1 mrg if (before_each_mark(mark, build) < 0)
5703 1.1 mrg node = isl_schedule_node_free(node);
5704 1.1 mrg
5705 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5706 1.1 mrg list = isl_ast_graft_list_fuse(list, build);
5707 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list);
5708 1.1 mrg if (n < 0)
5709 1.1 mrg list = isl_ast_graft_list_free(list);
5710 1.1 mrg if (n == 0) {
5711 1.1 mrg isl_id_free(mark);
5712 1.1 mrg } else {
5713 1.1 mrg graft = isl_ast_graft_list_get_ast_graft(list, 0);
5714 1.1 mrg graft = isl_ast_graft_insert_mark(graft, mark);
5715 1.1 mrg graft = after_each_mark(graft, build);
5716 1.1 mrg list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
5717 1.1 mrg }
5718 1.1 mrg isl_ast_build_free(build);
5719 1.1 mrg
5720 1.1 mrg return list;
5721 1.1 mrg }
5722 1.1 mrg
5723 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5724 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5725 1.1 mrg __isl_take isl_union_map *executed);
5726 1.1 mrg
5727 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5728 1.1 mrg * in the relative order specified by the sequence (or set) node "node" and
5729 1.1 mrg * its descendants.
5730 1.1 mrg *
5731 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5732 1.1 mrg * to the domain elements executed by those iterations.
5733 1.1 mrg *
5734 1.1 mrg * We simply generate an AST for each of the children and concatenate
5735 1.1 mrg * the results.
5736 1.1 mrg */
5737 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_sequence(
5738 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5739 1.1 mrg __isl_take isl_union_map *executed)
5740 1.1 mrg {
5741 1.1 mrg int i;
5742 1.1 mrg isl_size n;
5743 1.1 mrg isl_ctx *ctx;
5744 1.1 mrg isl_ast_graft_list *list;
5745 1.1 mrg
5746 1.1 mrg ctx = isl_ast_build_get_ctx(build);
5747 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0);
5748 1.1 mrg
5749 1.1 mrg n = isl_schedule_node_n_children(node);
5750 1.1 mrg if (n < 0)
5751 1.1 mrg list = isl_ast_graft_list_free(list);
5752 1.1 mrg for (i = 0; i < n; ++i) {
5753 1.1 mrg isl_schedule_node *child;
5754 1.1 mrg isl_ast_graft_list *list_i;
5755 1.1 mrg
5756 1.1 mrg child = isl_schedule_node_get_child(node, i);
5757 1.1 mrg list_i = build_ast_from_schedule_node(isl_ast_build_copy(build),
5758 1.1 mrg child, isl_union_map_copy(executed));
5759 1.1 mrg list = isl_ast_graft_list_concat(list, list_i);
5760 1.1 mrg }
5761 1.1 mrg isl_ast_build_free(build);
5762 1.1 mrg isl_schedule_node_free(node);
5763 1.1 mrg isl_union_map_free(executed);
5764 1.1 mrg
5765 1.1 mrg return list;
5766 1.1 mrg }
5767 1.1 mrg
5768 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5769 1.1 mrg * in the relative order specified by the node "node" and its descendants.
5770 1.1 mrg *
5771 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5772 1.1 mrg * to the domain elements executed by those iterations.
5773 1.1 mrg *
5774 1.1 mrg * The node types are handled in separate functions.
5775 1.1 mrg * Set nodes are currently treated in the same way as sequence nodes.
5776 1.1 mrg * The children of a set node may be executed in any order,
5777 1.1 mrg * including the order of the children.
5778 1.1 mrg */
5779 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5780 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5781 1.1 mrg __isl_take isl_union_map *executed)
5782 1.1 mrg {
5783 1.1 mrg enum isl_schedule_node_type type;
5784 1.1 mrg
5785 1.1 mrg type = isl_schedule_node_get_type(node);
5786 1.1 mrg
5787 1.1 mrg switch (type) {
5788 1.1 mrg case isl_schedule_node_error:
5789 1.1 mrg goto error;
5790 1.1 mrg case isl_schedule_node_leaf:
5791 1.1 mrg return build_ast_from_leaf(build, node, executed);
5792 1.1 mrg case isl_schedule_node_band:
5793 1.1 mrg return build_ast_from_band(build, node, executed);
5794 1.1 mrg case isl_schedule_node_context:
5795 1.1 mrg return build_ast_from_context(build, node, executed);
5796 1.1 mrg case isl_schedule_node_domain:
5797 1.1 mrg isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported,
5798 1.1 mrg "unexpected internal domain node", goto error);
5799 1.1 mrg case isl_schedule_node_expansion:
5800 1.1 mrg return build_ast_from_expansion(build, node, executed);
5801 1.1 mrg case isl_schedule_node_extension:
5802 1.1 mrg return build_ast_from_extension(build, node, executed);
5803 1.1 mrg case isl_schedule_node_filter:
5804 1.1 mrg return build_ast_from_filter(build, node, executed);
5805 1.1 mrg case isl_schedule_node_guard:
5806 1.1 mrg return build_ast_from_guard(build, node, executed);
5807 1.1 mrg case isl_schedule_node_mark:
5808 1.1 mrg return build_ast_from_mark(build, node, executed);
5809 1.1 mrg case isl_schedule_node_sequence:
5810 1.1 mrg case isl_schedule_node_set:
5811 1.1 mrg return build_ast_from_sequence(build, node, executed);
5812 1.1 mrg }
5813 1.1 mrg
5814 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_internal,
5815 1.1 mrg "unhandled type", goto error);
5816 1.1 mrg error:
5817 1.1 mrg isl_union_map_free(executed);
5818 1.1 mrg isl_schedule_node_free(node);
5819 1.1 mrg isl_ast_build_free(build);
5820 1.1 mrg
5821 1.1 mrg return NULL;
5822 1.1 mrg }
5823 1.1 mrg
5824 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed"
5825 1.1 mrg * in the relative order specified by the (single) child of "node" and
5826 1.1 mrg * its descendants.
5827 1.1 mrg *
5828 1.1 mrg * The relation "executed" maps the outer generated loop iterators
5829 1.1 mrg * to the domain elements executed by those iterations.
5830 1.1 mrg *
5831 1.1 mrg * This function is never called on a leaf, set or sequence node,
5832 1.1 mrg * so the node always has exactly one child.
5833 1.1 mrg */
5834 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_child(
5835 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5836 1.1 mrg __isl_take isl_union_map *executed)
5837 1.1 mrg {
5838 1.1 mrg node = isl_schedule_node_child(node, 0);
5839 1.1 mrg return build_ast_from_schedule_node(build, node, executed);
5840 1.1 mrg }
5841 1.1 mrg
5842 1.1 mrg /* Generate an AST that visits the elements in the domain of the domain
5843 1.1 mrg * node "node" in the relative order specified by its descendants.
5844 1.1 mrg *
5845 1.1 mrg * An initial inverse schedule is created that maps a zero-dimensional
5846 1.1 mrg * schedule space to the node domain.
5847 1.1 mrg * The input "build" is assumed to have a parametric domain and
5848 1.1 mrg * is replaced by the same zero-dimensional schedule space.
5849 1.1 mrg *
5850 1.1 mrg * We also add some of the parameter constraints in the build domain
5851 1.1 mrg * to the executed relation. Adding these constraints
5852 1.1 mrg * allows for an earlier detection of conflicts in some cases.
5853 1.1 mrg * However, we do not want to divide the executed relation into
5854 1.1 mrg * more disjuncts than necessary. We therefore approximate
5855 1.1 mrg * the constraints on the parameters by a single disjunct set.
5856 1.1 mrg */
5857 1.1 mrg static __isl_give isl_ast_node *build_ast_from_domain(
5858 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node)
5859 1.1 mrg {
5860 1.1 mrg isl_ctx *ctx;
5861 1.1 mrg isl_union_set *domain, *schedule_domain;
5862 1.1 mrg isl_union_map *executed;
5863 1.1 mrg isl_space *space;
5864 1.1 mrg isl_set *set;
5865 1.1 mrg isl_ast_graft_list *list;
5866 1.1 mrg isl_ast_node *ast;
5867 1.1 mrg int is_params;
5868 1.1 mrg
5869 1.1 mrg if (!build)
5870 1.1 mrg goto error;
5871 1.1 mrg
5872 1.1 mrg ctx = isl_ast_build_get_ctx(build);
5873 1.1 mrg space = isl_ast_build_get_space(build, 1);
5874 1.1 mrg is_params = isl_space_is_params(space);
5875 1.1 mrg isl_space_free(space);
5876 1.1 mrg if (is_params < 0)
5877 1.1 mrg goto error;
5878 1.1 mrg if (!is_params)
5879 1.1 mrg isl_die(ctx, isl_error_unsupported,
5880 1.1 mrg "expecting parametric initial context", goto error);
5881 1.1 mrg
5882 1.1 mrg domain = isl_schedule_node_domain_get_domain(node);
5883 1.1 mrg domain = isl_union_set_coalesce(domain);
5884 1.1 mrg
5885 1.1 mrg space = isl_union_set_get_space(domain);
5886 1.1 mrg space = isl_space_set_from_params(space);
5887 1.1 mrg build = isl_ast_build_product(build, space);
5888 1.1 mrg
5889 1.1 mrg set = isl_ast_build_get_domain(build);
5890 1.1 mrg set = isl_set_from_basic_set(isl_set_simple_hull(set));
5891 1.1 mrg schedule_domain = isl_union_set_from_set(set);
5892 1.1 mrg
5893 1.1 mrg executed = isl_union_map_from_domain_and_range(schedule_domain, domain);
5894 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5895 1.1 mrg ast = isl_ast_node_from_graft_list(list, build);
5896 1.1 mrg isl_ast_build_free(build);
5897 1.1 mrg
5898 1.1 mrg return ast;
5899 1.1 mrg error:
5900 1.1 mrg isl_schedule_node_free(node);
5901 1.1 mrg isl_ast_build_free(build);
5902 1.1 mrg return NULL;
5903 1.1 mrg }
5904 1.1 mrg
5905 1.1 mrg /* Generate an AST that visits the elements in the domain of "schedule"
5906 1.1 mrg * in the relative order specified by the schedule tree.
5907 1.1 mrg *
5908 1.1 mrg * "build" is an isl_ast_build that has been created using
5909 1.1 mrg * isl_ast_build_alloc or isl_ast_build_from_context based
5910 1.1 mrg * on a parametric set.
5911 1.1 mrg *
5912 1.1 mrg * The construction starts at the root node of the schedule,
5913 1.1 mrg * which is assumed to be a domain node.
5914 1.1 mrg */
5915 1.1 mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule(
5916 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule)
5917 1.1 mrg {
5918 1.1 mrg isl_ctx *ctx;
5919 1.1 mrg isl_schedule_node *node;
5920 1.1 mrg
5921 1.1 mrg if (!build || !schedule)
5922 1.1 mrg goto error;
5923 1.1 mrg
5924 1.1 mrg ctx = isl_ast_build_get_ctx(build);
5925 1.1 mrg
5926 1.1 mrg node = isl_schedule_get_root(schedule);
5927 1.1 mrg if (!node)
5928 1.1 mrg goto error;
5929 1.1 mrg isl_schedule_free(schedule);
5930 1.1 mrg
5931 1.1 mrg build = isl_ast_build_copy(build);
5932 1.1 mrg build = isl_ast_build_set_single_valued(build, 0);
5933 1.1 mrg if (isl_schedule_node_get_type(node) != isl_schedule_node_domain)
5934 1.1 mrg isl_die(ctx, isl_error_unsupported,
5935 1.1 mrg "expecting root domain node",
5936 1.1 mrg build = isl_ast_build_free(build));
5937 1.1 mrg return build_ast_from_domain(build, node);
5938 1.1 mrg error:
5939 1.1 mrg isl_schedule_free(schedule);
5940 1.1 mrg return NULL;
5941 1.1 mrg }
5942