1 1.1 mrg /* 2 1.1 mrg * Copyright 2012-2014 Ecole Normale Superieure 3 1.1 mrg * Copyright 2014 INRIA Rocquencourt 4 1.1 mrg * 5 1.1 mrg * Use of this software is governed by the MIT license 6 1.1 mrg * 7 1.1 mrg * Written by Sven Verdoolaege, 8 1.1 mrg * Ecole Normale Superieure, 45 rue dUlm, 75230 Paris, France 9 1.1 mrg * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt, 10 1.1 mrg * B.P. 105 - 78153 Le Chesnay, France 11 1.1 mrg */ 12 1.1 mrg 13 1.1 mrg #include <limits.h> 14 1.1 mrg #include <isl/id.h> 15 1.1 mrg #include <isl/val.h> 16 1.1 mrg #include <isl/space.h> 17 1.1 mrg #include <isl/aff.h> 18 1.1 mrg #include <isl/constraint.h> 19 1.1 mrg #include <isl/set.h> 20 1.1 mrg #include <isl/ilp.h> 21 1.1 mrg #include <isl/union_set.h> 22 1.1 mrg #include <isl/union_map.h> 23 1.1 mrg #include <isl/schedule_node.h> 24 1.1 mrg #include <isl/options.h> 25 1.1 mrg #include <isl_sort.h> 26 1.1 mrg #include <isl_tarjan.h> 27 1.1 mrg #include <isl_ast_private.h> 28 1.1 mrg #include <isl_ast_build_expr.h> 29 1.1 mrg #include <isl_ast_build_private.h> 30 1.1 mrg #include <isl_ast_graft_private.h> 31 1.1 mrg 32 1.1 mrg /* Try and reduce the number of disjuncts in the representation of "set", 33 1.1 mrg * without dropping explicit representations of local variables. 34 1.1 mrg */ 35 1.1 mrg static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set) 36 1.1 mrg { 37 1.1 mrg isl_ctx *ctx; 38 1.1 mrg int save_preserve; 39 1.1 mrg 40 1.1 mrg if (!set) 41 1.1 mrg return NULL; 42 1.1 mrg 43 1.1 mrg ctx = isl_set_get_ctx(set); 44 1.1 mrg save_preserve = isl_options_get_coalesce_preserve_locals(ctx); 45 1.1 mrg isl_options_set_coalesce_preserve_locals(ctx, 1); 46 1.1 mrg set = isl_set_coalesce(set); 47 1.1 mrg isl_options_set_coalesce_preserve_locals(ctx, save_preserve); 48 1.1 mrg return set; 49 1.1 mrg } 50 1.1 mrg 51 1.1 mrg /* Data used in generate_domain. 52 1.1 mrg * 53 1.1 mrg * "build" is the input build. 54 1.1 mrg * "list" collects the results. 55 1.1 mrg */ 56 1.1 mrg struct isl_generate_domain_data { 57 1.1 mrg isl_ast_build *build; 58 1.1 mrg 59 1.1 mrg isl_ast_graft_list *list; 60 1.1 mrg }; 61 1.1 mrg 62 1.1 mrg static __isl_give isl_ast_graft_list *generate_next_level( 63 1.1 mrg __isl_take isl_union_map *executed, 64 1.1 mrg __isl_take isl_ast_build *build); 65 1.1 mrg static __isl_give isl_ast_graft_list *generate_code( 66 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 67 1.1 mrg int internal); 68 1.1 mrg 69 1.1 mrg /* Generate an AST for a single domain based on 70 1.1 mrg * the (non single valued) inverse schedule "executed". 71 1.1 mrg * 72 1.1 mrg * We extend the schedule with the iteration domain 73 1.1 mrg * and continue generating through a call to generate_code. 74 1.1 mrg * 75 1.1 mrg * In particular, if executed has the form 76 1.1 mrg * 77 1.1 mrg * S -> D 78 1.1 mrg * 79 1.1 mrg * then we continue generating code on 80 1.1 mrg * 81 1.1 mrg * [S -> D] -> D 82 1.1 mrg * 83 1.1 mrg * The extended inverse schedule is clearly single valued 84 1.1 mrg * ensuring that the nested generate_code will not reach this function, 85 1.1 mrg * but will instead create calls to all elements of D that need 86 1.1 mrg * to be executed from the current schedule domain. 87 1.1 mrg */ 88 1.1 mrg static isl_stat generate_non_single_valued(__isl_take isl_map *executed, 89 1.1 mrg struct isl_generate_domain_data *data) 90 1.1 mrg { 91 1.1 mrg isl_map *identity; 92 1.1 mrg isl_ast_build *build; 93 1.1 mrg isl_ast_graft_list *list; 94 1.1 mrg 95 1.1 mrg build = isl_ast_build_copy(data->build); 96 1.1 mrg 97 1.1 mrg identity = isl_set_identity(isl_map_range(isl_map_copy(executed))); 98 1.1 mrg executed = isl_map_domain_product(executed, identity); 99 1.1 mrg build = isl_ast_build_set_single_valued(build, 1); 100 1.1 mrg 101 1.1 mrg list = generate_code(isl_union_map_from_map(executed), build, 1); 102 1.1 mrg 103 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list); 104 1.1 mrg 105 1.1 mrg return isl_stat_ok; 106 1.1 mrg } 107 1.1 mrg 108 1.1 mrg /* Call the at_each_domain callback, if requested by the user, 109 1.1 mrg * after recording the current inverse schedule in the build. 110 1.1 mrg */ 111 1.1 mrg static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft, 112 1.1 mrg __isl_keep isl_map *executed, __isl_keep isl_ast_build *build) 113 1.1 mrg { 114 1.1 mrg if (!graft || !build) 115 1.1 mrg return isl_ast_graft_free(graft); 116 1.1 mrg if (!build->at_each_domain) 117 1.1 mrg return graft; 118 1.1 mrg 119 1.1 mrg build = isl_ast_build_copy(build); 120 1.1 mrg build = isl_ast_build_set_executed(build, 121 1.1 mrg isl_union_map_from_map(isl_map_copy(executed))); 122 1.1 mrg if (!build) 123 1.1 mrg return isl_ast_graft_free(graft); 124 1.1 mrg 125 1.1 mrg graft->node = build->at_each_domain(graft->node, 126 1.1 mrg build, build->at_each_domain_user); 127 1.1 mrg isl_ast_build_free(build); 128 1.1 mrg 129 1.1 mrg if (!graft->node) 130 1.1 mrg graft = isl_ast_graft_free(graft); 131 1.1 mrg 132 1.1 mrg return graft; 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg /* Generate a call expression for the single executed 136 1.1 mrg * domain element "map" and put a guard around it based its (simplified) 137 1.1 mrg * domain. "executed" is the original inverse schedule from which "map" 138 1.1 mrg * has been derived. In particular, "map" is either identical to "executed" 139 1.1 mrg * or it is the result of gisting "executed" with respect to the build domain. 140 1.1 mrg * "executed" is only used if there is an at_each_domain callback. 141 1.1 mrg * 142 1.1 mrg * At this stage, any pending constraints in the build can no longer 143 1.1 mrg * be simplified with respect to any enforced constraints since 144 1.1 mrg * the call node does not have any enforced constraints. 145 1.1 mrg * Since all pending constraints not covered by any enforced constraints 146 1.1 mrg * will be added as a guard to the graft in create_node_scaled, 147 1.1 mrg * even in the eliminated case, the pending constraints 148 1.1 mrg * can be considered to have been generated by outer constructs. 149 1.1 mrg * 150 1.1 mrg * If the user has set an at_each_domain callback, it is called 151 1.1 mrg * on the constructed call expression node. 152 1.1 mrg */ 153 1.1 mrg static isl_stat add_domain(__isl_take isl_map *executed, 154 1.1 mrg __isl_take isl_map *map, struct isl_generate_domain_data *data) 155 1.1 mrg { 156 1.1 mrg isl_ast_build *build; 157 1.1 mrg isl_ast_graft *graft; 158 1.1 mrg isl_ast_graft_list *list; 159 1.1 mrg isl_set *guard, *pending; 160 1.1 mrg 161 1.1 mrg build = isl_ast_build_copy(data->build); 162 1.1 mrg pending = isl_ast_build_get_pending(build); 163 1.1 mrg build = isl_ast_build_replace_pending_by_guard(build, pending); 164 1.1 mrg 165 1.1 mrg guard = isl_map_domain(isl_map_copy(map)); 166 1.1 mrg guard = isl_set_compute_divs(guard); 167 1.1 mrg guard = isl_set_coalesce_preserve(guard); 168 1.1 mrg guard = isl_set_gist(guard, isl_ast_build_get_generated(build)); 169 1.1 mrg guard = isl_ast_build_specialize(build, guard); 170 1.1 mrg 171 1.1 mrg graft = isl_ast_graft_alloc_domain(map, build); 172 1.1 mrg graft = at_each_domain(graft, executed, build); 173 1.1 mrg isl_ast_build_free(build); 174 1.1 mrg isl_map_free(executed); 175 1.1 mrg graft = isl_ast_graft_add_guard(graft, guard, data->build); 176 1.1 mrg 177 1.1 mrg list = isl_ast_graft_list_from_ast_graft(graft); 178 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list); 179 1.1 mrg 180 1.1 mrg return isl_stat_ok; 181 1.1 mrg } 182 1.1 mrg 183 1.1 mrg /* Generate an AST for a single domain based on 184 1.1 mrg * the inverse schedule "executed" and add it to data->list. 185 1.1 mrg * 186 1.1 mrg * If there is more than one domain element associated to the current 187 1.1 mrg * schedule "time", then we need to continue the generation process 188 1.1 mrg * in generate_non_single_valued. 189 1.1 mrg * Note that the inverse schedule being single-valued may depend 190 1.1 mrg * on constraints that are only available in the original context 191 1.1 mrg * domain specified by the user. We therefore first introduce 192 1.1 mrg * some of the constraints of data->build->domain. In particular, 193 1.1 mrg * we intersect with a single-disjunct approximation of this set. 194 1.1 mrg * We perform this approximation to avoid further splitting up 195 1.1 mrg * the executed relation, possibly introducing a disjunctive guard 196 1.1 mrg * on the statement. 197 1.1 mrg * 198 1.1 mrg * On the other hand, we only perform the test after having taken the gist 199 1.1 mrg * of the domain as the resulting map is the one from which the call 200 1.1 mrg * expression is constructed. Using this map to construct the call 201 1.1 mrg * expression usually yields simpler results in cases where the original 202 1.1 mrg * map is not obviously single-valued. 203 1.1 mrg * If the original map is obviously single-valued, then the gist 204 1.1 mrg * operation is skipped. 205 1.1 mrg * 206 1.1 mrg * Because we perform the single-valuedness test on the gisted map, 207 1.1 mrg * we may in rare cases fail to recognize that the inverse schedule 208 1.1 mrg * is single-valued. This becomes problematic if this happens 209 1.1 mrg * from the recursive call through generate_non_single_valued 210 1.1 mrg * as we would then end up in an infinite recursion. 211 1.1 mrg * We therefore check if we are inside a call to generate_non_single_valued 212 1.1 mrg * and revert to the ungisted map if the gisted map turns out not to be 213 1.1 mrg * single-valued. 214 1.1 mrg * 215 1.1 mrg * Otherwise, call add_domain to generate a call expression (with guard) and 216 1.1 mrg * to call the at_each_domain callback, if any. 217 1.1 mrg */ 218 1.1 mrg static isl_stat generate_domain(__isl_take isl_map *executed, void *user) 219 1.1 mrg { 220 1.1 mrg struct isl_generate_domain_data *data = user; 221 1.1 mrg isl_set *domain; 222 1.1 mrg isl_map *map = NULL; 223 1.1 mrg int empty, sv; 224 1.1 mrg 225 1.1 mrg domain = isl_ast_build_get_domain(data->build); 226 1.1 mrg domain = isl_set_from_basic_set(isl_set_simple_hull(domain)); 227 1.1 mrg executed = isl_map_intersect_domain(executed, domain); 228 1.1 mrg empty = isl_map_is_empty(executed); 229 1.1 mrg if (empty < 0) 230 1.1 mrg goto error; 231 1.1 mrg if (empty) { 232 1.1 mrg isl_map_free(executed); 233 1.1 mrg return isl_stat_ok; 234 1.1 mrg } 235 1.1 mrg 236 1.1 mrg sv = isl_map_plain_is_single_valued(executed); 237 1.1 mrg if (sv < 0) 238 1.1 mrg goto error; 239 1.1 mrg if (sv) 240 1.1 mrg return add_domain(executed, isl_map_copy(executed), data); 241 1.1 mrg 242 1.1 mrg executed = isl_map_coalesce(executed); 243 1.1 mrg map = isl_map_copy(executed); 244 1.1 mrg map = isl_ast_build_compute_gist_map_domain(data->build, map); 245 1.1 mrg sv = isl_map_is_single_valued(map); 246 1.1 mrg if (sv < 0) 247 1.1 mrg goto error; 248 1.1 mrg if (!sv) { 249 1.1 mrg isl_map_free(map); 250 1.1 mrg if (data->build->single_valued) 251 1.1 mrg map = isl_map_copy(executed); 252 1.1 mrg else 253 1.1 mrg return generate_non_single_valued(executed, data); 254 1.1 mrg } 255 1.1 mrg 256 1.1 mrg return add_domain(executed, map, data); 257 1.1 mrg error: 258 1.1 mrg isl_map_free(map); 259 1.1 mrg isl_map_free(executed); 260 1.1 mrg return isl_stat_error; 261 1.1 mrg } 262 1.1 mrg 263 1.1 mrg /* Call build->create_leaf to a create "leaf" node in the AST, 264 1.1 mrg * encapsulate the result in an isl_ast_graft and return the result 265 1.1 mrg * as a 1-element list. 266 1.1 mrg * 267 1.1 mrg * Note that the node returned by the user may be an entire tree. 268 1.1 mrg * 269 1.1 mrg * Since the node itself cannot enforce any constraints, we turn 270 1.1 mrg * all pending constraints into guards and add them to the resulting 271 1.1 mrg * graft to ensure that they will be generated. 272 1.1 mrg * 273 1.1 mrg * Before we pass control to the user, we first clear some information 274 1.1 mrg * from the build that is (presumbably) only meaningful 275 1.1 mrg * for the current code generation. 276 1.1 mrg * This includes the create_leaf callback itself, so we make a copy 277 1.1 mrg * of the build first. 278 1.1 mrg */ 279 1.1 mrg static __isl_give isl_ast_graft_list *call_create_leaf( 280 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 281 1.1 mrg { 282 1.1 mrg isl_set *guard; 283 1.1 mrg isl_ast_node *node; 284 1.1 mrg isl_ast_graft *graft; 285 1.1 mrg isl_ast_build *user_build; 286 1.1 mrg 287 1.1 mrg guard = isl_ast_build_get_pending(build); 288 1.1 mrg user_build = isl_ast_build_copy(build); 289 1.1 mrg user_build = isl_ast_build_replace_pending_by_guard(user_build, 290 1.1 mrg isl_set_copy(guard)); 291 1.1 mrg user_build = isl_ast_build_set_executed(user_build, executed); 292 1.1 mrg user_build = isl_ast_build_clear_local_info(user_build); 293 1.1 mrg if (!user_build) 294 1.1 mrg node = NULL; 295 1.1 mrg else 296 1.1 mrg node = build->create_leaf(user_build, build->create_leaf_user); 297 1.1 mrg graft = isl_ast_graft_alloc(node, build); 298 1.1 mrg graft = isl_ast_graft_add_guard(graft, guard, build); 299 1.1 mrg isl_ast_build_free(build); 300 1.1 mrg return isl_ast_graft_list_from_ast_graft(graft); 301 1.1 mrg } 302 1.1 mrg 303 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_child( 304 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 305 1.1 mrg __isl_take isl_union_map *executed); 306 1.1 mrg 307 1.1 mrg /* Generate an AST after having handled the complete schedule 308 1.1 mrg * of this call to the code generator or the complete band 309 1.1 mrg * if we are generating an AST from a schedule tree. 310 1.1 mrg * 311 1.1 mrg * If we are inside a band node, then move on to the child of the band. 312 1.1 mrg * 313 1.1 mrg * If the user has specified a create_leaf callback, control 314 1.1 mrg * is passed to the user in call_create_leaf. 315 1.1 mrg * 316 1.1 mrg * Otherwise, we generate one or more calls for each individual 317 1.1 mrg * domain in generate_domain. 318 1.1 mrg */ 319 1.1 mrg static __isl_give isl_ast_graft_list *generate_inner_level( 320 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 321 1.1 mrg { 322 1.1 mrg isl_ctx *ctx; 323 1.1 mrg struct isl_generate_domain_data data = { build }; 324 1.1 mrg 325 1.1 mrg if (!build || !executed) 326 1.1 mrg goto error; 327 1.1 mrg 328 1.1 mrg if (isl_ast_build_has_schedule_node(build)) { 329 1.1 mrg isl_schedule_node *node; 330 1.1 mrg node = isl_ast_build_get_schedule_node(build); 331 1.1 mrg build = isl_ast_build_reset_schedule_node(build); 332 1.1 mrg return build_ast_from_child(build, node, executed); 333 1.1 mrg } 334 1.1 mrg 335 1.1 mrg if (build->create_leaf) 336 1.1 mrg return call_create_leaf(executed, build); 337 1.1 mrg 338 1.1 mrg ctx = isl_union_map_get_ctx(executed); 339 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, 0); 340 1.1 mrg if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0) 341 1.1 mrg data.list = isl_ast_graft_list_free(data.list); 342 1.1 mrg 343 1.1 mrg if (0) 344 1.1 mrg error: data.list = NULL; 345 1.1 mrg isl_ast_build_free(build); 346 1.1 mrg isl_union_map_free(executed); 347 1.1 mrg return data.list; 348 1.1 mrg } 349 1.1 mrg 350 1.1 mrg /* Call the before_each_for callback, if requested by the user. 351 1.1 mrg */ 352 1.1 mrg static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node, 353 1.1 mrg __isl_keep isl_ast_build *build) 354 1.1 mrg { 355 1.1 mrg isl_id *id; 356 1.1 mrg 357 1.1 mrg if (!node || !build) 358 1.1 mrg return isl_ast_node_free(node); 359 1.1 mrg if (!build->before_each_for) 360 1.1 mrg return node; 361 1.1 mrg id = build->before_each_for(build, build->before_each_for_user); 362 1.1 mrg node = isl_ast_node_set_annotation(node, id); 363 1.1 mrg return node; 364 1.1 mrg } 365 1.1 mrg 366 1.1 mrg /* Call the after_each_for callback, if requested by the user. 367 1.1 mrg */ 368 1.1 mrg static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft, 369 1.1 mrg __isl_keep isl_ast_build *build) 370 1.1 mrg { 371 1.1 mrg if (!graft || !build) 372 1.1 mrg return isl_ast_graft_free(graft); 373 1.1 mrg if (!build->after_each_for) 374 1.1 mrg return graft; 375 1.1 mrg graft->node = build->after_each_for(graft->node, build, 376 1.1 mrg build->after_each_for_user); 377 1.1 mrg if (!graft->node) 378 1.1 mrg return isl_ast_graft_free(graft); 379 1.1 mrg return graft; 380 1.1 mrg } 381 1.1 mrg 382 1.1 mrg /* Plug in all the know values of the current and outer dimensions 383 1.1 mrg * in the domain of "executed". In principle, we only need to plug 384 1.1 mrg * in the known value of the current dimension since the values of 385 1.1 mrg * outer dimensions have been plugged in already. 386 1.1 mrg * However, it turns out to be easier to just plug in all known values. 387 1.1 mrg */ 388 1.1 mrg static __isl_give isl_union_map *plug_in_values( 389 1.1 mrg __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) 390 1.1 mrg { 391 1.1 mrg return isl_ast_build_substitute_values_union_map_domain(build, 392 1.1 mrg executed); 393 1.1 mrg } 394 1.1 mrg 395 1.1 mrg /* Check if the constraint "c" is a lower bound on dimension "pos", 396 1.1 mrg * an upper bound, or independent of dimension "pos". 397 1.1 mrg */ 398 1.1 mrg static int constraint_type(isl_constraint *c, int pos) 399 1.1 mrg { 400 1.1 mrg if (isl_constraint_is_lower_bound(c, isl_dim_set, pos)) 401 1.1 mrg return 1; 402 1.1 mrg if (isl_constraint_is_upper_bound(c, isl_dim_set, pos)) 403 1.1 mrg return 2; 404 1.1 mrg return 0; 405 1.1 mrg } 406 1.1 mrg 407 1.1 mrg /* Compare the types of the constraints "a" and "b", 408 1.1 mrg * resulting in constraints that are independent of "depth" 409 1.1 mrg * to be sorted before the lower bounds on "depth", which in 410 1.1 mrg * turn are sorted before the upper bounds on "depth". 411 1.1 mrg */ 412 1.1 mrg static int cmp_constraint(__isl_keep isl_constraint *a, 413 1.1 mrg __isl_keep isl_constraint *b, void *user) 414 1.1 mrg { 415 1.1 mrg int *depth = user; 416 1.1 mrg int t1 = constraint_type(a, *depth); 417 1.1 mrg int t2 = constraint_type(b, *depth); 418 1.1 mrg 419 1.1 mrg return t1 - t2; 420 1.1 mrg } 421 1.1 mrg 422 1.1 mrg /* Extract a lower bound on dimension "pos" from constraint "c". 423 1.1 mrg * 424 1.1 mrg * If the constraint is of the form 425 1.1 mrg * 426 1.1 mrg * a x + f(...) >= 0 427 1.1 mrg * 428 1.1 mrg * then we essentially return 429 1.1 mrg * 430 1.1 mrg * l = ceil(-f(...)/a) 431 1.1 mrg * 432 1.1 mrg * However, if the current dimension is strided, then we need to make 433 1.1 mrg * sure that the lower bound we construct is of the form 434 1.1 mrg * 435 1.1 mrg * f + s a 436 1.1 mrg * 437 1.1 mrg * with f the offset and s the stride. 438 1.1 mrg * We therefore compute 439 1.1 mrg * 440 1.1 mrg * f + s * ceil((l - f)/s) 441 1.1 mrg */ 442 1.1 mrg static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c, 443 1.1 mrg int pos, __isl_keep isl_ast_build *build) 444 1.1 mrg { 445 1.1 mrg isl_aff *aff; 446 1.1 mrg 447 1.1 mrg aff = isl_constraint_get_bound(c, isl_dim_set, pos); 448 1.1 mrg aff = isl_aff_ceil(aff); 449 1.1 mrg 450 1.1 mrg if (isl_ast_build_has_stride(build, pos)) { 451 1.1 mrg isl_aff *offset; 452 1.1 mrg isl_val *stride; 453 1.1 mrg 454 1.1 mrg offset = isl_ast_build_get_offset(build, pos); 455 1.1 mrg stride = isl_ast_build_get_stride(build, pos); 456 1.1 mrg 457 1.1 mrg aff = isl_aff_sub(aff, isl_aff_copy(offset)); 458 1.1 mrg aff = isl_aff_scale_down_val(aff, isl_val_copy(stride)); 459 1.1 mrg aff = isl_aff_ceil(aff); 460 1.1 mrg aff = isl_aff_scale_val(aff, stride); 461 1.1 mrg aff = isl_aff_add(aff, offset); 462 1.1 mrg } 463 1.1 mrg 464 1.1 mrg aff = isl_ast_build_compute_gist_aff(build, aff); 465 1.1 mrg 466 1.1 mrg return aff; 467 1.1 mrg } 468 1.1 mrg 469 1.1 mrg /* Return the exact lower bound (or upper bound if "upper" is set) 470 1.1 mrg * of "domain" as a piecewise affine expression. 471 1.1 mrg * 472 1.1 mrg * If we are computing a lower bound (of a strided dimension), then 473 1.1 mrg * we need to make sure it is of the form 474 1.1 mrg * 475 1.1 mrg * f + s a 476 1.1 mrg * 477 1.1 mrg * where f is the offset and s is the stride. 478 1.1 mrg * We therefore need to include the stride constraint before computing 479 1.1 mrg * the minimum. 480 1.1 mrg */ 481 1.1 mrg static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain, 482 1.1 mrg __isl_keep isl_ast_build *build, int upper) 483 1.1 mrg { 484 1.1 mrg isl_set *stride; 485 1.1 mrg isl_map *it_map; 486 1.1 mrg isl_pw_aff *pa; 487 1.1 mrg isl_pw_multi_aff *pma; 488 1.1 mrg 489 1.1 mrg domain = isl_set_copy(domain); 490 1.1 mrg if (!upper) { 491 1.1 mrg stride = isl_ast_build_get_stride_constraint(build); 492 1.1 mrg domain = isl_set_intersect(domain, stride); 493 1.1 mrg } 494 1.1 mrg it_map = isl_ast_build_map_to_iterator(build, domain); 495 1.1 mrg if (upper) 496 1.1 mrg pma = isl_map_lexmax_pw_multi_aff(it_map); 497 1.1 mrg else 498 1.1 mrg pma = isl_map_lexmin_pw_multi_aff(it_map); 499 1.1 mrg pa = isl_pw_multi_aff_get_pw_aff(pma, 0); 500 1.1 mrg isl_pw_multi_aff_free(pma); 501 1.1 mrg pa = isl_ast_build_compute_gist_pw_aff(build, pa); 502 1.1 mrg pa = isl_pw_aff_coalesce(pa); 503 1.1 mrg 504 1.1 mrg return pa; 505 1.1 mrg } 506 1.1 mrg 507 1.1 mrg /* Callback for sorting the isl_pw_aff_list passed to reduce_list and 508 1.1 mrg * remove_redundant_lower_bounds. 509 1.1 mrg */ 510 1.1 mrg static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b, 511 1.1 mrg void *user) 512 1.1 mrg { 513 1.1 mrg return isl_pw_aff_plain_cmp(a, b); 514 1.1 mrg } 515 1.1 mrg 516 1.1 mrg /* Given a list of lower bounds "list", remove those that are redundant 517 1.1 mrg * with respect to the other bounds in "list" and the domain of "build". 518 1.1 mrg * 519 1.1 mrg * We first sort the bounds in the same way as they would be sorted 520 1.1 mrg * by set_for_node_expressions so that we can try and remove the last 521 1.1 mrg * bounds first. 522 1.1 mrg * 523 1.1 mrg * For a lower bound to be effective, there needs to be at least 524 1.1 mrg * one domain element for which it is larger than all other lower bounds. 525 1.1 mrg * For each lower bound we therefore intersect the domain with 526 1.1 mrg * the conditions that it is larger than all other bounds and 527 1.1 mrg * check whether the result is empty. If so, the bound can be removed. 528 1.1 mrg */ 529 1.1 mrg static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds( 530 1.1 mrg __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 531 1.1 mrg { 532 1.1 mrg int i, j; 533 1.1 mrg isl_size n; 534 1.1 mrg isl_set *domain; 535 1.1 mrg 536 1.1 mrg list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL); 537 1.1 mrg 538 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list); 539 1.1 mrg if (n < 0) 540 1.1 mrg return isl_pw_aff_list_free(list); 541 1.1 mrg if (n <= 1) 542 1.1 mrg return list; 543 1.1 mrg 544 1.1 mrg domain = isl_ast_build_get_domain(build); 545 1.1 mrg 546 1.1 mrg for (i = n - 1; i >= 0; --i) { 547 1.1 mrg isl_pw_aff *pa_i; 548 1.1 mrg isl_set *domain_i; 549 1.1 mrg int empty; 550 1.1 mrg 551 1.1 mrg domain_i = isl_set_copy(domain); 552 1.1 mrg pa_i = isl_pw_aff_list_get_pw_aff(list, i); 553 1.1 mrg 554 1.1 mrg for (j = 0; j < n; ++j) { 555 1.1 mrg isl_pw_aff *pa_j; 556 1.1 mrg isl_set *better; 557 1.1 mrg 558 1.1 mrg if (j == i) 559 1.1 mrg continue; 560 1.1 mrg 561 1.1 mrg pa_j = isl_pw_aff_list_get_pw_aff(list, j); 562 1.1 mrg better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j); 563 1.1 mrg domain_i = isl_set_intersect(domain_i, better); 564 1.1 mrg } 565 1.1 mrg 566 1.1 mrg empty = isl_set_is_empty(domain_i); 567 1.1 mrg 568 1.1 mrg isl_set_free(domain_i); 569 1.1 mrg isl_pw_aff_free(pa_i); 570 1.1 mrg 571 1.1 mrg if (empty < 0) 572 1.1 mrg goto error; 573 1.1 mrg if (!empty) 574 1.1 mrg continue; 575 1.1 mrg list = isl_pw_aff_list_drop(list, i, 1); 576 1.1 mrg n--; 577 1.1 mrg } 578 1.1 mrg 579 1.1 mrg isl_set_free(domain); 580 1.1 mrg 581 1.1 mrg return list; 582 1.1 mrg error: 583 1.1 mrg isl_set_free(domain); 584 1.1 mrg return isl_pw_aff_list_free(list); 585 1.1 mrg } 586 1.1 mrg 587 1.1 mrg /* Extract a lower bound on dimension "pos" from each constraint 588 1.1 mrg * in "constraints" and return the list of lower bounds. 589 1.1 mrg * If "constraints" has zero elements, then we extract a lower bound 590 1.1 mrg * from "domain" instead. 591 1.1 mrg * 592 1.1 mrg * If the current dimension is strided, then the lower bound 593 1.1 mrg * is adjusted by lower_bound to match the stride information. 594 1.1 mrg * This modification may make one or more lower bounds redundant 595 1.1 mrg * with respect to the other lower bounds. We therefore check 596 1.1 mrg * for this condition and remove the redundant lower bounds. 597 1.1 mrg */ 598 1.1 mrg static __isl_give isl_pw_aff_list *lower_bounds( 599 1.1 mrg __isl_keep isl_constraint_list *constraints, int pos, 600 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 601 1.1 mrg { 602 1.1 mrg isl_ctx *ctx; 603 1.1 mrg isl_pw_aff_list *list; 604 1.1 mrg int i; 605 1.1 mrg isl_size n; 606 1.1 mrg 607 1.1 mrg if (!build) 608 1.1 mrg return NULL; 609 1.1 mrg 610 1.1 mrg n = isl_constraint_list_n_constraint(constraints); 611 1.1 mrg if (n < 0) 612 1.1 mrg return NULL; 613 1.1 mrg if (n == 0) { 614 1.1 mrg isl_pw_aff *pa; 615 1.1 mrg pa = exact_bound(domain, build, 0); 616 1.1 mrg return isl_pw_aff_list_from_pw_aff(pa); 617 1.1 mrg } 618 1.1 mrg 619 1.1 mrg ctx = isl_ast_build_get_ctx(build); 620 1.1 mrg list = isl_pw_aff_list_alloc(ctx,n); 621 1.1 mrg 622 1.1 mrg for (i = 0; i < n; ++i) { 623 1.1 mrg isl_aff *aff; 624 1.1 mrg isl_constraint *c; 625 1.1 mrg 626 1.1 mrg c = isl_constraint_list_get_constraint(constraints, i); 627 1.1 mrg aff = lower_bound(c, pos, build); 628 1.1 mrg isl_constraint_free(c); 629 1.1 mrg list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 630 1.1 mrg } 631 1.1 mrg 632 1.1 mrg if (isl_ast_build_has_stride(build, pos)) 633 1.1 mrg list = remove_redundant_lower_bounds(list, build); 634 1.1 mrg 635 1.1 mrg return list; 636 1.1 mrg } 637 1.1 mrg 638 1.1 mrg /* Extract an upper bound on dimension "pos" from each constraint 639 1.1 mrg * in "constraints" and return the list of upper bounds. 640 1.1 mrg * If "constraints" has zero elements, then we extract an upper bound 641 1.1 mrg * from "domain" instead. 642 1.1 mrg */ 643 1.1 mrg static __isl_give isl_pw_aff_list *upper_bounds( 644 1.1 mrg __isl_keep isl_constraint_list *constraints, int pos, 645 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 646 1.1 mrg { 647 1.1 mrg isl_ctx *ctx; 648 1.1 mrg isl_pw_aff_list *list; 649 1.1 mrg int i; 650 1.1 mrg isl_size n; 651 1.1 mrg 652 1.1 mrg n = isl_constraint_list_n_constraint(constraints); 653 1.1 mrg if (n < 0) 654 1.1 mrg return NULL; 655 1.1 mrg if (n == 0) { 656 1.1 mrg isl_pw_aff *pa; 657 1.1 mrg pa = exact_bound(domain, build, 1); 658 1.1 mrg return isl_pw_aff_list_from_pw_aff(pa); 659 1.1 mrg } 660 1.1 mrg 661 1.1 mrg ctx = isl_ast_build_get_ctx(build); 662 1.1 mrg list = isl_pw_aff_list_alloc(ctx,n); 663 1.1 mrg 664 1.1 mrg for (i = 0; i < n; ++i) { 665 1.1 mrg isl_aff *aff; 666 1.1 mrg isl_constraint *c; 667 1.1 mrg 668 1.1 mrg c = isl_constraint_list_get_constraint(constraints, i); 669 1.1 mrg aff = isl_constraint_get_bound(c, isl_dim_set, pos); 670 1.1 mrg isl_constraint_free(c); 671 1.1 mrg aff = isl_aff_floor(aff); 672 1.1 mrg list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 673 1.1 mrg } 674 1.1 mrg 675 1.1 mrg return list; 676 1.1 mrg } 677 1.1 mrg 678 1.1 mrg /* Return an isl_ast_expr that performs the reduction of type "type" 679 1.1 mrg * on AST expressions corresponding to the elements in "list". 680 1.1 mrg * 681 1.1 mrg * The list is assumed to contain at least one element. 682 1.1 mrg * If the list contains exactly one element, then the returned isl_ast_expr 683 1.1 mrg * simply computes that affine expression. 684 1.1 mrg * If the list contains more than one element, then we sort it 685 1.1 mrg * using a fairly arbitrary but hopefully reasonably stable order. 686 1.1 mrg */ 687 1.1 mrg static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type, 688 1.1 mrg __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 689 1.1 mrg { 690 1.1 mrg int i; 691 1.1 mrg isl_size n; 692 1.1 mrg isl_ctx *ctx; 693 1.1 mrg isl_ast_expr *expr; 694 1.1 mrg 695 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list); 696 1.1 mrg if (n < 0) 697 1.1 mrg return NULL; 698 1.1 mrg 699 1.1 mrg if (n == 1) 700 1.1 mrg return isl_ast_build_expr_from_pw_aff_internal(build, 701 1.1 mrg isl_pw_aff_list_get_pw_aff(list, 0)); 702 1.1 mrg 703 1.1 mrg ctx = isl_pw_aff_list_get_ctx(list); 704 1.1 mrg expr = isl_ast_expr_alloc_op(ctx, type, n); 705 1.1 mrg 706 1.1 mrg list = isl_pw_aff_list_copy(list); 707 1.1 mrg list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL); 708 1.1 mrg if (!list) 709 1.1 mrg return isl_ast_expr_free(expr); 710 1.1 mrg 711 1.1 mrg for (i = 0; i < n; ++i) { 712 1.1 mrg isl_ast_expr *expr_i; 713 1.1 mrg 714 1.1 mrg expr_i = isl_ast_build_expr_from_pw_aff_internal(build, 715 1.1 mrg isl_pw_aff_list_get_pw_aff(list, i)); 716 1.1 mrg expr = isl_ast_expr_op_add_arg(expr, expr_i); 717 1.1 mrg } 718 1.1 mrg 719 1.1 mrg isl_pw_aff_list_free(list); 720 1.1 mrg return expr; 721 1.1 mrg } 722 1.1 mrg 723 1.1 mrg /* Add guards implied by the "generated constraints", 724 1.1 mrg * but not (necessarily) enforced by the generated AST to "guard". 725 1.1 mrg * In particular, if there is any stride constraints, 726 1.1 mrg * then add the guard implied by those constraints. 727 1.1 mrg * If we have generated a degenerate loop, then add the guard 728 1.1 mrg * implied by "bounds" on the outer dimensions, i.e., the guard 729 1.1 mrg * that ensures that the single value actually exists. 730 1.1 mrg * Since there may also be guards implied by a combination 731 1.1 mrg * of these constraints, we first combine them before 732 1.1 mrg * deriving the implied constraints. 733 1.1 mrg */ 734 1.1 mrg static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard, 735 1.1 mrg int degenerate, __isl_keep isl_basic_set *bounds, 736 1.1 mrg __isl_keep isl_ast_build *build) 737 1.1 mrg { 738 1.1 mrg isl_size depth; 739 1.1 mrg isl_bool has_stride; 740 1.1 mrg isl_space *space; 741 1.1 mrg isl_set *dom, *set; 742 1.1 mrg 743 1.1 mrg depth = isl_ast_build_get_depth(build); 744 1.1 mrg has_stride = isl_ast_build_has_stride(build, depth); 745 1.1 mrg if (depth < 0 || has_stride < 0) 746 1.1 mrg return isl_set_free(guard); 747 1.1 mrg if (!has_stride && !degenerate) 748 1.1 mrg return guard; 749 1.1 mrg 750 1.1 mrg space = isl_basic_set_get_space(bounds); 751 1.1 mrg dom = isl_set_universe(space); 752 1.1 mrg 753 1.1 mrg if (degenerate) { 754 1.1 mrg bounds = isl_basic_set_copy(bounds); 755 1.1 mrg bounds = isl_basic_set_drop_constraints_not_involving_dims( 756 1.1 mrg bounds, isl_dim_set, depth, 1); 757 1.1 mrg set = isl_set_from_basic_set(bounds); 758 1.1 mrg dom = isl_set_intersect(dom, set); 759 1.1 mrg } 760 1.1 mrg 761 1.1 mrg if (has_stride) { 762 1.1 mrg set = isl_ast_build_get_stride_constraint(build); 763 1.1 mrg dom = isl_set_intersect(dom, set); 764 1.1 mrg } 765 1.1 mrg 766 1.1 mrg dom = isl_set_eliminate(dom, isl_dim_set, depth, 1); 767 1.1 mrg dom = isl_ast_build_compute_gist(build, dom); 768 1.1 mrg guard = isl_set_intersect(guard, dom); 769 1.1 mrg 770 1.1 mrg return guard; 771 1.1 mrg } 772 1.1 mrg 773 1.1 mrg /* Update "graft" based on "sub_build" for the degenerate case. 774 1.1 mrg * 775 1.1 mrg * "build" is the build in which graft->node was created 776 1.1 mrg * "sub_build" contains information about the current level itself, 777 1.1 mrg * including the single value attained. 778 1.1 mrg * 779 1.1 mrg * We set the initialization part of the for loop to the single 780 1.1 mrg * value attained by the current dimension. 781 1.1 mrg * The increment and condition are not strictly needed as they are known 782 1.1 mrg * to be "1" and "iterator <= value" respectively. 783 1.1 mrg */ 784 1.1 mrg static __isl_give isl_ast_graft *refine_degenerate( 785 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build, 786 1.1 mrg __isl_keep isl_ast_build *sub_build) 787 1.1 mrg { 788 1.1 mrg isl_pw_aff *value; 789 1.1 mrg isl_ast_expr *init; 790 1.1 mrg 791 1.1 mrg if (!graft || !sub_build) 792 1.1 mrg return isl_ast_graft_free(graft); 793 1.1 mrg 794 1.1 mrg value = isl_pw_aff_copy(sub_build->value); 795 1.1 mrg 796 1.1 mrg init = isl_ast_build_expr_from_pw_aff_internal(build, value); 797 1.1 mrg graft->node = isl_ast_node_for_set_init(graft->node, init); 798 1.1 mrg if (!graft->node) 799 1.1 mrg return isl_ast_graft_free(graft); 800 1.1 mrg 801 1.1 mrg return graft; 802 1.1 mrg } 803 1.1 mrg 804 1.1 mrg /* Return the intersection of constraints in "list" as a set. 805 1.1 mrg */ 806 1.1 mrg static __isl_give isl_set *intersect_constraints( 807 1.1 mrg __isl_keep isl_constraint_list *list) 808 1.1 mrg { 809 1.1 mrg int i; 810 1.1 mrg isl_size n; 811 1.1 mrg isl_basic_set *bset; 812 1.1 mrg 813 1.1 mrg n = isl_constraint_list_n_constraint(list); 814 1.1 mrg if (n < 0) 815 1.1 mrg return NULL; 816 1.1 mrg if (n < 1) 817 1.1 mrg isl_die(isl_constraint_list_get_ctx(list), isl_error_internal, 818 1.1 mrg "expecting at least one constraint", return NULL); 819 1.1 mrg 820 1.1 mrg bset = isl_basic_set_from_constraint( 821 1.1 mrg isl_constraint_list_get_constraint(list, 0)); 822 1.1 mrg for (i = 1; i < n; ++i) { 823 1.1 mrg isl_basic_set *bset_i; 824 1.1 mrg 825 1.1 mrg bset_i = isl_basic_set_from_constraint( 826 1.1 mrg isl_constraint_list_get_constraint(list, i)); 827 1.1 mrg bset = isl_basic_set_intersect(bset, bset_i); 828 1.1 mrg } 829 1.1 mrg 830 1.1 mrg return isl_set_from_basic_set(bset); 831 1.1 mrg } 832 1.1 mrg 833 1.1 mrg /* Compute the constraints on the outer dimensions enforced by 834 1.1 mrg * graft->node and add those constraints to graft->enforced, 835 1.1 mrg * in case the upper bound is expressed as a set "upper". 836 1.1 mrg * 837 1.1 mrg * In particular, if l(...) is a lower bound in "lower", and 838 1.1 mrg * 839 1.1 mrg * -a i + f(...) >= 0 or a i <= f(...) 840 1.1 mrg * 841 1.1 mrg * is an upper bound ocnstraint on the current dimension i, 842 1.1 mrg * then the for loop enforces the constraint 843 1.1 mrg * 844 1.1 mrg * -a l(...) + f(...) >= 0 or a l(...) <= f(...) 845 1.1 mrg * 846 1.1 mrg * We therefore simply take each lower bound in turn, plug it into 847 1.1 mrg * the upper bounds and compute the intersection over all lower bounds. 848 1.1 mrg * 849 1.1 mrg * If a lower bound is a rational expression, then 850 1.1 mrg * isl_basic_set_preimage_multi_aff will force this rational 851 1.1 mrg * expression to have only integer values. However, the loop 852 1.1 mrg * itself does not enforce this integrality constraint. We therefore 853 1.1 mrg * use the ceil of the lower bounds instead of the lower bounds themselves. 854 1.1 mrg * Other constraints will make sure that the for loop is only executed 855 1.1 mrg * when each of the lower bounds attains an integral value. 856 1.1 mrg * In particular, potentially rational values only occur in 857 1.1 mrg * lower_bound if the offset is a (seemingly) rational expression, 858 1.1 mrg * but then outer conditions will make sure that this rational expression 859 1.1 mrg * only attains integer values. 860 1.1 mrg */ 861 1.1 mrg static __isl_give isl_ast_graft *set_enforced_from_set( 862 1.1 mrg __isl_take isl_ast_graft *graft, 863 1.1 mrg __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper) 864 1.1 mrg { 865 1.1 mrg isl_space *space; 866 1.1 mrg isl_basic_set *enforced; 867 1.1 mrg isl_pw_multi_aff *pma; 868 1.1 mrg int i; 869 1.1 mrg isl_size n; 870 1.1 mrg 871 1.1 mrg n = isl_pw_aff_list_n_pw_aff(lower); 872 1.1 mrg if (!graft || n < 0) 873 1.1 mrg return isl_ast_graft_free(graft); 874 1.1 mrg 875 1.1 mrg space = isl_set_get_space(upper); 876 1.1 mrg enforced = isl_basic_set_universe(isl_space_copy(space)); 877 1.1 mrg 878 1.1 mrg space = isl_space_map_from_set(space); 879 1.1 mrg pma = isl_pw_multi_aff_identity(space); 880 1.1 mrg 881 1.1 mrg for (i = 0; i < n; ++i) { 882 1.1 mrg isl_pw_aff *pa; 883 1.1 mrg isl_set *enforced_i; 884 1.1 mrg isl_basic_set *hull; 885 1.1 mrg isl_pw_multi_aff *pma_i; 886 1.1 mrg 887 1.1 mrg pa = isl_pw_aff_list_get_pw_aff(lower, i); 888 1.1 mrg pa = isl_pw_aff_ceil(pa); 889 1.1 mrg pma_i = isl_pw_multi_aff_copy(pma); 890 1.1 mrg pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa); 891 1.1 mrg enforced_i = isl_set_copy(upper); 892 1.1 mrg enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i); 893 1.1 mrg hull = isl_set_simple_hull(enforced_i); 894 1.1 mrg enforced = isl_basic_set_intersect(enforced, hull); 895 1.1 mrg } 896 1.1 mrg 897 1.1 mrg isl_pw_multi_aff_free(pma); 898 1.1 mrg 899 1.1 mrg graft = isl_ast_graft_enforce(graft, enforced); 900 1.1 mrg 901 1.1 mrg return graft; 902 1.1 mrg } 903 1.1 mrg 904 1.1 mrg /* Compute the constraints on the outer dimensions enforced by 905 1.1 mrg * graft->node and add those constraints to graft->enforced, 906 1.1 mrg * in case the upper bound is expressed as 907 1.1 mrg * a list of affine expressions "upper". 908 1.1 mrg * 909 1.1 mrg * The enforced condition is that each lower bound expression is less 910 1.1 mrg * than or equal to each upper bound expression. 911 1.1 mrg */ 912 1.1 mrg static __isl_give isl_ast_graft *set_enforced_from_list( 913 1.1 mrg __isl_take isl_ast_graft *graft, 914 1.1 mrg __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper) 915 1.1 mrg { 916 1.1 mrg isl_set *cond; 917 1.1 mrg isl_basic_set *enforced; 918 1.1 mrg 919 1.1 mrg lower = isl_pw_aff_list_copy(lower); 920 1.1 mrg upper = isl_pw_aff_list_copy(upper); 921 1.1 mrg cond = isl_pw_aff_list_le_set(lower, upper); 922 1.1 mrg enforced = isl_set_simple_hull(cond); 923 1.1 mrg graft = isl_ast_graft_enforce(graft, enforced); 924 1.1 mrg 925 1.1 mrg return graft; 926 1.1 mrg } 927 1.1 mrg 928 1.1 mrg /* Does "aff" have a negative constant term? 929 1.1 mrg */ 930 1.1 mrg static isl_bool aff_constant_is_negative(__isl_keep isl_set *set, 931 1.1 mrg __isl_keep isl_aff *aff, void *user) 932 1.1 mrg { 933 1.1 mrg isl_bool is_neg; 934 1.1 mrg isl_val *v; 935 1.1 mrg 936 1.1 mrg v = isl_aff_get_constant_val(aff); 937 1.1 mrg is_neg = isl_val_is_neg(v); 938 1.1 mrg isl_val_free(v); 939 1.1 mrg 940 1.1 mrg return is_neg; 941 1.1 mrg } 942 1.1 mrg 943 1.1 mrg /* Does "pa" have a negative constant term over its entire domain? 944 1.1 mrg */ 945 1.1 mrg static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa, 946 1.1 mrg void *user) 947 1.1 mrg { 948 1.1 mrg return isl_pw_aff_every_piece(pa, &aff_constant_is_negative, NULL); 949 1.1 mrg } 950 1.1 mrg 951 1.1 mrg /* Does each element in "list" have a negative constant term? 952 1.1 mrg */ 953 1.1 mrg static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list) 954 1.1 mrg { 955 1.1 mrg return isl_pw_aff_list_every(list, &pw_aff_constant_is_negative, NULL); 956 1.1 mrg } 957 1.1 mrg 958 1.1 mrg /* Add 1 to each of the elements in "list", where each of these elements 959 1.1 mrg * is defined over the internal schedule space of "build". 960 1.1 mrg */ 961 1.1 mrg static __isl_give isl_pw_aff_list *list_add_one( 962 1.1 mrg __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 963 1.1 mrg { 964 1.1 mrg int i; 965 1.1 mrg isl_size n; 966 1.1 mrg isl_space *space; 967 1.1 mrg isl_aff *aff; 968 1.1 mrg isl_pw_aff *one; 969 1.1 mrg 970 1.1 mrg n = isl_pw_aff_list_n_pw_aff(list); 971 1.1 mrg if (n < 0) 972 1.1 mrg return isl_pw_aff_list_free(list); 973 1.1 mrg 974 1.1 mrg space = isl_ast_build_get_space(build, 1); 975 1.1 mrg aff = isl_aff_zero_on_domain(isl_local_space_from_space(space)); 976 1.1 mrg aff = isl_aff_add_constant_si(aff, 1); 977 1.1 mrg one = isl_pw_aff_from_aff(aff); 978 1.1 mrg 979 1.1 mrg for (i = 0; i < n; ++i) { 980 1.1 mrg isl_pw_aff *pa; 981 1.1 mrg pa = isl_pw_aff_list_get_pw_aff(list, i); 982 1.1 mrg pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one)); 983 1.1 mrg list = isl_pw_aff_list_set_pw_aff(list, i, pa); 984 1.1 mrg } 985 1.1 mrg 986 1.1 mrg isl_pw_aff_free(one); 987 1.1 mrg 988 1.1 mrg return list; 989 1.1 mrg } 990 1.1 mrg 991 1.1 mrg /* Set the condition part of the for node graft->node in case 992 1.1 mrg * the upper bound is represented as a list of piecewise affine expressions. 993 1.1 mrg * 994 1.1 mrg * In particular, set the condition to 995 1.1 mrg * 996 1.1 mrg * iterator <= min(list of upper bounds) 997 1.1 mrg * 998 1.1 mrg * If each of the upper bounds has a negative constant term, then 999 1.1 mrg * set the condition to 1000 1.1 mrg * 1001 1.1 mrg * iterator < min(list of (upper bound + 1)s) 1002 1.1 mrg * 1003 1.1 mrg */ 1004 1.1 mrg static __isl_give isl_ast_graft *set_for_cond_from_list( 1005 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list, 1006 1.1 mrg __isl_keep isl_ast_build *build) 1007 1.1 mrg { 1008 1.1 mrg int neg; 1009 1.1 mrg isl_ast_expr *bound, *iterator, *cond; 1010 1.1 mrg enum isl_ast_expr_op_type type = isl_ast_expr_op_le; 1011 1.1 mrg 1012 1.1 mrg if (!graft || !list) 1013 1.1 mrg return isl_ast_graft_free(graft); 1014 1.1 mrg 1015 1.1 mrg neg = list_constant_is_negative(list); 1016 1.1 mrg if (neg < 0) 1017 1.1 mrg return isl_ast_graft_free(graft); 1018 1.1 mrg list = isl_pw_aff_list_copy(list); 1019 1.1 mrg if (neg) { 1020 1.1 mrg list = list_add_one(list, build); 1021 1.1 mrg type = isl_ast_expr_op_lt; 1022 1.1 mrg } 1023 1.1 mrg 1024 1.1 mrg bound = reduce_list(isl_ast_expr_op_min, list, build); 1025 1.1 mrg iterator = isl_ast_expr_copy(graft->node->u.f.iterator); 1026 1.1 mrg cond = isl_ast_expr_alloc_binary(type, iterator, bound); 1027 1.1 mrg graft->node = isl_ast_node_for_set_cond(graft->node, cond); 1028 1.1 mrg 1029 1.1 mrg isl_pw_aff_list_free(list); 1030 1.1 mrg if (!graft->node) 1031 1.1 mrg return isl_ast_graft_free(graft); 1032 1.1 mrg return graft; 1033 1.1 mrg } 1034 1.1 mrg 1035 1.1 mrg /* Set the condition part of the for node graft->node in case 1036 1.1 mrg * the upper bound is represented as a set. 1037 1.1 mrg */ 1038 1.1 mrg static __isl_give isl_ast_graft *set_for_cond_from_set( 1039 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_set *set, 1040 1.1 mrg __isl_keep isl_ast_build *build) 1041 1.1 mrg { 1042 1.1 mrg isl_ast_expr *cond; 1043 1.1 mrg 1044 1.1 mrg if (!graft) 1045 1.1 mrg return NULL; 1046 1.1 mrg 1047 1.1 mrg cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set)); 1048 1.1 mrg graft->node = isl_ast_node_for_set_cond(graft->node, cond); 1049 1.1 mrg if (!graft->node) 1050 1.1 mrg return isl_ast_graft_free(graft); 1051 1.1 mrg return graft; 1052 1.1 mrg } 1053 1.1 mrg 1054 1.1 mrg /* Construct an isl_ast_expr for the increment (i.e., stride) of 1055 1.1 mrg * the current dimension. 1056 1.1 mrg */ 1057 1.1 mrg static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build) 1058 1.1 mrg { 1059 1.1 mrg isl_size depth; 1060 1.1 mrg isl_val *v; 1061 1.1 mrg isl_ctx *ctx; 1062 1.1 mrg 1063 1.1 mrg depth = isl_ast_build_get_depth(build); 1064 1.1 mrg if (depth < 0) 1065 1.1 mrg return NULL; 1066 1.1 mrg ctx = isl_ast_build_get_ctx(build); 1067 1.1 mrg 1068 1.1 mrg if (!isl_ast_build_has_stride(build, depth)) 1069 1.1 mrg return isl_ast_expr_alloc_int_si(ctx, 1); 1070 1.1 mrg 1071 1.1 mrg v = isl_ast_build_get_stride(build, depth); 1072 1.1 mrg return isl_ast_expr_from_val(v); 1073 1.1 mrg } 1074 1.1 mrg 1075 1.1 mrg /* Should we express the loop condition as 1076 1.1 mrg * 1077 1.1 mrg * iterator <= min(list of upper bounds) 1078 1.1 mrg * 1079 1.1 mrg * or as a conjunction of constraints? 1080 1.1 mrg * 1081 1.1 mrg * The first is constructed from a list of upper bounds. 1082 1.1 mrg * The second is constructed from a set. 1083 1.1 mrg * 1084 1.1 mrg * If there are no upper bounds in "constraints", then this could mean 1085 1.1 mrg * that "domain" simply doesn't have an upper bound or that we didn't 1086 1.1 mrg * pick any upper bound. In the first case, we want to generate the 1087 1.1 mrg * loop condition as a(n empty) conjunction of constraints 1088 1.1 mrg * In the second case, we will compute 1089 1.1 mrg * a single upper bound from "domain" and so we use the list form. 1090 1.1 mrg * 1091 1.1 mrg * If there are upper bounds in "constraints", 1092 1.1 mrg * then we use the list form iff the atomic_upper_bound option is set. 1093 1.1 mrg */ 1094 1.1 mrg static int use_upper_bound_list(isl_ctx *ctx, int n_upper, 1095 1.1 mrg __isl_keep isl_set *domain, int depth) 1096 1.1 mrg { 1097 1.1 mrg if (n_upper > 0) 1098 1.1 mrg return isl_options_get_ast_build_atomic_upper_bound(ctx); 1099 1.1 mrg else 1100 1.1 mrg return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth); 1101 1.1 mrg } 1102 1.1 mrg 1103 1.1 mrg /* Fill in the expressions of the for node in graft->node. 1104 1.1 mrg * 1105 1.1 mrg * In particular, 1106 1.1 mrg * - set the initialization part of the loop to the maximum of the lower bounds 1107 1.1 mrg * - extract the increment from the stride of the current dimension 1108 1.1 mrg * - construct the for condition either based on a list of upper bounds 1109 1.1 mrg * or on a set of upper bound constraints. 1110 1.1 mrg */ 1111 1.1 mrg static __isl_give isl_ast_graft *set_for_node_expressions( 1112 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, 1113 1.1 mrg int use_list, __isl_keep isl_pw_aff_list *upper_list, 1114 1.1 mrg __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build) 1115 1.1 mrg { 1116 1.1 mrg isl_ast_expr *init; 1117 1.1 mrg 1118 1.1 mrg if (!graft) 1119 1.1 mrg return NULL; 1120 1.1 mrg 1121 1.1 mrg init = reduce_list(isl_ast_expr_op_max, lower, build); 1122 1.1 mrg graft->node = isl_ast_node_for_set_init(graft->node, init); 1123 1.1 mrg graft->node = isl_ast_node_for_set_inc(graft->node, for_inc(build)); 1124 1.1 mrg 1125 1.1 mrg if (!graft->node) 1126 1.1 mrg graft = isl_ast_graft_free(graft); 1127 1.1 mrg 1128 1.1 mrg if (use_list) 1129 1.1 mrg graft = set_for_cond_from_list(graft, upper_list, build); 1130 1.1 mrg else 1131 1.1 mrg graft = set_for_cond_from_set(graft, upper_set, build); 1132 1.1 mrg 1133 1.1 mrg return graft; 1134 1.1 mrg } 1135 1.1 mrg 1136 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic, 1137 1.1 mrg * non-degenerate, case. 1138 1.1 mrg * 1139 1.1 mrg * "c_lower" and "c_upper" contain the lower and upper bounds 1140 1.1 mrg * that the loop node should express. 1141 1.1 mrg * "domain" is the subset of the intersection of the constraints 1142 1.1 mrg * for which some code is executed. 1143 1.1 mrg * 1144 1.1 mrg * There may be zero lower bounds or zero upper bounds in "constraints" 1145 1.1 mrg * in case the list of constraints was created 1146 1.1 mrg * based on the atomic option or based on separation with explicit bounds. 1147 1.1 mrg * In that case, we use "domain" to derive lower and/or upper bounds. 1148 1.1 mrg * 1149 1.1 mrg * We first compute a list of one or more lower bounds. 1150 1.1 mrg * 1151 1.1 mrg * Then we decide if we want to express the condition as 1152 1.1 mrg * 1153 1.1 mrg * iterator <= min(list of upper bounds) 1154 1.1 mrg * 1155 1.1 mrg * or as a conjunction of constraints. 1156 1.1 mrg * 1157 1.1 mrg * The set of enforced constraints is then computed either based on 1158 1.1 mrg * a list of upper bounds or on a set of upper bound constraints. 1159 1.1 mrg * We do not compute any enforced constraints if we were forced 1160 1.1 mrg * to compute a lower or upper bound using exact_bound. The domains 1161 1.1 mrg * of the resulting expressions may imply some bounds on outer dimensions 1162 1.1 mrg * that we do not want to appear in the enforced constraints since 1163 1.1 mrg * they are not actually enforced by the corresponding code. 1164 1.1 mrg * 1165 1.1 mrg * Finally, we fill in the expressions of the for node. 1166 1.1 mrg */ 1167 1.1 mrg static __isl_give isl_ast_graft *refine_generic_bounds( 1168 1.1 mrg __isl_take isl_ast_graft *graft, 1169 1.1 mrg __isl_take isl_constraint_list *c_lower, 1170 1.1 mrg __isl_take isl_constraint_list *c_upper, 1171 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1172 1.1 mrg { 1173 1.1 mrg isl_size depth; 1174 1.1 mrg isl_ctx *ctx; 1175 1.1 mrg isl_pw_aff_list *lower; 1176 1.1 mrg int use_list; 1177 1.1 mrg isl_set *upper_set = NULL; 1178 1.1 mrg isl_pw_aff_list *upper_list = NULL; 1179 1.1 mrg isl_size n_lower, n_upper; 1180 1.1 mrg 1181 1.1 mrg depth = isl_ast_build_get_depth(build); 1182 1.1 mrg if (!graft || !c_lower || !c_upper || depth < 0) 1183 1.1 mrg goto error; 1184 1.1 mrg 1185 1.1 mrg ctx = isl_ast_graft_get_ctx(graft); 1186 1.1 mrg 1187 1.1 mrg n_lower = isl_constraint_list_n_constraint(c_lower); 1188 1.1 mrg n_upper = isl_constraint_list_n_constraint(c_upper); 1189 1.1 mrg if (n_lower < 0 || n_upper < 0) 1190 1.1 mrg goto error; 1191 1.1 mrg 1192 1.1 mrg use_list = use_upper_bound_list(ctx, n_upper, domain, depth); 1193 1.1 mrg 1194 1.1 mrg lower = lower_bounds(c_lower, depth, domain, build); 1195 1.1 mrg 1196 1.1 mrg if (use_list) 1197 1.1 mrg upper_list = upper_bounds(c_upper, depth, domain, build); 1198 1.1 mrg else if (n_upper > 0) 1199 1.1 mrg upper_set = intersect_constraints(c_upper); 1200 1.1 mrg else 1201 1.1 mrg upper_set = isl_set_universe(isl_set_get_space(domain)); 1202 1.1 mrg 1203 1.1 mrg if (n_lower == 0 || n_upper == 0) 1204 1.1 mrg ; 1205 1.1 mrg else if (use_list) 1206 1.1 mrg graft = set_enforced_from_list(graft, lower, upper_list); 1207 1.1 mrg else 1208 1.1 mrg graft = set_enforced_from_set(graft, lower, depth, upper_set); 1209 1.1 mrg 1210 1.1 mrg graft = set_for_node_expressions(graft, lower, use_list, upper_list, 1211 1.1 mrg upper_set, build); 1212 1.1 mrg 1213 1.1 mrg isl_pw_aff_list_free(lower); 1214 1.1 mrg isl_pw_aff_list_free(upper_list); 1215 1.1 mrg isl_set_free(upper_set); 1216 1.1 mrg isl_constraint_list_free(c_lower); 1217 1.1 mrg isl_constraint_list_free(c_upper); 1218 1.1 mrg 1219 1.1 mrg return graft; 1220 1.1 mrg error: 1221 1.1 mrg isl_constraint_list_free(c_lower); 1222 1.1 mrg isl_constraint_list_free(c_upper); 1223 1.1 mrg return isl_ast_graft_free(graft); 1224 1.1 mrg } 1225 1.1 mrg 1226 1.1 mrg /* Internal data structure used inside count_constraints to keep 1227 1.1 mrg * track of the number of constraints that are independent of dimension "pos", 1228 1.1 mrg * the lower bounds in "pos" and the upper bounds in "pos". 1229 1.1 mrg */ 1230 1.1 mrg struct isl_ast_count_constraints_data { 1231 1.1 mrg int pos; 1232 1.1 mrg 1233 1.1 mrg int n_indep; 1234 1.1 mrg int n_lower; 1235 1.1 mrg int n_upper; 1236 1.1 mrg }; 1237 1.1 mrg 1238 1.1 mrg /* Increment data->n_indep, data->lower or data->upper depending 1239 1.1 mrg * on whether "c" is independent of dimensions data->pos, 1240 1.1 mrg * a lower bound or an upper bound. 1241 1.1 mrg */ 1242 1.1 mrg static isl_stat count_constraints(__isl_take isl_constraint *c, void *user) 1243 1.1 mrg { 1244 1.1 mrg struct isl_ast_count_constraints_data *data = user; 1245 1.1 mrg 1246 1.1 mrg if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos)) 1247 1.1 mrg data->n_lower++; 1248 1.1 mrg else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos)) 1249 1.1 mrg data->n_upper++; 1250 1.1 mrg else 1251 1.1 mrg data->n_indep++; 1252 1.1 mrg 1253 1.1 mrg isl_constraint_free(c); 1254 1.1 mrg 1255 1.1 mrg return isl_stat_ok; 1256 1.1 mrg } 1257 1.1 mrg 1258 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic, 1259 1.1 mrg * non-degenerate, case. 1260 1.1 mrg * 1261 1.1 mrg * "list" respresent the list of bounds that need to be encoded by 1262 1.1 mrg * the for loop. Only the constraints that involve the iterator 1263 1.1 mrg * are relevant here. The other constraints are taken care of by 1264 1.1 mrg * the caller and are included in the generated constraints of "build". 1265 1.1 mrg * "domain" is the subset of the intersection of the constraints 1266 1.1 mrg * for which some code is executed. 1267 1.1 mrg * "build" is the build in which graft->node was created. 1268 1.1 mrg * 1269 1.1 mrg * We separate lower bounds, upper bounds and constraints that 1270 1.1 mrg * are independent of the loop iterator. 1271 1.1 mrg * 1272 1.1 mrg * The actual for loop bounds are generated in refine_generic_bounds. 1273 1.1 mrg */ 1274 1.1 mrg static __isl_give isl_ast_graft *refine_generic_split( 1275 1.1 mrg __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list, 1276 1.1 mrg __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1277 1.1 mrg { 1278 1.1 mrg struct isl_ast_count_constraints_data data; 1279 1.1 mrg isl_size depth; 1280 1.1 mrg isl_constraint_list *lower; 1281 1.1 mrg isl_constraint_list *upper; 1282 1.1 mrg 1283 1.1 mrg depth = isl_ast_build_get_depth(build); 1284 1.1 mrg if (depth < 0) 1285 1.1 mrg list = isl_constraint_list_free(list); 1286 1.1 mrg if (!list) 1287 1.1 mrg return isl_ast_graft_free(graft); 1288 1.1 mrg 1289 1.1 mrg data.pos = depth; 1290 1.1 mrg 1291 1.1 mrg list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos); 1292 1.1 mrg if (!list) 1293 1.1 mrg return isl_ast_graft_free(graft); 1294 1.1 mrg 1295 1.1 mrg data.n_indep = data.n_lower = data.n_upper = 0; 1296 1.1 mrg if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) { 1297 1.1 mrg isl_constraint_list_free(list); 1298 1.1 mrg return isl_ast_graft_free(graft); 1299 1.1 mrg } 1300 1.1 mrg 1301 1.1 mrg lower = isl_constraint_list_drop(list, 0, data.n_indep); 1302 1.1 mrg upper = isl_constraint_list_copy(lower); 1303 1.1 mrg lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper); 1304 1.1 mrg upper = isl_constraint_list_drop(upper, 0, data.n_lower); 1305 1.1 mrg 1306 1.1 mrg return refine_generic_bounds(graft, lower, upper, domain, build); 1307 1.1 mrg } 1308 1.1 mrg 1309 1.1 mrg /* Update "graft" based on "bounds" and "domain" for the generic, 1310 1.1 mrg * non-degenerate, case. 1311 1.1 mrg * 1312 1.1 mrg * "bounds" respresent the bounds that need to be encoded by 1313 1.1 mrg * the for loop (or a guard around the for loop). 1314 1.1 mrg * "domain" is the subset of "bounds" for which some code is executed. 1315 1.1 mrg * "build" is the build in which graft->node was created. 1316 1.1 mrg * 1317 1.1 mrg * We break up "bounds" into a list of constraints and continue with 1318 1.1 mrg * refine_generic_split. 1319 1.1 mrg */ 1320 1.1 mrg static __isl_give isl_ast_graft *refine_generic( 1321 1.1 mrg __isl_take isl_ast_graft *graft, 1322 1.1 mrg __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain, 1323 1.1 mrg __isl_keep isl_ast_build *build) 1324 1.1 mrg { 1325 1.1 mrg isl_constraint_list *list; 1326 1.1 mrg 1327 1.1 mrg if (!build || !graft) 1328 1.1 mrg return isl_ast_graft_free(graft); 1329 1.1 mrg 1330 1.1 mrg list = isl_basic_set_get_constraint_list(bounds); 1331 1.1 mrg 1332 1.1 mrg graft = refine_generic_split(graft, list, domain, build); 1333 1.1 mrg 1334 1.1 mrg return graft; 1335 1.1 mrg } 1336 1.1 mrg 1337 1.1 mrg /* Create a for node for the current level. 1338 1.1 mrg * 1339 1.1 mrg * Mark the for node degenerate if "degenerate" is set. 1340 1.1 mrg */ 1341 1.1 mrg static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, 1342 1.1 mrg int degenerate) 1343 1.1 mrg { 1344 1.1 mrg isl_size depth; 1345 1.1 mrg isl_id *id; 1346 1.1 mrg isl_ast_node *node; 1347 1.1 mrg 1348 1.1 mrg depth = isl_ast_build_get_depth(build); 1349 1.1 mrg if (depth < 0) 1350 1.1 mrg return NULL; 1351 1.1 mrg 1352 1.1 mrg id = isl_ast_build_get_iterator_id(build, depth); 1353 1.1 mrg node = isl_ast_node_alloc_for(id); 1354 1.1 mrg if (degenerate) 1355 1.1 mrg node = isl_ast_node_for_mark_degenerate(node); 1356 1.1 mrg 1357 1.1 mrg return node; 1358 1.1 mrg } 1359 1.1 mrg 1360 1.1 mrg /* If the ast_build_exploit_nested_bounds option is set, then return 1361 1.1 mrg * the constraints enforced by all elements in "list". 1362 1.1 mrg * Otherwise, return the universe. 1363 1.1 mrg */ 1364 1.1 mrg static __isl_give isl_basic_set *extract_shared_enforced( 1365 1.1 mrg __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build) 1366 1.1 mrg { 1367 1.1 mrg isl_ctx *ctx; 1368 1.1 mrg isl_space *space; 1369 1.1 mrg 1370 1.1 mrg if (!list) 1371 1.1 mrg return NULL; 1372 1.1 mrg 1373 1.1 mrg ctx = isl_ast_graft_list_get_ctx(list); 1374 1.1 mrg if (isl_options_get_ast_build_exploit_nested_bounds(ctx)) 1375 1.1 mrg return isl_ast_graft_list_extract_shared_enforced(list, build); 1376 1.1 mrg 1377 1.1 mrg space = isl_ast_build_get_space(build, 1); 1378 1.1 mrg return isl_basic_set_universe(space); 1379 1.1 mrg } 1380 1.1 mrg 1381 1.1 mrg /* Return the pending constraints of "build" that are not already taken 1382 1.1 mrg * care of (by a combination of "enforced" and the generated constraints 1383 1.1 mrg * of "build"). 1384 1.1 mrg */ 1385 1.1 mrg static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build, 1386 1.1 mrg __isl_keep isl_basic_set *enforced) 1387 1.1 mrg { 1388 1.1 mrg isl_set *guard, *context; 1389 1.1 mrg 1390 1.1 mrg guard = isl_ast_build_get_pending(build); 1391 1.1 mrg context = isl_set_from_basic_set(isl_basic_set_copy(enforced)); 1392 1.1 mrg context = isl_set_intersect(context, 1393 1.1 mrg isl_ast_build_get_generated(build)); 1394 1.1 mrg return isl_set_gist(guard, context); 1395 1.1 mrg } 1396 1.1 mrg 1397 1.1 mrg /* Create an AST node for the current dimension based on 1398 1.1 mrg * the schedule domain "bounds" and return the node encapsulated 1399 1.1 mrg * in an isl_ast_graft. 1400 1.1 mrg * 1401 1.1 mrg * "executed" is the current inverse schedule, taking into account 1402 1.1 mrg * the bounds in "bounds" 1403 1.1 mrg * "domain" is the domain of "executed", with inner dimensions projected out. 1404 1.1 mrg * It may be a strict subset of "bounds" in case "bounds" was created 1405 1.1 mrg * based on the atomic option or based on separation with explicit bounds. 1406 1.1 mrg * 1407 1.1 mrg * "domain" may satisfy additional equalities that result 1408 1.1 mrg * from intersecting "executed" with "bounds" in add_node. 1409 1.1 mrg * It may also satisfy some global constraints that were dropped out because 1410 1.1 mrg * we performed separation with explicit bounds. 1411 1.1 mrg * The very first step is then to copy these constraints to "bounds". 1412 1.1 mrg * 1413 1.1 mrg * Since we may be calling before_each_for and after_each_for 1414 1.1 mrg * callbacks, we record the current inverse schedule in the build. 1415 1.1 mrg * 1416 1.1 mrg * We consider three builds, 1417 1.1 mrg * "build" is the one in which the current level is created, 1418 1.1 mrg * "body_build" is the build in which the next level is created, 1419 1.1 mrg * "sub_build" is essentially the same as "body_build", except that 1420 1.1 mrg * the depth has not been increased yet. 1421 1.1 mrg * 1422 1.1 mrg * "build" already contains information (in strides and offsets) 1423 1.1 mrg * about the strides at the current level, but this information is not 1424 1.1 mrg * reflected in the build->domain. 1425 1.1 mrg * We first add this information and the "bounds" to the sub_build->domain. 1426 1.1 mrg * isl_ast_build_set_loop_bounds adds the stride information and 1427 1.1 mrg * checks whether the current dimension attains 1428 1.1 mrg * only a single value and whether this single value can be represented using 1429 1.1 mrg * a single affine expression. 1430 1.1 mrg * In the first case, the current level is considered "degenerate". 1431 1.1 mrg * In the second, sub-case, the current level is considered "eliminated". 1432 1.1 mrg * Eliminated levels don't need to be reflected in the AST since we can 1433 1.1 mrg * simply plug in the affine expression. For degenerate, but non-eliminated, 1434 1.1 mrg * levels, we do introduce a for node, but mark is as degenerate so that 1435 1.1 mrg * it can be printed as an assignment of the single value to the loop 1436 1.1 mrg * "iterator". 1437 1.1 mrg * 1438 1.1 mrg * If the current level is eliminated, we explicitly plug in the value 1439 1.1 mrg * for the current level found by isl_ast_build_set_loop_bounds in the 1440 1.1 mrg * inverse schedule. This ensures that if we are working on a slice 1441 1.1 mrg * of the domain based on information available in the inverse schedule 1442 1.1 mrg * and the build domain, that then this information is also reflected 1443 1.1 mrg * in the inverse schedule. This operation also eliminates the current 1444 1.1 mrg * dimension from the inverse schedule making sure no inner dimensions depend 1445 1.1 mrg * on the current dimension. Otherwise, we create a for node, marking 1446 1.1 mrg * it degenerate if appropriate. The initial for node is still incomplete 1447 1.1 mrg * and will be completed in either refine_degenerate or refine_generic. 1448 1.1 mrg * 1449 1.1 mrg * We then generate a sequence of grafts for the next level, 1450 1.1 mrg * create a surrounding graft for the current level and insert 1451 1.1 mrg * the for node we created (if the current level is not eliminated). 1452 1.1 mrg * Before creating a graft for the current level, we first extract 1453 1.1 mrg * hoistable constraints from the child guards and combine them 1454 1.1 mrg * with the pending constraints in the build. These constraints 1455 1.1 mrg * are used to simplify the child guards and then added to the guard 1456 1.1 mrg * of the current graft to ensure that they will be generated. 1457 1.1 mrg * If the hoisted guard is a disjunction, then we use it directly 1458 1.1 mrg * to gist the guards on the children before intersect it with the 1459 1.1 mrg * pending constraints. We do so because this disjunction is typically 1460 1.1 mrg * identical to the guards on the children such that these guards 1461 1.1 mrg * can be effectively removed completely. After the intersection, 1462 1.1 mrg * the gist operation would have a harder time figuring this out. 1463 1.1 mrg * 1464 1.1 mrg * Finally, we set the bounds of the for loop in either 1465 1.1 mrg * refine_degenerate or refine_generic. 1466 1.1 mrg * We do so in a context where the pending constraints of the build 1467 1.1 mrg * have been replaced by the guard of the current graft. 1468 1.1 mrg */ 1469 1.1 mrg static __isl_give isl_ast_graft *create_node_scaled( 1470 1.1 mrg __isl_take isl_union_map *executed, 1471 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1472 1.1 mrg __isl_take isl_ast_build *build) 1473 1.1 mrg { 1474 1.1 mrg isl_size depth; 1475 1.1 mrg int degenerate; 1476 1.1 mrg isl_bool eliminated; 1477 1.1 mrg isl_size n; 1478 1.1 mrg isl_basic_set *hull; 1479 1.1 mrg isl_basic_set *enforced; 1480 1.1 mrg isl_set *guard, *hoisted; 1481 1.1 mrg isl_ast_node *node = NULL; 1482 1.1 mrg isl_ast_graft *graft; 1483 1.1 mrg isl_ast_graft_list *children; 1484 1.1 mrg isl_ast_build *sub_build; 1485 1.1 mrg isl_ast_build *body_build; 1486 1.1 mrg 1487 1.1 mrg domain = isl_ast_build_eliminate_divs(build, domain); 1488 1.1 mrg domain = isl_set_detect_equalities(domain); 1489 1.1 mrg hull = isl_set_unshifted_simple_hull(isl_set_copy(domain)); 1490 1.1 mrg bounds = isl_basic_set_intersect(bounds, hull); 1491 1.1 mrg build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); 1492 1.1 mrg 1493 1.1 mrg depth = isl_ast_build_get_depth(build); 1494 1.1 mrg if (depth < 0) 1495 1.1 mrg build = isl_ast_build_free(build); 1496 1.1 mrg sub_build = isl_ast_build_copy(build); 1497 1.1 mrg bounds = isl_basic_set_remove_redundancies(bounds); 1498 1.1 mrg bounds = isl_ast_build_specialize_basic_set(sub_build, bounds); 1499 1.1 mrg sub_build = isl_ast_build_set_loop_bounds(sub_build, 1500 1.1 mrg isl_basic_set_copy(bounds)); 1501 1.1 mrg degenerate = isl_ast_build_has_value(sub_build); 1502 1.1 mrg eliminated = isl_ast_build_has_affine_value(sub_build, depth); 1503 1.1 mrg if (degenerate < 0 || eliminated < 0) 1504 1.1 mrg executed = isl_union_map_free(executed); 1505 1.1 mrg if (!degenerate) 1506 1.1 mrg bounds = isl_ast_build_compute_gist_basic_set(build, bounds); 1507 1.1 mrg sub_build = isl_ast_build_set_pending_generated(sub_build, 1508 1.1 mrg isl_basic_set_copy(bounds)); 1509 1.1 mrg if (eliminated) 1510 1.1 mrg executed = plug_in_values(executed, sub_build); 1511 1.1 mrg else 1512 1.1 mrg node = create_for(build, degenerate); 1513 1.1 mrg 1514 1.1 mrg body_build = isl_ast_build_copy(sub_build); 1515 1.1 mrg body_build = isl_ast_build_increase_depth(body_build); 1516 1.1 mrg if (!eliminated) 1517 1.1 mrg node = before_each_for(node, body_build); 1518 1.1 mrg children = generate_next_level(executed, 1519 1.1 mrg isl_ast_build_copy(body_build)); 1520 1.1 mrg 1521 1.1 mrg enforced = extract_shared_enforced(children, build); 1522 1.1 mrg guard = extract_pending(sub_build, enforced); 1523 1.1 mrg hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build); 1524 1.1 mrg n = isl_set_n_basic_set(hoisted); 1525 1.1 mrg if (n < 0) 1526 1.1 mrg children = isl_ast_graft_list_free(children); 1527 1.1 mrg if (n > 1) 1528 1.1 mrg children = isl_ast_graft_list_gist_guards(children, 1529 1.1 mrg isl_set_copy(hoisted)); 1530 1.1 mrg guard = isl_set_intersect(guard, hoisted); 1531 1.1 mrg if (!eliminated) 1532 1.1 mrg guard = add_implied_guards(guard, degenerate, bounds, build); 1533 1.1 mrg 1534 1.1 mrg graft = isl_ast_graft_alloc_from_children(children, 1535 1.1 mrg isl_set_copy(guard), enforced, build, sub_build); 1536 1.1 mrg 1537 1.1 mrg if (!eliminated) { 1538 1.1 mrg isl_ast_build *for_build; 1539 1.1 mrg 1540 1.1 mrg graft = isl_ast_graft_insert_for(graft, node); 1541 1.1 mrg for_build = isl_ast_build_copy(build); 1542 1.1 mrg for_build = isl_ast_build_replace_pending_by_guard(for_build, 1543 1.1 mrg isl_set_copy(guard)); 1544 1.1 mrg if (degenerate) 1545 1.1 mrg graft = refine_degenerate(graft, for_build, sub_build); 1546 1.1 mrg else 1547 1.1 mrg graft = refine_generic(graft, bounds, 1548 1.1 mrg domain, for_build); 1549 1.1 mrg isl_ast_build_free(for_build); 1550 1.1 mrg } 1551 1.1 mrg isl_set_free(guard); 1552 1.1 mrg if (!eliminated) 1553 1.1 mrg graft = after_each_for(graft, body_build); 1554 1.1 mrg 1555 1.1 mrg isl_ast_build_free(body_build); 1556 1.1 mrg isl_ast_build_free(sub_build); 1557 1.1 mrg isl_ast_build_free(build); 1558 1.1 mrg isl_basic_set_free(bounds); 1559 1.1 mrg isl_set_free(domain); 1560 1.1 mrg 1561 1.1 mrg return graft; 1562 1.1 mrg } 1563 1.1 mrg 1564 1.1 mrg /* Internal data structure for checking if all constraints involving 1565 1.1 mrg * the input dimension "depth" are such that the other coefficients 1566 1.1 mrg * are multiples of "m", reducing "m" if they are not. 1567 1.1 mrg * If "m" is reduced all the way down to "1", then the check has failed 1568 1.1 mrg * and we break out of the iteration. 1569 1.1 mrg */ 1570 1.1 mrg struct isl_check_scaled_data { 1571 1.1 mrg int depth; 1572 1.1 mrg isl_val *m; 1573 1.1 mrg }; 1574 1.1 mrg 1575 1.1 mrg /* If constraint "c" involves the input dimension data->depth, 1576 1.1 mrg * then make sure that all the other coefficients are multiples of data->m, 1577 1.1 mrg * reducing data->m if needed. 1578 1.1 mrg * Break out of the iteration if data->m has become equal to "1". 1579 1.1 mrg */ 1580 1.1 mrg static isl_stat constraint_check_scaled(__isl_take isl_constraint *c, 1581 1.1 mrg void *user) 1582 1.1 mrg { 1583 1.1 mrg struct isl_check_scaled_data *data = user; 1584 1.1 mrg int i, j; 1585 1.1 mrg isl_size n; 1586 1.1 mrg enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out, 1587 1.1 mrg isl_dim_div }; 1588 1.1 mrg 1589 1.1 mrg if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) { 1590 1.1 mrg isl_constraint_free(c); 1591 1.1 mrg return isl_stat_ok; 1592 1.1 mrg } 1593 1.1 mrg 1594 1.1 mrg for (i = 0; i < 4; ++i) { 1595 1.1 mrg n = isl_constraint_dim(c, t[i]); 1596 1.1 mrg if (n < 0) 1597 1.1 mrg break; 1598 1.1 mrg for (j = 0; j < n; ++j) { 1599 1.1 mrg isl_val *d; 1600 1.1 mrg 1601 1.1 mrg if (t[i] == isl_dim_in && j == data->depth) 1602 1.1 mrg continue; 1603 1.1 mrg if (!isl_constraint_involves_dims(c, t[i], j, 1)) 1604 1.1 mrg continue; 1605 1.1 mrg d = isl_constraint_get_coefficient_val(c, t[i], j); 1606 1.1 mrg data->m = isl_val_gcd(data->m, d); 1607 1.1 mrg if (isl_val_is_one(data->m)) 1608 1.1 mrg break; 1609 1.1 mrg } 1610 1.1 mrg if (j < n) 1611 1.1 mrg break; 1612 1.1 mrg } 1613 1.1 mrg 1614 1.1 mrg isl_constraint_free(c); 1615 1.1 mrg 1616 1.1 mrg return i < 4 ? isl_stat_error : isl_stat_ok; 1617 1.1 mrg } 1618 1.1 mrg 1619 1.1 mrg /* For each constraint of "bmap" that involves the input dimension data->depth, 1620 1.1 mrg * make sure that all the other coefficients are multiples of data->m, 1621 1.1 mrg * reducing data->m if needed. 1622 1.1 mrg * Break out of the iteration if data->m has become equal to "1". 1623 1.1 mrg */ 1624 1.1 mrg static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap, 1625 1.1 mrg void *user) 1626 1.1 mrg { 1627 1.1 mrg isl_stat r; 1628 1.1 mrg 1629 1.1 mrg r = isl_basic_map_foreach_constraint(bmap, 1630 1.1 mrg &constraint_check_scaled, user); 1631 1.1 mrg isl_basic_map_free(bmap); 1632 1.1 mrg 1633 1.1 mrg return r; 1634 1.1 mrg } 1635 1.1 mrg 1636 1.1 mrg /* For each constraint of "map" that involves the input dimension data->depth, 1637 1.1 mrg * make sure that all the other coefficients are multiples of data->m, 1638 1.1 mrg * reducing data->m if needed. 1639 1.1 mrg * Break out of the iteration if data->m has become equal to "1". 1640 1.1 mrg */ 1641 1.1 mrg static isl_stat map_check_scaled(__isl_take isl_map *map, void *user) 1642 1.1 mrg { 1643 1.1 mrg isl_stat r; 1644 1.1 mrg 1645 1.1 mrg r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user); 1646 1.1 mrg isl_map_free(map); 1647 1.1 mrg 1648 1.1 mrg return r; 1649 1.1 mrg } 1650 1.1 mrg 1651 1.1 mrg /* Create an AST node for the current dimension based on 1652 1.1 mrg * the schedule domain "bounds" and return the node encapsulated 1653 1.1 mrg * in an isl_ast_graft. 1654 1.1 mrg * 1655 1.1 mrg * "executed" is the current inverse schedule, taking into account 1656 1.1 mrg * the bounds in "bounds" 1657 1.1 mrg * "domain" is the domain of "executed", with inner dimensions projected out. 1658 1.1 mrg * 1659 1.1 mrg * 1660 1.1 mrg * Before moving on to the actual AST node construction in create_node_scaled, 1661 1.1 mrg * we first check if the current dimension is strided and if we can scale 1662 1.1 mrg * down this stride. Note that we only do this if the ast_build_scale_strides 1663 1.1 mrg * option is set. 1664 1.1 mrg * 1665 1.1 mrg * In particular, let the current dimension take on values 1666 1.1 mrg * 1667 1.1 mrg * f + s a 1668 1.1 mrg * 1669 1.1 mrg * with a an integer. We check if we can find an integer m that (obviously) 1670 1.1 mrg * divides both f and s. 1671 1.1 mrg * 1672 1.1 mrg * If so, we check if the current dimension only appears in constraints 1673 1.1 mrg * where the coefficients of the other variables are multiples of m. 1674 1.1 mrg * We perform this extra check to avoid the risk of introducing 1675 1.1 mrg * divisions by scaling down the current dimension. 1676 1.1 mrg * 1677 1.1 mrg * If so, we scale the current dimension down by a factor of m. 1678 1.1 mrg * That is, we plug in 1679 1.1 mrg * 1680 1.1 mrg * i = m i' (1) 1681 1.1 mrg * 1682 1.1 mrg * Note that in principle we could always scale down strided loops 1683 1.1 mrg * by plugging in 1684 1.1 mrg * 1685 1.1 mrg * i = f + s i' 1686 1.1 mrg * 1687 1.1 mrg * but this may result in i' taking on larger values than the original i, 1688 1.1 mrg * due to the shift by "f". 1689 1.1 mrg * By constrast, the scaling in (1) can only reduce the (absolute) value "i". 1690 1.1 mrg */ 1691 1.1 mrg static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed, 1692 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1693 1.1 mrg __isl_take isl_ast_build *build) 1694 1.1 mrg { 1695 1.1 mrg struct isl_check_scaled_data data; 1696 1.1 mrg isl_size depth; 1697 1.1 mrg isl_ctx *ctx; 1698 1.1 mrg isl_aff *offset; 1699 1.1 mrg isl_val *d; 1700 1.1 mrg 1701 1.1 mrg ctx = isl_ast_build_get_ctx(build); 1702 1.1 mrg if (!isl_options_get_ast_build_scale_strides(ctx)) 1703 1.1 mrg return create_node_scaled(executed, bounds, domain, build); 1704 1.1 mrg 1705 1.1 mrg depth = isl_ast_build_get_depth(build); 1706 1.1 mrg if (depth < 0) 1707 1.1 mrg build = isl_ast_build_free(build); 1708 1.1 mrg data.depth = depth; 1709 1.1 mrg if (!isl_ast_build_has_stride(build, data.depth)) 1710 1.1 mrg return create_node_scaled(executed, bounds, domain, build); 1711 1.1 mrg 1712 1.1 mrg offset = isl_ast_build_get_offset(build, data.depth); 1713 1.1 mrg data.m = isl_ast_build_get_stride(build, data.depth); 1714 1.1 mrg if (!data.m) 1715 1.1 mrg offset = isl_aff_free(offset); 1716 1.1 mrg offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m)); 1717 1.1 mrg d = isl_aff_get_denominator_val(offset); 1718 1.1 mrg if (!d) 1719 1.1 mrg executed = isl_union_map_free(executed); 1720 1.1 mrg 1721 1.1 mrg if (executed && isl_val_is_divisible_by(data.m, d)) 1722 1.1 mrg data.m = isl_val_div(data.m, d); 1723 1.1 mrg else { 1724 1.1 mrg data.m = isl_val_set_si(data.m, 1); 1725 1.1 mrg isl_val_free(d); 1726 1.1 mrg } 1727 1.1 mrg 1728 1.1 mrg if (!isl_val_is_one(data.m)) { 1729 1.1 mrg if (isl_union_map_foreach_map(executed, &map_check_scaled, 1730 1.1 mrg &data) < 0 && 1731 1.1 mrg !isl_val_is_one(data.m)) 1732 1.1 mrg executed = isl_union_map_free(executed); 1733 1.1 mrg } 1734 1.1 mrg 1735 1.1 mrg if (!isl_val_is_one(data.m)) { 1736 1.1 mrg isl_space *space; 1737 1.1 mrg isl_multi_aff *ma; 1738 1.1 mrg isl_aff *aff; 1739 1.1 mrg isl_map *map; 1740 1.1 mrg isl_union_map *umap; 1741 1.1 mrg 1742 1.1 mrg space = isl_ast_build_get_space(build, 1); 1743 1.1 mrg space = isl_space_map_from_set(space); 1744 1.1 mrg ma = isl_multi_aff_identity(space); 1745 1.1 mrg aff = isl_multi_aff_get_aff(ma, data.depth); 1746 1.1 mrg aff = isl_aff_scale_val(aff, isl_val_copy(data.m)); 1747 1.1 mrg ma = isl_multi_aff_set_aff(ma, data.depth, aff); 1748 1.1 mrg 1749 1.1 mrg bounds = isl_basic_set_preimage_multi_aff(bounds, 1750 1.1 mrg isl_multi_aff_copy(ma)); 1751 1.1 mrg domain = isl_set_preimage_multi_aff(domain, 1752 1.1 mrg isl_multi_aff_copy(ma)); 1753 1.1 mrg map = isl_map_reverse(isl_map_from_multi_aff(ma)); 1754 1.1 mrg umap = isl_union_map_from_map(map); 1755 1.1 mrg executed = isl_union_map_apply_domain(executed, 1756 1.1 mrg isl_union_map_copy(umap)); 1757 1.1 mrg build = isl_ast_build_scale_down(build, isl_val_copy(data.m), 1758 1.1 mrg umap); 1759 1.1 mrg } 1760 1.1 mrg isl_aff_free(offset); 1761 1.1 mrg isl_val_free(data.m); 1762 1.1 mrg 1763 1.1 mrg return create_node_scaled(executed, bounds, domain, build); 1764 1.1 mrg } 1765 1.1 mrg 1766 1.1 mrg /* Add the basic set to the list that "user" points to. 1767 1.1 mrg */ 1768 1.1 mrg static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user) 1769 1.1 mrg { 1770 1.1 mrg isl_basic_set_list **list = user; 1771 1.1 mrg 1772 1.1 mrg *list = isl_basic_set_list_add(*list, bset); 1773 1.1 mrg 1774 1.1 mrg return isl_stat_ok; 1775 1.1 mrg } 1776 1.1 mrg 1777 1.1 mrg /* Extract the basic sets of "set" and collect them in an isl_basic_set_list. 1778 1.1 mrg */ 1779 1.1 mrg static __isl_give isl_basic_set_list *isl_basic_set_list_from_set( 1780 1.1 mrg __isl_take isl_set *set) 1781 1.1 mrg { 1782 1.1 mrg isl_size n; 1783 1.1 mrg isl_ctx *ctx; 1784 1.1 mrg isl_basic_set_list *list; 1785 1.1 mrg 1786 1.1 mrg n = isl_set_n_basic_set(set); 1787 1.1 mrg if (n < 0) 1788 1.1 mrg set = isl_set_free(set); 1789 1.1 mrg if (!set) 1790 1.1 mrg return NULL; 1791 1.1 mrg 1792 1.1 mrg ctx = isl_set_get_ctx(set); 1793 1.1 mrg 1794 1.1 mrg list = isl_basic_set_list_alloc(ctx, n); 1795 1.1 mrg if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0) 1796 1.1 mrg list = isl_basic_set_list_free(list); 1797 1.1 mrg 1798 1.1 mrg isl_set_free(set); 1799 1.1 mrg return list; 1800 1.1 mrg } 1801 1.1 mrg 1802 1.1 mrg /* Generate code for the schedule domain "bounds" 1803 1.1 mrg * and add the result to "list". 1804 1.1 mrg * 1805 1.1 mrg * We mainly detect strides here and check if the bounds do not 1806 1.1 mrg * conflict with the current build domain 1807 1.1 mrg * and then pass over control to create_node. 1808 1.1 mrg * 1809 1.1 mrg * "bounds" reflects the bounds on the current dimension and possibly 1810 1.1 mrg * some extra conditions on outer dimensions. 1811 1.1 mrg * It does not, however, include any divs involving the current dimension, 1812 1.1 mrg * so it does not capture any stride constraints. 1813 1.1 mrg * We therefore need to compute that part of the schedule domain that 1814 1.1 mrg * intersects with "bounds" and derive the strides from the result. 1815 1.1 mrg */ 1816 1.1 mrg static __isl_give isl_ast_graft_list *add_node( 1817 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed, 1818 1.1 mrg __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build) 1819 1.1 mrg { 1820 1.1 mrg isl_ast_graft *graft; 1821 1.1 mrg isl_set *domain = NULL; 1822 1.1 mrg isl_union_set *uset; 1823 1.1 mrg int empty, disjoint; 1824 1.1 mrg 1825 1.1 mrg uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds)); 1826 1.1 mrg executed = isl_union_map_intersect_domain(executed, uset); 1827 1.1 mrg empty = isl_union_map_is_empty(executed); 1828 1.1 mrg if (empty < 0) 1829 1.1 mrg goto error; 1830 1.1 mrg if (empty) 1831 1.1 mrg goto done; 1832 1.1 mrg 1833 1.1 mrg uset = isl_union_map_domain(isl_union_map_copy(executed)); 1834 1.1 mrg domain = isl_set_from_union_set(uset); 1835 1.1 mrg domain = isl_ast_build_specialize(build, domain); 1836 1.1 mrg 1837 1.1 mrg domain = isl_set_compute_divs(domain); 1838 1.1 mrg domain = isl_ast_build_eliminate_inner(build, domain); 1839 1.1 mrg disjoint = isl_set_is_disjoint(domain, build->domain); 1840 1.1 mrg if (disjoint < 0) 1841 1.1 mrg goto error; 1842 1.1 mrg if (disjoint) 1843 1.1 mrg goto done; 1844 1.1 mrg 1845 1.1 mrg build = isl_ast_build_detect_strides(build, isl_set_copy(domain)); 1846 1.1 mrg 1847 1.1 mrg graft = create_node(executed, bounds, domain, 1848 1.1 mrg isl_ast_build_copy(build)); 1849 1.1 mrg list = isl_ast_graft_list_add(list, graft); 1850 1.1 mrg isl_ast_build_free(build); 1851 1.1 mrg return list; 1852 1.1 mrg error: 1853 1.1 mrg list = isl_ast_graft_list_free(list); 1854 1.1 mrg done: 1855 1.1 mrg isl_set_free(domain); 1856 1.1 mrg isl_basic_set_free(bounds); 1857 1.1 mrg isl_union_map_free(executed); 1858 1.1 mrg isl_ast_build_free(build); 1859 1.1 mrg return list; 1860 1.1 mrg } 1861 1.1 mrg 1862 1.1 mrg /* Does any element of i follow or coincide with any element of j 1863 1.1 mrg * at the current depth for equal values of the outer dimensions? 1864 1.1 mrg */ 1865 1.1 mrg static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i, 1866 1.1 mrg __isl_keep isl_basic_set *j, void *user) 1867 1.1 mrg { 1868 1.1 mrg int depth = *(int *) user; 1869 1.1 mrg isl_basic_map *test; 1870 1.1 mrg isl_bool empty; 1871 1.1 mrg int l; 1872 1.1 mrg 1873 1.1 mrg test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 1874 1.1 mrg isl_basic_set_copy(j)); 1875 1.1 mrg for (l = 0; l < depth; ++l) 1876 1.1 mrg test = isl_basic_map_equate(test, isl_dim_in, l, 1877 1.1 mrg isl_dim_out, l); 1878 1.1 mrg test = isl_basic_map_order_ge(test, isl_dim_in, depth, 1879 1.1 mrg isl_dim_out, depth); 1880 1.1 mrg empty = isl_basic_map_is_empty(test); 1881 1.1 mrg isl_basic_map_free(test); 1882 1.1 mrg 1883 1.1 mrg return isl_bool_not(empty); 1884 1.1 mrg } 1885 1.1 mrg 1886 1.1 mrg /* Split up each element of "list" into a part that is related to "bset" 1887 1.1 mrg * according to "gt" and a part that is not. 1888 1.1 mrg * Return a list that consist of "bset" and all the pieces. 1889 1.1 mrg */ 1890 1.1 mrg static __isl_give isl_basic_set_list *add_split_on( 1891 1.1 mrg __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset, 1892 1.1 mrg __isl_keep isl_basic_map *gt) 1893 1.1 mrg { 1894 1.1 mrg int i; 1895 1.1 mrg isl_size n; 1896 1.1 mrg isl_basic_set_list *res; 1897 1.1 mrg 1898 1.1 mrg n = isl_basic_set_list_n_basic_set(list); 1899 1.1 mrg if (n < 0) 1900 1.1 mrg bset = isl_basic_set_free(bset); 1901 1.1 mrg 1902 1.1 mrg gt = isl_basic_map_copy(gt); 1903 1.1 mrg gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset)); 1904 1.1 mrg res = isl_basic_set_list_from_basic_set(bset); 1905 1.1 mrg for (i = 0; res && i < n; ++i) { 1906 1.1 mrg isl_basic_set *bset; 1907 1.1 mrg isl_set *set1, *set2; 1908 1.1 mrg isl_basic_map *bmap; 1909 1.1 mrg int empty; 1910 1.1 mrg 1911 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i); 1912 1.1 mrg bmap = isl_basic_map_copy(gt); 1913 1.1 mrg bmap = isl_basic_map_intersect_range(bmap, bset); 1914 1.1 mrg bset = isl_basic_map_range(bmap); 1915 1.1 mrg empty = isl_basic_set_is_empty(bset); 1916 1.1 mrg if (empty < 0) 1917 1.1 mrg res = isl_basic_set_list_free(res); 1918 1.1 mrg if (empty) { 1919 1.1 mrg isl_basic_set_free(bset); 1920 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i); 1921 1.1 mrg res = isl_basic_set_list_add(res, bset); 1922 1.1 mrg continue; 1923 1.1 mrg } 1924 1.1 mrg 1925 1.1 mrg res = isl_basic_set_list_add(res, isl_basic_set_copy(bset)); 1926 1.1 mrg set1 = isl_set_from_basic_set(bset); 1927 1.1 mrg bset = isl_basic_set_list_get_basic_set(list, i); 1928 1.1 mrg set2 = isl_set_from_basic_set(bset); 1929 1.1 mrg set1 = isl_set_subtract(set2, set1); 1930 1.1 mrg set1 = isl_set_make_disjoint(set1); 1931 1.1 mrg 1932 1.1 mrg res = isl_basic_set_list_concat(res, 1933 1.1 mrg isl_basic_set_list_from_set(set1)); 1934 1.1 mrg } 1935 1.1 mrg isl_basic_map_free(gt); 1936 1.1 mrg isl_basic_set_list_free(list); 1937 1.1 mrg return res; 1938 1.1 mrg } 1939 1.1 mrg 1940 1.1 mrg static __isl_give isl_ast_graft_list *generate_sorted_domains( 1941 1.1 mrg __isl_keep isl_basic_set_list *domain_list, 1942 1.1 mrg __isl_keep isl_union_map *executed, 1943 1.1 mrg __isl_keep isl_ast_build *build); 1944 1.1 mrg 1945 1.1 mrg /* Internal data structure for add_nodes. 1946 1.1 mrg * 1947 1.1 mrg * "executed" and "build" are extra arguments to be passed to add_node. 1948 1.1 mrg * "list" collects the results. 1949 1.1 mrg */ 1950 1.1 mrg struct isl_add_nodes_data { 1951 1.1 mrg isl_union_map *executed; 1952 1.1 mrg isl_ast_build *build; 1953 1.1 mrg 1954 1.1 mrg isl_ast_graft_list *list; 1955 1.1 mrg }; 1956 1.1 mrg 1957 1.1 mrg /* Generate code for the schedule domains in "scc" 1958 1.1 mrg * and add the results to "list". 1959 1.1 mrg * 1960 1.1 mrg * The domains in "scc" form a strongly connected component in the ordering. 1961 1.1 mrg * If the number of domains in "scc" is larger than 1, then this means 1962 1.1 mrg * that we cannot determine a valid ordering for the domains in the component. 1963 1.1 mrg * This should be fairly rare because the individual domains 1964 1.1 mrg * have been made disjoint first. 1965 1.1 mrg * The problem is that the domains may be integrally disjoint but not 1966 1.1 mrg * rationally disjoint. For example, we may have domains 1967 1.1 mrg * 1968 1.1 mrg * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 } 1969 1.1 mrg * 1970 1.1 mrg * These two domains have an empty intersection, but their rational 1971 1.1 mrg * relaxations do intersect. It is impossible to order these domains 1972 1.1 mrg * in the second dimension because the first should be ordered before 1973 1.1 mrg * the second for outer dimension equal to 0, while it should be ordered 1974 1.1 mrg * after for outer dimension equal to 1. 1975 1.1 mrg * 1976 1.1 mrg * This may happen in particular in case of unrolling since the domain 1977 1.1 mrg * of each slice is replaced by its simple hull. 1978 1.1 mrg * 1979 1.1 mrg * For each basic set i in "scc" and for each of the following basic sets j, 1980 1.1 mrg * we split off that part of the basic set i that shares the outer dimensions 1981 1.1 mrg * with j and lies before j in the current dimension. 1982 1.1 mrg * We collect all the pieces in a new list that replaces "scc". 1983 1.1 mrg * 1984 1.1 mrg * While the elements in "scc" should be disjoint, we double-check 1985 1.1 mrg * this property to avoid running into an infinite recursion in case 1986 1.1 mrg * they intersect due to some internal error. 1987 1.1 mrg */ 1988 1.1 mrg static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user) 1989 1.1 mrg { 1990 1.1 mrg struct isl_add_nodes_data *data = user; 1991 1.1 mrg int i; 1992 1.1 mrg isl_size depth; 1993 1.1 mrg isl_size n; 1994 1.1 mrg isl_basic_set *bset, *first; 1995 1.1 mrg isl_basic_set_list *list; 1996 1.1 mrg isl_space *space; 1997 1.1 mrg isl_basic_map *gt; 1998 1.1 mrg 1999 1.1 mrg n = isl_basic_set_list_n_basic_set(scc); 2000 1.1 mrg if (n < 0) 2001 1.1 mrg goto error; 2002 1.1 mrg bset = isl_basic_set_list_get_basic_set(scc, 0); 2003 1.1 mrg if (n == 1) { 2004 1.1 mrg isl_basic_set_list_free(scc); 2005 1.1 mrg data->list = add_node(data->list, 2006 1.1 mrg isl_union_map_copy(data->executed), bset, 2007 1.1 mrg isl_ast_build_copy(data->build)); 2008 1.1 mrg return data->list ? isl_stat_ok : isl_stat_error; 2009 1.1 mrg } 2010 1.1 mrg 2011 1.1 mrg depth = isl_ast_build_get_depth(data->build); 2012 1.1 mrg if (depth < 0) 2013 1.1 mrg bset = isl_basic_set_free(bset); 2014 1.1 mrg space = isl_basic_set_get_space(bset); 2015 1.1 mrg space = isl_space_map_from_set(space); 2016 1.1 mrg gt = isl_basic_map_universe(space); 2017 1.1 mrg for (i = 0; i < depth; ++i) 2018 1.1 mrg gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i); 2019 1.1 mrg gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); 2020 1.1 mrg 2021 1.1 mrg first = isl_basic_set_copy(bset); 2022 1.1 mrg list = isl_basic_set_list_from_basic_set(bset); 2023 1.1 mrg for (i = 1; i < n; ++i) { 2024 1.1 mrg int disjoint; 2025 1.1 mrg 2026 1.1 mrg bset = isl_basic_set_list_get_basic_set(scc, i); 2027 1.1 mrg 2028 1.1 mrg disjoint = isl_basic_set_is_disjoint(bset, first); 2029 1.1 mrg if (disjoint < 0) 2030 1.1 mrg list = isl_basic_set_list_free(list); 2031 1.1 mrg else if (!disjoint) 2032 1.1 mrg isl_die(isl_basic_set_list_get_ctx(scc), 2033 1.1 mrg isl_error_internal, 2034 1.1 mrg "basic sets in scc are assumed to be disjoint", 2035 1.1 mrg list = isl_basic_set_list_free(list)); 2036 1.1 mrg 2037 1.1 mrg list = add_split_on(list, bset, gt); 2038 1.1 mrg } 2039 1.1 mrg isl_basic_set_free(first); 2040 1.1 mrg isl_basic_map_free(gt); 2041 1.1 mrg isl_basic_set_list_free(scc); 2042 1.1 mrg scc = list; 2043 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, 2044 1.1 mrg generate_sorted_domains(scc, data->executed, data->build)); 2045 1.1 mrg isl_basic_set_list_free(scc); 2046 1.1 mrg 2047 1.1 mrg return data->list ? isl_stat_ok : isl_stat_error; 2048 1.1 mrg error: 2049 1.1 mrg isl_basic_set_list_free(scc); 2050 1.1 mrg return isl_stat_error; 2051 1.1 mrg } 2052 1.1 mrg 2053 1.1 mrg /* Sort the domains in "domain_list" according to the execution order 2054 1.1 mrg * at the current depth (for equal values of the outer dimensions), 2055 1.1 mrg * generate code for each of them, collecting the results in a list. 2056 1.1 mrg * If no code is generated (because the intersection of the inverse schedule 2057 1.1 mrg * with the domains turns out to be empty), then an empty list is returned. 2058 1.1 mrg * 2059 1.1 mrg * The caller is responsible for ensuring that the basic sets in "domain_list" 2060 1.1 mrg * are pair-wise disjoint. It can, however, in principle happen that 2061 1.1 mrg * two basic sets should be ordered one way for one value of the outer 2062 1.1 mrg * dimensions and the other way for some other value of the outer dimensions. 2063 1.1 mrg * We therefore play safe and look for strongly connected components. 2064 1.1 mrg * The function add_nodes takes care of handling non-trivial components. 2065 1.1 mrg */ 2066 1.1 mrg static __isl_give isl_ast_graft_list *generate_sorted_domains( 2067 1.1 mrg __isl_keep isl_basic_set_list *domain_list, 2068 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 2069 1.1 mrg { 2070 1.1 mrg isl_ctx *ctx; 2071 1.1 mrg struct isl_add_nodes_data data; 2072 1.1 mrg isl_size depth; 2073 1.1 mrg isl_size n; 2074 1.1 mrg 2075 1.1 mrg n = isl_basic_set_list_n_basic_set(domain_list); 2076 1.1 mrg if (n < 0) 2077 1.1 mrg return NULL; 2078 1.1 mrg 2079 1.1 mrg ctx = isl_basic_set_list_get_ctx(domain_list); 2080 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, n); 2081 1.1 mrg if (n == 0) 2082 1.1 mrg return data.list; 2083 1.1 mrg if (n == 1) 2084 1.1 mrg return add_node(data.list, isl_union_map_copy(executed), 2085 1.1 mrg isl_basic_set_list_get_basic_set(domain_list, 0), 2086 1.1 mrg isl_ast_build_copy(build)); 2087 1.1 mrg 2088 1.1 mrg depth = isl_ast_build_get_depth(build); 2089 1.1 mrg data.executed = executed; 2090 1.1 mrg data.build = build; 2091 1.1 mrg if (depth < 0 || isl_basic_set_list_foreach_scc(domain_list, 2092 1.1 mrg &domain_follows_at_depth, &depth, 2093 1.1 mrg &add_nodes, &data) < 0) 2094 1.1 mrg data.list = isl_ast_graft_list_free(data.list); 2095 1.1 mrg 2096 1.1 mrg return data.list; 2097 1.1 mrg } 2098 1.1 mrg 2099 1.1 mrg /* Do i and j share any values for the outer dimensions? 2100 1.1 mrg */ 2101 1.1 mrg static isl_bool shared_outer(__isl_keep isl_basic_set *i, 2102 1.1 mrg __isl_keep isl_basic_set *j, void *user) 2103 1.1 mrg { 2104 1.1 mrg int depth = *(int *) user; 2105 1.1 mrg isl_basic_map *test; 2106 1.1 mrg isl_bool empty; 2107 1.1 mrg int l; 2108 1.1 mrg 2109 1.1 mrg test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 2110 1.1 mrg isl_basic_set_copy(j)); 2111 1.1 mrg for (l = 0; l < depth; ++l) 2112 1.1 mrg test = isl_basic_map_equate(test, isl_dim_in, l, 2113 1.1 mrg isl_dim_out, l); 2114 1.1 mrg empty = isl_basic_map_is_empty(test); 2115 1.1 mrg isl_basic_map_free(test); 2116 1.1 mrg 2117 1.1 mrg return isl_bool_not(empty); 2118 1.1 mrg } 2119 1.1 mrg 2120 1.1 mrg /* Internal data structure for generate_sorted_domains_wrap. 2121 1.1 mrg * 2122 1.1 mrg * "n" is the total number of basic sets 2123 1.1 mrg * "executed" and "build" are extra arguments to be passed 2124 1.1 mrg * to generate_sorted_domains. 2125 1.1 mrg * 2126 1.1 mrg * "single" is set to 1 by generate_sorted_domains_wrap if there 2127 1.1 mrg * is only a single component. 2128 1.1 mrg * "list" collects the results. 2129 1.1 mrg */ 2130 1.1 mrg struct isl_ast_generate_parallel_domains_data { 2131 1.1 mrg isl_size n; 2132 1.1 mrg isl_union_map *executed; 2133 1.1 mrg isl_ast_build *build; 2134 1.1 mrg 2135 1.1 mrg int single; 2136 1.1 mrg isl_ast_graft_list *list; 2137 1.1 mrg }; 2138 1.1 mrg 2139 1.1 mrg /* Call generate_sorted_domains on "scc", fuse the result into a list 2140 1.1 mrg * with either zero or one graft and collect the these single element 2141 1.1 mrg * lists into data->list. 2142 1.1 mrg * 2143 1.1 mrg * If there is only one component, i.e., if the number of basic sets 2144 1.1 mrg * in the current component is equal to the total number of basic sets, 2145 1.1 mrg * then data->single is set to 1 and the result of generate_sorted_domains 2146 1.1 mrg * is not fused. 2147 1.1 mrg */ 2148 1.1 mrg static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc, 2149 1.1 mrg void *user) 2150 1.1 mrg { 2151 1.1 mrg struct isl_ast_generate_parallel_domains_data *data = user; 2152 1.1 mrg isl_ast_graft_list *list; 2153 1.1 mrg isl_size n; 2154 1.1 mrg 2155 1.1 mrg n = isl_basic_set_list_n_basic_set(scc); 2156 1.1 mrg if (n < 0) 2157 1.1 mrg scc = isl_basic_set_list_free(scc); 2158 1.1 mrg list = generate_sorted_domains(scc, data->executed, data->build); 2159 1.1 mrg data->single = n == data->n; 2160 1.1 mrg if (!data->single) 2161 1.1 mrg list = isl_ast_graft_list_fuse(list, data->build); 2162 1.1 mrg if (!data->list) 2163 1.1 mrg data->list = list; 2164 1.1 mrg else 2165 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list); 2166 1.1 mrg 2167 1.1 mrg isl_basic_set_list_free(scc); 2168 1.1 mrg if (!data->list) 2169 1.1 mrg return isl_stat_error; 2170 1.1 mrg 2171 1.1 mrg return isl_stat_ok; 2172 1.1 mrg } 2173 1.1 mrg 2174 1.1 mrg /* Look for any (weakly connected) components in the "domain_list" 2175 1.1 mrg * of domains that share some values of the outer dimensions. 2176 1.1 mrg * That is, domains in different components do not share any values 2177 1.1 mrg * of the outer dimensions. This means that these components 2178 1.1 mrg * can be freely reordered. 2179 1.1 mrg * Within each of the components, we sort the domains according 2180 1.1 mrg * to the execution order at the current depth. 2181 1.1 mrg * 2182 1.1 mrg * If there is more than one component, then generate_sorted_domains_wrap 2183 1.1 mrg * fuses the result of each call to generate_sorted_domains 2184 1.1 mrg * into a list with either zero or one graft and collects these (at most) 2185 1.1 mrg * single element lists into a bigger list. This means that the elements of the 2186 1.1 mrg * final list can be freely reordered. In particular, we sort them 2187 1.1 mrg * according to an arbitrary but fixed ordering to ease merging of 2188 1.1 mrg * graft lists from different components. 2189 1.1 mrg */ 2190 1.1 mrg static __isl_give isl_ast_graft_list *generate_parallel_domains( 2191 1.1 mrg __isl_keep isl_basic_set_list *domain_list, 2192 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 2193 1.1 mrg { 2194 1.1 mrg isl_size depth; 2195 1.1 mrg struct isl_ast_generate_parallel_domains_data data; 2196 1.1 mrg 2197 1.1 mrg data.n = isl_basic_set_list_n_basic_set(domain_list); 2198 1.1 mrg if (data.n < 0) 2199 1.1 mrg return NULL; 2200 1.1 mrg 2201 1.1 mrg if (data.n <= 1) 2202 1.1 mrg return generate_sorted_domains(domain_list, executed, build); 2203 1.1 mrg 2204 1.1 mrg depth = isl_ast_build_get_depth(build); 2205 1.1 mrg if (depth < 0) 2206 1.1 mrg return NULL; 2207 1.1 mrg data.list = NULL; 2208 1.1 mrg data.executed = executed; 2209 1.1 mrg data.build = build; 2210 1.1 mrg data.single = 0; 2211 1.1 mrg if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth, 2212 1.1 mrg &generate_sorted_domains_wrap, 2213 1.1 mrg &data) < 0) 2214 1.1 mrg data.list = isl_ast_graft_list_free(data.list); 2215 1.1 mrg 2216 1.1 mrg if (!data.single) 2217 1.1 mrg data.list = isl_ast_graft_list_sort_guard(data.list); 2218 1.1 mrg 2219 1.1 mrg return data.list; 2220 1.1 mrg } 2221 1.1 mrg 2222 1.1 mrg /* Internal data for separate_domain. 2223 1.1 mrg * 2224 1.1 mrg * "explicit" is set if we only want to use explicit bounds. 2225 1.1 mrg * 2226 1.1 mrg * "domain" collects the separated domains. 2227 1.1 mrg */ 2228 1.1 mrg struct isl_separate_domain_data { 2229 1.1 mrg isl_ast_build *build; 2230 1.1 mrg int explicit; 2231 1.1 mrg isl_set *domain; 2232 1.1 mrg }; 2233 1.1 mrg 2234 1.1 mrg /* Extract implicit bounds on the current dimension for the executed "map". 2235 1.1 mrg * 2236 1.1 mrg * The domain of "map" may involve inner dimensions, so we 2237 1.1 mrg * need to eliminate them. 2238 1.1 mrg */ 2239 1.1 mrg static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map, 2240 1.1 mrg __isl_keep isl_ast_build *build) 2241 1.1 mrg { 2242 1.1 mrg isl_set *domain; 2243 1.1 mrg 2244 1.1 mrg domain = isl_map_domain(map); 2245 1.1 mrg domain = isl_ast_build_eliminate(build, domain); 2246 1.1 mrg 2247 1.1 mrg return domain; 2248 1.1 mrg } 2249 1.1 mrg 2250 1.1 mrg /* Extract explicit bounds on the current dimension for the executed "map". 2251 1.1 mrg * 2252 1.1 mrg * Rather than eliminating the inner dimensions as in implicit_bounds, 2253 1.1 mrg * we simply drop any constraints involving those inner dimensions. 2254 1.1 mrg * The idea is that most bounds that are implied by constraints on the 2255 1.1 mrg * inner dimensions will be enforced by for loops and not by explicit guards. 2256 1.1 mrg * There is then no need to separate along those bounds. 2257 1.1 mrg */ 2258 1.1 mrg static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map, 2259 1.1 mrg __isl_keep isl_ast_build *build) 2260 1.1 mrg { 2261 1.1 mrg isl_set *domain; 2262 1.1 mrg isl_size depth; 2263 1.1 mrg isl_size dim; 2264 1.1 mrg 2265 1.1 mrg depth = isl_ast_build_get_depth(build); 2266 1.1 mrg dim = isl_map_dim(map, isl_dim_out); 2267 1.1 mrg if (depth < 0 || dim < 0) 2268 1.1 mrg return isl_map_domain(isl_map_free(map)); 2269 1.1 mrg map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim); 2270 1.1 mrg 2271 1.1 mrg domain = isl_map_domain(map); 2272 1.1 mrg dim = isl_set_dim(domain, isl_dim_set); 2273 1.1 mrg domain = isl_set_detect_equalities(domain); 2274 1.1 mrg domain = isl_set_drop_constraints_involving_dims(domain, 2275 1.1 mrg isl_dim_set, depth + 1, dim - (depth + 1)); 2276 1.1 mrg domain = isl_set_remove_divs_involving_dims(domain, 2277 1.1 mrg isl_dim_set, depth, 1); 2278 1.1 mrg domain = isl_set_remove_unknown_divs(domain); 2279 1.1 mrg 2280 1.1 mrg return domain; 2281 1.1 mrg } 2282 1.1 mrg 2283 1.1 mrg /* Split data->domain into pieces that intersect with the range of "map" 2284 1.1 mrg * and pieces that do not intersect with the range of "map" 2285 1.1 mrg * and then add that part of the range of "map" that does not intersect 2286 1.1 mrg * with data->domain. 2287 1.1 mrg */ 2288 1.1 mrg static isl_stat separate_domain(__isl_take isl_map *map, void *user) 2289 1.1 mrg { 2290 1.1 mrg struct isl_separate_domain_data *data = user; 2291 1.1 mrg isl_set *domain; 2292 1.1 mrg isl_set *d1, *d2; 2293 1.1 mrg 2294 1.1 mrg if (data->explicit) 2295 1.1 mrg domain = explicit_bounds(map, data->build); 2296 1.1 mrg else 2297 1.1 mrg domain = implicit_bounds(map, data->build); 2298 1.1 mrg 2299 1.1 mrg domain = isl_set_coalesce(domain); 2300 1.1 mrg domain = isl_set_make_disjoint(domain); 2301 1.1 mrg d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain)); 2302 1.1 mrg d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain)); 2303 1.1 mrg data->domain = isl_set_intersect(data->domain, domain); 2304 1.1 mrg data->domain = isl_set_union(data->domain, d1); 2305 1.1 mrg data->domain = isl_set_union(data->domain, d2); 2306 1.1 mrg 2307 1.1 mrg return isl_stat_ok; 2308 1.1 mrg } 2309 1.1 mrg 2310 1.1 mrg /* Separate the schedule domains of "executed". 2311 1.1 mrg * 2312 1.1 mrg * That is, break up the domain of "executed" into basic sets, 2313 1.1 mrg * such that for each basic set S, every element in S is associated with 2314 1.1 mrg * the same domain spaces. 2315 1.1 mrg * 2316 1.1 mrg * "space" is the (single) domain space of "executed". 2317 1.1 mrg */ 2318 1.1 mrg static __isl_give isl_set *separate_schedule_domains( 2319 1.1 mrg __isl_take isl_space *space, __isl_take isl_union_map *executed, 2320 1.1 mrg __isl_keep isl_ast_build *build) 2321 1.1 mrg { 2322 1.1 mrg struct isl_separate_domain_data data = { build }; 2323 1.1 mrg isl_ctx *ctx; 2324 1.1 mrg 2325 1.1 mrg ctx = isl_ast_build_get_ctx(build); 2326 1.1 mrg data.explicit = isl_options_get_ast_build_separation_bounds(ctx) == 2327 1.1 mrg ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT; 2328 1.1 mrg data.domain = isl_set_empty(space); 2329 1.1 mrg if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0) 2330 1.1 mrg data.domain = isl_set_free(data.domain); 2331 1.1 mrg 2332 1.1 mrg isl_union_map_free(executed); 2333 1.1 mrg return data.domain; 2334 1.1 mrg } 2335 1.1 mrg 2336 1.1 mrg /* Temporary data used during the search for a lower bound for unrolling. 2337 1.1 mrg * 2338 1.1 mrg * "build" is the build in which the unrolling will be performed 2339 1.1 mrg * "domain" is the original set for which to find a lower bound 2340 1.1 mrg * "depth" is the dimension for which to find a lower boudn 2341 1.1 mrg * "expansion" is the expansion that needs to be applied to "domain" 2342 1.1 mrg * in the unrolling that will be performed 2343 1.1 mrg * 2344 1.1 mrg * "lower" is the best lower bound found so far. It is NULL if we have not 2345 1.1 mrg * found any yet. 2346 1.1 mrg * "n" is the corresponding size. If lower is NULL, then the value of n 2347 1.1 mrg * is undefined. 2348 1.1 mrg * "n_div" is the maximal number of integer divisions in the first 2349 1.1 mrg * unrolled iteration (after expansion). It is set to -1 if it hasn't 2350 1.1 mrg * been computed yet. 2351 1.1 mrg */ 2352 1.1 mrg struct isl_find_unroll_data { 2353 1.1 mrg isl_ast_build *build; 2354 1.1 mrg isl_set *domain; 2355 1.1 mrg int depth; 2356 1.1 mrg isl_basic_map *expansion; 2357 1.1 mrg 2358 1.1 mrg isl_aff *lower; 2359 1.1 mrg int *n; 2360 1.1 mrg int n_div; 2361 1.1 mrg }; 2362 1.1 mrg 2363 1.1 mrg /* Return the constraint 2364 1.1 mrg * 2365 1.1 mrg * i_"depth" = aff + offset 2366 1.1 mrg */ 2367 1.1 mrg static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, 2368 1.1 mrg int offset) 2369 1.1 mrg { 2370 1.1 mrg aff = isl_aff_copy(aff); 2371 1.1 mrg aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1); 2372 1.1 mrg aff = isl_aff_add_constant_si(aff, offset); 2373 1.1 mrg return isl_equality_from_aff(aff); 2374 1.1 mrg } 2375 1.1 mrg 2376 1.1 mrg /* Update *user to the number of integer divisions in the first element 2377 1.1 mrg * of "ma", if it is larger than the current value. 2378 1.1 mrg */ 2379 1.1 mrg static isl_stat update_n_div(__isl_take isl_set *set, 2380 1.1 mrg __isl_take isl_multi_aff *ma, void *user) 2381 1.1 mrg { 2382 1.1 mrg isl_aff *aff; 2383 1.1 mrg int *n = user; 2384 1.1 mrg isl_size n_div; 2385 1.1 mrg 2386 1.1 mrg aff = isl_multi_aff_get_aff(ma, 0); 2387 1.1 mrg n_div = isl_aff_dim(aff, isl_dim_div); 2388 1.1 mrg isl_aff_free(aff); 2389 1.1 mrg isl_multi_aff_free(ma); 2390 1.1 mrg isl_set_free(set); 2391 1.1 mrg 2392 1.1 mrg if (n_div > *n) 2393 1.1 mrg *n = n_div; 2394 1.1 mrg 2395 1.1 mrg return n_div >= 0 ? isl_stat_ok : isl_stat_error; 2396 1.1 mrg } 2397 1.1 mrg 2398 1.1 mrg /* Get the number of integer divisions in the expression for the iterator 2399 1.1 mrg * value at the first slice in the unrolling based on lower bound "lower", 2400 1.1 mrg * taking into account the expansion that needs to be performed on this slice. 2401 1.1 mrg */ 2402 1.1 mrg static int get_expanded_n_div(struct isl_find_unroll_data *data, 2403 1.1 mrg __isl_keep isl_aff *lower) 2404 1.1 mrg { 2405 1.1 mrg isl_constraint *c; 2406 1.1 mrg isl_set *set; 2407 1.1 mrg isl_map *it_map, *expansion; 2408 1.1 mrg isl_pw_multi_aff *pma; 2409 1.1 mrg int n; 2410 1.1 mrg 2411 1.1 mrg c = at_offset(data->depth, lower, 0); 2412 1.1 mrg set = isl_set_copy(data->domain); 2413 1.1 mrg set = isl_set_add_constraint(set, c); 2414 1.1 mrg expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion)); 2415 1.1 mrg set = isl_set_apply(set, expansion); 2416 1.1 mrg it_map = isl_ast_build_map_to_iterator(data->build, set); 2417 1.1 mrg pma = isl_pw_multi_aff_from_map(it_map); 2418 1.1 mrg n = 0; 2419 1.1 mrg if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0) 2420 1.1 mrg n = -1; 2421 1.1 mrg isl_pw_multi_aff_free(pma); 2422 1.1 mrg 2423 1.1 mrg return n; 2424 1.1 mrg } 2425 1.1 mrg 2426 1.1 mrg /* Is the lower bound "lower" with corresponding iteration count "n" 2427 1.1 mrg * better than the one stored in "data"? 2428 1.1 mrg * If there is no upper bound on the iteration count ("n" is infinity) or 2429 1.1 mrg * if the count is too large, then we cannot use this lower bound. 2430 1.1 mrg * Otherwise, if there was no previous lower bound or 2431 1.1 mrg * if the iteration count of the new lower bound is smaller than 2432 1.1 mrg * the iteration count of the previous lower bound, then we consider 2433 1.1 mrg * the new lower bound to be better. 2434 1.1 mrg * If the iteration count is the same, then compare the number 2435 1.1 mrg * of integer divisions that would be needed to express 2436 1.1 mrg * the iterator value at the first slice in the unrolling 2437 1.1 mrg * according to the lower bound. If we end up computing this 2438 1.1 mrg * number, then store the lowest value in data->n_div. 2439 1.1 mrg */ 2440 1.1 mrg static int is_better_lower_bound(struct isl_find_unroll_data *data, 2441 1.1 mrg __isl_keep isl_aff *lower, __isl_keep isl_val *n) 2442 1.1 mrg { 2443 1.1 mrg int cmp; 2444 1.1 mrg int n_div; 2445 1.1 mrg 2446 1.1 mrg if (!n) 2447 1.1 mrg return -1; 2448 1.1 mrg if (isl_val_is_infty(n)) 2449 1.1 mrg return 0; 2450 1.1 mrg if (isl_val_cmp_si(n, INT_MAX) > 0) 2451 1.1 mrg return 0; 2452 1.1 mrg if (!data->lower) 2453 1.1 mrg return 1; 2454 1.1 mrg cmp = isl_val_cmp_si(n, *data->n); 2455 1.1 mrg if (cmp < 0) 2456 1.1 mrg return 1; 2457 1.1 mrg if (cmp > 0) 2458 1.1 mrg return 0; 2459 1.1 mrg if (data->n_div < 0) 2460 1.1 mrg data->n_div = get_expanded_n_div(data, data->lower); 2461 1.1 mrg if (data->n_div < 0) 2462 1.1 mrg return -1; 2463 1.1 mrg if (data->n_div == 0) 2464 1.1 mrg return 0; 2465 1.1 mrg n_div = get_expanded_n_div(data, lower); 2466 1.1 mrg if (n_div < 0) 2467 1.1 mrg return -1; 2468 1.1 mrg if (n_div >= data->n_div) 2469 1.1 mrg return 0; 2470 1.1 mrg data->n_div = n_div; 2471 1.1 mrg 2472 1.1 mrg return 1; 2473 1.1 mrg } 2474 1.1 mrg 2475 1.1 mrg /* Check if we can use "c" as a lower bound and if it is better than 2476 1.1 mrg * any previously found lower bound. 2477 1.1 mrg * 2478 1.1 mrg * If "c" does not involve the dimension at the current depth, 2479 1.1 mrg * then we cannot use it. 2480 1.1 mrg * Otherwise, let "c" be of the form 2481 1.1 mrg * 2482 1.1 mrg * i >= f(j)/a 2483 1.1 mrg * 2484 1.1 mrg * We compute the maximal value of 2485 1.1 mrg * 2486 1.1 mrg * -ceil(f(j)/a)) + i + 1 2487 1.1 mrg * 2488 1.1 mrg * over the domain. If there is such a value "n", then we know 2489 1.1 mrg * 2490 1.1 mrg * -ceil(f(j)/a)) + i + 1 <= n 2491 1.1 mrg * 2492 1.1 mrg * or 2493 1.1 mrg * 2494 1.1 mrg * i < ceil(f(j)/a)) + n 2495 1.1 mrg * 2496 1.1 mrg * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling. 2497 1.1 mrg * We just need to check if we have found any lower bound before and 2498 1.1 mrg * if the new lower bound is better (smaller n or fewer integer divisions) 2499 1.1 mrg * than the previously found lower bounds. 2500 1.1 mrg */ 2501 1.1 mrg static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data, 2502 1.1 mrg __isl_keep isl_constraint *c) 2503 1.1 mrg { 2504 1.1 mrg isl_aff *aff, *lower; 2505 1.1 mrg isl_val *max; 2506 1.1 mrg int better; 2507 1.1 mrg 2508 1.1 mrg if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth)) 2509 1.1 mrg return isl_stat_ok; 2510 1.1 mrg 2511 1.1 mrg lower = isl_constraint_get_bound(c, isl_dim_set, data->depth); 2512 1.1 mrg lower = isl_aff_ceil(lower); 2513 1.1 mrg aff = isl_aff_copy(lower); 2514 1.1 mrg aff = isl_aff_neg(aff); 2515 1.1 mrg aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1); 2516 1.1 mrg aff = isl_aff_add_constant_si(aff, 1); 2517 1.1 mrg max = isl_set_max_val(data->domain, aff); 2518 1.1 mrg isl_aff_free(aff); 2519 1.1 mrg 2520 1.1 mrg better = is_better_lower_bound(data, lower, max); 2521 1.1 mrg if (better < 0 || !better) { 2522 1.1 mrg isl_val_free(max); 2523 1.1 mrg isl_aff_free(lower); 2524 1.1 mrg return better < 0 ? isl_stat_error : isl_stat_ok; 2525 1.1 mrg } 2526 1.1 mrg 2527 1.1 mrg isl_aff_free(data->lower); 2528 1.1 mrg data->lower = lower; 2529 1.1 mrg *data->n = isl_val_get_num_si(max); 2530 1.1 mrg isl_val_free(max); 2531 1.1 mrg 2532 1.1 mrg return isl_stat_ok; 2533 1.1 mrg } 2534 1.1 mrg 2535 1.1 mrg /* Check if we can use "c" as a lower bound and if it is better than 2536 1.1 mrg * any previously found lower bound. 2537 1.1 mrg */ 2538 1.1 mrg static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user) 2539 1.1 mrg { 2540 1.1 mrg struct isl_find_unroll_data *data; 2541 1.1 mrg isl_stat r; 2542 1.1 mrg 2543 1.1 mrg data = (struct isl_find_unroll_data *) user; 2544 1.1 mrg r = update_unrolling_lower_bound(data, c); 2545 1.1 mrg isl_constraint_free(c); 2546 1.1 mrg 2547 1.1 mrg return r; 2548 1.1 mrg } 2549 1.1 mrg 2550 1.1 mrg /* Look for a lower bound l(i) on the dimension at "depth" 2551 1.1 mrg * and a size n such that "domain" is a subset of 2552 1.1 mrg * 2553 1.1 mrg * { [i] : l(i) <= i_d < l(i) + n } 2554 1.1 mrg * 2555 1.1 mrg * where d is "depth" and l(i) depends only on earlier dimensions. 2556 1.1 mrg * Furthermore, try and find a lower bound such that n is as small as possible. 2557 1.1 mrg * In particular, "n" needs to be finite. 2558 1.1 mrg * "build" is the build in which the unrolling will be performed. 2559 1.1 mrg * "expansion" is the expansion that needs to be applied to "domain" 2560 1.1 mrg * in the unrolling that will be performed. 2561 1.1 mrg * 2562 1.1 mrg * Inner dimensions have been eliminated from "domain" by the caller. 2563 1.1 mrg * 2564 1.1 mrg * We first construct a collection of lower bounds on the input set 2565 1.1 mrg * by computing its simple hull. We then iterate through them, 2566 1.1 mrg * discarding those that we cannot use (either because they do not 2567 1.1 mrg * involve the dimension at "depth" or because they have no corresponding 2568 1.1 mrg * upper bound, meaning that "n" would be unbounded) and pick out the 2569 1.1 mrg * best from the remaining ones. 2570 1.1 mrg * 2571 1.1 mrg * If we cannot find a suitable lower bound, then we consider that 2572 1.1 mrg * to be an error. 2573 1.1 mrg */ 2574 1.1 mrg static __isl_give isl_aff *find_unroll_lower_bound( 2575 1.1 mrg __isl_keep isl_ast_build *build, __isl_keep isl_set *domain, 2576 1.1 mrg int depth, __isl_keep isl_basic_map *expansion, int *n) 2577 1.1 mrg { 2578 1.1 mrg struct isl_find_unroll_data data = 2579 1.1 mrg { build, domain, depth, expansion, NULL, n, -1 }; 2580 1.1 mrg isl_basic_set *hull; 2581 1.1 mrg 2582 1.1 mrg hull = isl_set_simple_hull(isl_set_copy(domain)); 2583 1.1 mrg 2584 1.1 mrg if (isl_basic_set_foreach_constraint(hull, 2585 1.1 mrg &constraint_find_unroll, &data) < 0) 2586 1.1 mrg goto error; 2587 1.1 mrg 2588 1.1 mrg isl_basic_set_free(hull); 2589 1.1 mrg 2590 1.1 mrg if (!data.lower) 2591 1.1 mrg isl_die(isl_set_get_ctx(domain), isl_error_invalid, 2592 1.1 mrg "cannot find lower bound for unrolling", return NULL); 2593 1.1 mrg 2594 1.1 mrg return data.lower; 2595 1.1 mrg error: 2596 1.1 mrg isl_basic_set_free(hull); 2597 1.1 mrg return isl_aff_free(data.lower); 2598 1.1 mrg } 2599 1.1 mrg 2600 1.1 mrg /* Call "fn" on each iteration of the current dimension of "domain". 2601 1.1 mrg * If "init" is not NULL, then it is called with the number of 2602 1.1 mrg * iterations before any call to "fn". 2603 1.1 mrg * Return -1 on failure. 2604 1.1 mrg * 2605 1.1 mrg * Since we are going to be iterating over the individual values, 2606 1.1 mrg * we first check if there are any strides on the current dimension. 2607 1.1 mrg * If there is, we rewrite the current dimension i as 2608 1.1 mrg * 2609 1.1 mrg * i = stride i' + offset 2610 1.1 mrg * 2611 1.1 mrg * and then iterate over individual values of i' instead. 2612 1.1 mrg * 2613 1.1 mrg * We then look for a lower bound on i' and a size such that the domain 2614 1.1 mrg * is a subset of 2615 1.1 mrg * 2616 1.1 mrg * { [j,i'] : l(j) <= i' < l(j) + n } 2617 1.1 mrg * 2618 1.1 mrg * and then take slices of the domain at values of i' 2619 1.1 mrg * between l(j) and l(j) + n - 1. 2620 1.1 mrg * 2621 1.1 mrg * We compute the unshifted simple hull of each slice to ensure that 2622 1.1 mrg * we have a single basic set per offset. The slicing constraint 2623 1.1 mrg * may get simplified away before the unshifted simple hull is taken 2624 1.1 mrg * and may therefore in some rare cases disappear from the result. 2625 1.1 mrg * We therefore explicitly add the constraint back after computing 2626 1.1 mrg * the unshifted simple hull to ensure that the basic sets 2627 1.1 mrg * remain disjoint. The constraints that are dropped by taking the hull 2628 1.1 mrg * will be taken into account at the next level, as in the case of the 2629 1.1 mrg * atomic option. 2630 1.1 mrg * 2631 1.1 mrg * Finally, we map i' back to i and call "fn". 2632 1.1 mrg */ 2633 1.1 mrg static int foreach_iteration(__isl_take isl_set *domain, 2634 1.1 mrg __isl_keep isl_ast_build *build, int (*init)(int n, void *user), 2635 1.1 mrg int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user) 2636 1.1 mrg { 2637 1.1 mrg int i, n; 2638 1.1 mrg isl_bool empty; 2639 1.1 mrg isl_size depth; 2640 1.1 mrg isl_multi_aff *expansion; 2641 1.1 mrg isl_basic_map *bmap; 2642 1.1 mrg isl_aff *lower = NULL; 2643 1.1 mrg isl_ast_build *stride_build; 2644 1.1 mrg 2645 1.1 mrg depth = isl_ast_build_get_depth(build); 2646 1.1 mrg if (depth < 0) 2647 1.1 mrg domain = isl_set_free(domain); 2648 1.1 mrg 2649 1.1 mrg domain = isl_ast_build_eliminate_inner(build, domain); 2650 1.1 mrg domain = isl_set_intersect(domain, isl_ast_build_get_domain(build)); 2651 1.1 mrg stride_build = isl_ast_build_copy(build); 2652 1.1 mrg stride_build = isl_ast_build_detect_strides(stride_build, 2653 1.1 mrg isl_set_copy(domain)); 2654 1.1 mrg expansion = isl_ast_build_get_stride_expansion(stride_build); 2655 1.1 mrg 2656 1.1 mrg domain = isl_set_preimage_multi_aff(domain, 2657 1.1 mrg isl_multi_aff_copy(expansion)); 2658 1.1 mrg domain = isl_ast_build_eliminate_divs(stride_build, domain); 2659 1.1 mrg isl_ast_build_free(stride_build); 2660 1.1 mrg 2661 1.1 mrg bmap = isl_basic_map_from_multi_aff(expansion); 2662 1.1 mrg 2663 1.1 mrg empty = isl_set_is_empty(domain); 2664 1.1 mrg if (empty < 0) { 2665 1.1 mrg n = -1; 2666 1.1 mrg } else if (empty) { 2667 1.1 mrg n = 0; 2668 1.1 mrg } else { 2669 1.1 mrg lower = find_unroll_lower_bound(build, domain, depth, bmap, &n); 2670 1.1 mrg if (!lower) 2671 1.1 mrg n = -1; 2672 1.1 mrg } 2673 1.1 mrg if (n >= 0 && init && init(n, user) < 0) 2674 1.1 mrg n = -1; 2675 1.1 mrg for (i = 0; i < n; ++i) { 2676 1.1 mrg isl_set *set; 2677 1.1 mrg isl_basic_set *bset; 2678 1.1 mrg isl_constraint *slice; 2679 1.1 mrg 2680 1.1 mrg slice = at_offset(depth, lower, i); 2681 1.1 mrg set = isl_set_copy(domain); 2682 1.1 mrg set = isl_set_add_constraint(set, isl_constraint_copy(slice)); 2683 1.1 mrg bset = isl_set_unshifted_simple_hull(set); 2684 1.1 mrg bset = isl_basic_set_add_constraint(bset, slice); 2685 1.1 mrg bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap)); 2686 1.1 mrg 2687 1.1 mrg if (fn(bset, user) < 0) 2688 1.1 mrg break; 2689 1.1 mrg } 2690 1.1 mrg 2691 1.1 mrg isl_aff_free(lower); 2692 1.1 mrg isl_set_free(domain); 2693 1.1 mrg isl_basic_map_free(bmap); 2694 1.1 mrg 2695 1.1 mrg return n < 0 || i < n ? -1 : 0; 2696 1.1 mrg } 2697 1.1 mrg 2698 1.1 mrg /* Data structure for storing the results and the intermediate objects 2699 1.1 mrg * of compute_domains. 2700 1.1 mrg * 2701 1.1 mrg * "list" is the main result of the function and contains a list 2702 1.1 mrg * of disjoint basic sets for which code should be generated. 2703 1.1 mrg * 2704 1.1 mrg * "executed" and "build" are inputs to compute_domains. 2705 1.1 mrg * "schedule_domain" is the domain of "executed". 2706 1.1 mrg * 2707 1.1 mrg * "option" contains the domains at the current depth that should by 2708 1.1 mrg * atomic, separated or unrolled. These domains are as specified by 2709 1.1 mrg * the user, except that inner dimensions have been eliminated and 2710 1.1 mrg * that they have been made pair-wise disjoint. 2711 1.1 mrg * 2712 1.1 mrg * "sep_class" contains the user-specified split into separation classes 2713 1.1 mrg * specialized to the current depth. 2714 1.1 mrg * "done" contains the union of the separation domains that have already 2715 1.1 mrg * been handled. 2716 1.1 mrg */ 2717 1.1 mrg struct isl_codegen_domains { 2718 1.1 mrg isl_basic_set_list *list; 2719 1.1 mrg 2720 1.1 mrg isl_union_map *executed; 2721 1.1 mrg isl_ast_build *build; 2722 1.1 mrg isl_set *schedule_domain; 2723 1.1 mrg 2724 1.1 mrg isl_set *option[4]; 2725 1.1 mrg 2726 1.1 mrg isl_map *sep_class; 2727 1.1 mrg isl_set *done; 2728 1.1 mrg }; 2729 1.1 mrg 2730 1.1 mrg /* Internal data structure for do_unroll. 2731 1.1 mrg * 2732 1.1 mrg * "domains" stores the results of compute_domains. 2733 1.1 mrg * "class_domain" is the original class domain passed to do_unroll. 2734 1.1 mrg * "unroll_domain" collects the unrolled iterations. 2735 1.1 mrg */ 2736 1.1 mrg struct isl_ast_unroll_data { 2737 1.1 mrg struct isl_codegen_domains *domains; 2738 1.1 mrg isl_set *class_domain; 2739 1.1 mrg isl_set *unroll_domain; 2740 1.1 mrg }; 2741 1.1 mrg 2742 1.1 mrg /* Given an iteration of an unrolled domain represented by "bset", 2743 1.1 mrg * add it to data->domains->list. 2744 1.1 mrg * Since we may have dropped some constraints, we intersect with 2745 1.1 mrg * the class domain again to ensure that each element in the list 2746 1.1 mrg * is disjoint from the other class domains. 2747 1.1 mrg */ 2748 1.1 mrg static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user) 2749 1.1 mrg { 2750 1.1 mrg struct isl_ast_unroll_data *data = user; 2751 1.1 mrg isl_set *set; 2752 1.1 mrg isl_basic_set_list *list; 2753 1.1 mrg 2754 1.1 mrg set = isl_set_from_basic_set(bset); 2755 1.1 mrg data->unroll_domain = isl_set_union(data->unroll_domain, 2756 1.1 mrg isl_set_copy(set)); 2757 1.1 mrg set = isl_set_intersect(set, isl_set_copy(data->class_domain)); 2758 1.1 mrg set = isl_set_make_disjoint(set); 2759 1.1 mrg list = isl_basic_set_list_from_set(set); 2760 1.1 mrg data->domains->list = isl_basic_set_list_concat(data->domains->list, 2761 1.1 mrg list); 2762 1.1 mrg 2763 1.1 mrg return 0; 2764 1.1 mrg } 2765 1.1 mrg 2766 1.1 mrg /* Extend domains->list with a list of basic sets, one for each value 2767 1.1 mrg * of the current dimension in "domain" and remove the corresponding 2768 1.1 mrg * sets from the class domain. Return the updated class domain. 2769 1.1 mrg * The divs that involve the current dimension have not been projected out 2770 1.1 mrg * from this domain. 2771 1.1 mrg * 2772 1.1 mrg * We call foreach_iteration to iterate over the individual values and 2773 1.1 mrg * in do_unroll_iteration we collect the individual basic sets in 2774 1.1 mrg * domains->list and their union in data->unroll_domain, which is then 2775 1.1 mrg * used to update the class domain. 2776 1.1 mrg */ 2777 1.1 mrg static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains, 2778 1.1 mrg __isl_take isl_set *domain, __isl_take isl_set *class_domain) 2779 1.1 mrg { 2780 1.1 mrg struct isl_ast_unroll_data data; 2781 1.1 mrg 2782 1.1 mrg if (!domain) 2783 1.1 mrg return isl_set_free(class_domain); 2784 1.1 mrg if (!class_domain) 2785 1.1 mrg return isl_set_free(domain); 2786 1.1 mrg 2787 1.1 mrg data.domains = domains; 2788 1.1 mrg data.class_domain = class_domain; 2789 1.1 mrg data.unroll_domain = isl_set_empty(isl_set_get_space(domain)); 2790 1.1 mrg 2791 1.1 mrg if (foreach_iteration(domain, domains->build, NULL, 2792 1.1 mrg &do_unroll_iteration, &data) < 0) 2793 1.1 mrg data.unroll_domain = isl_set_free(data.unroll_domain); 2794 1.1 mrg 2795 1.1 mrg class_domain = isl_set_subtract(class_domain, data.unroll_domain); 2796 1.1 mrg 2797 1.1 mrg return class_domain; 2798 1.1 mrg } 2799 1.1 mrg 2800 1.1 mrg /* Add domains to domains->list for each individual value of the current 2801 1.1 mrg * dimension, for that part of the schedule domain that lies in the 2802 1.1 mrg * intersection of the option domain and the class domain. 2803 1.1 mrg * Remove the corresponding sets from the class domain and 2804 1.1 mrg * return the updated class domain. 2805 1.1 mrg * 2806 1.1 mrg * We first break up the unroll option domain into individual pieces 2807 1.1 mrg * and then handle each of them separately. The unroll option domain 2808 1.1 mrg * has been made disjoint in compute_domains_init_options, 2809 1.1 mrg * 2810 1.1 mrg * Note that we actively want to combine different pieces of the 2811 1.1 mrg * schedule domain that have the same value at the current dimension. 2812 1.1 mrg * We therefore need to break up the unroll option domain before 2813 1.1 mrg * intersecting with class and schedule domain, hoping that the 2814 1.1 mrg * unroll option domain specified by the user is relatively simple. 2815 1.1 mrg */ 2816 1.1 mrg static __isl_give isl_set *compute_unroll_domains( 2817 1.1 mrg struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2818 1.1 mrg { 2819 1.1 mrg isl_set *unroll_domain; 2820 1.1 mrg isl_basic_set_list *unroll_list; 2821 1.1 mrg int i; 2822 1.1 mrg isl_size n; 2823 1.1 mrg isl_bool empty; 2824 1.1 mrg 2825 1.1 mrg empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]); 2826 1.1 mrg if (empty < 0) 2827 1.1 mrg return isl_set_free(class_domain); 2828 1.1 mrg if (empty) 2829 1.1 mrg return class_domain; 2830 1.1 mrg 2831 1.1 mrg unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]); 2832 1.1 mrg unroll_list = isl_basic_set_list_from_set(unroll_domain); 2833 1.1 mrg 2834 1.1 mrg n = isl_basic_set_list_n_basic_set(unroll_list); 2835 1.1 mrg if (n < 0) 2836 1.1 mrg class_domain = isl_set_free(class_domain); 2837 1.1 mrg for (i = 0; i < n; ++i) { 2838 1.1 mrg isl_basic_set *bset; 2839 1.1 mrg 2840 1.1 mrg bset = isl_basic_set_list_get_basic_set(unroll_list, i); 2841 1.1 mrg unroll_domain = isl_set_from_basic_set(bset); 2842 1.1 mrg unroll_domain = isl_set_intersect(unroll_domain, 2843 1.1 mrg isl_set_copy(class_domain)); 2844 1.1 mrg unroll_domain = isl_set_intersect(unroll_domain, 2845 1.1 mrg isl_set_copy(domains->schedule_domain)); 2846 1.1 mrg 2847 1.1 mrg empty = isl_set_is_empty(unroll_domain); 2848 1.1 mrg if (empty >= 0 && empty) { 2849 1.1 mrg isl_set_free(unroll_domain); 2850 1.1 mrg continue; 2851 1.1 mrg } 2852 1.1 mrg 2853 1.1 mrg class_domain = do_unroll(domains, unroll_domain, class_domain); 2854 1.1 mrg } 2855 1.1 mrg 2856 1.1 mrg isl_basic_set_list_free(unroll_list); 2857 1.1 mrg 2858 1.1 mrg return class_domain; 2859 1.1 mrg } 2860 1.1 mrg 2861 1.1 mrg /* Try and construct a single basic set that includes the intersection of 2862 1.1 mrg * the schedule domain, the atomic option domain and the class domain. 2863 1.1 mrg * Add the resulting basic set(s) to domains->list and remove them 2864 1.1 mrg * from class_domain. Return the updated class domain. 2865 1.1 mrg * 2866 1.1 mrg * We construct a single domain rather than trying to combine 2867 1.1 mrg * the schedule domains of individual domains because we are working 2868 1.1 mrg * within a single component so that non-overlapping schedule domains 2869 1.1 mrg * should already have been separated. 2870 1.1 mrg * We do however need to make sure that this single domains is a subset 2871 1.1 mrg * of the class domain so that it would not intersect with any other 2872 1.1 mrg * class domains. This means that we may end up splitting up the atomic 2873 1.1 mrg * domain in case separation classes are being used. 2874 1.1 mrg * 2875 1.1 mrg * "domain" is the intersection of the schedule domain and the class domain, 2876 1.1 mrg * with inner dimensions projected out. 2877 1.1 mrg */ 2878 1.1 mrg static __isl_give isl_set *compute_atomic_domain( 2879 1.1 mrg struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2880 1.1 mrg { 2881 1.1 mrg isl_basic_set *bset; 2882 1.1 mrg isl_basic_set_list *list; 2883 1.1 mrg isl_set *domain, *atomic_domain; 2884 1.1 mrg int empty; 2885 1.1 mrg 2886 1.1 mrg domain = isl_set_copy(domains->option[isl_ast_loop_atomic]); 2887 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2888 1.1 mrg domain = isl_set_intersect(domain, 2889 1.1 mrg isl_set_copy(domains->schedule_domain)); 2890 1.1 mrg empty = isl_set_is_empty(domain); 2891 1.1 mrg if (empty < 0) 2892 1.1 mrg class_domain = isl_set_free(class_domain); 2893 1.1 mrg if (empty) { 2894 1.1 mrg isl_set_free(domain); 2895 1.1 mrg return class_domain; 2896 1.1 mrg } 2897 1.1 mrg 2898 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain); 2899 1.1 mrg domain = isl_set_coalesce_preserve(domain); 2900 1.1 mrg bset = isl_set_unshifted_simple_hull(domain); 2901 1.1 mrg domain = isl_set_from_basic_set(bset); 2902 1.1 mrg atomic_domain = isl_set_copy(domain); 2903 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2904 1.1 mrg class_domain = isl_set_subtract(class_domain, atomic_domain); 2905 1.1 mrg domain = isl_set_make_disjoint(domain); 2906 1.1 mrg list = isl_basic_set_list_from_set(domain); 2907 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list); 2908 1.1 mrg 2909 1.1 mrg return class_domain; 2910 1.1 mrg } 2911 1.1 mrg 2912 1.1 mrg /* Split up the schedule domain into uniform basic sets, 2913 1.1 mrg * in the sense that each element in a basic set is associated to 2914 1.1 mrg * elements of the same domains, and add the result to domains->list. 2915 1.1 mrg * Do this for that part of the schedule domain that lies in the 2916 1.1 mrg * intersection of "class_domain" and the separate option domain. 2917 1.1 mrg * 2918 1.1 mrg * "class_domain" may or may not include the constraints 2919 1.1 mrg * of the schedule domain, but this does not make a difference 2920 1.1 mrg * since we are going to intersect it with the domain of the inverse schedule. 2921 1.1 mrg * If it includes schedule domain constraints, then they may involve 2922 1.1 mrg * inner dimensions, but we will eliminate them in separation_domain. 2923 1.1 mrg */ 2924 1.1 mrg static int compute_separate_domain(struct isl_codegen_domains *domains, 2925 1.1 mrg __isl_keep isl_set *class_domain) 2926 1.1 mrg { 2927 1.1 mrg isl_space *space; 2928 1.1 mrg isl_set *domain; 2929 1.1 mrg isl_union_map *executed; 2930 1.1 mrg isl_basic_set_list *list; 2931 1.1 mrg int empty; 2932 1.1 mrg 2933 1.1 mrg domain = isl_set_copy(domains->option[isl_ast_loop_separate]); 2934 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2935 1.1 mrg executed = isl_union_map_copy(domains->executed); 2936 1.1 mrg executed = isl_union_map_intersect_domain(executed, 2937 1.1 mrg isl_union_set_from_set(domain)); 2938 1.1 mrg empty = isl_union_map_is_empty(executed); 2939 1.1 mrg if (empty < 0 || empty) { 2940 1.1 mrg isl_union_map_free(executed); 2941 1.1 mrg return empty < 0 ? -1 : 0; 2942 1.1 mrg } 2943 1.1 mrg 2944 1.1 mrg space = isl_set_get_space(class_domain); 2945 1.1 mrg domain = separate_schedule_domains(space, executed, domains->build); 2946 1.1 mrg 2947 1.1 mrg list = isl_basic_set_list_from_set(domain); 2948 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list); 2949 1.1 mrg 2950 1.1 mrg return 0; 2951 1.1 mrg } 2952 1.1 mrg 2953 1.1 mrg /* Split up the domain at the current depth into disjoint 2954 1.1 mrg * basic sets for which code should be generated separately 2955 1.1 mrg * for the given separation class domain. 2956 1.1 mrg * 2957 1.1 mrg * If any separation classes have been defined, then "class_domain" 2958 1.1 mrg * is the domain of the current class and does not refer to inner dimensions. 2959 1.1 mrg * Otherwise, "class_domain" is the universe domain. 2960 1.1 mrg * 2961 1.1 mrg * We first make sure that the class domain is disjoint from 2962 1.1 mrg * previously considered class domains. 2963 1.1 mrg * 2964 1.1 mrg * The separate domains can be computed directly from the "class_domain". 2965 1.1 mrg * 2966 1.1 mrg * The unroll, atomic and remainder domains need the constraints 2967 1.1 mrg * from the schedule domain. 2968 1.1 mrg * 2969 1.1 mrg * For unrolling, the actual schedule domain is needed (with divs that 2970 1.1 mrg * may refer to the current dimension) so that stride detection can be 2971 1.1 mrg * performed. 2972 1.1 mrg * 2973 1.1 mrg * For atomic and remainder domains, inner dimensions and divs involving 2974 1.1 mrg * the current dimensions should be eliminated. 2975 1.1 mrg * In case we are working within a separation class, we need to intersect 2976 1.1 mrg * the result with the current "class_domain" to ensure that the domains 2977 1.1 mrg * are disjoint from those generated from other class domains. 2978 1.1 mrg * 2979 1.1 mrg * The domain that has been made atomic may be larger than specified 2980 1.1 mrg * by the user since it needs to be representable as a single basic set. 2981 1.1 mrg * This possibly larger domain is removed from class_domain by 2982 1.1 mrg * compute_atomic_domain. It is computed first so that the extended domain 2983 1.1 mrg * would not overlap with any domains computed before. 2984 1.1 mrg * Similary, the unrolled domains may have some constraints removed and 2985 1.1 mrg * may therefore also be larger than specified by the user. 2986 1.1 mrg * 2987 1.1 mrg * If anything is left after handling separate, unroll and atomic, 2988 1.1 mrg * we split it up into basic sets and append the basic sets to domains->list. 2989 1.1 mrg */ 2990 1.1 mrg static isl_stat compute_partial_domains(struct isl_codegen_domains *domains, 2991 1.1 mrg __isl_take isl_set *class_domain) 2992 1.1 mrg { 2993 1.1 mrg isl_basic_set_list *list; 2994 1.1 mrg isl_set *domain; 2995 1.1 mrg 2996 1.1 mrg class_domain = isl_set_subtract(class_domain, 2997 1.1 mrg isl_set_copy(domains->done)); 2998 1.1 mrg domains->done = isl_set_union(domains->done, 2999 1.1 mrg isl_set_copy(class_domain)); 3000 1.1 mrg 3001 1.1 mrg class_domain = compute_atomic_domain(domains, class_domain); 3002 1.1 mrg class_domain = compute_unroll_domains(domains, class_domain); 3003 1.1 mrg 3004 1.1 mrg domain = isl_set_copy(class_domain); 3005 1.1 mrg 3006 1.1 mrg if (compute_separate_domain(domains, domain) < 0) 3007 1.1 mrg goto error; 3008 1.1 mrg domain = isl_set_subtract(domain, 3009 1.1 mrg isl_set_copy(domains->option[isl_ast_loop_separate])); 3010 1.1 mrg 3011 1.1 mrg domain = isl_set_intersect(domain, 3012 1.1 mrg isl_set_copy(domains->schedule_domain)); 3013 1.1 mrg 3014 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain); 3015 1.1 mrg domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 3016 1.1 mrg 3017 1.1 mrg domain = isl_set_coalesce_preserve(domain); 3018 1.1 mrg domain = isl_set_make_disjoint(domain); 3019 1.1 mrg 3020 1.1 mrg list = isl_basic_set_list_from_set(domain); 3021 1.1 mrg domains->list = isl_basic_set_list_concat(domains->list, list); 3022 1.1 mrg 3023 1.1 mrg isl_set_free(class_domain); 3024 1.1 mrg 3025 1.1 mrg return isl_stat_ok; 3026 1.1 mrg error: 3027 1.1 mrg isl_set_free(domain); 3028 1.1 mrg isl_set_free(class_domain); 3029 1.1 mrg return isl_stat_error; 3030 1.1 mrg } 3031 1.1 mrg 3032 1.1 mrg /* Split up the domain at the current depth into disjoint 3033 1.1 mrg * basic sets for which code should be generated separately 3034 1.1 mrg * for the separation class identified by "pnt". 3035 1.1 mrg * 3036 1.1 mrg * We extract the corresponding class domain from domains->sep_class, 3037 1.1 mrg * eliminate inner dimensions and pass control to compute_partial_domains. 3038 1.1 mrg */ 3039 1.1 mrg static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user) 3040 1.1 mrg { 3041 1.1 mrg struct isl_codegen_domains *domains = user; 3042 1.1 mrg isl_set *class_set; 3043 1.1 mrg isl_set *domain; 3044 1.1 mrg int disjoint; 3045 1.1 mrg 3046 1.1 mrg class_set = isl_set_from_point(pnt); 3047 1.1 mrg domain = isl_map_domain(isl_map_intersect_range( 3048 1.1 mrg isl_map_copy(domains->sep_class), class_set)); 3049 1.1 mrg domain = isl_ast_build_compute_gist(domains->build, domain); 3050 1.1 mrg domain = isl_ast_build_eliminate(domains->build, domain); 3051 1.1 mrg 3052 1.1 mrg disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain); 3053 1.1 mrg if (disjoint < 0) 3054 1.1 mrg return isl_stat_error; 3055 1.1 mrg if (disjoint) { 3056 1.1 mrg isl_set_free(domain); 3057 1.1 mrg return isl_stat_ok; 3058 1.1 mrg } 3059 1.1 mrg 3060 1.1 mrg return compute_partial_domains(domains, domain); 3061 1.1 mrg } 3062 1.1 mrg 3063 1.1 mrg /* Extract the domains at the current depth that should be atomic, 3064 1.1 mrg * separated or unrolled and store them in option. 3065 1.1 mrg * 3066 1.1 mrg * The domains specified by the user might overlap, so we make 3067 1.1 mrg * them disjoint by subtracting earlier domains from later domains. 3068 1.1 mrg */ 3069 1.1 mrg static void compute_domains_init_options(isl_set *option[4], 3070 1.1 mrg __isl_keep isl_ast_build *build) 3071 1.1 mrg { 3072 1.1 mrg enum isl_ast_loop_type type, type2; 3073 1.1 mrg isl_set *unroll; 3074 1.1 mrg 3075 1.1 mrg for (type = isl_ast_loop_atomic; 3076 1.1 mrg type <= isl_ast_loop_separate; ++type) { 3077 1.1 mrg option[type] = isl_ast_build_get_option_domain(build, type); 3078 1.1 mrg for (type2 = isl_ast_loop_atomic; type2 < type; ++type2) 3079 1.1 mrg option[type] = isl_set_subtract(option[type], 3080 1.1 mrg isl_set_copy(option[type2])); 3081 1.1 mrg } 3082 1.1 mrg 3083 1.1 mrg unroll = option[isl_ast_loop_unroll]; 3084 1.1 mrg unroll = isl_set_coalesce(unroll); 3085 1.1 mrg unroll = isl_set_make_disjoint(unroll); 3086 1.1 mrg option[isl_ast_loop_unroll] = unroll; 3087 1.1 mrg } 3088 1.1 mrg 3089 1.1 mrg /* Split up the domain at the current depth into disjoint 3090 1.1 mrg * basic sets for which code should be generated separately, 3091 1.1 mrg * based on the user-specified options. 3092 1.1 mrg * Return the list of disjoint basic sets. 3093 1.1 mrg * 3094 1.1 mrg * There are three kinds of domains that we need to keep track of. 3095 1.1 mrg * - the "schedule domain" is the domain of "executed" 3096 1.1 mrg * - the "class domain" is the domain corresponding to the currrent 3097 1.1 mrg * separation class 3098 1.1 mrg * - the "option domain" is the domain corresponding to one of the options 3099 1.1 mrg * atomic, unroll or separate 3100 1.1 mrg * 3101 1.1 mrg * We first consider the individial values of the separation classes 3102 1.1 mrg * and split up the domain for each of them separately. 3103 1.1 mrg * Finally, we consider the remainder. If no separation classes were 3104 1.1 mrg * specified, then we call compute_partial_domains with the universe 3105 1.1 mrg * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain", 3106 1.1 mrg * with inner dimensions removed. We do this because we want to 3107 1.1 mrg * avoid computing the complement of the class domains (i.e., the difference 3108 1.1 mrg * between the universe and domains->done). 3109 1.1 mrg */ 3110 1.1 mrg static __isl_give isl_basic_set_list *compute_domains( 3111 1.1 mrg __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 3112 1.1 mrg { 3113 1.1 mrg struct isl_codegen_domains domains; 3114 1.1 mrg isl_ctx *ctx; 3115 1.1 mrg isl_set *domain; 3116 1.1 mrg isl_union_set *schedule_domain; 3117 1.1 mrg isl_set *classes; 3118 1.1 mrg isl_space *space; 3119 1.1 mrg int n_param; 3120 1.1 mrg enum isl_ast_loop_type type; 3121 1.1 mrg isl_bool empty; 3122 1.1 mrg 3123 1.1 mrg if (!executed) 3124 1.1 mrg return NULL; 3125 1.1 mrg 3126 1.1 mrg ctx = isl_union_map_get_ctx(executed); 3127 1.1 mrg domains.list = isl_basic_set_list_alloc(ctx, 0); 3128 1.1 mrg 3129 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3130 1.1 mrg domain = isl_set_from_union_set(schedule_domain); 3131 1.1 mrg 3132 1.1 mrg compute_domains_init_options(domains.option, build); 3133 1.1 mrg 3134 1.1 mrg domains.sep_class = isl_ast_build_get_separation_class(build); 3135 1.1 mrg classes = isl_map_range(isl_map_copy(domains.sep_class)); 3136 1.1 mrg n_param = isl_set_dim(classes, isl_dim_param); 3137 1.1 mrg if (n_param < 0) 3138 1.1 mrg classes = isl_set_free(classes); 3139 1.1 mrg classes = isl_set_project_out(classes, isl_dim_param, 0, n_param); 3140 1.1 mrg 3141 1.1 mrg space = isl_set_get_space(domain); 3142 1.1 mrg domains.build = build; 3143 1.1 mrg domains.schedule_domain = isl_set_copy(domain); 3144 1.1 mrg domains.executed = executed; 3145 1.1 mrg domains.done = isl_set_empty(space); 3146 1.1 mrg 3147 1.1 mrg if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0) 3148 1.1 mrg domains.list = isl_basic_set_list_free(domains.list); 3149 1.1 mrg isl_set_free(classes); 3150 1.1 mrg 3151 1.1 mrg empty = isl_set_is_empty(domains.done); 3152 1.1 mrg if (empty < 0) { 3153 1.1 mrg domains.list = isl_basic_set_list_free(domains.list); 3154 1.1 mrg domain = isl_set_free(domain); 3155 1.1 mrg } else if (empty) { 3156 1.1 mrg isl_set_free(domain); 3157 1.1 mrg domain = isl_set_universe(isl_set_get_space(domains.done)); 3158 1.1 mrg } else { 3159 1.1 mrg domain = isl_ast_build_eliminate(build, domain); 3160 1.1 mrg } 3161 1.1 mrg if (compute_partial_domains(&domains, domain) < 0) 3162 1.1 mrg domains.list = isl_basic_set_list_free(domains.list); 3163 1.1 mrg 3164 1.1 mrg isl_set_free(domains.schedule_domain); 3165 1.1 mrg isl_set_free(domains.done); 3166 1.1 mrg isl_map_free(domains.sep_class); 3167 1.1 mrg for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type) 3168 1.1 mrg isl_set_free(domains.option[type]); 3169 1.1 mrg 3170 1.1 mrg return domains.list; 3171 1.1 mrg } 3172 1.1 mrg 3173 1.1 mrg /* Generate code for a single component, after shifting (if any) 3174 1.1 mrg * has been applied, in case the schedule was specified as a union map. 3175 1.1 mrg * 3176 1.1 mrg * We first split up the domain at the current depth into disjoint 3177 1.1 mrg * basic sets based on the user-specified options. 3178 1.1 mrg * Then we generated code for each of them and concatenate the results. 3179 1.1 mrg */ 3180 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_flat( 3181 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3182 1.1 mrg { 3183 1.1 mrg isl_basic_set_list *domain_list; 3184 1.1 mrg isl_ast_graft_list *list = NULL; 3185 1.1 mrg 3186 1.1 mrg domain_list = compute_domains(executed, build); 3187 1.1 mrg list = generate_parallel_domains(domain_list, executed, build); 3188 1.1 mrg 3189 1.1 mrg isl_basic_set_list_free(domain_list); 3190 1.1 mrg isl_union_map_free(executed); 3191 1.1 mrg isl_ast_build_free(build); 3192 1.1 mrg 3193 1.1 mrg return list; 3194 1.1 mrg } 3195 1.1 mrg 3196 1.1 mrg /* Generate code for a single component, after shifting (if any) 3197 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree 3198 1.1 mrg * and the separate option was specified. 3199 1.1 mrg * 3200 1.1 mrg * We perform separation on the domain of "executed" and then generate 3201 1.1 mrg * an AST for each of the resulting disjoint basic sets. 3202 1.1 mrg */ 3203 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate( 3204 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3205 1.1 mrg { 3206 1.1 mrg isl_space *space; 3207 1.1 mrg isl_set *domain; 3208 1.1 mrg isl_basic_set_list *domain_list; 3209 1.1 mrg isl_ast_graft_list *list; 3210 1.1 mrg 3211 1.1 mrg space = isl_ast_build_get_space(build, 1); 3212 1.1 mrg domain = separate_schedule_domains(space, 3213 1.1 mrg isl_union_map_copy(executed), build); 3214 1.1 mrg domain_list = isl_basic_set_list_from_set(domain); 3215 1.1 mrg 3216 1.1 mrg list = generate_parallel_domains(domain_list, executed, build); 3217 1.1 mrg 3218 1.1 mrg isl_basic_set_list_free(domain_list); 3219 1.1 mrg isl_union_map_free(executed); 3220 1.1 mrg isl_ast_build_free(build); 3221 1.1 mrg 3222 1.1 mrg return list; 3223 1.1 mrg } 3224 1.1 mrg 3225 1.1 mrg /* Internal data structure for generate_shifted_component_tree_unroll. 3226 1.1 mrg * 3227 1.1 mrg * "executed" and "build" are inputs to generate_shifted_component_tree_unroll. 3228 1.1 mrg * "list" collects the constructs grafts. 3229 1.1 mrg */ 3230 1.1 mrg struct isl_ast_unroll_tree_data { 3231 1.1 mrg isl_union_map *executed; 3232 1.1 mrg isl_ast_build *build; 3233 1.1 mrg isl_ast_graft_list *list; 3234 1.1 mrg }; 3235 1.1 mrg 3236 1.1 mrg /* Initialize data->list to a list of "n" elements. 3237 1.1 mrg */ 3238 1.1 mrg static int init_unroll_tree(int n, void *user) 3239 1.1 mrg { 3240 1.1 mrg struct isl_ast_unroll_tree_data *data = user; 3241 1.1 mrg isl_ctx *ctx; 3242 1.1 mrg 3243 1.1 mrg ctx = isl_ast_build_get_ctx(data->build); 3244 1.1 mrg data->list = isl_ast_graft_list_alloc(ctx, n); 3245 1.1 mrg 3246 1.1 mrg return 0; 3247 1.1 mrg } 3248 1.1 mrg 3249 1.1 mrg /* Given an iteration of an unrolled domain represented by "bset", 3250 1.1 mrg * generate the corresponding AST and add the result to data->list. 3251 1.1 mrg */ 3252 1.1 mrg static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user) 3253 1.1 mrg { 3254 1.1 mrg struct isl_ast_unroll_tree_data *data = user; 3255 1.1 mrg 3256 1.1 mrg data->list = add_node(data->list, isl_union_map_copy(data->executed), 3257 1.1 mrg bset, isl_ast_build_copy(data->build)); 3258 1.1 mrg 3259 1.1 mrg return 0; 3260 1.1 mrg } 3261 1.1 mrg 3262 1.1 mrg /* Generate code for a single component, after shifting (if any) 3263 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree 3264 1.1 mrg * and the unroll option was specified. 3265 1.1 mrg * 3266 1.1 mrg * We call foreach_iteration to iterate over the individual values and 3267 1.1 mrg * construct and collect the corresponding grafts in do_unroll_tree_iteration. 3268 1.1 mrg */ 3269 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll( 3270 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *domain, 3271 1.1 mrg __isl_take isl_ast_build *build) 3272 1.1 mrg { 3273 1.1 mrg struct isl_ast_unroll_tree_data data = { executed, build, NULL }; 3274 1.1 mrg 3275 1.1 mrg if (foreach_iteration(domain, build, &init_unroll_tree, 3276 1.1 mrg &do_unroll_tree_iteration, &data) < 0) 3277 1.1 mrg data.list = isl_ast_graft_list_free(data.list); 3278 1.1 mrg 3279 1.1 mrg isl_union_map_free(executed); 3280 1.1 mrg isl_ast_build_free(build); 3281 1.1 mrg 3282 1.1 mrg return data.list; 3283 1.1 mrg } 3284 1.1 mrg 3285 1.1 mrg /* Does "domain" involve a disjunction that is purely based on 3286 1.1 mrg * constraints involving only outer dimension? 3287 1.1 mrg * 3288 1.1 mrg * In particular, is there a disjunction such that the constraints 3289 1.1 mrg * involving the current and later dimensions are the same over 3290 1.1 mrg * all the disjuncts? 3291 1.1 mrg */ 3292 1.1 mrg static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain, 3293 1.1 mrg __isl_keep isl_ast_build *build) 3294 1.1 mrg { 3295 1.1 mrg isl_basic_set *hull; 3296 1.1 mrg isl_set *shared, *inner; 3297 1.1 mrg isl_bool equal; 3298 1.1 mrg isl_size depth; 3299 1.1 mrg isl_size n; 3300 1.1 mrg isl_size dim; 3301 1.1 mrg 3302 1.1 mrg n = isl_set_n_basic_set(domain); 3303 1.1 mrg if (n < 0) 3304 1.1 mrg return isl_bool_error; 3305 1.1 mrg if (n <= 1) 3306 1.1 mrg return isl_bool_false; 3307 1.1 mrg dim = isl_set_dim(domain, isl_dim_set); 3308 1.1 mrg depth = isl_ast_build_get_depth(build); 3309 1.1 mrg if (dim < 0 || depth < 0) 3310 1.1 mrg return isl_bool_error; 3311 1.1 mrg 3312 1.1 mrg inner = isl_set_copy(domain); 3313 1.1 mrg inner = isl_set_drop_constraints_not_involving_dims(inner, 3314 1.1 mrg isl_dim_set, depth, dim - depth); 3315 1.1 mrg hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner)); 3316 1.1 mrg shared = isl_set_from_basic_set(hull); 3317 1.1 mrg equal = isl_set_plain_is_equal(inner, shared); 3318 1.1 mrg isl_set_free(inner); 3319 1.1 mrg isl_set_free(shared); 3320 1.1 mrg 3321 1.1 mrg return equal; 3322 1.1 mrg } 3323 1.1 mrg 3324 1.1 mrg /* Generate code for a single component, after shifting (if any) 3325 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree. 3326 1.1 mrg * In particular, handle the base case where there is either no isolated 3327 1.1 mrg * set or we are within the isolated set (in which case "isolated" is set) 3328 1.1 mrg * or the iterations that precede or follow the isolated set. 3329 1.1 mrg * 3330 1.1 mrg * The schedule domain is broken up or combined into basic sets 3331 1.1 mrg * according to the AST generation option specified in the current 3332 1.1 mrg * schedule node, which may be either atomic, separate, unroll or 3333 1.1 mrg * unspecified. If the option is unspecified, then we currently simply 3334 1.1 mrg * split the schedule domain into disjoint basic sets. 3335 1.1 mrg * 3336 1.1 mrg * In case the separate option is specified, the AST generation is 3337 1.1 mrg * handled by generate_shifted_component_tree_separate. 3338 1.1 mrg * In the other cases, we need the global schedule domain. 3339 1.1 mrg * In the unroll case, the AST generation is then handled by 3340 1.1 mrg * generate_shifted_component_tree_unroll which needs the actual 3341 1.1 mrg * schedule domain (with divs that may refer to the current dimension) 3342 1.1 mrg * so that stride detection can be performed. 3343 1.1 mrg * In the atomic or unspecified case, inner dimensions and divs involving 3344 1.1 mrg * the current dimensions should be eliminated. 3345 1.1 mrg * The result is then either combined into a single basic set or 3346 1.1 mrg * split up into disjoint basic sets. 3347 1.1 mrg * Finally an AST is generated for each basic set and the results are 3348 1.1 mrg * concatenated. 3349 1.1 mrg * 3350 1.1 mrg * If the schedule domain involves a disjunction that is purely based on 3351 1.1 mrg * constraints involving only outer dimension, then it is treated as 3352 1.1 mrg * if atomic was specified. This ensures that only a single loop 3353 1.1 mrg * is generated instead of a sequence of identical loops with 3354 1.1 mrg * different guards. 3355 1.1 mrg */ 3356 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base( 3357 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 3358 1.1 mrg int isolated) 3359 1.1 mrg { 3360 1.1 mrg isl_bool outer_disjunction; 3361 1.1 mrg isl_union_set *schedule_domain; 3362 1.1 mrg isl_set *domain; 3363 1.1 mrg isl_basic_set_list *domain_list; 3364 1.1 mrg isl_ast_graft_list *list; 3365 1.1 mrg enum isl_ast_loop_type type; 3366 1.1 mrg 3367 1.1 mrg type = isl_ast_build_get_loop_type(build, isolated); 3368 1.1 mrg if (type < 0) 3369 1.1 mrg goto error; 3370 1.1 mrg 3371 1.1 mrg if (type == isl_ast_loop_separate) 3372 1.1 mrg return generate_shifted_component_tree_separate(executed, 3373 1.1 mrg build); 3374 1.1 mrg 3375 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3376 1.1 mrg domain = isl_set_from_union_set(schedule_domain); 3377 1.1 mrg 3378 1.1 mrg if (type == isl_ast_loop_unroll) 3379 1.1 mrg return generate_shifted_component_tree_unroll(executed, domain, 3380 1.1 mrg build); 3381 1.1 mrg 3382 1.1 mrg domain = isl_ast_build_eliminate(build, domain); 3383 1.1 mrg domain = isl_set_coalesce_preserve(domain); 3384 1.1 mrg 3385 1.1 mrg outer_disjunction = has_pure_outer_disjunction(domain, build); 3386 1.1 mrg if (outer_disjunction < 0) 3387 1.1 mrg domain = isl_set_free(domain); 3388 1.1 mrg 3389 1.1 mrg if (outer_disjunction || type == isl_ast_loop_atomic) { 3390 1.1 mrg isl_basic_set *hull; 3391 1.1 mrg hull = isl_set_unshifted_simple_hull(domain); 3392 1.1 mrg domain_list = isl_basic_set_list_from_basic_set(hull); 3393 1.1 mrg } else { 3394 1.1 mrg domain = isl_set_make_disjoint(domain); 3395 1.1 mrg domain_list = isl_basic_set_list_from_set(domain); 3396 1.1 mrg } 3397 1.1 mrg 3398 1.1 mrg list = generate_parallel_domains(domain_list, executed, build); 3399 1.1 mrg 3400 1.1 mrg isl_basic_set_list_free(domain_list); 3401 1.1 mrg isl_union_map_free(executed); 3402 1.1 mrg isl_ast_build_free(build); 3403 1.1 mrg 3404 1.1 mrg return list; 3405 1.1 mrg error: 3406 1.1 mrg isl_union_map_free(executed); 3407 1.1 mrg isl_ast_build_free(build); 3408 1.1 mrg return NULL; 3409 1.1 mrg } 3410 1.1 mrg 3411 1.1 mrg /* Extract out the disjunction imposed by "domain" on the outer 3412 1.1 mrg * schedule dimensions. 3413 1.1 mrg * 3414 1.1 mrg * In particular, remove all inner dimensions from "domain" (including 3415 1.1 mrg * the current dimension) and then remove the constraints that are shared 3416 1.1 mrg * by all disjuncts in the result. 3417 1.1 mrg */ 3418 1.1 mrg static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain, 3419 1.1 mrg __isl_keep isl_ast_build *build) 3420 1.1 mrg { 3421 1.1 mrg isl_set *hull; 3422 1.1 mrg isl_size depth; 3423 1.1 mrg isl_size dim; 3424 1.1 mrg 3425 1.1 mrg domain = isl_ast_build_specialize(build, domain); 3426 1.1 mrg depth = isl_ast_build_get_depth(build); 3427 1.1 mrg dim = isl_set_dim(domain, isl_dim_set); 3428 1.1 mrg if (depth < 0 || dim < 0) 3429 1.1 mrg return isl_set_free(domain); 3430 1.1 mrg domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth); 3431 1.1 mrg domain = isl_set_remove_unknown_divs(domain); 3432 1.1 mrg hull = isl_set_copy(domain); 3433 1.1 mrg hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull)); 3434 1.1 mrg domain = isl_set_gist(domain, hull); 3435 1.1 mrg 3436 1.1 mrg return domain; 3437 1.1 mrg } 3438 1.1 mrg 3439 1.1 mrg /* Add "guard" to the grafts in "list". 3440 1.1 mrg * "build" is the outer AST build, while "sub_build" includes "guard" 3441 1.1 mrg * in its generated domain. 3442 1.1 mrg * 3443 1.1 mrg * First combine the grafts into a single graft and then add the guard. 3444 1.1 mrg * If the list is empty, or if some error occurred, then simply return 3445 1.1 mrg * the list. 3446 1.1 mrg */ 3447 1.1 mrg static __isl_give isl_ast_graft_list *list_add_guard( 3448 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard, 3449 1.1 mrg __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build) 3450 1.1 mrg { 3451 1.1 mrg isl_ast_graft *graft; 3452 1.1 mrg isl_size n; 3453 1.1 mrg 3454 1.1 mrg list = isl_ast_graft_list_fuse(list, sub_build); 3455 1.1 mrg 3456 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list); 3457 1.1 mrg if (n < 0) 3458 1.1 mrg return isl_ast_graft_list_free(list); 3459 1.1 mrg if (n != 1) 3460 1.1 mrg return list; 3461 1.1 mrg 3462 1.1 mrg graft = isl_ast_graft_list_get_ast_graft(list, 0); 3463 1.1 mrg graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build); 3464 1.1 mrg list = isl_ast_graft_list_set_ast_graft(list, 0, graft); 3465 1.1 mrg 3466 1.1 mrg return list; 3467 1.1 mrg } 3468 1.1 mrg 3469 1.1 mrg /* Generate code for a single component, after shifting (if any) 3470 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree. 3471 1.1 mrg * In particular, do so for the specified subset of the schedule domain. 3472 1.1 mrg * 3473 1.1 mrg * If we are outside of the isolated part, then "domain" may include 3474 1.1 mrg * a disjunction. Explicitly generate this disjunction at this point 3475 1.1 mrg * instead of relying on the disjunction getting hoisted back up 3476 1.1 mrg * to this level. 3477 1.1 mrg */ 3478 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part( 3479 1.1 mrg __isl_keep isl_union_map *executed, __isl_take isl_set *domain, 3480 1.1 mrg __isl_keep isl_ast_build *build, int isolated) 3481 1.1 mrg { 3482 1.1 mrg isl_union_set *uset; 3483 1.1 mrg isl_ast_graft_list *list; 3484 1.1 mrg isl_ast_build *sub_build; 3485 1.1 mrg int empty; 3486 1.1 mrg 3487 1.1 mrg uset = isl_union_set_from_set(isl_set_copy(domain)); 3488 1.1 mrg executed = isl_union_map_copy(executed); 3489 1.1 mrg executed = isl_union_map_intersect_domain(executed, uset); 3490 1.1 mrg empty = isl_union_map_is_empty(executed); 3491 1.1 mrg if (empty < 0) 3492 1.1 mrg goto error; 3493 1.1 mrg if (empty) { 3494 1.1 mrg isl_ctx *ctx; 3495 1.1 mrg isl_union_map_free(executed); 3496 1.1 mrg isl_set_free(domain); 3497 1.1 mrg ctx = isl_ast_build_get_ctx(build); 3498 1.1 mrg return isl_ast_graft_list_alloc(ctx, 0); 3499 1.1 mrg } 3500 1.1 mrg 3501 1.1 mrg sub_build = isl_ast_build_copy(build); 3502 1.1 mrg if (!isolated) { 3503 1.1 mrg domain = extract_disjunction(domain, build); 3504 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build, 3505 1.1 mrg isl_set_copy(domain)); 3506 1.1 mrg } 3507 1.1 mrg list = generate_shifted_component_tree_base(executed, 3508 1.1 mrg isl_ast_build_copy(sub_build), isolated); 3509 1.1 mrg if (!isolated) 3510 1.1 mrg list = list_add_guard(list, domain, build, sub_build); 3511 1.1 mrg isl_ast_build_free(sub_build); 3512 1.1 mrg isl_set_free(domain); 3513 1.1 mrg return list; 3514 1.1 mrg error: 3515 1.1 mrg isl_union_map_free(executed); 3516 1.1 mrg isl_set_free(domain); 3517 1.1 mrg return NULL; 3518 1.1 mrg } 3519 1.1 mrg 3520 1.1 mrg /* Generate code for a single component, after shifting (if any) 3521 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree. 3522 1.1 mrg * In particular, do so for the specified sequence of subsets 3523 1.1 mrg * of the schedule domain, "before", "isolated", "after" and "other", 3524 1.1 mrg * where only the "isolated" part is considered to be isolated. 3525 1.1 mrg */ 3526 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_parts( 3527 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *before, 3528 1.1 mrg __isl_take isl_set *isolated, __isl_take isl_set *after, 3529 1.1 mrg __isl_take isl_set *other, __isl_take isl_ast_build *build) 3530 1.1 mrg { 3531 1.1 mrg isl_ast_graft_list *list, *res; 3532 1.1 mrg 3533 1.1 mrg res = generate_shifted_component_tree_part(executed, before, build, 0); 3534 1.1 mrg list = generate_shifted_component_tree_part(executed, isolated, 3535 1.1 mrg build, 1); 3536 1.1 mrg res = isl_ast_graft_list_concat(res, list); 3537 1.1 mrg list = generate_shifted_component_tree_part(executed, after, build, 0); 3538 1.1 mrg res = isl_ast_graft_list_concat(res, list); 3539 1.1 mrg list = generate_shifted_component_tree_part(executed, other, build, 0); 3540 1.1 mrg res = isl_ast_graft_list_concat(res, list); 3541 1.1 mrg 3542 1.1 mrg isl_union_map_free(executed); 3543 1.1 mrg isl_ast_build_free(build); 3544 1.1 mrg 3545 1.1 mrg return res; 3546 1.1 mrg } 3547 1.1 mrg 3548 1.1 mrg /* Does "set" intersect "first", but not "second"? 3549 1.1 mrg */ 3550 1.1 mrg static isl_bool only_intersects_first(__isl_keep isl_set *set, 3551 1.1 mrg __isl_keep isl_set *first, __isl_keep isl_set *second) 3552 1.1 mrg { 3553 1.1 mrg isl_bool disjoint; 3554 1.1 mrg 3555 1.1 mrg disjoint = isl_set_is_disjoint(set, first); 3556 1.1 mrg if (disjoint < 0) 3557 1.1 mrg return isl_bool_error; 3558 1.1 mrg if (disjoint) 3559 1.1 mrg return isl_bool_false; 3560 1.1 mrg 3561 1.1 mrg return isl_set_is_disjoint(set, second); 3562 1.1 mrg } 3563 1.1 mrg 3564 1.1 mrg /* Generate code for a single component, after shifting (if any) 3565 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree. 3566 1.1 mrg * In particular, do so in case of isolation where there is 3567 1.1 mrg * only an "isolated" part and an "after" part. 3568 1.1 mrg * "dead1" and "dead2" are freed by this function in order to simplify 3569 1.1 mrg * the caller. 3570 1.1 mrg * 3571 1.1 mrg * The "before" and "other" parts are set to empty sets. 3572 1.1 mrg */ 3573 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_only_after( 3574 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_set *isolated, 3575 1.1 mrg __isl_take isl_set *after, __isl_take isl_ast_build *build, 3576 1.1 mrg __isl_take isl_set *dead1, __isl_take isl_set *dead2) 3577 1.1 mrg { 3578 1.1 mrg isl_set *empty; 3579 1.1 mrg 3580 1.1 mrg empty = isl_set_empty(isl_set_get_space(after)); 3581 1.1 mrg isl_set_free(dead1); 3582 1.1 mrg isl_set_free(dead2); 3583 1.1 mrg return generate_shifted_component_parts(executed, isl_set_copy(empty), 3584 1.1 mrg isolated, after, empty, build); 3585 1.1 mrg } 3586 1.1 mrg 3587 1.1 mrg /* Generate code for a single component, after shifting (if any) 3588 1.1 mrg * has been applied, in case the schedule was specified as a schedule tree. 3589 1.1 mrg * 3590 1.1 mrg * We first check if the user has specified an isolated schedule domain 3591 1.1 mrg * and that we are not already outside of this isolated schedule domain. 3592 1.1 mrg * If so, we break up the schedule domain into iterations that 3593 1.1 mrg * precede the isolated domain, the isolated domain itself, 3594 1.1 mrg * the iterations that follow the isolated domain and 3595 1.1 mrg * the remaining iterations (those that are incomparable 3596 1.1 mrg * to the isolated domain). 3597 1.1 mrg * We generate an AST for each piece and concatenate the results. 3598 1.1 mrg * 3599 1.1 mrg * If the isolated domain is not convex, then it is replaced 3600 1.1 mrg * by a convex superset to ensure that the sets of preceding and 3601 1.1 mrg * following iterations are properly defined and, in particular, 3602 1.1 mrg * that there are no intermediate iterations that do not belong 3603 1.1 mrg * to the isolated domain. 3604 1.1 mrg * 3605 1.1 mrg * In the special case where at least one element of the schedule 3606 1.1 mrg * domain that does not belong to the isolated domain needs 3607 1.1 mrg * to be scheduled after this isolated domain, but none of those 3608 1.1 mrg * elements need to be scheduled before, break up the schedule domain 3609 1.1 mrg * in only two parts, the isolated domain, and a part that will be 3610 1.1 mrg * scheduled after the isolated domain. 3611 1.1 mrg * 3612 1.1 mrg * If no isolated set has been specified, then we generate an 3613 1.1 mrg * AST for the entire inverse schedule. 3614 1.1 mrg */ 3615 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_tree( 3616 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3617 1.1 mrg { 3618 1.1 mrg int i; 3619 1.1 mrg isl_size depth; 3620 1.1 mrg int empty, has_isolate; 3621 1.1 mrg isl_space *space; 3622 1.1 mrg isl_union_set *schedule_domain; 3623 1.1 mrg isl_set *domain; 3624 1.1 mrg isl_basic_set *hull; 3625 1.1 mrg isl_set *isolated, *before, *after, *test; 3626 1.1 mrg isl_map *gt, *lt; 3627 1.1 mrg isl_bool pure; 3628 1.1 mrg 3629 1.1 mrg build = isl_ast_build_extract_isolated(build); 3630 1.1 mrg has_isolate = isl_ast_build_has_isolated(build); 3631 1.1 mrg if (has_isolate < 0) 3632 1.1 mrg executed = isl_union_map_free(executed); 3633 1.1 mrg else if (!has_isolate) 3634 1.1 mrg return generate_shifted_component_tree_base(executed, build, 0); 3635 1.1 mrg 3636 1.1 mrg schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3637 1.1 mrg domain = isl_set_from_union_set(schedule_domain); 3638 1.1 mrg 3639 1.1 mrg isolated = isl_ast_build_get_isolated(build); 3640 1.1 mrg isolated = isl_set_intersect(isolated, isl_set_copy(domain)); 3641 1.1 mrg test = isl_ast_build_specialize(build, isl_set_copy(isolated)); 3642 1.1 mrg empty = isl_set_is_empty(test); 3643 1.1 mrg isl_set_free(test); 3644 1.1 mrg if (empty < 0) 3645 1.1 mrg goto error; 3646 1.1 mrg if (empty) { 3647 1.1 mrg isl_set_free(isolated); 3648 1.1 mrg isl_set_free(domain); 3649 1.1 mrg return generate_shifted_component_tree_base(executed, build, 0); 3650 1.1 mrg } 3651 1.1 mrg depth = isl_ast_build_get_depth(build); 3652 1.1 mrg if (depth < 0) 3653 1.1 mrg goto error; 3654 1.1 mrg 3655 1.1 mrg isolated = isl_ast_build_eliminate(build, isolated); 3656 1.1 mrg hull = isl_set_unshifted_simple_hull(isolated); 3657 1.1 mrg isolated = isl_set_from_basic_set(hull); 3658 1.1 mrg 3659 1.1 mrg space = isl_space_map_from_set(isl_set_get_space(isolated)); 3660 1.1 mrg gt = isl_map_universe(space); 3661 1.1 mrg for (i = 0; i < depth; ++i) 3662 1.1 mrg gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i); 3663 1.1 mrg gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); 3664 1.1 mrg lt = isl_map_reverse(isl_map_copy(gt)); 3665 1.1 mrg before = isl_set_apply(isl_set_copy(isolated), gt); 3666 1.1 mrg after = isl_set_apply(isl_set_copy(isolated), lt); 3667 1.1 mrg 3668 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(isolated)); 3669 1.1 mrg pure = only_intersects_first(domain, after, before); 3670 1.1 mrg if (pure < 0) 3671 1.1 mrg executed = isl_union_map_free(executed); 3672 1.1 mrg else if (pure) 3673 1.1 mrg return generate_shifted_component_only_after(executed, isolated, 3674 1.1 mrg domain, build, before, after); 3675 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(before)); 3676 1.1 mrg domain = isl_set_subtract(domain, isl_set_copy(after)); 3677 1.1 mrg after = isl_set_subtract(after, isl_set_copy(isolated)); 3678 1.1 mrg after = isl_set_subtract(after, isl_set_copy(before)); 3679 1.1 mrg before = isl_set_subtract(before, isl_set_copy(isolated)); 3680 1.1 mrg 3681 1.1 mrg return generate_shifted_component_parts(executed, before, isolated, 3682 1.1 mrg after, domain, build); 3683 1.1 mrg error: 3684 1.1 mrg isl_set_free(domain); 3685 1.1 mrg isl_set_free(isolated); 3686 1.1 mrg isl_union_map_free(executed); 3687 1.1 mrg isl_ast_build_free(build); 3688 1.1 mrg return NULL; 3689 1.1 mrg } 3690 1.1 mrg 3691 1.1 mrg /* Generate code for a single component, after shifting (if any) 3692 1.1 mrg * has been applied. 3693 1.1 mrg * 3694 1.1 mrg * Call generate_shifted_component_tree or generate_shifted_component_flat 3695 1.1 mrg * depending on whether the schedule was specified as a schedule tree. 3696 1.1 mrg */ 3697 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component( 3698 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3699 1.1 mrg { 3700 1.1 mrg if (isl_ast_build_has_schedule_node(build)) 3701 1.1 mrg return generate_shifted_component_tree(executed, build); 3702 1.1 mrg else 3703 1.1 mrg return generate_shifted_component_flat(executed, build); 3704 1.1 mrg } 3705 1.1 mrg 3706 1.1 mrg struct isl_set_map_pair { 3707 1.1 mrg isl_set *set; 3708 1.1 mrg isl_map *map; 3709 1.1 mrg }; 3710 1.1 mrg 3711 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order" 3712 1.1 mrg * of indices into the "domain" array, 3713 1.1 mrg * return the union of the "map" fields of the elements 3714 1.1 mrg * indexed by the first "n" elements of "order". 3715 1.1 mrg */ 3716 1.1 mrg static __isl_give isl_union_map *construct_component_executed( 3717 1.1 mrg struct isl_set_map_pair *domain, int *order, int n) 3718 1.1 mrg { 3719 1.1 mrg int i; 3720 1.1 mrg isl_map *map; 3721 1.1 mrg isl_union_map *executed; 3722 1.1 mrg 3723 1.1 mrg map = isl_map_copy(domain[order[0]].map); 3724 1.1 mrg executed = isl_union_map_from_map(map); 3725 1.1 mrg for (i = 1; i < n; ++i) { 3726 1.1 mrg map = isl_map_copy(domain[order[i]].map); 3727 1.1 mrg executed = isl_union_map_add_map(executed, map); 3728 1.1 mrg } 3729 1.1 mrg 3730 1.1 mrg return executed; 3731 1.1 mrg } 3732 1.1 mrg 3733 1.1 mrg /* Generate code for a single component, after shifting (if any) 3734 1.1 mrg * has been applied. 3735 1.1 mrg * 3736 1.1 mrg * The component inverse schedule is specified as the "map" fields 3737 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order". 3738 1.1 mrg */ 3739 1.1 mrg static __isl_give isl_ast_graft_list *generate_shifted_component_from_list( 3740 1.1 mrg struct isl_set_map_pair *domain, int *order, int n, 3741 1.1 mrg __isl_take isl_ast_build *build) 3742 1.1 mrg { 3743 1.1 mrg isl_union_map *executed; 3744 1.1 mrg 3745 1.1 mrg executed = construct_component_executed(domain, order, n); 3746 1.1 mrg return generate_shifted_component(executed, build); 3747 1.1 mrg } 3748 1.1 mrg 3749 1.1 mrg /* Does set dimension "pos" of "set" have an obviously fixed value? 3750 1.1 mrg */ 3751 1.1 mrg static int dim_is_fixed(__isl_keep isl_set *set, int pos) 3752 1.1 mrg { 3753 1.1 mrg int fixed; 3754 1.1 mrg isl_val *v; 3755 1.1 mrg 3756 1.1 mrg v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos); 3757 1.1 mrg if (!v) 3758 1.1 mrg return -1; 3759 1.1 mrg fixed = !isl_val_is_nan(v); 3760 1.1 mrg isl_val_free(v); 3761 1.1 mrg 3762 1.1 mrg return fixed; 3763 1.1 mrg } 3764 1.1 mrg 3765 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order" 3766 1.1 mrg * of indices into the "domain" array, 3767 1.1 mrg * do all (except for at most one) of the "set" field of the elements 3768 1.1 mrg * indexed by the first "n" elements of "order" have a fixed value 3769 1.1 mrg * at position "depth"? 3770 1.1 mrg */ 3771 1.1 mrg static int at_most_one_non_fixed(struct isl_set_map_pair *domain, 3772 1.1 mrg int *order, int n, int depth) 3773 1.1 mrg { 3774 1.1 mrg int i; 3775 1.1 mrg int non_fixed = -1; 3776 1.1 mrg 3777 1.1 mrg for (i = 0; i < n; ++i) { 3778 1.1 mrg int f; 3779 1.1 mrg 3780 1.1 mrg f = dim_is_fixed(domain[order[i]].set, depth); 3781 1.1 mrg if (f < 0) 3782 1.1 mrg return -1; 3783 1.1 mrg if (f) 3784 1.1 mrg continue; 3785 1.1 mrg if (non_fixed >= 0) 3786 1.1 mrg return 0; 3787 1.1 mrg non_fixed = i; 3788 1.1 mrg } 3789 1.1 mrg 3790 1.1 mrg return 1; 3791 1.1 mrg } 3792 1.1 mrg 3793 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order" 3794 1.1 mrg * of indices into the "domain" array, 3795 1.1 mrg * eliminate the inner dimensions from the "set" field of the elements 3796 1.1 mrg * indexed by the first "n" elements of "order", provided the current 3797 1.1 mrg * dimension does not have a fixed value. 3798 1.1 mrg * 3799 1.1 mrg * Return the index of the first element in "order" with a corresponding 3800 1.1 mrg * "set" field that does not have an (obviously) fixed value. 3801 1.1 mrg */ 3802 1.1 mrg static int eliminate_non_fixed(struct isl_set_map_pair *domain, 3803 1.1 mrg int *order, int n, int depth, __isl_keep isl_ast_build *build) 3804 1.1 mrg { 3805 1.1 mrg int i; 3806 1.1 mrg int base = -1; 3807 1.1 mrg 3808 1.1 mrg for (i = n - 1; i >= 0; --i) { 3809 1.1 mrg int f; 3810 1.1 mrg f = dim_is_fixed(domain[order[i]].set, depth); 3811 1.1 mrg if (f < 0) 3812 1.1 mrg return -1; 3813 1.1 mrg if (f) 3814 1.1 mrg continue; 3815 1.1 mrg domain[order[i]].set = isl_ast_build_eliminate_inner(build, 3816 1.1 mrg domain[order[i]].set); 3817 1.1 mrg base = i; 3818 1.1 mrg } 3819 1.1 mrg 3820 1.1 mrg return base; 3821 1.1 mrg } 3822 1.1 mrg 3823 1.1 mrg /* Given an array "domain" of isl_set_map_pairs and an array "order" 3824 1.1 mrg * of indices into the "domain" array, 3825 1.1 mrg * find the element of "domain" (amongst those indexed by the first "n" 3826 1.1 mrg * elements of "order") with the "set" field that has the smallest 3827 1.1 mrg * value for the current iterator. 3828 1.1 mrg * 3829 1.1 mrg * Note that the domain with the smallest value may depend on the parameters 3830 1.1 mrg * and/or outer loop dimension. Since the result of this function is only 3831 1.1 mrg * used as heuristic, we only make a reasonable attempt at finding the best 3832 1.1 mrg * domain, one that should work in case a single domain provides the smallest 3833 1.1 mrg * value for the current dimension over all values of the parameters 3834 1.1 mrg * and outer dimensions. 3835 1.1 mrg * 3836 1.1 mrg * In particular, we compute the smallest value of the first domain 3837 1.1 mrg * and replace it by that of any later domain if that later domain 3838 1.1 mrg * has a smallest value that is smaller for at least some value 3839 1.1 mrg * of the parameters and outer dimensions. 3840 1.1 mrg */ 3841 1.1 mrg static int first_offset(struct isl_set_map_pair *domain, int *order, int n, 3842 1.1 mrg __isl_keep isl_ast_build *build) 3843 1.1 mrg { 3844 1.1 mrg int i; 3845 1.1 mrg isl_map *min_first; 3846 1.1 mrg int first = 0; 3847 1.1 mrg 3848 1.1 mrg min_first = isl_ast_build_map_to_iterator(build, 3849 1.1 mrg isl_set_copy(domain[order[0]].set)); 3850 1.1 mrg min_first = isl_map_lexmin(min_first); 3851 1.1 mrg 3852 1.1 mrg for (i = 1; i < n; ++i) { 3853 1.1 mrg isl_map *min, *test; 3854 1.1 mrg int empty; 3855 1.1 mrg 3856 1.1 mrg min = isl_ast_build_map_to_iterator(build, 3857 1.1 mrg isl_set_copy(domain[order[i]].set)); 3858 1.1 mrg min = isl_map_lexmin(min); 3859 1.1 mrg test = isl_map_copy(min); 3860 1.1 mrg test = isl_map_apply_domain(isl_map_copy(min_first), test); 3861 1.1 mrg test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0); 3862 1.1 mrg empty = isl_map_is_empty(test); 3863 1.1 mrg isl_map_free(test); 3864 1.1 mrg if (empty >= 0 && !empty) { 3865 1.1 mrg isl_map_free(min_first); 3866 1.1 mrg first = i; 3867 1.1 mrg min_first = min; 3868 1.1 mrg } else 3869 1.1 mrg isl_map_free(min); 3870 1.1 mrg 3871 1.1 mrg if (empty < 0) 3872 1.1 mrg break; 3873 1.1 mrg } 3874 1.1 mrg 3875 1.1 mrg isl_map_free(min_first); 3876 1.1 mrg 3877 1.1 mrg return i < n ? -1 : first; 3878 1.1 mrg } 3879 1.1 mrg 3880 1.1 mrg /* Construct a shifted inverse schedule based on the original inverse schedule, 3881 1.1 mrg * the stride and the offset. 3882 1.1 mrg * 3883 1.1 mrg * The original inverse schedule is specified as the "map" fields 3884 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order". 3885 1.1 mrg * 3886 1.1 mrg * "stride" and "offset" are such that the difference 3887 1.1 mrg * between the values of the current dimension of domain "i" 3888 1.1 mrg * and the values of the current dimension for some reference domain are 3889 1.1 mrg * equal to 3890 1.1 mrg * 3891 1.1 mrg * stride * integer + offset[i] 3892 1.1 mrg * 3893 1.1 mrg * Moreover, 0 <= offset[i] < stride. 3894 1.1 mrg * 3895 1.1 mrg * For each domain, we create a map 3896 1.1 mrg * 3897 1.1 mrg * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] } 3898 1.1 mrg * 3899 1.1 mrg * where j refers to the current dimension and the other dimensions are 3900 1.1 mrg * unchanged, and apply this map to the original schedule domain. 3901 1.1 mrg * 3902 1.1 mrg * For example, for the original schedule 3903 1.1 mrg * 3904 1.1 mrg * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 3905 1.1 mrg * 3906 1.1 mrg * and assuming the offset is 0 for the A domain and 1 for the B domain, 3907 1.1 mrg * we apply the mapping 3908 1.1 mrg * 3909 1.1 mrg * { [j] -> [j, 0] } 3910 1.1 mrg * 3911 1.1 mrg * to the schedule of the "A" domain and the mapping 3912 1.1 mrg * 3913 1.1 mrg * { [j - 1] -> [j, 1] } 3914 1.1 mrg * 3915 1.1 mrg * to the schedule of the "B" domain. 3916 1.1 mrg * 3917 1.1 mrg * 3918 1.1 mrg * Note that after the transformation, the differences between pairs 3919 1.1 mrg * of values of the current dimension over all domains are multiples 3920 1.1 mrg * of stride and that we have therefore exposed the stride. 3921 1.1 mrg * 3922 1.1 mrg * 3923 1.1 mrg * To see that the mapping preserves the lexicographic order, 3924 1.1 mrg * first note that each of the individual maps above preserves the order. 3925 1.1 mrg * If the value of the current iterator is j1 in one domain and j2 in another, 3926 1.1 mrg * then if j1 = j2, we know that the same map is applied to both domains 3927 1.1 mrg * and the order is preserved. 3928 1.1 mrg * Otherwise, let us assume, without loss of generality, that j1 < j2. 3929 1.1 mrg * If c1 >= c2 (with c1 and c2 the corresponding offsets), then 3930 1.1 mrg * 3931 1.1 mrg * j1 - c1 < j2 - c2 3932 1.1 mrg * 3933 1.1 mrg * and the order is preserved. 3934 1.1 mrg * If c1 < c2, then we know 3935 1.1 mrg * 3936 1.1 mrg * 0 <= c2 - c1 < s 3937 1.1 mrg * 3938 1.1 mrg * We also have 3939 1.1 mrg * 3940 1.1 mrg * j2 - j1 = n * s + r 3941 1.1 mrg * 3942 1.1 mrg * with n >= 0 and 0 <= r < s. 3943 1.1 mrg * In other words, r = c2 - c1. 3944 1.1 mrg * If n > 0, then 3945 1.1 mrg * 3946 1.1 mrg * j1 - c1 < j2 - c2 3947 1.1 mrg * 3948 1.1 mrg * If n = 0, then 3949 1.1 mrg * 3950 1.1 mrg * j1 - c1 = j2 - c2 3951 1.1 mrg * 3952 1.1 mrg * and so 3953 1.1 mrg * 3954 1.1 mrg * (j1 - c1, c1) << (j2 - c2, c2) 3955 1.1 mrg * 3956 1.1 mrg * with "<<" the lexicographic order, proving that the order is preserved 3957 1.1 mrg * in all cases. 3958 1.1 mrg */ 3959 1.1 mrg static __isl_give isl_union_map *construct_shifted_executed( 3960 1.1 mrg struct isl_set_map_pair *domain, int *order, int n, 3961 1.1 mrg __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 3962 1.1 mrg __isl_keep isl_ast_build *build) 3963 1.1 mrg { 3964 1.1 mrg int i; 3965 1.1 mrg isl_union_map *executed; 3966 1.1 mrg isl_space *space; 3967 1.1 mrg isl_map *map; 3968 1.1 mrg isl_size depth; 3969 1.1 mrg isl_constraint *c; 3970 1.1 mrg 3971 1.1 mrg depth = isl_ast_build_get_depth(build); 3972 1.1 mrg if (depth < 0) 3973 1.1 mrg return NULL; 3974 1.1 mrg space = isl_ast_build_get_space(build, 1); 3975 1.1 mrg executed = isl_union_map_empty(isl_space_copy(space)); 3976 1.1 mrg space = isl_space_map_from_set(space); 3977 1.1 mrg map = isl_map_identity(isl_space_copy(space)); 3978 1.1 mrg map = isl_map_eliminate(map, isl_dim_out, depth, 1); 3979 1.1 mrg map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1); 3980 1.1 mrg space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1); 3981 1.1 mrg 3982 1.1 mrg c = isl_constraint_alloc_equality(isl_local_space_from_space(space)); 3983 1.1 mrg c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1); 3984 1.1 mrg c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1); 3985 1.1 mrg 3986 1.1 mrg for (i = 0; i < n; ++i) { 3987 1.1 mrg isl_map *map_i; 3988 1.1 mrg isl_val *v; 3989 1.1 mrg 3990 1.1 mrg v = isl_multi_val_get_val(offset, i); 3991 1.1 mrg if (!v) 3992 1.1 mrg break; 3993 1.1 mrg map_i = isl_map_copy(map); 3994 1.1 mrg map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1, 3995 1.1 mrg isl_val_copy(v)); 3996 1.1 mrg v = isl_val_neg(v); 3997 1.1 mrg c = isl_constraint_set_constant_val(c, v); 3998 1.1 mrg map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c)); 3999 1.1 mrg 4000 1.1 mrg map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map), 4001 1.1 mrg map_i); 4002 1.1 mrg executed = isl_union_map_add_map(executed, map_i); 4003 1.1 mrg } 4004 1.1 mrg 4005 1.1 mrg isl_constraint_free(c); 4006 1.1 mrg isl_map_free(map); 4007 1.1 mrg 4008 1.1 mrg if (i < n) 4009 1.1 mrg executed = isl_union_map_free(executed); 4010 1.1 mrg 4011 1.1 mrg return executed; 4012 1.1 mrg } 4013 1.1 mrg 4014 1.1 mrg /* Generate code for a single component, after exposing the stride, 4015 1.1 mrg * given that the schedule domain is "shifted strided". 4016 1.1 mrg * 4017 1.1 mrg * The component inverse schedule is specified as the "map" fields 4018 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order". 4019 1.1 mrg * 4020 1.1 mrg * The schedule domain being "shifted strided" means that the differences 4021 1.1 mrg * between the values of the current dimension of domain "i" 4022 1.1 mrg * and the values of the current dimension for some reference domain are 4023 1.1 mrg * equal to 4024 1.1 mrg * 4025 1.1 mrg * stride * integer + offset[i] 4026 1.1 mrg * 4027 1.1 mrg * We first look for the domain with the "smallest" value for the current 4028 1.1 mrg * dimension and adjust the offsets such that the offset of the "smallest" 4029 1.1 mrg * domain is equal to zero. The other offsets are reduced modulo stride. 4030 1.1 mrg * 4031 1.1 mrg * Based on this information, we construct a new inverse schedule in 4032 1.1 mrg * construct_shifted_executed that exposes the stride. 4033 1.1 mrg * Since this involves the introduction of a new schedule dimension, 4034 1.1 mrg * the build needs to be changed accordingly. 4035 1.1 mrg * After computing the AST, the newly introduced dimension needs 4036 1.1 mrg * to be removed again from the list of grafts. We do this by plugging 4037 1.1 mrg * in a mapping that represents the new schedule domain in terms of the 4038 1.1 mrg * old schedule domain. 4039 1.1 mrg */ 4040 1.1 mrg static __isl_give isl_ast_graft_list *generate_shift_component( 4041 1.1 mrg struct isl_set_map_pair *domain, int *order, int n, 4042 1.1 mrg __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 4043 1.1 mrg __isl_take isl_ast_build *build) 4044 1.1 mrg { 4045 1.1 mrg isl_ast_graft_list *list; 4046 1.1 mrg int first; 4047 1.1 mrg isl_size depth; 4048 1.1 mrg isl_val *val; 4049 1.1 mrg isl_multi_val *mv; 4050 1.1 mrg isl_space *space; 4051 1.1 mrg isl_multi_aff *ma, *zero; 4052 1.1 mrg isl_union_map *executed; 4053 1.1 mrg 4054 1.1 mrg depth = isl_ast_build_get_depth(build); 4055 1.1 mrg 4056 1.1 mrg first = first_offset(domain, order, n, build); 4057 1.1 mrg if (depth < 0 || first < 0) 4058 1.1 mrg goto error; 4059 1.1 mrg 4060 1.1 mrg mv = isl_multi_val_copy(offset); 4061 1.1 mrg val = isl_multi_val_get_val(offset, first); 4062 1.1 mrg val = isl_val_neg(val); 4063 1.1 mrg mv = isl_multi_val_add_val(mv, val); 4064 1.1 mrg mv = isl_multi_val_mod_val(mv, isl_val_copy(stride)); 4065 1.1 mrg 4066 1.1 mrg executed = construct_shifted_executed(domain, order, n, stride, mv, 4067 1.1 mrg build); 4068 1.1 mrg space = isl_ast_build_get_space(build, 1); 4069 1.1 mrg space = isl_space_map_from_set(space); 4070 1.1 mrg ma = isl_multi_aff_identity(isl_space_copy(space)); 4071 1.1 mrg space = isl_space_from_domain(isl_space_domain(space)); 4072 1.1 mrg space = isl_space_add_dims(space, isl_dim_out, 1); 4073 1.1 mrg zero = isl_multi_aff_zero(space); 4074 1.1 mrg ma = isl_multi_aff_range_splice(ma, depth + 1, zero); 4075 1.1 mrg build = isl_ast_build_insert_dim(build, depth + 1); 4076 1.1 mrg list = generate_shifted_component(executed, build); 4077 1.1 mrg 4078 1.1 mrg list = isl_ast_graft_list_preimage_multi_aff(list, ma); 4079 1.1 mrg 4080 1.1 mrg isl_multi_val_free(mv); 4081 1.1 mrg 4082 1.1 mrg return list; 4083 1.1 mrg error: 4084 1.1 mrg isl_ast_build_free(build); 4085 1.1 mrg return NULL; 4086 1.1 mrg } 4087 1.1 mrg 4088 1.1 mrg /* Does any node in the schedule tree rooted at the current schedule node 4089 1.1 mrg * of "build" depend on outer schedule nodes? 4090 1.1 mrg */ 4091 1.1 mrg static int has_anchored_subtree(__isl_keep isl_ast_build *build) 4092 1.1 mrg { 4093 1.1 mrg isl_schedule_node *node; 4094 1.1 mrg int dependent = 0; 4095 1.1 mrg 4096 1.1 mrg node = isl_ast_build_get_schedule_node(build); 4097 1.1 mrg dependent = isl_schedule_node_is_subtree_anchored(node); 4098 1.1 mrg isl_schedule_node_free(node); 4099 1.1 mrg 4100 1.1 mrg return dependent; 4101 1.1 mrg } 4102 1.1 mrg 4103 1.1 mrg /* Generate code for a single component. 4104 1.1 mrg * 4105 1.1 mrg * The component inverse schedule is specified as the "map" fields 4106 1.1 mrg * of the elements of "domain" indexed by the first "n" elements of "order". 4107 1.1 mrg * 4108 1.1 mrg * This function may modify the "set" fields of "domain". 4109 1.1 mrg * 4110 1.1 mrg * Before proceeding with the actual code generation for the component, 4111 1.1 mrg * we first check if there are any "shifted" strides, meaning that 4112 1.1 mrg * the schedule domains of the individual domains are all strided, 4113 1.1 mrg * but that they have different offsets, resulting in the union 4114 1.1 mrg * of schedule domains not being strided anymore. 4115 1.1 mrg * 4116 1.1 mrg * The simplest example is the schedule 4117 1.1 mrg * 4118 1.1 mrg * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 4119 1.1 mrg * 4120 1.1 mrg * Both schedule domains are strided, but their union is not. 4121 1.1 mrg * This function detects such cases and then rewrites the schedule to 4122 1.1 mrg * 4123 1.1 mrg * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 } 4124 1.1 mrg * 4125 1.1 mrg * In the new schedule, the schedule domains have the same offset (modulo 4126 1.1 mrg * the stride), ensuring that the union of schedule domains is also strided. 4127 1.1 mrg * 4128 1.1 mrg * 4129 1.1 mrg * If there is only a single domain in the component, then there is 4130 1.1 mrg * nothing to do. Similarly, if the current schedule dimension has 4131 1.1 mrg * a fixed value for almost all domains then there is nothing to be done. 4132 1.1 mrg * In particular, we need at least two domains where the current schedule 4133 1.1 mrg * dimension does not have a fixed value. 4134 1.1 mrg * Finally, in case of a schedule map input, 4135 1.1 mrg * if any of the options refer to the current schedule dimension, 4136 1.1 mrg * then we bail out as well. It would be possible to reformulate the options 4137 1.1 mrg * in terms of the new schedule domain, but that would introduce constraints 4138 1.1 mrg * that separate the domains in the options and that is something we would 4139 1.1 mrg * like to avoid. 4140 1.1 mrg * In the case of a schedule tree input, we bail out if any of 4141 1.1 mrg * the descendants of the current schedule node refer to outer 4142 1.1 mrg * schedule nodes in any way. 4143 1.1 mrg * 4144 1.1 mrg * 4145 1.1 mrg * To see if there is any shifted stride, we look at the differences 4146 1.1 mrg * between the values of the current dimension in pairs of domains 4147 1.1 mrg * for equal values of outer dimensions. These differences should be 4148 1.1 mrg * of the form 4149 1.1 mrg * 4150 1.1 mrg * m x + r 4151 1.1 mrg * 4152 1.1 mrg * with "m" the stride and "r" a constant. Note that we cannot perform 4153 1.1 mrg * this analysis on individual domains as the lower bound in each domain 4154 1.1 mrg * may depend on parameters or outer dimensions and so the current dimension 4155 1.1 mrg * itself may not have a fixed remainder on division by the stride. 4156 1.1 mrg * 4157 1.1 mrg * In particular, we compare the first domain that does not have an 4158 1.1 mrg * obviously fixed value for the current dimension to itself and all 4159 1.1 mrg * other domains and collect the offsets and the gcd of the strides. 4160 1.1 mrg * If the gcd becomes one, then we failed to find shifted strides. 4161 1.1 mrg * If the gcd is zero, then the differences were all fixed, meaning 4162 1.1 mrg * that some domains had non-obviously fixed values for the current dimension. 4163 1.1 mrg * If all the offsets are the same (for those domains that do not have 4164 1.1 mrg * an obviously fixed value for the current dimension), then we do not 4165 1.1 mrg * apply the transformation. 4166 1.1 mrg * If none of the domains were skipped, then there is nothing to do. 4167 1.1 mrg * If some of them were skipped, then if we apply separation, the schedule 4168 1.1 mrg * domain should get split in pieces with a (non-shifted) stride. 4169 1.1 mrg * 4170 1.1 mrg * Otherwise, we apply a shift to expose the stride in 4171 1.1 mrg * generate_shift_component. 4172 1.1 mrg */ 4173 1.1 mrg static __isl_give isl_ast_graft_list *generate_component( 4174 1.1 mrg struct isl_set_map_pair *domain, int *order, int n, 4175 1.1 mrg __isl_take isl_ast_build *build) 4176 1.1 mrg { 4177 1.1 mrg int i, d; 4178 1.1 mrg isl_size depth; 4179 1.1 mrg isl_ctx *ctx; 4180 1.1 mrg isl_map *map; 4181 1.1 mrg isl_set *deltas; 4182 1.1 mrg isl_val *gcd = NULL; 4183 1.1 mrg isl_multi_val *mv; 4184 1.1 mrg int fixed, skip; 4185 1.1 mrg int base; 4186 1.1 mrg isl_ast_graft_list *list; 4187 1.1 mrg int res = 0; 4188 1.1 mrg 4189 1.1 mrg depth = isl_ast_build_get_depth(build); 4190 1.1 mrg if (depth < 0) 4191 1.1 mrg goto error; 4192 1.1 mrg 4193 1.1 mrg skip = n == 1; 4194 1.1 mrg if (skip >= 0 && !skip) 4195 1.1 mrg skip = at_most_one_non_fixed(domain, order, n, depth); 4196 1.1 mrg if (skip >= 0 && !skip) { 4197 1.1 mrg if (isl_ast_build_has_schedule_node(build)) 4198 1.1 mrg skip = has_anchored_subtree(build); 4199 1.1 mrg else 4200 1.1 mrg skip = isl_ast_build_options_involve_depth(build); 4201 1.1 mrg } 4202 1.1 mrg if (skip < 0) 4203 1.1 mrg goto error; 4204 1.1 mrg if (skip) 4205 1.1 mrg return generate_shifted_component_from_list(domain, 4206 1.1 mrg order, n, build); 4207 1.1 mrg 4208 1.1 mrg base = eliminate_non_fixed(domain, order, n, depth, build); 4209 1.1 mrg if (base < 0) 4210 1.1 mrg goto error; 4211 1.1 mrg 4212 1.1 mrg ctx = isl_ast_build_get_ctx(build); 4213 1.1 mrg 4214 1.1 mrg mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n)); 4215 1.1 mrg 4216 1.1 mrg fixed = 1; 4217 1.1 mrg for (i = 0; i < n; ++i) { 4218 1.1 mrg isl_val *r, *m; 4219 1.1 mrg 4220 1.1 mrg map = isl_map_from_domain_and_range( 4221 1.1 mrg isl_set_copy(domain[order[base]].set), 4222 1.1 mrg isl_set_copy(domain[order[i]].set)); 4223 1.1 mrg for (d = 0; d < depth; ++d) 4224 1.1 mrg map = isl_map_equate(map, isl_dim_in, d, 4225 1.1 mrg isl_dim_out, d); 4226 1.1 mrg deltas = isl_map_deltas(map); 4227 1.1 mrg res = isl_set_dim_residue_class_val(deltas, depth, &m, &r); 4228 1.1 mrg isl_set_free(deltas); 4229 1.1 mrg if (res < 0) 4230 1.1 mrg break; 4231 1.1 mrg 4232 1.1 mrg if (i == 0) 4233 1.1 mrg gcd = m; 4234 1.1 mrg else 4235 1.1 mrg gcd = isl_val_gcd(gcd, m); 4236 1.1 mrg if (isl_val_is_one(gcd)) { 4237 1.1 mrg isl_val_free(r); 4238 1.1 mrg break; 4239 1.1 mrg } 4240 1.1 mrg mv = isl_multi_val_set_val(mv, i, r); 4241 1.1 mrg 4242 1.1 mrg res = dim_is_fixed(domain[order[i]].set, depth); 4243 1.1 mrg if (res < 0) 4244 1.1 mrg break; 4245 1.1 mrg if (res) 4246 1.1 mrg continue; 4247 1.1 mrg 4248 1.1 mrg if (fixed && i > base) { 4249 1.1 mrg isl_val *a, *b; 4250 1.1 mrg a = isl_multi_val_get_val(mv, i); 4251 1.1 mrg b = isl_multi_val_get_val(mv, base); 4252 1.1 mrg if (isl_val_ne(a, b)) 4253 1.1 mrg fixed = 0; 4254 1.1 mrg isl_val_free(a); 4255 1.1 mrg isl_val_free(b); 4256 1.1 mrg } 4257 1.1 mrg } 4258 1.1 mrg 4259 1.1 mrg if (res < 0 || !gcd) { 4260 1.1 mrg isl_ast_build_free(build); 4261 1.1 mrg list = NULL; 4262 1.1 mrg } else if (i < n || fixed || isl_val_is_zero(gcd)) { 4263 1.1 mrg list = generate_shifted_component_from_list(domain, 4264 1.1 mrg order, n, build); 4265 1.1 mrg } else { 4266 1.1 mrg list = generate_shift_component(domain, order, n, gcd, mv, 4267 1.1 mrg build); 4268 1.1 mrg } 4269 1.1 mrg 4270 1.1 mrg isl_val_free(gcd); 4271 1.1 mrg isl_multi_val_free(mv); 4272 1.1 mrg 4273 1.1 mrg return list; 4274 1.1 mrg error: 4275 1.1 mrg isl_ast_build_free(build); 4276 1.1 mrg return NULL; 4277 1.1 mrg } 4278 1.1 mrg 4279 1.1 mrg /* Store both "map" itself and its domain in the 4280 1.1 mrg * structure pointed to by *next and advance to the next array element. 4281 1.1 mrg */ 4282 1.1 mrg static isl_stat extract_domain(__isl_take isl_map *map, void *user) 4283 1.1 mrg { 4284 1.1 mrg struct isl_set_map_pair **next = user; 4285 1.1 mrg 4286 1.1 mrg (*next)->map = isl_map_copy(map); 4287 1.1 mrg (*next)->set = isl_map_domain(map); 4288 1.1 mrg (*next)++; 4289 1.1 mrg 4290 1.1 mrg return isl_stat_ok; 4291 1.1 mrg } 4292 1.1 mrg 4293 1.1 mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap, 4294 1.1 mrg __isl_keep isl_schedule_node *node); 4295 1.1 mrg 4296 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4297 1.1 mrg * the corresponding image elements by the tree rooted at 4298 1.1 mrg * the child of "node"? 4299 1.1 mrg */ 4300 1.1 mrg static isl_bool after_in_child(__isl_keep isl_union_map *umap, 4301 1.1 mrg __isl_keep isl_schedule_node *node) 4302 1.1 mrg { 4303 1.1 mrg isl_schedule_node *child; 4304 1.1 mrg isl_bool after; 4305 1.1 mrg 4306 1.1 mrg child = isl_schedule_node_get_child(node, 0); 4307 1.1 mrg after = after_in_tree(umap, child); 4308 1.1 mrg isl_schedule_node_free(child); 4309 1.1 mrg 4310 1.1 mrg return after; 4311 1.1 mrg } 4312 1.1 mrg 4313 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4314 1.1 mrg * the corresponding image elements by the tree rooted at 4315 1.1 mrg * the band node "node"? 4316 1.1 mrg * 4317 1.1 mrg * We first check if any domain element is scheduled after any 4318 1.1 mrg * of the corresponding image elements by the band node itself. 4319 1.1 mrg * If not, we restrict "map" to those pairs of element that 4320 1.1 mrg * are scheduled together by the band node and continue with 4321 1.1 mrg * the child of the band node. 4322 1.1 mrg * If there are no such pairs then the map passed to after_in_child 4323 1.1 mrg * will be empty causing it to return 0. 4324 1.1 mrg */ 4325 1.1 mrg static isl_bool after_in_band(__isl_keep isl_union_map *umap, 4326 1.1 mrg __isl_keep isl_schedule_node *node) 4327 1.1 mrg { 4328 1.1 mrg isl_multi_union_pw_aff *mupa; 4329 1.1 mrg isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2; 4330 1.1 mrg isl_union_set *domain, *range; 4331 1.1 mrg isl_space *space; 4332 1.1 mrg isl_bool empty; 4333 1.1 mrg isl_bool after; 4334 1.1 mrg isl_size n; 4335 1.1 mrg 4336 1.1 mrg n = isl_schedule_node_band_n_member(node); 4337 1.1 mrg if (n < 0) 4338 1.1 mrg return isl_bool_error; 4339 1.1 mrg if (n == 0) 4340 1.1 mrg return after_in_child(umap, node); 4341 1.1 mrg 4342 1.1 mrg mupa = isl_schedule_node_band_get_partial_schedule(node); 4343 1.1 mrg space = isl_multi_union_pw_aff_get_space(mupa); 4344 1.1 mrg partial = isl_union_map_from_multi_union_pw_aff(mupa); 4345 1.1 mrg test = isl_union_map_copy(umap); 4346 1.1 mrg test = isl_union_map_apply_domain(test, isl_union_map_copy(partial)); 4347 1.1 mrg test = isl_union_map_apply_range(test, isl_union_map_copy(partial)); 4348 1.1 mrg gt = isl_union_map_from_map(isl_map_lex_gt(space)); 4349 1.1 mrg test = isl_union_map_intersect(test, gt); 4350 1.1 mrg empty = isl_union_map_is_empty(test); 4351 1.1 mrg isl_union_map_free(test); 4352 1.1 mrg 4353 1.1 mrg if (empty < 0 || !empty) { 4354 1.1 mrg isl_union_map_free(partial); 4355 1.1 mrg return isl_bool_not(empty); 4356 1.1 mrg } 4357 1.1 mrg 4358 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(umap)); 4359 1.1 mrg domain = isl_union_map_domain(isl_union_map_copy(universe)); 4360 1.1 mrg range = isl_union_map_range(universe); 4361 1.1 mrg umap1 = isl_union_map_copy(partial); 4362 1.1 mrg umap1 = isl_union_map_intersect_domain(umap1, domain); 4363 1.1 mrg umap2 = isl_union_map_intersect_domain(partial, range); 4364 1.1 mrg test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2)); 4365 1.1 mrg test = isl_union_map_intersect(test, isl_union_map_copy(umap)); 4366 1.1 mrg after = after_in_child(test, node); 4367 1.1 mrg isl_union_map_free(test); 4368 1.1 mrg return after; 4369 1.1 mrg } 4370 1.1 mrg 4371 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4372 1.1 mrg * the corresponding image elements by the tree rooted at 4373 1.1 mrg * the context node "node"? 4374 1.1 mrg * 4375 1.1 mrg * The context constraints apply to the schedule domain, 4376 1.1 mrg * so we cannot apply them directly to "umap", which contains 4377 1.1 mrg * pairs of statement instances. Instead, we add them 4378 1.1 mrg * to the range of the prefix schedule for both domain and 4379 1.1 mrg * range of "umap". 4380 1.1 mrg */ 4381 1.1 mrg static isl_bool after_in_context(__isl_keep isl_union_map *umap, 4382 1.1 mrg __isl_keep isl_schedule_node *node) 4383 1.1 mrg { 4384 1.1 mrg isl_union_map *prefix, *universe, *umap1, *umap2; 4385 1.1 mrg isl_union_set *domain, *range; 4386 1.1 mrg isl_set *context; 4387 1.1 mrg isl_bool after; 4388 1.1 mrg 4389 1.1 mrg umap = isl_union_map_copy(umap); 4390 1.1 mrg context = isl_schedule_node_context_get_context(node); 4391 1.1 mrg prefix = isl_schedule_node_get_prefix_schedule_union_map(node); 4392 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(umap)); 4393 1.1 mrg domain = isl_union_map_domain(isl_union_map_copy(universe)); 4394 1.1 mrg range = isl_union_map_range(universe); 4395 1.1 mrg umap1 = isl_union_map_copy(prefix); 4396 1.1 mrg umap1 = isl_union_map_intersect_domain(umap1, domain); 4397 1.1 mrg umap2 = isl_union_map_intersect_domain(prefix, range); 4398 1.1 mrg umap1 = isl_union_map_intersect_range(umap1, 4399 1.1 mrg isl_union_set_from_set(context)); 4400 1.1 mrg umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2)); 4401 1.1 mrg umap = isl_union_map_intersect(umap, umap1); 4402 1.1 mrg 4403 1.1 mrg after = after_in_child(umap, node); 4404 1.1 mrg 4405 1.1 mrg isl_union_map_free(umap); 4406 1.1 mrg 4407 1.1 mrg return after; 4408 1.1 mrg } 4409 1.1 mrg 4410 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4411 1.1 mrg * the corresponding image elements by the tree rooted at 4412 1.1 mrg * the expansion node "node"? 4413 1.1 mrg * 4414 1.1 mrg * We apply the expansion to domain and range of "umap" and 4415 1.1 mrg * continue with its child. 4416 1.1 mrg */ 4417 1.1 mrg static isl_bool after_in_expansion(__isl_keep isl_union_map *umap, 4418 1.1 mrg __isl_keep isl_schedule_node *node) 4419 1.1 mrg { 4420 1.1 mrg isl_union_map *expansion; 4421 1.1 mrg isl_bool after; 4422 1.1 mrg 4423 1.1 mrg expansion = isl_schedule_node_expansion_get_expansion(node); 4424 1.1 mrg umap = isl_union_map_copy(umap); 4425 1.1 mrg umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion)); 4426 1.1 mrg umap = isl_union_map_apply_range(umap, expansion); 4427 1.1 mrg 4428 1.1 mrg after = after_in_child(umap, node); 4429 1.1 mrg 4430 1.1 mrg isl_union_map_free(umap); 4431 1.1 mrg 4432 1.1 mrg return after; 4433 1.1 mrg } 4434 1.1 mrg 4435 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4436 1.1 mrg * the corresponding image elements by the tree rooted at 4437 1.1 mrg * the extension node "node"? 4438 1.1 mrg * 4439 1.1 mrg * Since the extension node may add statement instances before or 4440 1.1 mrg * after the pairs of statement instances in "umap", we return isl_bool_true 4441 1.1 mrg * to ensure that these pairs are not broken up. 4442 1.1 mrg */ 4443 1.1 mrg static isl_bool after_in_extension(__isl_keep isl_union_map *umap, 4444 1.1 mrg __isl_keep isl_schedule_node *node) 4445 1.1 mrg { 4446 1.1 mrg return isl_bool_true; 4447 1.1 mrg } 4448 1.1 mrg 4449 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4450 1.1 mrg * the corresponding image elements by the tree rooted at 4451 1.1 mrg * the filter node "node"? 4452 1.1 mrg * 4453 1.1 mrg * We intersect domain and range of "umap" with the filter and 4454 1.1 mrg * continue with its child. 4455 1.1 mrg */ 4456 1.1 mrg static isl_bool after_in_filter(__isl_keep isl_union_map *umap, 4457 1.1 mrg __isl_keep isl_schedule_node *node) 4458 1.1 mrg { 4459 1.1 mrg isl_union_set *filter; 4460 1.1 mrg isl_bool after; 4461 1.1 mrg 4462 1.1 mrg umap = isl_union_map_copy(umap); 4463 1.1 mrg filter = isl_schedule_node_filter_get_filter(node); 4464 1.1 mrg umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter)); 4465 1.1 mrg umap = isl_union_map_intersect_range(umap, filter); 4466 1.1 mrg 4467 1.1 mrg after = after_in_child(umap, node); 4468 1.1 mrg 4469 1.1 mrg isl_union_map_free(umap); 4470 1.1 mrg 4471 1.1 mrg return after; 4472 1.1 mrg } 4473 1.1 mrg 4474 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4475 1.1 mrg * the corresponding image elements by the tree rooted at 4476 1.1 mrg * the set node "node"? 4477 1.1 mrg * 4478 1.1 mrg * This is only the case if this condition holds in any 4479 1.1 mrg * of the (filter) children of the set node. 4480 1.1 mrg * In particular, if the domain and the range of "umap" 4481 1.1 mrg * are contained in different children, then the condition 4482 1.1 mrg * does not hold. 4483 1.1 mrg */ 4484 1.1 mrg static isl_bool after_in_set(__isl_keep isl_union_map *umap, 4485 1.1 mrg __isl_keep isl_schedule_node *node) 4486 1.1 mrg { 4487 1.1 mrg int i; 4488 1.1 mrg isl_size n; 4489 1.1 mrg 4490 1.1 mrg n = isl_schedule_node_n_children(node); 4491 1.1 mrg if (n < 0) 4492 1.1 mrg return isl_bool_error; 4493 1.1 mrg for (i = 0; i < n; ++i) { 4494 1.1 mrg isl_schedule_node *child; 4495 1.1 mrg isl_bool after; 4496 1.1 mrg 4497 1.1 mrg child = isl_schedule_node_get_child(node, i); 4498 1.1 mrg after = after_in_tree(umap, child); 4499 1.1 mrg isl_schedule_node_free(child); 4500 1.1 mrg 4501 1.1 mrg if (after < 0 || after) 4502 1.1 mrg return after; 4503 1.1 mrg } 4504 1.1 mrg 4505 1.1 mrg return isl_bool_false; 4506 1.1 mrg } 4507 1.1 mrg 4508 1.1 mrg /* Return the filter of child "i" of "node". 4509 1.1 mrg */ 4510 1.1 mrg static __isl_give isl_union_set *child_filter( 4511 1.1 mrg __isl_keep isl_schedule_node *node, int i) 4512 1.1 mrg { 4513 1.1 mrg isl_schedule_node *child; 4514 1.1 mrg isl_union_set *filter; 4515 1.1 mrg 4516 1.1 mrg child = isl_schedule_node_get_child(node, i); 4517 1.1 mrg filter = isl_schedule_node_filter_get_filter(child); 4518 1.1 mrg isl_schedule_node_free(child); 4519 1.1 mrg 4520 1.1 mrg return filter; 4521 1.1 mrg } 4522 1.1 mrg 4523 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4524 1.1 mrg * the corresponding image elements by the tree rooted at 4525 1.1 mrg * the sequence node "node"? 4526 1.1 mrg * 4527 1.1 mrg * This happens in particular if any domain element is 4528 1.1 mrg * contained in a later child than one containing a range element or 4529 1.1 mrg * if the condition holds within a given child in the sequence. 4530 1.1 mrg * The later part of the condition is checked by after_in_set. 4531 1.1 mrg */ 4532 1.1 mrg static isl_bool after_in_sequence(__isl_keep isl_union_map *umap, 4533 1.1 mrg __isl_keep isl_schedule_node *node) 4534 1.1 mrg { 4535 1.1 mrg int i, j; 4536 1.1 mrg isl_size n; 4537 1.1 mrg isl_union_map *umap_i; 4538 1.1 mrg isl_bool empty; 4539 1.1 mrg isl_bool after = isl_bool_false; 4540 1.1 mrg 4541 1.1 mrg n = isl_schedule_node_n_children(node); 4542 1.1 mrg if (n < 0) 4543 1.1 mrg return isl_bool_error; 4544 1.1 mrg for (i = 1; i < n; ++i) { 4545 1.1 mrg isl_union_set *filter_i; 4546 1.1 mrg 4547 1.1 mrg umap_i = isl_union_map_copy(umap); 4548 1.1 mrg filter_i = child_filter(node, i); 4549 1.1 mrg umap_i = isl_union_map_intersect_domain(umap_i, filter_i); 4550 1.1 mrg empty = isl_union_map_is_empty(umap_i); 4551 1.1 mrg if (empty < 0) 4552 1.1 mrg goto error; 4553 1.1 mrg if (empty) { 4554 1.1 mrg isl_union_map_free(umap_i); 4555 1.1 mrg continue; 4556 1.1 mrg } 4557 1.1 mrg 4558 1.1 mrg for (j = 0; j < i; ++j) { 4559 1.1 mrg isl_union_set *filter_j; 4560 1.1 mrg isl_union_map *umap_ij; 4561 1.1 mrg 4562 1.1 mrg umap_ij = isl_union_map_copy(umap_i); 4563 1.1 mrg filter_j = child_filter(node, j); 4564 1.1 mrg umap_ij = isl_union_map_intersect_range(umap_ij, 4565 1.1 mrg filter_j); 4566 1.1 mrg empty = isl_union_map_is_empty(umap_ij); 4567 1.1 mrg isl_union_map_free(umap_ij); 4568 1.1 mrg 4569 1.1 mrg if (empty < 0) 4570 1.1 mrg goto error; 4571 1.1 mrg if (!empty) 4572 1.1 mrg after = isl_bool_true; 4573 1.1 mrg if (after) 4574 1.1 mrg break; 4575 1.1 mrg } 4576 1.1 mrg 4577 1.1 mrg isl_union_map_free(umap_i); 4578 1.1 mrg if (after) 4579 1.1 mrg break; 4580 1.1 mrg } 4581 1.1 mrg 4582 1.1 mrg if (after < 0 || after) 4583 1.1 mrg return after; 4584 1.1 mrg 4585 1.1 mrg return after_in_set(umap, node); 4586 1.1 mrg error: 4587 1.1 mrg isl_union_map_free(umap_i); 4588 1.1 mrg return isl_bool_error; 4589 1.1 mrg } 4590 1.1 mrg 4591 1.1 mrg /* Is any domain element of "umap" scheduled after any of 4592 1.1 mrg * the corresponding image elements by the tree rooted at "node"? 4593 1.1 mrg * 4594 1.1 mrg * If "umap" is empty, then clearly there is no such element. 4595 1.1 mrg * Otherwise, consider the different types of nodes separately. 4596 1.1 mrg */ 4597 1.1 mrg static isl_bool after_in_tree(__isl_keep isl_union_map *umap, 4598 1.1 mrg __isl_keep isl_schedule_node *node) 4599 1.1 mrg { 4600 1.1 mrg isl_bool empty; 4601 1.1 mrg enum isl_schedule_node_type type; 4602 1.1 mrg 4603 1.1 mrg empty = isl_union_map_is_empty(umap); 4604 1.1 mrg if (empty < 0) 4605 1.1 mrg return isl_bool_error; 4606 1.1 mrg if (empty) 4607 1.1 mrg return isl_bool_false; 4608 1.1 mrg if (!node) 4609 1.1 mrg return isl_bool_error; 4610 1.1 mrg 4611 1.1 mrg type = isl_schedule_node_get_type(node); 4612 1.1 mrg switch (type) { 4613 1.1 mrg case isl_schedule_node_error: 4614 1.1 mrg return isl_bool_error; 4615 1.1 mrg case isl_schedule_node_leaf: 4616 1.1 mrg return isl_bool_false; 4617 1.1 mrg case isl_schedule_node_band: 4618 1.1 mrg return after_in_band(umap, node); 4619 1.1 mrg case isl_schedule_node_domain: 4620 1.1 mrg isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, 4621 1.1 mrg "unexpected internal domain node", 4622 1.1 mrg return isl_bool_error); 4623 1.1 mrg case isl_schedule_node_context: 4624 1.1 mrg return after_in_context(umap, node); 4625 1.1 mrg case isl_schedule_node_expansion: 4626 1.1 mrg return after_in_expansion(umap, node); 4627 1.1 mrg case isl_schedule_node_extension: 4628 1.1 mrg return after_in_extension(umap, node); 4629 1.1 mrg case isl_schedule_node_filter: 4630 1.1 mrg return after_in_filter(umap, node); 4631 1.1 mrg case isl_schedule_node_guard: 4632 1.1 mrg case isl_schedule_node_mark: 4633 1.1 mrg return after_in_child(umap, node); 4634 1.1 mrg case isl_schedule_node_set: 4635 1.1 mrg return after_in_set(umap, node); 4636 1.1 mrg case isl_schedule_node_sequence: 4637 1.1 mrg return after_in_sequence(umap, node); 4638 1.1 mrg } 4639 1.1 mrg 4640 1.1 mrg return isl_bool_true; 4641 1.1 mrg } 4642 1.1 mrg 4643 1.1 mrg /* Is any domain element of "map1" scheduled after any domain 4644 1.1 mrg * element of "map2" by the subtree underneath the current band node, 4645 1.1 mrg * while at the same time being scheduled together by the current 4646 1.1 mrg * band node, i.e., by "map1" and "map2? 4647 1.1 mrg * 4648 1.1 mrg * If the child of the current band node is a leaf, then 4649 1.1 mrg * no element can be scheduled after any other element. 4650 1.1 mrg * 4651 1.1 mrg * Otherwise, we construct a relation between domain elements 4652 1.1 mrg * of "map1" and domain elements of "map2" that are scheduled 4653 1.1 mrg * together and then check if the subtree underneath the current 4654 1.1 mrg * band node determines their relative order. 4655 1.1 mrg */ 4656 1.1 mrg static isl_bool after_in_subtree(__isl_keep isl_ast_build *build, 4657 1.1 mrg __isl_keep isl_map *map1, __isl_keep isl_map *map2) 4658 1.1 mrg { 4659 1.1 mrg isl_schedule_node *node; 4660 1.1 mrg isl_map *map; 4661 1.1 mrg isl_union_map *umap; 4662 1.1 mrg isl_bool after; 4663 1.1 mrg 4664 1.1 mrg node = isl_ast_build_get_schedule_node(build); 4665 1.1 mrg if (!node) 4666 1.1 mrg return isl_bool_error; 4667 1.1 mrg node = isl_schedule_node_child(node, 0); 4668 1.1 mrg if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) { 4669 1.1 mrg isl_schedule_node_free(node); 4670 1.1 mrg return isl_bool_false; 4671 1.1 mrg } 4672 1.1 mrg map = isl_map_copy(map2); 4673 1.1 mrg map = isl_map_apply_domain(map, isl_map_copy(map1)); 4674 1.1 mrg umap = isl_union_map_from_map(map); 4675 1.1 mrg after = after_in_tree(umap, node); 4676 1.1 mrg isl_union_map_free(umap); 4677 1.1 mrg isl_schedule_node_free(node); 4678 1.1 mrg return after; 4679 1.1 mrg } 4680 1.1 mrg 4681 1.1 mrg /* Internal data for any_scheduled_after. 4682 1.1 mrg * 4683 1.1 mrg * "build" is the build in which the AST is constructed. 4684 1.1 mrg * "depth" is the number of loops that have already been generated 4685 1.1 mrg * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled 4686 1.1 mrg * "domain" is an array of set-map pairs corresponding to the different 4687 1.1 mrg * iteration domains. The set is the schedule domain, i.e., the domain 4688 1.1 mrg * of the inverse schedule, while the map is the inverse schedule itself. 4689 1.1 mrg */ 4690 1.1 mrg struct isl_any_scheduled_after_data { 4691 1.1 mrg isl_ast_build *build; 4692 1.1 mrg int depth; 4693 1.1 mrg int group_coscheduled; 4694 1.1 mrg struct isl_set_map_pair *domain; 4695 1.1 mrg }; 4696 1.1 mrg 4697 1.1 mrg /* Is any element of domain "i" scheduled after any element of domain "j" 4698 1.1 mrg * (for a common iteration of the first data->depth loops)? 4699 1.1 mrg * 4700 1.1 mrg * data->domain[i].set contains the domain of the inverse schedule 4701 1.1 mrg * for domain "i", i.e., elements in the schedule domain. 4702 1.1 mrg * 4703 1.1 mrg * If we are inside a band of a schedule tree and there is a pair 4704 1.1 mrg * of elements in the two domains that is schedule together by 4705 1.1 mrg * the current band, then we check if any element of "i" may be schedule 4706 1.1 mrg * after element of "j" by the descendants of the band node. 4707 1.1 mrg * 4708 1.1 mrg * If data->group_coscheduled is set, then we also return 1 if there 4709 1.1 mrg * is any pair of elements in the two domains that are scheduled together. 4710 1.1 mrg */ 4711 1.1 mrg static isl_bool any_scheduled_after(int i, int j, void *user) 4712 1.1 mrg { 4713 1.1 mrg struct isl_any_scheduled_after_data *data = user; 4714 1.1 mrg isl_size dim = isl_set_dim(data->domain[i].set, isl_dim_set); 4715 1.1 mrg int pos; 4716 1.1 mrg 4717 1.1 mrg if (dim < 0) 4718 1.1 mrg return isl_bool_error; 4719 1.1 mrg 4720 1.1 mrg for (pos = data->depth; pos < dim; ++pos) { 4721 1.1 mrg int follows; 4722 1.1 mrg 4723 1.1 mrg follows = isl_set_follows_at(data->domain[i].set, 4724 1.1 mrg data->domain[j].set, pos); 4725 1.1 mrg 4726 1.1 mrg if (follows < -1) 4727 1.1 mrg return isl_bool_error; 4728 1.1 mrg if (follows > 0) 4729 1.1 mrg return isl_bool_true; 4730 1.1 mrg if (follows < 0) 4731 1.1 mrg return isl_bool_false; 4732 1.1 mrg } 4733 1.1 mrg 4734 1.1 mrg if (isl_ast_build_has_schedule_node(data->build)) { 4735 1.1 mrg isl_bool after; 4736 1.1 mrg 4737 1.1 mrg after = after_in_subtree(data->build, data->domain[i].map, 4738 1.1 mrg data->domain[j].map); 4739 1.1 mrg if (after < 0 || after) 4740 1.1 mrg return after; 4741 1.1 mrg } 4742 1.1 mrg 4743 1.1 mrg return isl_bool_ok(data->group_coscheduled); 4744 1.1 mrg } 4745 1.1 mrg 4746 1.1 mrg /* Look for independent components at the current depth and generate code 4747 1.1 mrg * for each component separately. The resulting lists of grafts are 4748 1.1 mrg * merged in an attempt to combine grafts with identical guards. 4749 1.1 mrg * 4750 1.1 mrg * Code for two domains can be generated separately if all the elements 4751 1.1 mrg * of one domain are scheduled before (or together with) all the elements 4752 1.1 mrg * of the other domain. We therefore consider the graph with as nodes 4753 1.1 mrg * the domains and an edge between two nodes if any element of the first 4754 1.1 mrg * node is scheduled after any element of the second node. 4755 1.1 mrg * If the ast_build_group_coscheduled is set, then we also add an edge if 4756 1.1 mrg * there is any pair of elements in the two domains that are scheduled 4757 1.1 mrg * together. 4758 1.1 mrg * Code is then generated (by generate_component) 4759 1.1 mrg * for each of the strongly connected components in this graph 4760 1.1 mrg * in their topological order. 4761 1.1 mrg * 4762 1.1 mrg * Since the test is performed on the domain of the inverse schedules of 4763 1.1 mrg * the different domains, we precompute these domains and store 4764 1.1 mrg * them in data.domain. 4765 1.1 mrg */ 4766 1.1 mrg static __isl_give isl_ast_graft_list *generate_components( 4767 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 4768 1.1 mrg { 4769 1.1 mrg int i; 4770 1.1 mrg isl_ctx *ctx = isl_ast_build_get_ctx(build); 4771 1.1 mrg isl_size n = isl_union_map_n_map(executed); 4772 1.1 mrg isl_size depth; 4773 1.1 mrg struct isl_any_scheduled_after_data data; 4774 1.1 mrg struct isl_set_map_pair *next; 4775 1.1 mrg struct isl_tarjan_graph *g = NULL; 4776 1.1 mrg isl_ast_graft_list *list = NULL; 4777 1.1 mrg int n_domain = 0; 4778 1.1 mrg 4779 1.1 mrg data.domain = NULL; 4780 1.1 mrg if (n < 0) 4781 1.1 mrg goto error; 4782 1.1 mrg data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n); 4783 1.1 mrg if (!data.domain) 4784 1.1 mrg goto error; 4785 1.1 mrg n_domain = n; 4786 1.1 mrg 4787 1.1 mrg next = data.domain; 4788 1.1 mrg if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0) 4789 1.1 mrg goto error; 4790 1.1 mrg 4791 1.1 mrg depth = isl_ast_build_get_depth(build); 4792 1.1 mrg if (depth < 0) 4793 1.1 mrg goto error; 4794 1.1 mrg data.build = build; 4795 1.1 mrg data.depth = depth; 4796 1.1 mrg data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx); 4797 1.1 mrg g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data); 4798 1.1 mrg if (!g) 4799 1.1 mrg goto error; 4800 1.1 mrg 4801 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0); 4802 1.1 mrg 4803 1.1 mrg i = 0; 4804 1.1 mrg while (list && n) { 4805 1.1 mrg isl_ast_graft_list *list_c; 4806 1.1 mrg int first = i; 4807 1.1 mrg 4808 1.1 mrg if (g->order[i] == -1) 4809 1.1 mrg isl_die(ctx, isl_error_internal, "cannot happen", 4810 1.1 mrg goto error); 4811 1.1 mrg ++i; --n; 4812 1.1 mrg while (g->order[i] != -1) { 4813 1.1 mrg ++i; --n; 4814 1.1 mrg } 4815 1.1 mrg 4816 1.1 mrg list_c = generate_component(data.domain, 4817 1.1 mrg g->order + first, i - first, 4818 1.1 mrg isl_ast_build_copy(build)); 4819 1.1 mrg list = isl_ast_graft_list_merge(list, list_c, build); 4820 1.1 mrg 4821 1.1 mrg ++i; 4822 1.1 mrg } 4823 1.1 mrg 4824 1.1 mrg if (0) 4825 1.1 mrg error: list = isl_ast_graft_list_free(list); 4826 1.1 mrg isl_tarjan_graph_free(g); 4827 1.1 mrg for (i = 0; i < n_domain; ++i) { 4828 1.1 mrg isl_map_free(data.domain[i].map); 4829 1.1 mrg isl_set_free(data.domain[i].set); 4830 1.1 mrg } 4831 1.1 mrg free(data.domain); 4832 1.1 mrg isl_union_map_free(executed); 4833 1.1 mrg isl_ast_build_free(build); 4834 1.1 mrg 4835 1.1 mrg return list; 4836 1.1 mrg } 4837 1.1 mrg 4838 1.1 mrg /* Generate code for the next level (and all inner levels). 4839 1.1 mrg * 4840 1.1 mrg * If "executed" is empty, i.e., no code needs to be generated, 4841 1.1 mrg * then we return an empty list. 4842 1.1 mrg * 4843 1.1 mrg * If we have already generated code for all loop levels, then we pass 4844 1.1 mrg * control to generate_inner_level. 4845 1.1 mrg * 4846 1.1 mrg * If "executed" lives in a single space, i.e., if code needs to be 4847 1.1 mrg * generated for a single domain, then there can only be a single 4848 1.1 mrg * component and we go directly to generate_shifted_component. 4849 1.1 mrg * Otherwise, we call generate_components to detect the components 4850 1.1 mrg * and to call generate_component on each of them separately. 4851 1.1 mrg */ 4852 1.1 mrg static __isl_give isl_ast_graft_list *generate_next_level( 4853 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 4854 1.1 mrg { 4855 1.1 mrg isl_size depth; 4856 1.1 mrg isl_size dim; 4857 1.1 mrg isl_size n; 4858 1.1 mrg 4859 1.1 mrg if (!build || !executed) 4860 1.1 mrg goto error; 4861 1.1 mrg 4862 1.1 mrg if (isl_union_map_is_empty(executed)) { 4863 1.1 mrg isl_ctx *ctx = isl_ast_build_get_ctx(build); 4864 1.1 mrg isl_union_map_free(executed); 4865 1.1 mrg isl_ast_build_free(build); 4866 1.1 mrg return isl_ast_graft_list_alloc(ctx, 0); 4867 1.1 mrg } 4868 1.1 mrg 4869 1.1 mrg depth = isl_ast_build_get_depth(build); 4870 1.1 mrg dim = isl_ast_build_dim(build, isl_dim_set); 4871 1.1 mrg if (depth < 0 || dim < 0) 4872 1.1 mrg goto error; 4873 1.1 mrg if (depth >= dim) 4874 1.1 mrg return generate_inner_level(executed, build); 4875 1.1 mrg 4876 1.1 mrg n = isl_union_map_n_map(executed); 4877 1.1 mrg if (n < 0) 4878 1.1 mrg goto error; 4879 1.1 mrg if (n == 1) 4880 1.1 mrg return generate_shifted_component(executed, build); 4881 1.1 mrg 4882 1.1 mrg return generate_components(executed, build); 4883 1.1 mrg error: 4884 1.1 mrg isl_union_map_free(executed); 4885 1.1 mrg isl_ast_build_free(build); 4886 1.1 mrg return NULL; 4887 1.1 mrg } 4888 1.1 mrg 4889 1.1 mrg /* Internal data structure used by isl_ast_build_node_from_schedule_map. 4890 1.1 mrg * internal, executed and build are the inputs to generate_code. 4891 1.1 mrg * list collects the output. 4892 1.1 mrg */ 4893 1.1 mrg struct isl_generate_code_data { 4894 1.1 mrg int internal; 4895 1.1 mrg isl_union_map *executed; 4896 1.1 mrg isl_ast_build *build; 4897 1.1 mrg 4898 1.1 mrg isl_ast_graft_list *list; 4899 1.1 mrg }; 4900 1.1 mrg 4901 1.1 mrg /* Given an inverse schedule in terms of the external build schedule, i.e., 4902 1.1 mrg * 4903 1.1 mrg * [E -> S] -> D 4904 1.1 mrg * 4905 1.1 mrg * with E the external build schedule and S the additional schedule "space", 4906 1.1 mrg * reformulate the inverse schedule in terms of the internal schedule domain, 4907 1.1 mrg * i.e., return 4908 1.1 mrg * 4909 1.1 mrg * [I -> S] -> D 4910 1.1 mrg * 4911 1.1 mrg * We first obtain a mapping 4912 1.1 mrg * 4913 1.1 mrg * I -> E 4914 1.1 mrg * 4915 1.1 mrg * take the inverse and the product with S -> S, resulting in 4916 1.1 mrg * 4917 1.1 mrg * [I -> S] -> [E -> S] 4918 1.1 mrg * 4919 1.1 mrg * Applying the map to the input produces the desired result. 4920 1.1 mrg */ 4921 1.1 mrg static __isl_give isl_union_map *internal_executed( 4922 1.1 mrg __isl_take isl_union_map *executed, __isl_keep isl_space *space, 4923 1.1 mrg __isl_keep isl_ast_build *build) 4924 1.1 mrg { 4925 1.1 mrg isl_map *id, *proj; 4926 1.1 mrg 4927 1.1 mrg proj = isl_ast_build_get_schedule_map(build); 4928 1.1 mrg proj = isl_map_reverse(proj); 4929 1.1 mrg space = isl_space_map_from_set(isl_space_copy(space)); 4930 1.1 mrg id = isl_map_identity(space); 4931 1.1 mrg proj = isl_map_product(proj, id); 4932 1.1 mrg executed = isl_union_map_apply_domain(executed, 4933 1.1 mrg isl_union_map_from_map(proj)); 4934 1.1 mrg return executed; 4935 1.1 mrg } 4936 1.1 mrg 4937 1.1 mrg /* Generate an AST that visits the elements in the range of data->executed 4938 1.1 mrg * in the relative order specified by the corresponding domain element(s) 4939 1.1 mrg * for those domain elements that belong to "set". 4940 1.1 mrg * Add the result to data->list. 4941 1.1 mrg * 4942 1.1 mrg * The caller ensures that "set" is a universe domain. 4943 1.1 mrg * "space" is the space of the additional part of the schedule. 4944 1.1 mrg * It is equal to the space of "set" if build->domain is parametric. 4945 1.1 mrg * Otherwise, it is equal to the range of the wrapped space of "set". 4946 1.1 mrg * 4947 1.1 mrg * If the build space is not parametric and 4948 1.1 mrg * if isl_ast_build_node_from_schedule_map 4949 1.1 mrg * was called from an outside user (data->internal not set), then 4950 1.1 mrg * the (inverse) schedule refers to the external build domain and needs to 4951 1.1 mrg * be transformed to refer to the internal build domain. 4952 1.1 mrg * 4953 1.1 mrg * If the build space is parametric, then we add some of the parameter 4954 1.1 mrg * constraints to the executed relation. Adding these constraints 4955 1.1 mrg * allows for an earlier detection of conflicts in some cases. 4956 1.1 mrg * However, we do not want to divide the executed relation into 4957 1.1 mrg * more disjuncts than necessary. We therefore approximate 4958 1.1 mrg * the constraints on the parameters by a single disjunct set. 4959 1.1 mrg * 4960 1.1 mrg * The build is extended to include the additional part of the schedule. 4961 1.1 mrg * If the original build space was not parametric, then the options 4962 1.1 mrg * in data->build refer only to the additional part of the schedule 4963 1.1 mrg * and they need to be adjusted to refer to the complete AST build 4964 1.1 mrg * domain. 4965 1.1 mrg * 4966 1.1 mrg * After having adjusted inverse schedule and build, we start generating 4967 1.1 mrg * code with the outer loop of the current code generation 4968 1.1 mrg * in generate_next_level. 4969 1.1 mrg * 4970 1.1 mrg * If the original build space was not parametric, we undo the embedding 4971 1.1 mrg * on the resulting isl_ast_node_list so that it can be used within 4972 1.1 mrg * the outer AST build. 4973 1.1 mrg */ 4974 1.1 mrg static isl_stat generate_code_in_space(struct isl_generate_code_data *data, 4975 1.1 mrg __isl_take isl_set *set, __isl_take isl_space *space) 4976 1.1 mrg { 4977 1.1 mrg isl_union_map *executed; 4978 1.1 mrg isl_ast_build *build; 4979 1.1 mrg isl_ast_graft_list *list; 4980 1.1 mrg int embed; 4981 1.1 mrg 4982 1.1 mrg executed = isl_union_map_copy(data->executed); 4983 1.1 mrg executed = isl_union_map_intersect_domain(executed, 4984 1.1 mrg isl_union_set_from_set(set)); 4985 1.1 mrg 4986 1.1 mrg embed = !isl_set_is_params(data->build->domain); 4987 1.1 mrg if (embed && !data->internal) 4988 1.1 mrg executed = internal_executed(executed, space, data->build); 4989 1.1 mrg if (!embed) { 4990 1.1 mrg isl_set *domain; 4991 1.1 mrg domain = isl_ast_build_get_domain(data->build); 4992 1.1 mrg domain = isl_set_from_basic_set(isl_set_simple_hull(domain)); 4993 1.1 mrg executed = isl_union_map_intersect_params(executed, domain); 4994 1.1 mrg } 4995 1.1 mrg 4996 1.1 mrg build = isl_ast_build_copy(data->build); 4997 1.1 mrg build = isl_ast_build_product(build, space); 4998 1.1 mrg 4999 1.1 mrg list = generate_next_level(executed, build); 5000 1.1 mrg 5001 1.1 mrg list = isl_ast_graft_list_unembed(list, embed); 5002 1.1 mrg 5003 1.1 mrg data->list = isl_ast_graft_list_concat(data->list, list); 5004 1.1 mrg 5005 1.1 mrg return isl_stat_ok; 5006 1.1 mrg } 5007 1.1 mrg 5008 1.1 mrg /* Generate an AST that visits the elements in the range of data->executed 5009 1.1 mrg * in the relative order specified by the corresponding domain element(s) 5010 1.1 mrg * for those domain elements that belong to "set". 5011 1.1 mrg * Add the result to data->list. 5012 1.1 mrg * 5013 1.1 mrg * The caller ensures that "set" is a universe domain. 5014 1.1 mrg * 5015 1.1 mrg * If the build space S is not parametric, then the space of "set" 5016 1.1 mrg * need to be a wrapped relation with S as domain. That is, it needs 5017 1.1 mrg * to be of the form 5018 1.1 mrg * 5019 1.1 mrg * [S -> T] 5020 1.1 mrg * 5021 1.1 mrg * Check this property and pass control to generate_code_in_space 5022 1.1 mrg * passing along T. 5023 1.1 mrg * If the build space is not parametric, then T is the space of "set". 5024 1.1 mrg */ 5025 1.1 mrg static isl_stat generate_code_set(__isl_take isl_set *set, void *user) 5026 1.1 mrg { 5027 1.1 mrg struct isl_generate_code_data *data = user; 5028 1.1 mrg isl_space *space, *build_space; 5029 1.1 mrg int is_domain; 5030 1.1 mrg 5031 1.1 mrg space = isl_set_get_space(set); 5032 1.1 mrg 5033 1.1 mrg if (isl_set_is_params(data->build->domain)) 5034 1.1 mrg return generate_code_in_space(data, set, space); 5035 1.1 mrg 5036 1.1 mrg build_space = isl_ast_build_get_space(data->build, data->internal); 5037 1.1 mrg space = isl_space_unwrap(space); 5038 1.1 mrg is_domain = isl_space_is_domain(build_space, space); 5039 1.1 mrg isl_space_free(build_space); 5040 1.1 mrg space = isl_space_range(space); 5041 1.1 mrg 5042 1.1 mrg if (is_domain < 0) 5043 1.1 mrg goto error; 5044 1.1 mrg if (!is_domain) 5045 1.1 mrg isl_die(isl_set_get_ctx(set), isl_error_invalid, 5046 1.1 mrg "invalid nested schedule space", goto error); 5047 1.1 mrg 5048 1.1 mrg return generate_code_in_space(data, set, space); 5049 1.1 mrg error: 5050 1.1 mrg isl_set_free(set); 5051 1.1 mrg isl_space_free(space); 5052 1.1 mrg return isl_stat_error; 5053 1.1 mrg } 5054 1.1 mrg 5055 1.1 mrg /* Generate an AST that visits the elements in the range of "executed" 5056 1.1 mrg * in the relative order specified by the corresponding domain element(s). 5057 1.1 mrg * 5058 1.1 mrg * "build" is an isl_ast_build that has either been constructed by 5059 1.1 mrg * isl_ast_build_from_context or passed to a callback set by 5060 1.1 mrg * isl_ast_build_set_create_leaf. 5061 1.1 mrg * In the first case, the space of the isl_ast_build is typically 5062 1.1 mrg * a parametric space, although this is currently not enforced. 5063 1.1 mrg * In the second case, the space is never a parametric space. 5064 1.1 mrg * If the space S is not parametric, then the domain space(s) of "executed" 5065 1.1 mrg * need to be wrapped relations with S as domain. 5066 1.1 mrg * 5067 1.1 mrg * If the domain of "executed" consists of several spaces, then an AST 5068 1.1 mrg * is generated for each of them (in arbitrary order) and the results 5069 1.1 mrg * are concatenated. 5070 1.1 mrg * 5071 1.1 mrg * If "internal" is set, then the domain "S" above refers to the internal 5072 1.1 mrg * schedule domain representation. Otherwise, it refers to the external 5073 1.1 mrg * representation, as returned by isl_ast_build_get_schedule_space. 5074 1.1 mrg * 5075 1.1 mrg * We essentially run over all the spaces in the domain of "executed" 5076 1.1 mrg * and call generate_code_set on each of them. 5077 1.1 mrg */ 5078 1.1 mrg static __isl_give isl_ast_graft_list *generate_code( 5079 1.1 mrg __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 5080 1.1 mrg int internal) 5081 1.1 mrg { 5082 1.1 mrg isl_ctx *ctx; 5083 1.1 mrg struct isl_generate_code_data data = { 0 }; 5084 1.1 mrg isl_space *space; 5085 1.1 mrg isl_union_set *schedule_domain; 5086 1.1 mrg isl_union_map *universe; 5087 1.1 mrg 5088 1.1 mrg if (!build) 5089 1.1 mrg goto error; 5090 1.1 mrg space = isl_ast_build_get_space(build, 1); 5091 1.1 mrg space = isl_space_align_params(space, 5092 1.1 mrg isl_union_map_get_space(executed)); 5093 1.1 mrg space = isl_space_align_params(space, 5094 1.1 mrg isl_union_map_get_space(build->options)); 5095 1.1 mrg build = isl_ast_build_align_params(build, isl_space_copy(space)); 5096 1.1 mrg executed = isl_union_map_align_params(executed, space); 5097 1.1 mrg if (!executed || !build) 5098 1.1 mrg goto error; 5099 1.1 mrg 5100 1.1 mrg ctx = isl_ast_build_get_ctx(build); 5101 1.1 mrg 5102 1.1 mrg data.internal = internal; 5103 1.1 mrg data.executed = executed; 5104 1.1 mrg data.build = build; 5105 1.1 mrg data.list = isl_ast_graft_list_alloc(ctx, 0); 5106 1.1 mrg 5107 1.1 mrg universe = isl_union_map_universe(isl_union_map_copy(executed)); 5108 1.1 mrg schedule_domain = isl_union_map_domain(universe); 5109 1.1 mrg if (isl_union_set_foreach_set(schedule_domain, &generate_code_set, 5110 1.1 mrg &data) < 0) 5111 1.1 mrg data.list = isl_ast_graft_list_free(data.list); 5112 1.1 mrg 5113 1.1 mrg isl_union_set_free(schedule_domain); 5114 1.1 mrg isl_union_map_free(executed); 5115 1.1 mrg 5116 1.1 mrg isl_ast_build_free(build); 5117 1.1 mrg return data.list; 5118 1.1 mrg error: 5119 1.1 mrg isl_union_map_free(executed); 5120 1.1 mrg isl_ast_build_free(build); 5121 1.1 mrg return NULL; 5122 1.1 mrg } 5123 1.1 mrg 5124 1.1 mrg /* Generate an AST that visits the elements in the domain of "schedule" 5125 1.1 mrg * in the relative order specified by the corresponding image element(s). 5126 1.1 mrg * 5127 1.1 mrg * "build" is an isl_ast_build that has either been constructed by 5128 1.1 mrg * isl_ast_build_from_context or passed to a callback set by 5129 1.1 mrg * isl_ast_build_set_create_leaf. 5130 1.1 mrg * In the first case, the space of the isl_ast_build is typically 5131 1.1 mrg * a parametric space, although this is currently not enforced. 5132 1.1 mrg * In the second case, the space is never a parametric space. 5133 1.1 mrg * If the space S is not parametric, then the range space(s) of "schedule" 5134 1.1 mrg * need to be wrapped relations with S as domain. 5135 1.1 mrg * 5136 1.1 mrg * If the range of "schedule" consists of several spaces, then an AST 5137 1.1 mrg * is generated for each of them (in arbitrary order) and the results 5138 1.1 mrg * are concatenated. 5139 1.1 mrg * 5140 1.1 mrg * We first initialize the local copies of the relevant options. 5141 1.1 mrg * We do this here rather than when the isl_ast_build is created 5142 1.1 mrg * because the options may have changed between the construction 5143 1.1 mrg * of the isl_ast_build and the call to isl_generate_code. 5144 1.1 mrg * 5145 1.1 mrg * The main computation is performed on an inverse schedule (with 5146 1.1 mrg * the schedule domain in the domain and the elements to be executed 5147 1.1 mrg * in the range) called "executed". 5148 1.1 mrg */ 5149 1.1 mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map( 5150 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) 5151 1.1 mrg { 5152 1.1 mrg isl_ast_graft_list *list; 5153 1.1 mrg isl_ast_node *node; 5154 1.1 mrg isl_union_map *executed; 5155 1.1 mrg 5156 1.1 mrg build = isl_ast_build_copy(build); 5157 1.1 mrg build = isl_ast_build_set_single_valued(build, 0); 5158 1.1 mrg schedule = isl_union_map_coalesce(schedule); 5159 1.1 mrg schedule = isl_union_map_remove_redundancies(schedule); 5160 1.1 mrg executed = isl_union_map_reverse(schedule); 5161 1.1 mrg list = generate_code(executed, isl_ast_build_copy(build), 0); 5162 1.1 mrg node = isl_ast_node_from_graft_list(list, build); 5163 1.1 mrg isl_ast_build_free(build); 5164 1.1 mrg 5165 1.1 mrg return node; 5166 1.1 mrg } 5167 1.1 mrg 5168 1.1 mrg /* The old name for isl_ast_build_node_from_schedule_map. 5169 1.1 mrg * It is being kept for backward compatibility, but 5170 1.1 mrg * it will be removed in the future. 5171 1.1 mrg */ 5172 1.1 mrg __isl_give isl_ast_node *isl_ast_build_ast_from_schedule( 5173 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) 5174 1.1 mrg { 5175 1.1 mrg return isl_ast_build_node_from_schedule_map(build, schedule); 5176 1.1 mrg } 5177 1.1 mrg 5178 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5179 1.1 mrg * in the relative order specified by the leaf node "node". 5180 1.1 mrg * 5181 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5182 1.1 mrg * to the domain elements executed by those iterations. 5183 1.1 mrg * 5184 1.1 mrg * Simply pass control to generate_inner_level. 5185 1.1 mrg * Note that the current build does not refer to any band node, so 5186 1.1 mrg * that generate_inner_level will not try to visit the child of 5187 1.1 mrg * the leaf node. 5188 1.1 mrg * 5189 1.1 mrg * If multiple statement instances reach a leaf, 5190 1.1 mrg * then they can be executed in any order. 5191 1.1 mrg * Group the list of grafts based on shared guards 5192 1.1 mrg * such that identical guards are only generated once 5193 1.1 mrg * when the list is eventually passed on to isl_ast_graft_list_fuse. 5194 1.1 mrg */ 5195 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_leaf( 5196 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5197 1.1 mrg __isl_take isl_union_map *executed) 5198 1.1 mrg { 5199 1.1 mrg isl_ast_graft_list *list; 5200 1.1 mrg 5201 1.1 mrg isl_schedule_node_free(node); 5202 1.1 mrg list = generate_inner_level(executed, isl_ast_build_copy(build)); 5203 1.1 mrg list = isl_ast_graft_list_group_on_guard(list, build); 5204 1.1 mrg isl_ast_build_free(build); 5205 1.1 mrg 5206 1.1 mrg return list; 5207 1.1 mrg } 5208 1.1 mrg 5209 1.1 mrg /* Check that the band partial schedule "partial" does not filter out 5210 1.1 mrg * any statement instances, as specified by the range of "executed". 5211 1.1 mrg */ 5212 1.1 mrg static isl_stat check_band_schedule_total_on_instances( 5213 1.1 mrg __isl_keep isl_multi_union_pw_aff *partial, 5214 1.1 mrg __isl_keep isl_union_map *executed) 5215 1.1 mrg { 5216 1.1 mrg isl_bool subset; 5217 1.1 mrg isl_union_set *domain, *instances; 5218 1.1 mrg 5219 1.1 mrg instances = isl_union_map_range(isl_union_map_copy(executed)); 5220 1.1 mrg partial = isl_multi_union_pw_aff_copy(partial); 5221 1.1 mrg domain = isl_multi_union_pw_aff_domain(partial); 5222 1.1 mrg subset = isl_union_set_is_subset(instances, domain); 5223 1.1 mrg isl_union_set_free(domain); 5224 1.1 mrg isl_union_set_free(instances); 5225 1.1 mrg 5226 1.1 mrg if (subset < 0) 5227 1.1 mrg return isl_stat_error; 5228 1.1 mrg if (!subset) 5229 1.1 mrg isl_die(isl_union_map_get_ctx(executed), isl_error_invalid, 5230 1.1 mrg "band node is not allowed to drop statement instances", 5231 1.1 mrg return isl_stat_error); 5232 1.1 mrg return isl_stat_ok; 5233 1.1 mrg } 5234 1.1 mrg 5235 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5236 1.1 mrg * in the relative order specified by the band node "node" and its descendants. 5237 1.1 mrg * 5238 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5239 1.1 mrg * to the domain elements executed by those iterations. 5240 1.1 mrg * 5241 1.1 mrg * If the band is empty, we continue with its descendants. 5242 1.1 mrg * Otherwise, we extend the build and the inverse schedule with 5243 1.1 mrg * the additional space/partial schedule and continue generating 5244 1.1 mrg * an AST in generate_next_level. 5245 1.1 mrg * As soon as we have extended the inverse schedule with the additional 5246 1.1 mrg * partial schedule, we look for equalities that may exists between 5247 1.1 mrg * the old and the new part. 5248 1.1 mrg */ 5249 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_band( 5250 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5251 1.1 mrg __isl_take isl_union_map *executed) 5252 1.1 mrg { 5253 1.1 mrg isl_space *space; 5254 1.1 mrg isl_multi_union_pw_aff *extra; 5255 1.1 mrg isl_union_map *extra_umap; 5256 1.1 mrg isl_ast_graft_list *list; 5257 1.1 mrg isl_size n1, n2; 5258 1.1 mrg isl_size n; 5259 1.1 mrg 5260 1.1 mrg n = isl_schedule_node_band_n_member(node); 5261 1.1 mrg if (!build || n < 0 || !executed) 5262 1.1 mrg goto error; 5263 1.1 mrg 5264 1.1 mrg if (n == 0) 5265 1.1 mrg return build_ast_from_child(build, node, executed); 5266 1.1 mrg 5267 1.1 mrg extra = isl_schedule_node_band_get_partial_schedule(node); 5268 1.1 mrg extra = isl_multi_union_pw_aff_align_params(extra, 5269 1.1 mrg isl_ast_build_get_space(build, 1)); 5270 1.1 mrg space = isl_multi_union_pw_aff_get_space(extra); 5271 1.1 mrg 5272 1.1 mrg if (check_band_schedule_total_on_instances(extra, executed) < 0) 5273 1.1 mrg executed = isl_union_map_free(executed); 5274 1.1 mrg 5275 1.1 mrg extra_umap = isl_union_map_from_multi_union_pw_aff(extra); 5276 1.1 mrg extra_umap = isl_union_map_reverse(extra_umap); 5277 1.1 mrg 5278 1.1 mrg executed = isl_union_map_domain_product(executed, extra_umap); 5279 1.1 mrg executed = isl_union_map_detect_equalities(executed); 5280 1.1 mrg 5281 1.1 mrg n1 = isl_ast_build_dim(build, isl_dim_param); 5282 1.1 mrg build = isl_ast_build_product(build, space); 5283 1.1 mrg n2 = isl_ast_build_dim(build, isl_dim_param); 5284 1.1 mrg if (n1 < 0 || n2 < 0) 5285 1.1 mrg build = isl_ast_build_free(build); 5286 1.1 mrg else if (n2 > n1) 5287 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5288 1.1 mrg "band node is not allowed to introduce new parameters", 5289 1.1 mrg build = isl_ast_build_free(build)); 5290 1.1 mrg build = isl_ast_build_set_schedule_node(build, node); 5291 1.1 mrg 5292 1.1 mrg list = generate_next_level(executed, build); 5293 1.1 mrg 5294 1.1 mrg list = isl_ast_graft_list_unembed(list, 1); 5295 1.1 mrg 5296 1.1 mrg return list; 5297 1.1 mrg error: 5298 1.1 mrg isl_schedule_node_free(node); 5299 1.1 mrg isl_union_map_free(executed); 5300 1.1 mrg isl_ast_build_free(build); 5301 1.1 mrg return NULL; 5302 1.1 mrg } 5303 1.1 mrg 5304 1.1 mrg /* Hoist a list of grafts (in practice containing a single graft) 5305 1.1 mrg * from "sub_build" (which includes extra context information) 5306 1.1 mrg * to "build". 5307 1.1 mrg * 5308 1.1 mrg * In particular, project out all additional parameters introduced 5309 1.1 mrg * by the context node from the enforced constraints and the guard 5310 1.1 mrg * of the single graft. 5311 1.1 mrg */ 5312 1.1 mrg static __isl_give isl_ast_graft_list *hoist_out_of_context( 5313 1.1 mrg __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build, 5314 1.1 mrg __isl_keep isl_ast_build *sub_build) 5315 1.1 mrg { 5316 1.1 mrg isl_ast_graft *graft; 5317 1.1 mrg isl_basic_set *enforced; 5318 1.1 mrg isl_set *guard; 5319 1.1 mrg isl_size n_param, extra_param; 5320 1.1 mrg 5321 1.1 mrg n_param = isl_ast_build_dim(build, isl_dim_param); 5322 1.1 mrg extra_param = isl_ast_build_dim(sub_build, isl_dim_param); 5323 1.1 mrg if (n_param < 0 || extra_param < 0) 5324 1.1 mrg return isl_ast_graft_list_free(list); 5325 1.1 mrg 5326 1.1 mrg if (extra_param == n_param) 5327 1.1 mrg return list; 5328 1.1 mrg 5329 1.1 mrg extra_param -= n_param; 5330 1.1 mrg enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build); 5331 1.1 mrg enforced = isl_basic_set_project_out(enforced, isl_dim_param, 5332 1.1 mrg n_param, extra_param); 5333 1.1 mrg enforced = isl_basic_set_remove_unknown_divs(enforced); 5334 1.1 mrg guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build); 5335 1.1 mrg guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param, 5336 1.1 mrg n_param, extra_param); 5337 1.1 mrg guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param); 5338 1.1 mrg guard = isl_set_compute_divs(guard); 5339 1.1 mrg graft = isl_ast_graft_alloc_from_children(list, guard, enforced, 5340 1.1 mrg build, sub_build); 5341 1.1 mrg list = isl_ast_graft_list_from_ast_graft(graft); 5342 1.1 mrg 5343 1.1 mrg return list; 5344 1.1 mrg } 5345 1.1 mrg 5346 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5347 1.1 mrg * in the relative order specified by the context node "node" 5348 1.1 mrg * and its descendants. 5349 1.1 mrg * 5350 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5351 1.1 mrg * to the domain elements executed by those iterations. 5352 1.1 mrg * 5353 1.1 mrg * The context node may introduce additional parameters as well as 5354 1.1 mrg * constraints on the outer schedule dimensions or original parameters. 5355 1.1 mrg * 5356 1.1 mrg * We add the extra parameters to a new build and the context 5357 1.1 mrg * constraints to both the build and (as a single disjunct) 5358 1.1 mrg * to the domain of "executed". Since the context constraints 5359 1.1 mrg * are specified in terms of the input schedule, we first need 5360 1.1 mrg * to map them to the internal schedule domain. 5361 1.1 mrg * 5362 1.1 mrg * After constructing the AST from the descendants of "node", 5363 1.1 mrg * we combine the list of grafts into a single graft within 5364 1.1 mrg * the new build, in order to be able to exploit the additional 5365 1.1 mrg * context constraints during this combination. 5366 1.1 mrg * 5367 1.1 mrg * Additionally, if the current node is the outermost node in 5368 1.1 mrg * the schedule tree (apart from the root domain node), we generate 5369 1.1 mrg * all pending guards, again to be able to exploit the additional 5370 1.1 mrg * context constraints. We currently do not do this for internal 5371 1.1 mrg * context nodes since we may still want to hoist conditions 5372 1.1 mrg * to outer AST nodes. 5373 1.1 mrg * 5374 1.1 mrg * If the context node introduced any new parameters, then they 5375 1.1 mrg * are removed from the set of enforced constraints and guard 5376 1.1 mrg * in hoist_out_of_context. 5377 1.1 mrg */ 5378 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_context( 5379 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5380 1.1 mrg __isl_take isl_union_map *executed) 5381 1.1 mrg { 5382 1.1 mrg isl_set *context; 5383 1.1 mrg isl_space *space; 5384 1.1 mrg isl_multi_aff *internal2input; 5385 1.1 mrg isl_ast_build *sub_build; 5386 1.1 mrg isl_ast_graft_list *list; 5387 1.1 mrg isl_size n; 5388 1.1 mrg isl_size depth; 5389 1.1 mrg 5390 1.1 mrg depth = isl_schedule_node_get_tree_depth(node); 5391 1.1 mrg if (depth < 0) 5392 1.1 mrg build = isl_ast_build_free(build); 5393 1.1 mrg space = isl_ast_build_get_space(build, 1); 5394 1.1 mrg context = isl_schedule_node_context_get_context(node); 5395 1.1 mrg context = isl_set_align_params(context, space); 5396 1.1 mrg sub_build = isl_ast_build_copy(build); 5397 1.1 mrg space = isl_set_get_space(context); 5398 1.1 mrg sub_build = isl_ast_build_align_params(sub_build, space); 5399 1.1 mrg internal2input = isl_ast_build_get_internal2input(sub_build); 5400 1.1 mrg context = isl_set_preimage_multi_aff(context, internal2input); 5401 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build, 5402 1.1 mrg isl_set_copy(context)); 5403 1.1 mrg context = isl_set_from_basic_set(isl_set_simple_hull(context)); 5404 1.1 mrg executed = isl_union_map_intersect_domain(executed, 5405 1.1 mrg isl_union_set_from_set(context)); 5406 1.1 mrg 5407 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(sub_build), 5408 1.1 mrg node, executed); 5409 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list); 5410 1.1 mrg if (n < 0) 5411 1.1 mrg list = isl_ast_graft_list_free(list); 5412 1.1 mrg 5413 1.1 mrg list = isl_ast_graft_list_fuse(list, sub_build); 5414 1.1 mrg if (depth == 1) 5415 1.1 mrg list = isl_ast_graft_list_insert_pending_guard_nodes(list, 5416 1.1 mrg sub_build); 5417 1.1 mrg if (n >= 1) 5418 1.1 mrg list = hoist_out_of_context(list, build, sub_build); 5419 1.1 mrg 5420 1.1 mrg isl_ast_build_free(build); 5421 1.1 mrg isl_ast_build_free(sub_build); 5422 1.1 mrg 5423 1.1 mrg return list; 5424 1.1 mrg } 5425 1.1 mrg 5426 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5427 1.1 mrg * in the relative order specified by the expansion node "node" and 5428 1.1 mrg * its descendants. 5429 1.1 mrg * 5430 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5431 1.1 mrg * to the domain elements executed by those iterations. 5432 1.1 mrg * 5433 1.1 mrg * We expand the domain elements by the expansion and 5434 1.1 mrg * continue with the descendants of the node. 5435 1.1 mrg */ 5436 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_expansion( 5437 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5438 1.1 mrg __isl_take isl_union_map *executed) 5439 1.1 mrg { 5440 1.1 mrg isl_union_map *expansion; 5441 1.1 mrg isl_size n1, n2; 5442 1.1 mrg 5443 1.1 mrg expansion = isl_schedule_node_expansion_get_expansion(node); 5444 1.1 mrg expansion = isl_union_map_align_params(expansion, 5445 1.1 mrg isl_union_map_get_space(executed)); 5446 1.1 mrg 5447 1.1 mrg n1 = isl_union_map_dim(executed, isl_dim_param); 5448 1.1 mrg executed = isl_union_map_apply_range(executed, expansion); 5449 1.1 mrg n2 = isl_union_map_dim(executed, isl_dim_param); 5450 1.1 mrg if (n1 < 0 || n2 < 0) 5451 1.1 mrg goto error; 5452 1.1 mrg if (n2 > n1) 5453 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5454 1.1 mrg "expansion node is not allowed to introduce " 5455 1.1 mrg "new parameters", goto error); 5456 1.1 mrg 5457 1.1 mrg return build_ast_from_child(build, node, executed); 5458 1.1 mrg error: 5459 1.1 mrg isl_ast_build_free(build); 5460 1.1 mrg isl_schedule_node_free(node); 5461 1.1 mrg isl_union_map_free(executed); 5462 1.1 mrg return NULL; 5463 1.1 mrg } 5464 1.1 mrg 5465 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5466 1.1 mrg * in the relative order specified by the extension node "node" and 5467 1.1 mrg * its descendants. 5468 1.1 mrg * 5469 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5470 1.1 mrg * to the domain elements executed by those iterations. 5471 1.1 mrg * 5472 1.1 mrg * Extend the inverse schedule with the extension applied to current 5473 1.1 mrg * set of generated constraints. Since the extension if formulated 5474 1.1 mrg * in terms of the input schedule, it first needs to be transformed 5475 1.1 mrg * to refer to the internal schedule. 5476 1.1 mrg */ 5477 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_extension( 5478 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5479 1.1 mrg __isl_take isl_union_map *executed) 5480 1.1 mrg { 5481 1.1 mrg isl_union_set *schedule_domain; 5482 1.1 mrg isl_union_map *extension; 5483 1.1 mrg isl_set *set; 5484 1.1 mrg 5485 1.1 mrg set = isl_ast_build_get_generated(build); 5486 1.1 mrg set = isl_set_from_basic_set(isl_set_simple_hull(set)); 5487 1.1 mrg schedule_domain = isl_union_set_from_set(set); 5488 1.1 mrg 5489 1.1 mrg extension = isl_schedule_node_extension_get_extension(node); 5490 1.1 mrg 5491 1.1 mrg extension = isl_union_map_preimage_domain_multi_aff(extension, 5492 1.1 mrg isl_multi_aff_copy(build->internal2input)); 5493 1.1 mrg extension = isl_union_map_intersect_domain(extension, schedule_domain); 5494 1.1 mrg extension = isl_ast_build_substitute_values_union_map_domain(build, 5495 1.1 mrg extension); 5496 1.1 mrg executed = isl_union_map_union(executed, extension); 5497 1.1 mrg 5498 1.1 mrg return build_ast_from_child(build, node, executed); 5499 1.1 mrg } 5500 1.1 mrg 5501 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5502 1.1 mrg * in the relative order specified by the filter node "node" and 5503 1.1 mrg * its descendants. 5504 1.1 mrg * 5505 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5506 1.1 mrg * to the domain elements executed by those iterations. 5507 1.1 mrg * 5508 1.1 mrg * We simply intersect the iteration domain (i.e., the range of "executed") 5509 1.1 mrg * with the filter and continue with the descendants of the node, 5510 1.1 mrg * unless the resulting inverse schedule is empty, in which 5511 1.1 mrg * case we return an empty list. 5512 1.1 mrg * 5513 1.1 mrg * If the result of the intersection is equal to the original "executed" 5514 1.1 mrg * relation, then keep the original representation since the intersection 5515 1.1 mrg * may have unnecessarily broken up the relation into a greater number 5516 1.1 mrg * of disjuncts. 5517 1.1 mrg */ 5518 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_filter( 5519 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5520 1.1 mrg __isl_take isl_union_map *executed) 5521 1.1 mrg { 5522 1.1 mrg isl_ctx *ctx; 5523 1.1 mrg isl_union_set *filter; 5524 1.1 mrg isl_union_map *orig; 5525 1.1 mrg isl_ast_graft_list *list; 5526 1.1 mrg int empty; 5527 1.1 mrg isl_bool unchanged; 5528 1.1 mrg isl_size n1, n2; 5529 1.1 mrg 5530 1.1 mrg orig = isl_union_map_copy(executed); 5531 1.1 mrg if (!build || !node || !executed) 5532 1.1 mrg goto error; 5533 1.1 mrg 5534 1.1 mrg filter = isl_schedule_node_filter_get_filter(node); 5535 1.1 mrg filter = isl_union_set_align_params(filter, 5536 1.1 mrg isl_union_map_get_space(executed)); 5537 1.1 mrg n1 = isl_union_map_dim(executed, isl_dim_param); 5538 1.1 mrg executed = isl_union_map_intersect_range(executed, filter); 5539 1.1 mrg n2 = isl_union_map_dim(executed, isl_dim_param); 5540 1.1 mrg if (n1 < 0 || n2 < 0) 5541 1.1 mrg goto error; 5542 1.1 mrg if (n2 > n1) 5543 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5544 1.1 mrg "filter node is not allowed to introduce " 5545 1.1 mrg "new parameters", goto error); 5546 1.1 mrg 5547 1.1 mrg unchanged = isl_union_map_is_subset(orig, executed); 5548 1.1 mrg empty = isl_union_map_is_empty(executed); 5549 1.1 mrg if (unchanged < 0 || empty < 0) 5550 1.1 mrg goto error; 5551 1.1 mrg if (unchanged) { 5552 1.1 mrg isl_union_map_free(executed); 5553 1.1 mrg return build_ast_from_child(build, node, orig); 5554 1.1 mrg } 5555 1.1 mrg isl_union_map_free(orig); 5556 1.1 mrg if (!empty) 5557 1.1 mrg return build_ast_from_child(build, node, executed); 5558 1.1 mrg 5559 1.1 mrg ctx = isl_ast_build_get_ctx(build); 5560 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0); 5561 1.1 mrg isl_ast_build_free(build); 5562 1.1 mrg isl_schedule_node_free(node); 5563 1.1 mrg isl_union_map_free(executed); 5564 1.1 mrg return list; 5565 1.1 mrg error: 5566 1.1 mrg isl_ast_build_free(build); 5567 1.1 mrg isl_schedule_node_free(node); 5568 1.1 mrg isl_union_map_free(executed); 5569 1.1 mrg isl_union_map_free(orig); 5570 1.1 mrg return NULL; 5571 1.1 mrg } 5572 1.1 mrg 5573 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5574 1.1 mrg * in the relative order specified by the guard node "node" and 5575 1.1 mrg * its descendants. 5576 1.1 mrg * 5577 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5578 1.1 mrg * to the domain elements executed by those iterations. 5579 1.1 mrg * 5580 1.1 mrg * Ensure that the associated guard is enforced by the outer AST 5581 1.1 mrg * constructs by adding it to the guard of the graft. 5582 1.1 mrg * Since we know that we will enforce the guard, we can also include it 5583 1.1 mrg * in the generated constraints used to construct an AST for 5584 1.1 mrg * the descendant nodes. 5585 1.1 mrg */ 5586 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_guard( 5587 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5588 1.1 mrg __isl_take isl_union_map *executed) 5589 1.1 mrg { 5590 1.1 mrg isl_space *space; 5591 1.1 mrg isl_set *guard, *hoisted; 5592 1.1 mrg isl_basic_set *enforced; 5593 1.1 mrg isl_ast_build *sub_build; 5594 1.1 mrg isl_ast_graft *graft; 5595 1.1 mrg isl_ast_graft_list *list; 5596 1.1 mrg isl_size n1, n2, n; 5597 1.1 mrg 5598 1.1 mrg space = isl_ast_build_get_space(build, 1); 5599 1.1 mrg guard = isl_schedule_node_guard_get_guard(node); 5600 1.1 mrg n1 = isl_space_dim(space, isl_dim_param); 5601 1.1 mrg guard = isl_set_align_params(guard, space); 5602 1.1 mrg n2 = isl_set_dim(guard, isl_dim_param); 5603 1.1 mrg if (n1 < 0 || n2 < 0) 5604 1.1 mrg guard = isl_set_free(guard); 5605 1.1 mrg else if (n2 > n1) 5606 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5607 1.1 mrg "guard node is not allowed to introduce " 5608 1.1 mrg "new parameters", guard = isl_set_free(guard)); 5609 1.1 mrg guard = isl_set_preimage_multi_aff(guard, 5610 1.1 mrg isl_multi_aff_copy(build->internal2input)); 5611 1.1 mrg guard = isl_ast_build_specialize(build, guard); 5612 1.1 mrg guard = isl_set_gist(guard, isl_set_copy(build->generated)); 5613 1.1 mrg 5614 1.1 mrg sub_build = isl_ast_build_copy(build); 5615 1.1 mrg sub_build = isl_ast_build_restrict_generated(sub_build, 5616 1.1 mrg isl_set_copy(guard)); 5617 1.1 mrg 5618 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(sub_build), 5619 1.1 mrg node, executed); 5620 1.1 mrg 5621 1.1 mrg hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build); 5622 1.1 mrg n = isl_set_n_basic_set(hoisted); 5623 1.1 mrg if (n < 0) 5624 1.1 mrg list = isl_ast_graft_list_free(list); 5625 1.1 mrg if (n > 1) 5626 1.1 mrg list = isl_ast_graft_list_gist_guards(list, 5627 1.1 mrg isl_set_copy(hoisted)); 5628 1.1 mrg guard = isl_set_intersect(guard, hoisted); 5629 1.1 mrg enforced = extract_shared_enforced(list, build); 5630 1.1 mrg graft = isl_ast_graft_alloc_from_children(list, guard, enforced, 5631 1.1 mrg build, sub_build); 5632 1.1 mrg 5633 1.1 mrg isl_ast_build_free(sub_build); 5634 1.1 mrg isl_ast_build_free(build); 5635 1.1 mrg return isl_ast_graft_list_from_ast_graft(graft); 5636 1.1 mrg } 5637 1.1 mrg 5638 1.1 mrg /* Call the before_each_mark callback, if requested by the user. 5639 1.1 mrg * 5640 1.1 mrg * Return 0 on success and -1 on error. 5641 1.1 mrg * 5642 1.1 mrg * The caller is responsible for recording the current inverse schedule 5643 1.1 mrg * in "build". 5644 1.1 mrg */ 5645 1.1 mrg static isl_stat before_each_mark(__isl_keep isl_id *mark, 5646 1.1 mrg __isl_keep isl_ast_build *build) 5647 1.1 mrg { 5648 1.1 mrg if (!build) 5649 1.1 mrg return isl_stat_error; 5650 1.1 mrg if (!build->before_each_mark) 5651 1.1 mrg return isl_stat_ok; 5652 1.1 mrg return build->before_each_mark(mark, build, 5653 1.1 mrg build->before_each_mark_user); 5654 1.1 mrg } 5655 1.1 mrg 5656 1.1 mrg /* Call the after_each_mark callback, if requested by the user. 5657 1.1 mrg * 5658 1.1 mrg * The caller is responsible for recording the current inverse schedule 5659 1.1 mrg * in "build". 5660 1.1 mrg */ 5661 1.1 mrg static __isl_give isl_ast_graft *after_each_mark( 5662 1.1 mrg __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) 5663 1.1 mrg { 5664 1.1 mrg if (!graft || !build) 5665 1.1 mrg return isl_ast_graft_free(graft); 5666 1.1 mrg if (!build->after_each_mark) 5667 1.1 mrg return graft; 5668 1.1 mrg graft->node = build->after_each_mark(graft->node, build, 5669 1.1 mrg build->after_each_mark_user); 5670 1.1 mrg if (!graft->node) 5671 1.1 mrg return isl_ast_graft_free(graft); 5672 1.1 mrg return graft; 5673 1.1 mrg } 5674 1.1 mrg 5675 1.1 mrg 5676 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5677 1.1 mrg * in the relative order specified by the mark node "node" and 5678 1.1 mrg * its descendants. 5679 1.1 mrg * 5680 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5681 1.1 mrg * to the domain elements executed by those iterations. 5682 1.1 mrg 5683 1.1 mrg * Since we may be calling before_each_mark and after_each_mark 5684 1.1 mrg * callbacks, we record the current inverse schedule in the build. 5685 1.1 mrg * 5686 1.1 mrg * We generate an AST for the child of the mark node, combine 5687 1.1 mrg * the graft list into a single graft and then insert the mark 5688 1.1 mrg * in the AST of that single graft. 5689 1.1 mrg */ 5690 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_mark( 5691 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5692 1.1 mrg __isl_take isl_union_map *executed) 5693 1.1 mrg { 5694 1.1 mrg isl_id *mark; 5695 1.1 mrg isl_ast_graft *graft; 5696 1.1 mrg isl_ast_graft_list *list; 5697 1.1 mrg isl_size n; 5698 1.1 mrg 5699 1.1 mrg build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); 5700 1.1 mrg 5701 1.1 mrg mark = isl_schedule_node_mark_get_id(node); 5702 1.1 mrg if (before_each_mark(mark, build) < 0) 5703 1.1 mrg node = isl_schedule_node_free(node); 5704 1.1 mrg 5705 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(build), node, executed); 5706 1.1 mrg list = isl_ast_graft_list_fuse(list, build); 5707 1.1 mrg n = isl_ast_graft_list_n_ast_graft(list); 5708 1.1 mrg if (n < 0) 5709 1.1 mrg list = isl_ast_graft_list_free(list); 5710 1.1 mrg if (n == 0) { 5711 1.1 mrg isl_id_free(mark); 5712 1.1 mrg } else { 5713 1.1 mrg graft = isl_ast_graft_list_get_ast_graft(list, 0); 5714 1.1 mrg graft = isl_ast_graft_insert_mark(graft, mark); 5715 1.1 mrg graft = after_each_mark(graft, build); 5716 1.1 mrg list = isl_ast_graft_list_set_ast_graft(list, 0, graft); 5717 1.1 mrg } 5718 1.1 mrg isl_ast_build_free(build); 5719 1.1 mrg 5720 1.1 mrg return list; 5721 1.1 mrg } 5722 1.1 mrg 5723 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( 5724 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5725 1.1 mrg __isl_take isl_union_map *executed); 5726 1.1 mrg 5727 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5728 1.1 mrg * in the relative order specified by the sequence (or set) node "node" and 5729 1.1 mrg * its descendants. 5730 1.1 mrg * 5731 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5732 1.1 mrg * to the domain elements executed by those iterations. 5733 1.1 mrg * 5734 1.1 mrg * We simply generate an AST for each of the children and concatenate 5735 1.1 mrg * the results. 5736 1.1 mrg */ 5737 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_sequence( 5738 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5739 1.1 mrg __isl_take isl_union_map *executed) 5740 1.1 mrg { 5741 1.1 mrg int i; 5742 1.1 mrg isl_size n; 5743 1.1 mrg isl_ctx *ctx; 5744 1.1 mrg isl_ast_graft_list *list; 5745 1.1 mrg 5746 1.1 mrg ctx = isl_ast_build_get_ctx(build); 5747 1.1 mrg list = isl_ast_graft_list_alloc(ctx, 0); 5748 1.1 mrg 5749 1.1 mrg n = isl_schedule_node_n_children(node); 5750 1.1 mrg if (n < 0) 5751 1.1 mrg list = isl_ast_graft_list_free(list); 5752 1.1 mrg for (i = 0; i < n; ++i) { 5753 1.1 mrg isl_schedule_node *child; 5754 1.1 mrg isl_ast_graft_list *list_i; 5755 1.1 mrg 5756 1.1 mrg child = isl_schedule_node_get_child(node, i); 5757 1.1 mrg list_i = build_ast_from_schedule_node(isl_ast_build_copy(build), 5758 1.1 mrg child, isl_union_map_copy(executed)); 5759 1.1 mrg list = isl_ast_graft_list_concat(list, list_i); 5760 1.1 mrg } 5761 1.1 mrg isl_ast_build_free(build); 5762 1.1 mrg isl_schedule_node_free(node); 5763 1.1 mrg isl_union_map_free(executed); 5764 1.1 mrg 5765 1.1 mrg return list; 5766 1.1 mrg } 5767 1.1 mrg 5768 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5769 1.1 mrg * in the relative order specified by the node "node" and its descendants. 5770 1.1 mrg * 5771 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5772 1.1 mrg * to the domain elements executed by those iterations. 5773 1.1 mrg * 5774 1.1 mrg * The node types are handled in separate functions. 5775 1.1 mrg * Set nodes are currently treated in the same way as sequence nodes. 5776 1.1 mrg * The children of a set node may be executed in any order, 5777 1.1 mrg * including the order of the children. 5778 1.1 mrg */ 5779 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( 5780 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5781 1.1 mrg __isl_take isl_union_map *executed) 5782 1.1 mrg { 5783 1.1 mrg enum isl_schedule_node_type type; 5784 1.1 mrg 5785 1.1 mrg type = isl_schedule_node_get_type(node); 5786 1.1 mrg 5787 1.1 mrg switch (type) { 5788 1.1 mrg case isl_schedule_node_error: 5789 1.1 mrg goto error; 5790 1.1 mrg case isl_schedule_node_leaf: 5791 1.1 mrg return build_ast_from_leaf(build, node, executed); 5792 1.1 mrg case isl_schedule_node_band: 5793 1.1 mrg return build_ast_from_band(build, node, executed); 5794 1.1 mrg case isl_schedule_node_context: 5795 1.1 mrg return build_ast_from_context(build, node, executed); 5796 1.1 mrg case isl_schedule_node_domain: 5797 1.1 mrg isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported, 5798 1.1 mrg "unexpected internal domain node", goto error); 5799 1.1 mrg case isl_schedule_node_expansion: 5800 1.1 mrg return build_ast_from_expansion(build, node, executed); 5801 1.1 mrg case isl_schedule_node_extension: 5802 1.1 mrg return build_ast_from_extension(build, node, executed); 5803 1.1 mrg case isl_schedule_node_filter: 5804 1.1 mrg return build_ast_from_filter(build, node, executed); 5805 1.1 mrg case isl_schedule_node_guard: 5806 1.1 mrg return build_ast_from_guard(build, node, executed); 5807 1.1 mrg case isl_schedule_node_mark: 5808 1.1 mrg return build_ast_from_mark(build, node, executed); 5809 1.1 mrg case isl_schedule_node_sequence: 5810 1.1 mrg case isl_schedule_node_set: 5811 1.1 mrg return build_ast_from_sequence(build, node, executed); 5812 1.1 mrg } 5813 1.1 mrg 5814 1.1 mrg isl_die(isl_ast_build_get_ctx(build), isl_error_internal, 5815 1.1 mrg "unhandled type", goto error); 5816 1.1 mrg error: 5817 1.1 mrg isl_union_map_free(executed); 5818 1.1 mrg isl_schedule_node_free(node); 5819 1.1 mrg isl_ast_build_free(build); 5820 1.1 mrg 5821 1.1 mrg return NULL; 5822 1.1 mrg } 5823 1.1 mrg 5824 1.1 mrg /* Generate an AST that visits the elements in the domain of "executed" 5825 1.1 mrg * in the relative order specified by the (single) child of "node" and 5826 1.1 mrg * its descendants. 5827 1.1 mrg * 5828 1.1 mrg * The relation "executed" maps the outer generated loop iterators 5829 1.1 mrg * to the domain elements executed by those iterations. 5830 1.1 mrg * 5831 1.1 mrg * This function is never called on a leaf, set or sequence node, 5832 1.1 mrg * so the node always has exactly one child. 5833 1.1 mrg */ 5834 1.1 mrg static __isl_give isl_ast_graft_list *build_ast_from_child( 5835 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5836 1.1 mrg __isl_take isl_union_map *executed) 5837 1.1 mrg { 5838 1.1 mrg node = isl_schedule_node_child(node, 0); 5839 1.1 mrg return build_ast_from_schedule_node(build, node, executed); 5840 1.1 mrg } 5841 1.1 mrg 5842 1.1 mrg /* Generate an AST that visits the elements in the domain of the domain 5843 1.1 mrg * node "node" in the relative order specified by its descendants. 5844 1.1 mrg * 5845 1.1 mrg * An initial inverse schedule is created that maps a zero-dimensional 5846 1.1 mrg * schedule space to the node domain. 5847 1.1 mrg * The input "build" is assumed to have a parametric domain and 5848 1.1 mrg * is replaced by the same zero-dimensional schedule space. 5849 1.1 mrg * 5850 1.1 mrg * We also add some of the parameter constraints in the build domain 5851 1.1 mrg * to the executed relation. Adding these constraints 5852 1.1 mrg * allows for an earlier detection of conflicts in some cases. 5853 1.1 mrg * However, we do not want to divide the executed relation into 5854 1.1 mrg * more disjuncts than necessary. We therefore approximate 5855 1.1 mrg * the constraints on the parameters by a single disjunct set. 5856 1.1 mrg */ 5857 1.1 mrg static __isl_give isl_ast_node *build_ast_from_domain( 5858 1.1 mrg __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node) 5859 1.1 mrg { 5860 1.1 mrg isl_ctx *ctx; 5861 1.1 mrg isl_union_set *domain, *schedule_domain; 5862 1.1 mrg isl_union_map *executed; 5863 1.1 mrg isl_space *space; 5864 1.1 mrg isl_set *set; 5865 1.1 mrg isl_ast_graft_list *list; 5866 1.1 mrg isl_ast_node *ast; 5867 1.1 mrg int is_params; 5868 1.1 mrg 5869 1.1 mrg if (!build) 5870 1.1 mrg goto error; 5871 1.1 mrg 5872 1.1 mrg ctx = isl_ast_build_get_ctx(build); 5873 1.1 mrg space = isl_ast_build_get_space(build, 1); 5874 1.1 mrg is_params = isl_space_is_params(space); 5875 1.1 mrg isl_space_free(space); 5876 1.1 mrg if (is_params < 0) 5877 1.1 mrg goto error; 5878 1.1 mrg if (!is_params) 5879 1.1 mrg isl_die(ctx, isl_error_unsupported, 5880 1.1 mrg "expecting parametric initial context", goto error); 5881 1.1 mrg 5882 1.1 mrg domain = isl_schedule_node_domain_get_domain(node); 5883 1.1 mrg domain = isl_union_set_coalesce(domain); 5884 1.1 mrg 5885 1.1 mrg space = isl_union_set_get_space(domain); 5886 1.1 mrg space = isl_space_set_from_params(space); 5887 1.1 mrg build = isl_ast_build_product(build, space); 5888 1.1 mrg 5889 1.1 mrg set = isl_ast_build_get_domain(build); 5890 1.1 mrg set = isl_set_from_basic_set(isl_set_simple_hull(set)); 5891 1.1 mrg schedule_domain = isl_union_set_from_set(set); 5892 1.1 mrg 5893 1.1 mrg executed = isl_union_map_from_domain_and_range(schedule_domain, domain); 5894 1.1 mrg list = build_ast_from_child(isl_ast_build_copy(build), node, executed); 5895 1.1 mrg ast = isl_ast_node_from_graft_list(list, build); 5896 1.1 mrg isl_ast_build_free(build); 5897 1.1 mrg 5898 1.1 mrg return ast; 5899 1.1 mrg error: 5900 1.1 mrg isl_schedule_node_free(node); 5901 1.1 mrg isl_ast_build_free(build); 5902 1.1 mrg return NULL; 5903 1.1 mrg } 5904 1.1 mrg 5905 1.1 mrg /* Generate an AST that visits the elements in the domain of "schedule" 5906 1.1 mrg * in the relative order specified by the schedule tree. 5907 1.1 mrg * 5908 1.1 mrg * "build" is an isl_ast_build that has been created using 5909 1.1 mrg * isl_ast_build_alloc or isl_ast_build_from_context based 5910 1.1 mrg * on a parametric set. 5911 1.1 mrg * 5912 1.1 mrg * The construction starts at the root node of the schedule, 5913 1.1 mrg * which is assumed to be a domain node. 5914 1.1 mrg */ 5915 1.1 mrg __isl_give isl_ast_node *isl_ast_build_node_from_schedule( 5916 1.1 mrg __isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule) 5917 1.1 mrg { 5918 1.1 mrg isl_ctx *ctx; 5919 1.1 mrg isl_schedule_node *node; 5920 1.1 mrg 5921 1.1 mrg if (!build || !schedule) 5922 1.1 mrg goto error; 5923 1.1 mrg 5924 1.1 mrg ctx = isl_ast_build_get_ctx(build); 5925 1.1 mrg 5926 1.1 mrg node = isl_schedule_get_root(schedule); 5927 1.1 mrg if (!node) 5928 1.1 mrg goto error; 5929 1.1 mrg isl_schedule_free(schedule); 5930 1.1 mrg 5931 1.1 mrg build = isl_ast_build_copy(build); 5932 1.1 mrg build = isl_ast_build_set_single_valued(build, 0); 5933 1.1 mrg if (isl_schedule_node_get_type(node) != isl_schedule_node_domain) 5934 1.1 mrg isl_die(ctx, isl_error_unsupported, 5935 1.1 mrg "expecting root domain node", 5936 1.1 mrg build = isl_ast_build_free(build)); 5937 1.1 mrg return build_ast_from_domain(build, node); 5938 1.1 mrg error: 5939 1.1 mrg isl_schedule_free(schedule); 5940 1.1 mrg return NULL; 5941 1.1 mrg } 5942