1 1.1 mrg /* 2 1.1 mrg * Copyright 2011 INRIA Saclay 3 1.1 mrg * Copyright 2012 Ecole Normale Superieure 4 1.1 mrg * Copyright 2020 Cerebras Systems 5 1.1 mrg * 6 1.1 mrg * Use of this software is governed by the MIT license 7 1.1 mrg * 8 1.1 mrg * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, 9 1.1 mrg * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, 10 1.1 mrg * 91893 Orsay, France 11 1.1 mrg * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France 12 1.1 mrg * and Cerebras Systems, 175 S San Antonio Rd, Los Altos, CA, USA 13 1.1 mrg */ 14 1.1 mrg 15 1.1 mrg #include <isl_pw_macro.h> 16 1.1 mrg 17 1.1 mrg /* Given a function "cmp" that returns the set of elements where 18 1.1 mrg * "el1" is "better" than "el2", return this set. 19 1.1 mrg */ 20 1.1 mrg static __isl_give isl_set *FN(PW,better)(__isl_keep EL *el1, __isl_keep EL *el2, 21 1.1 mrg __isl_give isl_set *(*cmp)(__isl_take EL *el1, __isl_take EL *el2)) 22 1.1 mrg { 23 1.1 mrg return cmp(FN(EL,copy)(el1), FN(EL,copy)(el2)); 24 1.1 mrg } 25 1.1 mrg 26 1.1 mrg /* Return a list containing the domains of the pieces of "pw". 27 1.1 mrg */ 28 1.1 mrg static __isl_give isl_set_list *FN(PW,extract_domains)(__isl_keep PW *pw) 29 1.1 mrg { 30 1.1 mrg int i; 31 1.1 mrg isl_ctx *ctx; 32 1.1 mrg isl_set_list *list; 33 1.1 mrg 34 1.1 mrg if (!pw) 35 1.1 mrg return NULL; 36 1.1 mrg ctx = FN(PW,get_ctx)(pw); 37 1.1 mrg list = isl_set_list_alloc(ctx, pw->n); 38 1.1 mrg for (i = 0; i < pw->n; ++i) 39 1.1 mrg list = isl_set_list_add(list, isl_set_copy(pw->p[i].set)); 40 1.1 mrg 41 1.1 mrg return list; 42 1.1 mrg } 43 1.1 mrg 44 1.1 mrg /* Given sets B ("set"), C ("better") and A' ("out"), return 45 1.1 mrg * 46 1.1 mrg * (B \cap C) \cup ((B \setminus C) \setminus A') 47 1.1 mrg */ 48 1.1 mrg static __isl_give isl_set *FN(PW,better_or_out)(__isl_take isl_set *set, 49 1.1 mrg __isl_take isl_set *better, __isl_take isl_set *out) 50 1.1 mrg { 51 1.1 mrg isl_set *set_better, *set_out; 52 1.1 mrg 53 1.1 mrg set_better = isl_set_intersect(isl_set_copy(set), isl_set_copy(better)); 54 1.1 mrg set_out = isl_set_subtract(isl_set_subtract(set, better), out); 55 1.1 mrg 56 1.1 mrg return isl_set_union(set_better, set_out); 57 1.1 mrg } 58 1.1 mrg 59 1.1 mrg /* Given sets A ("set"), C ("better") and B' ("out"), return 60 1.1 mrg * 61 1.1 mrg * (A \setminus C) \cup ((A \cap C) \setminus B') 62 1.1 mrg */ 63 1.1 mrg static __isl_give isl_set *FN(PW,worse_or_out)(__isl_take isl_set *set, 64 1.1 mrg __isl_take isl_set *better, __isl_take isl_set *out) 65 1.1 mrg { 66 1.1 mrg isl_set *set_worse, *set_out; 67 1.1 mrg 68 1.1 mrg set_worse = isl_set_subtract(isl_set_copy(set), isl_set_copy(better)); 69 1.1 mrg set_out = isl_set_subtract(isl_set_intersect(set, better), out); 70 1.1 mrg 71 1.1 mrg return isl_set_union(set_worse, set_out); 72 1.1 mrg } 73 1.1 mrg 74 1.1 mrg /* Internal data structure used by isl_pw_*_union_opt_cmp 75 1.1 mrg * that keeps track of a piecewise expression with updated cells. 76 1.1 mrg * "pw" holds the original piecewise expression. 77 1.1 mrg * "list" holds the updated cells. 78 1.1 mrg */ 79 1.1 mrg S(PW,union_opt_cmp_data) { 80 1.1 mrg PW *pw; 81 1.1 mrg isl_set_list *cell; 82 1.1 mrg }; 83 1.1 mrg 84 1.1 mrg /* Free all memory allocated for "data". 85 1.1 mrg */ 86 1.1 mrg static void FN(PW,union_opt_cmp_data_clear)(S(PW,union_opt_cmp_data) *data) 87 1.1 mrg { 88 1.1 mrg isl_set_list_free(data->cell); 89 1.1 mrg FN(PW,free)(data->pw); 90 1.1 mrg } 91 1.1 mrg 92 1.1 mrg /* Given (potentially) updated cells "i" of data_i->pw and "j" of data_j->pw and 93 1.1 mrg * a set "better" where the piece from data_j->pw is better 94 1.1 mrg * than the piece from data_i->pw, 95 1.1 mrg * (further) update the specified cells such that only the better elements 96 1.1 mrg * remain on the (non-empty) intersection. 97 1.1 mrg * 98 1.1 mrg * Let C be the set "better". 99 1.1 mrg * Let A be the cell data_i->cell[i] and B the cell data_j->cell[j]. 100 1.1 mrg * 101 1.1 mrg * The elements in C need to be removed from A, except for those parts 102 1.1 mrg * that lie outside of B. That is, 103 1.1 mrg * 104 1.1 mrg * A <- (A \setminus C) \cup ((A \cap C) \setminus B') 105 1.1 mrg * 106 1.1 mrg * Conversely, the elements in B need to be restricted to C, except 107 1.1 mrg * for those parts that lie outside of A. That is 108 1.1 mrg * 109 1.1 mrg * B <- (B \cap C) \cup ((B \setminus C) \setminus A') 110 1.1 mrg * 111 1.1 mrg * Since all pairs of pieces are considered, the domains are updated 112 1.1 mrg * several times. A and B refer to these updated domains 113 1.1 mrg * (kept track of in data_i->cell[i] and data_j->cell[j]), while A' and B' refer 114 1.1 mrg * to the original domains of the pieces. It is safe to use these 115 1.1 mrg * original domains because the difference between, say, A' and A is 116 1.1 mrg * the domains of pw2-pieces that have been removed before and 117 1.1 mrg * those domains are disjoint from B. A' is used instead of A 118 1.1 mrg * because the continued updating of A may result in this domain 119 1.1 mrg * getting broken up into more disjuncts. 120 1.1 mrg */ 121 1.1 mrg static isl_stat FN(PW,union_opt_cmp_split)(S(PW,union_opt_cmp_data) *data_i, 122 1.1 mrg int i, S(PW,union_opt_cmp_data) *data_j, int j, 123 1.1 mrg __isl_take isl_set *better) 124 1.1 mrg { 125 1.1 mrg isl_set *set_i, *set_j; 126 1.1 mrg 127 1.1 mrg set_i = isl_set_list_get_set(data_i->cell, i); 128 1.1 mrg set_j = FN(PW,get_domain_at)(data_j->pw, j); 129 1.1 mrg set_i = FN(PW,worse_or_out)(set_i, isl_set_copy(better), set_j); 130 1.1 mrg data_i->cell = isl_set_list_set_set(data_i->cell, i, set_i); 131 1.1 mrg set_i = FN(PW,get_domain_at)(data_i->pw, i); 132 1.1 mrg set_j = isl_set_list_get_set(data_j->cell, j); 133 1.1 mrg set_j = FN(PW,better_or_out)(set_j, better, set_i); 134 1.1 mrg data_j->cell = isl_set_list_set_set(data_j->cell, j, set_j); 135 1.1 mrg 136 1.1 mrg return isl_stat_ok; 137 1.1 mrg } 138 1.1 mrg 139 1.1 mrg /* Given (potentially) updated cells "i" of data_i->pw and "j" of data_j->pw and 140 1.1 mrg * a function "cmp" that returns the set of elements where 141 1.1 mrg * "el1" is "better" than "el2", 142 1.1 mrg * (further) update the specified cells such that only the "better" elements 143 1.1 mrg * remain on the (non-empty) intersection. 144 1.1 mrg */ 145 1.1 mrg static isl_stat FN(PW,union_opt_cmp_pair)(S(PW,union_opt_cmp_data) *data_i, 146 1.1 mrg int i, S(PW,union_opt_cmp_data) *data_j, int j, 147 1.1 mrg __isl_give isl_set *(*cmp)(__isl_take EL *el1, __isl_take EL *el2)) 148 1.1 mrg { 149 1.1 mrg isl_set *better; 150 1.1 mrg EL *el_i, *el_j; 151 1.1 mrg 152 1.1 mrg el_i = FN(PW,peek_base_at)(data_i->pw, i); 153 1.1 mrg el_j = FN(PW,peek_base_at)(data_j->pw, j); 154 1.1 mrg better = FN(PW,better)(el_j, el_i, cmp); 155 1.1 mrg return FN(PW,union_opt_cmp_split)(data_i, i, data_j, j, better); 156 1.1 mrg } 157 1.1 mrg 158 1.1 mrg /* Given (potentially) updated cells "i" of data_i->pw and "j" of data_j->pw and 159 1.1 mrg * a function "cmp" that returns the set of elements where 160 1.1 mrg * "el1" is "better" than "el2", 161 1.1 mrg * (further) update the specified cells such that only the "better" elements 162 1.1 mrg * remain on the (non-empty) intersection. 163 1.1 mrg * 164 1.1 mrg * The base computation is performed by isl_pw_*_union_opt_cmp_pair, 165 1.1 mrg * which splits the cells according to the set of elements 166 1.1 mrg * where the piece from data_j->pw is better than the piece from data_i->pw. 167 1.1 mrg * 168 1.1 mrg * In some cases, there may be a subset of the intersection 169 1.1 mrg * where both pieces have the same value and can therefore 170 1.1 mrg * both be considered to be "better" than the other. 171 1.1 mrg * This can result in unnecessary splitting on this subset. 172 1.1 mrg * Avoid some of these cases by checking whether 173 1.1 mrg * data_i->pw is always better than data_j->pw on the intersection. 174 1.1 mrg * In particular, do this for the special case where this intersection 175 1.1 mrg * is equal to the cell "j" and data_i->pw is better on its entire cell. 176 1.1 mrg * 177 1.1 mrg * Similarly, if data_i->pw is never better than data_j->pw, 178 1.1 mrg * then no splitting will occur and there is no need to check 179 1.1 mrg * where data_j->pw is better than data_i->pw. 180 1.1 mrg */ 181 1.1 mrg static isl_stat FN(PW,union_opt_cmp_two)(S(PW,union_opt_cmp_data) *data_i, 182 1.1 mrg int i, S(PW,union_opt_cmp_data) *data_j, int j, 183 1.1 mrg __isl_give isl_set *(*cmp)(__isl_take EL *el1, __isl_take EL *el2)) 184 1.1 mrg { 185 1.1 mrg isl_bool is_subset, is_empty; 186 1.1 mrg isl_set *better, *set_i, *set_j; 187 1.1 mrg EL *el_i, *el_j; 188 1.1 mrg 189 1.1 mrg set_i = FN(PW,peek_domain_at)(data_i->pw, i); 190 1.1 mrg set_j = FN(PW,peek_domain_at)(data_j->pw, j); 191 1.1 mrg is_subset = isl_set_is_subset(set_j, set_i); 192 1.1 mrg if (is_subset < 0) 193 1.1 mrg return isl_stat_error; 194 1.1 mrg if (!is_subset) 195 1.1 mrg return FN(PW,union_opt_cmp_pair)(data_i, i, data_j, j, cmp); 196 1.1 mrg 197 1.1 mrg el_i = FN(PW,peek_base_at)(data_i->pw, i); 198 1.1 mrg el_j = FN(PW,peek_base_at)(data_j->pw, j); 199 1.1 mrg better = FN(PW,better)(el_i, el_j, cmp); 200 1.1 mrg is_empty = isl_set_is_empty(better); 201 1.1 mrg if (is_empty >= 0 && is_empty) 202 1.1 mrg return FN(PW,union_opt_cmp_split)(data_j, j, data_i, i, better); 203 1.1 mrg is_subset = isl_set_is_subset(set_i, better); 204 1.1 mrg if (is_subset >= 0 && is_subset) 205 1.1 mrg return FN(PW,union_opt_cmp_split)(data_j, j, data_i, i, better); 206 1.1 mrg isl_set_free(better); 207 1.1 mrg if (is_empty < 0 || is_subset < 0) 208 1.1 mrg return isl_stat_error; 209 1.1 mrg 210 1.1 mrg return FN(PW,union_opt_cmp_pair)(data_i, i, data_j, j, cmp); 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg /* Given two piecewise expressions data1->pw and data2->pw, replace 214 1.1 mrg * their domains 215 1.1 mrg * by the sets in data1->cell and data2->cell and combine the results into 216 1.1 mrg * a single piecewise expression. 217 1.1 mrg * The pieces of data1->pw and data2->pw are assumed to have been sorted 218 1.1 mrg * according to the function value expressions. 219 1.1 mrg * The pieces of the result are also sorted in this way. 220 1.1 mrg * 221 1.1 mrg * Run through the pieces of data1->pw and data2->pw in order until they 222 1.1 mrg * have both been exhausted, picking the piece from data1->pw or data2->pw 223 1.1 mrg * depending on which should come first, together with the corresponding 224 1.1 mrg * domain from data1->cell or data2->cell. In cases where the next pieces 225 1.1 mrg * in both data1->pw and data2->pw have the same function value expression, 226 1.1 mrg * construct only a single piece in the result with as domain 227 1.1 mrg * the union of the domains in data1->cell and data2->cell. 228 1.1 mrg */ 229 1.1 mrg static __isl_give PW *FN(PW,merge)(S(PW,union_opt_cmp_data) *data1, 230 1.1 mrg S(PW,union_opt_cmp_data) *data2) 231 1.1 mrg { 232 1.1 mrg int i, j; 233 1.1 mrg PW *res; 234 1.1 mrg PW *pw1 = data1->pw; 235 1.1 mrg PW *pw2 = data2->pw; 236 1.1 mrg isl_set_list *list1 = data1->cell; 237 1.1 mrg isl_set_list *list2 = data2->cell; 238 1.1 mrg 239 1.1 mrg if (!pw1 || !pw2) 240 1.1 mrg return NULL; 241 1.1 mrg 242 1.1 mrg res = FN(PW,alloc_size)(isl_space_copy(pw1->dim), pw1->n + pw2->n); 243 1.1 mrg 244 1.1 mrg i = 0; j = 0; 245 1.1 mrg while (i < pw1->n || j < pw2->n) { 246 1.1 mrg int cmp; 247 1.1 mrg isl_set *set; 248 1.1 mrg EL *el; 249 1.1 mrg 250 1.1 mrg if (i < pw1->n && j < pw2->n) 251 1.1 mrg cmp = FN(EL,plain_cmp)(pw1->p[i].FIELD, 252 1.1 mrg pw2->p[j].FIELD); 253 1.1 mrg else 254 1.1 mrg cmp = i < pw1->n ? -1 : 1; 255 1.1 mrg 256 1.1 mrg if (cmp < 0) { 257 1.1 mrg set = isl_set_list_get_set(list1, i); 258 1.1 mrg el = FN(EL,copy)(pw1->p[i].FIELD); 259 1.1 mrg ++i; 260 1.1 mrg } else if (cmp > 0) { 261 1.1 mrg set = isl_set_list_get_set(list2, j); 262 1.1 mrg el = FN(EL,copy)(pw2->p[j].FIELD); 263 1.1 mrg ++j; 264 1.1 mrg } else { 265 1.1 mrg set = isl_set_union(isl_set_list_get_set(list1, i), 266 1.1 mrg isl_set_list_get_set(list2, j)); 267 1.1 mrg el = FN(EL,copy)(pw1->p[i].FIELD); 268 1.1 mrg ++i; 269 1.1 mrg ++j; 270 1.1 mrg } 271 1.1 mrg res = FN(PW,add_piece)(res, set, el); 272 1.1 mrg } 273 1.1 mrg 274 1.1 mrg return res; 275 1.1 mrg } 276 1.1 mrg 277 1.1 mrg /* Given a function "cmp" that returns the set of elements where 278 1.1 mrg * "el1" is "better" than "el2", return a piecewise 279 1.1 mrg * expression defined on the union of the definition domains 280 1.1 mrg * of "pw1" and "pw2" that maps to the "best" of "pw1" and 281 1.1 mrg * "pw2" on each cell. If only one of the two input functions 282 1.1 mrg * is defined on a given cell, then it is considered the best. 283 1.1 mrg * 284 1.1 mrg * Run through all pairs of pieces in "pw1" and "pw2". 285 1.1 mrg * If the domains of these pieces intersect, then the intersection 286 1.1 mrg * needs to be distributed over the two pieces based on "cmp". 287 1.1 mrg * 288 1.1 mrg * After the updated domains have been computed, the result is constructed 289 1.1 mrg * from "pw1", "pw2", data[0].cell and data[1].cell. If there are any pieces 290 1.1 mrg * in "pw1" and "pw2" with the same function value expression, then 291 1.1 mrg * they are combined into a single piece in the result. 292 1.1 mrg * In order to be able to do this efficiently, the pieces of "pw1" and 293 1.1 mrg * "pw2" are first sorted according to their function value expressions. 294 1.1 mrg */ 295 1.1 mrg static __isl_give PW *FN(PW,union_opt_cmp)( 296 1.1 mrg __isl_take PW *pw1, __isl_take PW *pw2, 297 1.1 mrg __isl_give isl_set *(*cmp)(__isl_take EL *el1, __isl_take EL *el2)) 298 1.1 mrg { 299 1.1 mrg S(PW,union_opt_cmp_data) data[2] = { { pw1, NULL }, { pw2, NULL } }; 300 1.1 mrg int i, j; 301 1.1 mrg isl_size n1, n2; 302 1.1 mrg PW *res = NULL; 303 1.1 mrg isl_ctx *ctx; 304 1.1 mrg 305 1.1 mrg if (!pw1 || !pw2) 306 1.1 mrg goto error; 307 1.1 mrg 308 1.1 mrg ctx = isl_space_get_ctx(pw1->dim); 309 1.1 mrg if (!isl_space_is_equal(pw1->dim, pw2->dim)) 310 1.1 mrg isl_die(ctx, isl_error_invalid, 311 1.1 mrg "arguments should live in the same space", goto error); 312 1.1 mrg 313 1.1 mrg if (FN(PW,is_empty)(pw1)) { 314 1.1 mrg FN(PW,free)(pw1); 315 1.1 mrg return pw2; 316 1.1 mrg } 317 1.1 mrg 318 1.1 mrg if (FN(PW,is_empty)(pw2)) { 319 1.1 mrg FN(PW,free)(pw2); 320 1.1 mrg return pw1; 321 1.1 mrg } 322 1.1 mrg 323 1.1 mrg for (i = 0; i < 2; ++i) { 324 1.1 mrg data[i].pw = FN(PW,sort_unique)(data[i].pw); 325 1.1 mrg data[i].cell = FN(PW,extract_domains)(data[i].pw); 326 1.1 mrg } 327 1.1 mrg 328 1.1 mrg n1 = FN(PW,n_piece)(data[0].pw); 329 1.1 mrg n2 = FN(PW,n_piece)(data[1].pw); 330 1.1 mrg if (n1 < 0 || n2 < 0) 331 1.1 mrg goto error; 332 1.1 mrg for (i = 0; i < n1; ++i) { 333 1.1 mrg for (j = 0; j < n2; ++j) { 334 1.1 mrg isl_bool disjoint; 335 1.1 mrg isl_set *set_i, *set_j; 336 1.1 mrg 337 1.1 mrg set_i = FN(PW,peek_domain_at)(data[0].pw, i); 338 1.1 mrg set_j = FN(PW,peek_domain_at)(data[1].pw, j); 339 1.1 mrg disjoint = isl_set_is_disjoint(set_i, set_j); 340 1.1 mrg if (disjoint < 0) 341 1.1 mrg goto error; 342 1.1 mrg if (disjoint) 343 1.1 mrg continue; 344 1.1 mrg if (FN(PW,union_opt_cmp_two)(&data[0], i, 345 1.1 mrg &data[1], j, cmp) < 0) 346 1.1 mrg goto error; 347 1.1 mrg } 348 1.1 mrg } 349 1.1 mrg 350 1.1 mrg res = FN(PW,merge)(&data[0], &data[1]); 351 1.1 mrg for (i = 0; i < 2; ++i) 352 1.1 mrg FN(PW,union_opt_cmp_data_clear)(&data[i]); 353 1.1 mrg 354 1.1 mrg return res; 355 1.1 mrg error: 356 1.1 mrg for (i = 0; i < 2; ++i) 357 1.1 mrg FN(PW,union_opt_cmp_data_clear)(&data[i]); 358 1.1 mrg return FN(PW,free)(res); 359 1.1 mrg } 360