1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "util/rb_tree.h" 25#include "ir3_ra.h" 26#include "ir3_shader.h" 27 28/* 29 * This pass does two things: 30 * 31 * 1. Calculates the maximum register pressure. To do this, we need to use the 32 * exact same technique that RA uses for combining meta_split instructions 33 * with their sources, so that our calculation agrees with RA. 34 * 2. Spills when the register pressure is exceeded a limit calculated by RA. 35 * The implementation is based on "Register Spilling and Live-Range Splitting 36 * for SSA-Form Programs" by Braun and Hack, although again care has to be 37 * taken to handle combining split/collect instructions. 38 */ 39 40struct reg_or_immed { 41 unsigned flags; 42 union { 43 struct ir3_register *def; 44 uint32_t uimm; 45 unsigned const_num; 46 }; 47}; 48 49struct ra_spill_interval { 50 struct ir3_reg_interval interval; 51 52 struct rb_node node; 53 struct rb_node half_node; 54 55 /* The current SSA value/const/immed this source is mapped to. */ 56 struct reg_or_immed dst; 57 58 /* When computing use distances we use the distance relative to the start 59 * of the block. So, for example, a value that's defined in cycle 5 of the 60 * block and used 6 cycles later will always have a next_use_distance of 11 61 * until we reach that use. 62 */ 63 unsigned next_use_distance; 64 65 /* Whether this value was reloaded and therefore doesn't need to be 66 * spilled again. Corresponds to the S set in the paper. 67 */ 68 bool already_spilled; 69 70 /* We need to add sources early for accounting purposes, but we have to 71 * insert the reload code for them last. Keep track of whether this interval 72 * needs to be reloaded later. 73 */ 74 bool needs_reload; 75 76 /* Keep track of whether this interval currently can't be spilled because: 77 * - It or one of its children is a source and we're making space for 78 * sources. 79 * - It is a destination and we're making space for destinations. 80 */ 81 bool cant_spill; 82}; 83 84struct ra_spill_block_state { 85 unsigned *next_use_end; 86 unsigned *next_use_start; 87 88 unsigned cycles; 89 90 /* Map from SSA def to reg_or_immed it is mapped to at the end of the block. 91 * This map only contains values which we didn't spill, so it also serves as 92 * a record of the new live-out set for this block. 93 */ 94 struct hash_table *remap; 95 96 /* For blocks whose successors are visited first (i.e. loop backedges), which 97 * values should be live at the end. 98 */ 99 BITSET_WORD *live_out; 100 101 bool visited; 102}; 103 104struct ra_spill_ctx { 105 struct ir3_reg_ctx reg_ctx; 106 107 struct ra_spill_interval **intervals; 108 unsigned intervals_count; 109 110 /* rb tree of live intervals that we can spill, ordered by next-use distance. 111 * full_live_intervals contains the full+shared intervals in the merged_regs 112 * case. We use this list to determine what to spill. 113 */ 114 struct rb_tree full_live_intervals; 115 struct rb_tree half_live_intervals; 116 117 struct ir3_pressure cur_pressure, max_pressure; 118 119 struct ir3_pressure limit_pressure; 120 121 /* When spilling, we need to reserve a register to serve as the zero'd 122 * "base". For simplicity we reserve a register at the beginning so that it's 123 * always available. 124 */ 125 struct ir3_register *base_reg; 126 127 /* Current pvtmem offset in bytes. */ 128 unsigned spill_slot; 129 130 struct ir3_liveness *live; 131 132 const struct ir3_compiler *compiler; 133 134 struct ra_spill_block_state *blocks; 135 136 bool spilling; 137 138 bool merged_regs; 139}; 140 141static void 142add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir) 143{ 144 struct ir3_block *start = ir3_start_block(ir); 145 146 /* We need to stick it after any meta instructions which need to be first. */ 147 struct ir3_instruction *after = NULL; 148 foreach_instr (instr, &start->instr_list) { 149 if (instr->opc != OPC_META_INPUT && 150 instr->opc != OPC_META_TEX_PREFETCH) { 151 after = instr; 152 break; 153 } 154 } 155 156 struct ir3_instruction *mov = create_immed(start, 0); 157 158 if (after) 159 ir3_instr_move_before(mov, after); 160 161 ctx->base_reg = mov->dsts[0]; 162 163 /* We don't create an interval, etc. for the base reg, so just lower the 164 * register pressure limit to account for it. We assume it's always 165 * available for simplicity. 166 */ 167 ctx->limit_pressure.full -= reg_size(ctx->base_reg); 168} 169 170 171/* Compute the number of cycles per instruction used for next-use-distance 172 * analysis. This is just approximate, obviously. 173 */ 174static unsigned 175instr_cycles(struct ir3_instruction *instr) 176{ 177 if (instr->opc == OPC_META_PARALLEL_COPY) { 178 unsigned cycles = 0; 179 for (unsigned i = 0; i < instr->dsts_count; i++) { 180 if (!instr->srcs[i]->def || 181 instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) { 182 cycles += reg_elems(instr->srcs[i]); 183 } 184 } 185 186 return cycles; 187 } 188 189 if (instr->opc == OPC_META_COLLECT) { 190 unsigned cycles = 0; 191 for (unsigned i = 0; i < instr->srcs_count; i++) { 192 if (!instr->srcs[i]->def || 193 instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) { 194 cycles++; 195 } 196 } 197 198 return cycles; 199 } 200 201 if (is_meta(instr)) 202 return 0; 203 204 return 1 + instr->repeat; 205} 206 207static bool 208compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block, 209 unsigned *tmp_next_use) 210{ 211 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 212 memcpy(tmp_next_use, state->next_use_end, 213 ctx->live->definitions_count * sizeof(*tmp_next_use)); 214 215 unsigned cycle = state->cycles; 216 foreach_instr_rev (instr, &block->instr_list) { 217 ra_foreach_dst (dst, instr) { 218 dst->next_use = tmp_next_use[dst->name]; 219 } 220 221 ra_foreach_src (src, instr) { 222 src->next_use = tmp_next_use[src->def->name]; 223 } 224 225 cycle -= instr_cycles(instr); 226 227 if (instr->opc == OPC_META_PARALLEL_COPY) { 228 ra_foreach_src_n (src, i, instr) { 229 if (src->def->merge_set == instr->dsts[i]->merge_set && 230 src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) { 231 tmp_next_use[src->def->name] = 232 tmp_next_use[instr->dsts[i]->name]; 233 } else { 234 tmp_next_use[src->def->name] = cycle; 235 } 236 } 237 } else if (instr->opc != OPC_META_PHI) { 238 ra_foreach_src (src, instr) { 239 tmp_next_use[src->def->name] = cycle; 240 } 241 } 242 243 ra_foreach_dst (dst, instr) { 244 tmp_next_use[dst->name] = UINT_MAX; 245 } 246 } 247 248 memcpy(state->next_use_start, tmp_next_use, 249 ctx->live->definitions_count * sizeof(*tmp_next_use)); 250 251 bool progress = false; 252 for (unsigned i = 0; i < block->predecessors_count; i++) { 253 const struct ir3_block *pred = block->predecessors[i]; 254 struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index]; 255 256 /* Add a large-enough distance in front of edges exiting the loop so that 257 * variables that are live-through the loop but not used inside it are 258 * prioritized for spilling, as per the paper. This just needs to be 259 * larger than the longest path through the loop. 260 */ 261 bool loop_exit = pred->loop_depth < block->loop_depth; 262 unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0); 263 264 for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 265 if (state->next_use_start[j] < UINT_MAX && 266 state->next_use_start[j] + block_distance < 267 pred_state->next_use_end[j]) { 268 pred_state->next_use_end[j] = state->next_use_start[j] + 269 block_distance; 270 progress = true; 271 } 272 } 273 274 foreach_instr (phi, &block->instr_list) { 275 if (phi->opc != OPC_META_PHI) 276 break; 277 if (!phi->srcs[i]->def) 278 continue; 279 unsigned src = phi->srcs[i]->def->name; 280 if (phi->dsts[0]->next_use < UINT_MAX && 281 phi->dsts[0]->next_use + block_distance < 282 pred_state->next_use_end[src]) { 283 pred_state->next_use_end[src] = phi->dsts[0]->next_use + 284 block_distance; 285 progress = true; 286 } 287 } 288 } 289 290 return progress; 291} 292 293static void 294compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir) 295{ 296 for (unsigned i = 0; i < ctx->live->block_count; i++) { 297 ctx->blocks[i].next_use_start = 298 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 299 ctx->blocks[i].next_use_end = 300 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 301 302 for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 303 ctx->blocks[i].next_use_start[j] = UINT_MAX; 304 ctx->blocks[i].next_use_end[j] = UINT_MAX; 305 } 306 } 307 308 foreach_block (block, &ir->block_list) { 309 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 310 state->cycles = 0; 311 foreach_instr (instr, &block->instr_list) { 312 state->cycles += instr_cycles(instr); 313 foreach_dst (dst, instr) { 314 dst->spill_slot = ~0; 315 } 316 } 317 } 318 319 unsigned *tmp_next_use = 320 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 321 322 bool progress = true; 323 while (progress) { 324 progress = false; 325 foreach_block_rev (block, &ir->block_list) { 326 progress |= compute_block_next_distance(ctx, block, tmp_next_use); 327 } 328 } 329} 330 331static void 332ra_spill_interval_init(struct ra_spill_interval *interval, 333 struct ir3_register *reg) 334{ 335 ir3_reg_interval_init(&interval->interval, reg); 336 interval->dst.flags = reg->flags; 337 interval->dst.def = reg; 338 interval->already_spilled = false; 339 interval->needs_reload = false; 340 interval->cant_spill = false; 341} 342 343static struct ra_spill_interval * 344ir3_reg_interval_to_interval(struct ir3_reg_interval *interval) 345{ 346 return rb_node_data(struct ra_spill_interval, interval, interval); 347} 348 349static struct ra_spill_interval * 350ra_spill_interval_root(struct ra_spill_interval *interval) 351{ 352 struct ir3_reg_interval *ir3_interval = &interval->interval; 353 while (ir3_interval->parent) 354 ir3_interval = ir3_interval->parent; 355 return ir3_reg_interval_to_interval(ir3_interval); 356} 357 358static struct ra_spill_ctx * 359ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx) 360{ 361 return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx); 362} 363 364static int 365ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b) 366{ 367 const struct ra_spill_interval *a = 368 rb_node_data(const struct ra_spill_interval, _a, node); 369 const struct ra_spill_interval *b = 370 rb_node_data(const struct ra_spill_interval, _b, node); 371 return a->next_use_distance - b->next_use_distance; 372} 373 374static int 375ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b) 376{ 377 const struct ra_spill_interval *a = 378 rb_node_data(const struct ra_spill_interval, _a, half_node); 379 const struct ra_spill_interval *b = 380 rb_node_data(const struct ra_spill_interval, _b, half_node); 381 return a->next_use_distance - b->next_use_distance; 382} 383 384static void 385interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 386{ 387 struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 388 struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 389 390 unsigned size = reg_size(interval->interval.reg); 391 if (interval->interval.reg->flags & IR3_REG_SHARED) { 392 ctx->cur_pressure.shared += size; 393 } else { 394 if (interval->interval.reg->flags & IR3_REG_HALF) { 395 ctx->cur_pressure.half += size; 396 if (ctx->spilling) { 397 rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 398 ra_spill_interval_half_cmp); 399 } 400 } 401 if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 402 ctx->cur_pressure.full += size; 403 if (ctx->spilling) { 404 rb_tree_insert(&ctx->full_live_intervals, &interval->node, 405 ra_spill_interval_cmp); 406 } 407 } 408 } 409} 410 411static void 412interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 413{ 414 struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 415 struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 416 417 unsigned size = reg_size(interval->interval.reg); 418 if (interval->interval.reg->flags & IR3_REG_SHARED) { 419 ctx->cur_pressure.shared -= size; 420 } else { 421 if (interval->interval.reg->flags & IR3_REG_HALF) { 422 ctx->cur_pressure.half -= size; 423 if (ctx->spilling) { 424 rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 425 } 426 } 427 if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 428 ctx->cur_pressure.full -= size; 429 if (ctx->spilling) { 430 rb_tree_remove(&ctx->full_live_intervals, &interval->node); 431 } 432 } 433 } 434} 435 436static void 437interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent, 438 struct ir3_reg_interval *_child) 439{ 440 interval_add(_ctx, _child); 441} 442 443static void 444spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v, 445 struct ir3_liveness *live) 446{ 447 ctx->live = live; 448 ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *, 449 ctx->live->definitions_count); 450 struct ra_spill_interval *intervals = 451 rzalloc_array(ctx, struct ra_spill_interval, 452 ctx->live->definitions_count); 453 for (unsigned i = 0; i < ctx->live->definitions_count; i++) 454 ctx->intervals[i] = &intervals[i]; 455 456 ctx->intervals_count = ctx->live->definitions_count; 457 ctx->compiler = v->shader->compiler; 458 ctx->merged_regs = v->mergedregs; 459 460 rb_tree_init(&ctx->reg_ctx.intervals); 461 ctx->reg_ctx.interval_add = interval_add; 462 ctx->reg_ctx.interval_delete = interval_delete; 463 ctx->reg_ctx.interval_readd = interval_readd; 464} 465 466static void 467ra_spill_ctx_insert(struct ra_spill_ctx *ctx, 468 struct ra_spill_interval *interval) 469{ 470 ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 471} 472 473static void 474ra_spill_ctx_remove(struct ra_spill_ctx *ctx, 475 struct ra_spill_interval *interval) 476{ 477 ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); 478} 479 480static void 481init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 482{ 483 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 484 ra_spill_interval_init(interval, dst); 485 if (ctx->spilling) 486 interval->next_use_distance = dst->next_use; 487} 488 489static void 490insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 491{ 492 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 493 if (interval->interval.inserted) 494 return; 495 496 ra_spill_ctx_insert(ctx, interval); 497 interval->cant_spill = true; 498 499 /* For precolored inputs, make sure we leave enough registers to allow for 500 * holes in the inputs. It can happen that the binning shader has a lower 501 * register pressure than the main shader, but the main shader decided to 502 * add holes between the inputs which means that the binning shader has a 503 * higher register demand. 504 */ 505 if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) { 506 physreg_t physreg = ra_reg_get_physreg(dst); 507 physreg_t max = physreg + reg_size(dst); 508 509 if (interval->interval.reg->flags & IR3_REG_SHARED) 510 ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max); 511 else if (interval->interval.reg->flags & IR3_REG_HALF) 512 ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max); 513 else 514 ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max); 515 } 516} 517 518static void 519insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src) 520{ 521 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 522 523 if (!interval->interval.inserted) { 524 ra_spill_ctx_insert(ctx, interval); 525 interval->needs_reload = true; 526 interval->already_spilled = true; 527 } 528 529 ra_spill_interval_root(interval)->cant_spill = true; 530 531} 532 533static void 534remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 535 struct ir3_register *src) 536{ 537 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 538 539 if (!interval->interval.inserted || interval->interval.parent || 540 !rb_tree_is_empty(&interval->interval.children)) 541 return; 542 543 ra_spill_ctx_remove(ctx, interval); 544} 545 546static void 547remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 548 struct ir3_register *src) 549{ 550 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 551 552 if (!interval->interval.inserted) 553 return; 554 555 ra_spill_ctx_remove(ctx, interval); 556} 557 558static void 559finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 560{ 561 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 562 interval->cant_spill = false; 563} 564 565static void 566remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 567{ 568 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 569 570 if (!interval->interval.inserted) 571 return; 572 573 ra_spill_ctx_remove(ctx, interval); 574} 575 576static void 577update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src) 578{ 579 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 580 581 assert(interval->interval.inserted); 582 583 interval->next_use_distance = src->next_use; 584 585 /* If this node is inserted in one of the trees, then it needs to be resorted 586 * as its key has changed. 587 */ 588 if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) { 589 if (src->flags & IR3_REG_HALF) { 590 rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 591 rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 592 ra_spill_interval_half_cmp); 593 } 594 if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) { 595 rb_tree_remove(&ctx->full_live_intervals, &interval->node); 596 rb_tree_insert(&ctx->full_live_intervals, &interval->node, 597 ra_spill_interval_cmp); 598 } 599 } 600} 601 602static unsigned 603get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg) 604{ 605 if (reg->merge_set) { 606 if (reg->merge_set->spill_slot == ~0) { 607 reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot, 608 reg->merge_set->alignment); 609 ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2; 610 } 611 return reg->merge_set->spill_slot + reg->merge_set_offset * 2; 612 } else { 613 if (reg->spill_slot == ~0) { 614 reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg)); 615 ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2; 616 } 617 return reg->spill_slot; 618 } 619} 620 621static void 622set_src_val(struct ir3_register *src, const struct reg_or_immed *val) 623{ 624 if (val->flags & IR3_REG_IMMED) { 625 src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF); 626 src->uim_val = val->uimm; 627 src->def = NULL; 628 } else if (val->flags & IR3_REG_CONST) { 629 src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF); 630 src->num = val->const_num; 631 src->def = NULL; 632 } else { 633 src->def = val->def; 634 } 635} 636 637static struct ir3_register * 638materialize_pcopy_src(const struct reg_or_immed *src, 639 struct ir3_instruction *instr, 640 struct ir3_block *block) 641{ 642 struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 643 struct ir3_register *dst = __ssa_dst(mov); 644 dst->flags |= src->flags & IR3_REG_HALF; 645 struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags); 646 set_src_val(mov_src, src); 647 mov->cat1.src_type = mov->cat1.dst_type = 648 (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 649 650 if (instr) 651 ir3_instr_move_before(mov, instr); 652 return dst; 653} 654 655static void 656spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val, 657 unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block) 658{ 659 struct ir3_register *reg; 660 661 /* If spilling an immed/const pcopy src, we need to actually materialize it 662 * first with a mov. 663 */ 664 if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) { 665 reg = materialize_pcopy_src(val, instr, block); 666 } else { 667 reg = val->def; 668 } 669 670 d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name, 671 spill_slot); 672 673 unsigned elems = reg_elems(reg); 674 struct ir3_instruction *spill = 675 ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3); 676 ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 677 unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED | 678 IR3_REG_CONST | IR3_REG_SSA | 679 IR3_REG_ARRAY); 680 struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags); 681 ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 682 spill->cat6.dst_offset = spill_slot; 683 spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 684 685 src->def = reg; 686 if (reg->flags & IR3_REG_ARRAY) { 687 src->size = reg->size; 688 src->array.id = reg->array.id; 689 src->array.offset = 0; 690 } else { 691 src->wrmask = reg->wrmask; 692 } 693 694 if (instr) 695 ir3_instr_move_before(spill, instr); 696} 697 698static void 699spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 700 struct ir3_instruction *instr, struct ir3_block *block) 701{ 702 spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg), 703 instr, block); 704} 705 706/* This is similar to "limit" in the paper. */ 707static void 708limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 709{ 710 if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 711 d("cur half pressure %u exceeds %u", ctx->cur_pressure.half, 712 ctx->limit_pressure.half); 713 rb_tree_foreach_safe (struct ra_spill_interval, interval, 714 &ctx->half_live_intervals, half_node) { 715 d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 716 interval->interval.reg->name); 717 if (!interval->cant_spill) { 718 if (!interval->already_spilled) 719 spill_interval(ctx, interval, instr, instr->block); 720 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 721 if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 722 break; 723 } 724 } 725 726 assert(ctx->cur_pressure.half <= ctx->limit_pressure.half); 727 } 728 729 if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 730 d("cur full pressure %u exceeds %u", ctx->cur_pressure.full, 731 ctx->limit_pressure.full); 732 rb_tree_foreach_safe (struct ra_spill_interval, interval, 733 &ctx->full_live_intervals, node) { 734 d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 735 interval->interval.reg->name); 736 if (!interval->cant_spill) { 737 if (!interval->already_spilled) 738 spill_interval(ctx, interval, instr, instr->block); 739 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 740 if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 741 break; 742 } else { 743 d("can't spill"); 744 } 745 } 746 747 assert(ctx->cur_pressure.full <= ctx->limit_pressure.full); 748 } 749} 750 751/* There's a corner case where we reload a value which has overlapping live 752 * values already reloaded, either because it's the child of some other interval 753 * that was already reloaded or some of its children have already been 754 * reloaded. Because RA only expects overlapping source/dest intervals for meta 755 * instructions (split/collect), and we don't want to add register pressure by 756 * creating an entirely separate value, we need to add splits and collects to 757 * deal with this case. These splits/collects have to also have correct merge 758 * set information, so that it doesn't result in any actual code or register 759 * pressure in practice. 760 */ 761 762static void 763add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def, 764 unsigned offset) 765{ 766 def->merge_set = set; 767 def->merge_set_offset = offset; 768 def->interval_start = set->interval_start + offset; 769 def->interval_end = set->interval_start + offset + reg_size(def); 770} 771 772static struct ir3_register * 773split(struct ir3_register *def, unsigned offset, 774 struct ir3_instruction *after, struct ir3_block *block) 775{ 776 if (reg_elems(def) == 1) { 777 assert(offset == 0); 778 return def; 779 } 780 781 assert(!(def->flags & IR3_REG_ARRAY)); 782 assert(def->merge_set); 783 struct ir3_instruction *split = 784 ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1); 785 struct ir3_register *dst = __ssa_dst(split); 786 dst->flags |= def->flags & IR3_REG_HALF; 787 struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags); 788 src->wrmask = def->wrmask; 789 src->def = def; 790 add_to_merge_set(def->merge_set, dst, 791 def->merge_set_offset + offset * reg_elem_size(def)); 792 if (after) 793 ir3_instr_move_before(split, after); 794 return dst; 795} 796 797static struct ir3_register * 798extract(struct ir3_register *parent_def, unsigned offset, unsigned elems, 799 struct ir3_instruction *after, struct ir3_block *block) 800{ 801 if (offset == 0 && elems == reg_elems(parent_def)) 802 return parent_def; 803 804 struct ir3_instruction *collect = 805 ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems); 806 struct ir3_register *dst = __ssa_dst(collect); 807 dst->flags |= parent_def->flags & IR3_REG_HALF; 808 dst->wrmask = MASK(elems); 809 add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset); 810 811 for (unsigned i = 0; i < elems; i++) { 812 ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = 813 split(parent_def, offset + i, after, block); 814 } 815 816 if (after) 817 ir3_instr_move_before(collect, after); 818 return dst; 819} 820 821static struct ir3_register * 822reload(struct ra_spill_ctx *ctx, struct ir3_register *reg, 823 struct ir3_instruction *after, struct ir3_block *block) 824{ 825 unsigned spill_slot = get_spill_slot(ctx, reg); 826 827 d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name, 828 spill_slot); 829 830 unsigned elems = reg_elems(reg); 831 struct ir3_instruction *reload = 832 ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3); 833 struct ir3_register *dst = __ssa_dst(reload); 834 dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 835 ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 836 struct ir3_register *offset_reg = 837 ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED); 838 offset_reg->uim_val = spill_slot; 839 ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 840 reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 841 842 if (reg->flags & IR3_REG_ARRAY) { 843 dst->array.offset = 0; 844 dst->array.id = reg->array.id; 845 dst->size = reg->size; 846 } else { 847 dst->wrmask = MASK(elems); 848 } 849 850 dst->merge_set = reg->merge_set; 851 dst->merge_set_offset = reg->merge_set_offset; 852 dst->interval_start = reg->interval_start; 853 dst->interval_end = reg->interval_end; 854 855 if (after) 856 ir3_instr_move_before(reload, after); 857 858 return dst; 859} 860 861static void 862rewrite_src_interval(struct ra_spill_ctx *ctx, 863 struct ra_spill_interval *interval, 864 struct ir3_register *def, 865 struct ir3_instruction *instr, 866 struct ir3_block *block) 867{ 868 interval->dst.flags = def->flags; 869 interval->dst.def = def; 870 interval->needs_reload = false; 871 872 rb_tree_foreach (struct ra_spill_interval, child, 873 &interval->interval.children, interval.node) { 874 struct ir3_register *child_reg = child->interval.reg; 875 struct ir3_register *child_def = 876 extract(def, (child_reg->interval_start - 877 interval->interval.reg->interval_start) / reg_elem_size(def), 878 reg_elems(child_reg), instr, block); 879 rewrite_src_interval(ctx, child, child_def, instr, block); 880 } 881} 882 883static void 884reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def, 885 struct ir3_instruction *instr, struct ir3_block *block) 886{ 887 unsigned elems = reg_elems(def); 888 struct ra_spill_interval *interval = ctx->intervals[def->name]; 889 890 struct ir3_reg_interval *ir3_parent = interval->interval.parent; 891 892 if (ir3_parent) { 893 struct ra_spill_interval *parent = 894 ir3_reg_interval_to_interval(ir3_parent); 895 if (!parent->needs_reload) { 896 interval->dst.flags = def->flags; 897 interval->dst.def = extract( 898 parent->dst.def, (def->interval_start - parent->dst.def->interval_start) / 899 reg_elem_size(def), elems, instr, block); 900 return; 901 } 902 } 903 904 struct ir3_register *dst = reload(ctx, def, instr, block); 905 906 rewrite_src_interval(ctx, interval, dst, instr, block); 907} 908 909static void 910reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 911 struct ir3_register *src) 912{ 913 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 914 915 if (interval->needs_reload) { 916 reload_def(ctx, src->def, instr, instr->block); 917 } 918 919 ra_spill_interval_root(interval)->cant_spill = false; 920} 921 922static void 923rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 924 struct ir3_register *src) 925{ 926 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 927 928 set_src_val(src, &interval->dst); 929} 930 931static void 932update_max_pressure(struct ra_spill_ctx *ctx) 933{ 934 d("pressure:"); 935 d("\tfull: %u", ctx->cur_pressure.full); 936 d("\thalf: %u", ctx->cur_pressure.half); 937 d("\tshared: %u", ctx->cur_pressure.shared); 938 939 ctx->max_pressure.full = 940 MAX2(ctx->max_pressure.full, ctx->cur_pressure.full); 941 ctx->max_pressure.half = 942 MAX2(ctx->max_pressure.half, ctx->cur_pressure.half); 943 ctx->max_pressure.shared = 944 MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared); 945} 946 947static void 948handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 949{ 950 ra_foreach_dst (dst, instr) { 951 init_dst(ctx, dst); 952 } 953 954 if (ctx->spilling) { 955 ra_foreach_src (src, instr) 956 insert_src(ctx, src); 957 } 958 959 /* Handle tied destinations. If a destination is tied to a source and that 960 * source is live-through, then we need to allocate a new register for the 961 * destination which is live-through itself and cannot overlap the 962 * sources. 963 */ 964 965 ra_foreach_dst (dst, instr) { 966 struct ir3_register *tied_src = dst->tied; 967 if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) 968 insert_dst(ctx, dst); 969 } 970 971 if (ctx->spilling) 972 limit(ctx, instr); 973 else 974 update_max_pressure(ctx); 975 976 if (ctx->spilling) { 977 ra_foreach_src (src, instr) { 978 reload_src(ctx, instr, src); 979 update_src_next_use(ctx, src); 980 } 981 } 982 983 ra_foreach_src (src, instr) { 984 if (src->flags & IR3_REG_FIRST_KILL) 985 remove_src_early(ctx, instr, src); 986 } 987 988 ra_foreach_dst (dst, instr) { 989 insert_dst(ctx, dst); 990 } 991 992 if (ctx->spilling) 993 limit(ctx, instr); 994 else 995 update_max_pressure(ctx); 996 997 /* We have to remove sources before rewriting them so that we can lookup the 998 * interval to remove before the source itself is changed. 999 */ 1000 ra_foreach_src (src, instr) { 1001 if (src->flags & IR3_REG_FIRST_KILL) 1002 remove_src(ctx, instr, src); 1003 } 1004 1005 if (ctx->spilling) { 1006 ra_foreach_src (src, instr) { 1007 rewrite_src(ctx, instr, src); 1008 } 1009 } 1010 1011 ra_foreach_dst (dst, instr) { 1012 finish_dst(ctx, dst); 1013 } 1014 1015 for (unsigned i = 0; i < instr->dsts_count; i++) { 1016 if (ra_reg_is_dst(instr->dsts[i]) && 1017 (instr->dsts[i]->flags & IR3_REG_UNUSED)) 1018 remove_dst(ctx, instr->dsts[i]); 1019 } 1020} 1021 1022static struct ra_spill_interval * 1023create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def) 1024{ 1025 unsigned name = ctx->intervals_count++; 1026 unsigned offset = ctx->live->interval_offset; 1027 1028 /* This is kinda hacky, but we need to create a fake SSA def here that is 1029 * only used as part of the pcopy accounting. See below. 1030 */ 1031 struct ir3_register *reg = rzalloc(ctx, struct ir3_register); 1032 *reg = *def; 1033 reg->name = name; 1034 reg->interval_start = offset; 1035 reg->interval_end = offset + reg_size(def); 1036 reg->merge_set = NULL; 1037 1038 ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *, 1039 ctx->intervals_count); 1040 struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval); 1041 ra_spill_interval_init(interval, reg); 1042 ctx->intervals[name] = interval; 1043 ctx->live->interval_offset += reg_size(def); 1044 return interval; 1045} 1046 1047/* In the sequence of copies generated (see below), would this source be killed? 1048 */ 1049static bool 1050is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n) 1051{ 1052 struct ir3_register *src = pcopy->srcs[src_n]; 1053 if (!(src->flags & IR3_REG_KILL)) 1054 return false; 1055 for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) { 1056 if (pcopy->srcs[j]->def == src->def) 1057 return false; 1058 } 1059 return true; 1060} 1061 1062/* Parallel copies are different from normal instructions. The sources together 1063 * may be larger than the entire register file, so we cannot just reload every 1064 * source like normal, and indeed that probably wouldn't be a great idea. 1065 * Instead we essentially need to lower the parallel copy to "copies," just like 1066 * in the normal CSSA construction, although we implement the copies by 1067 * reloading and then possibly spilling values. We essentially just shuffle 1068 * around the sources until each source either (a) is live or (b) has the same 1069 * spill slot as its corresponding destination. We do this by decomposing the 1070 * copy into a series of copies, so: 1071 * 1072 * a, b, c = d, e, f 1073 * 1074 * becomes: 1075 * 1076 * d' = d 1077 * e' = e 1078 * f' = f 1079 * a = d' 1080 * b = e' 1081 * c = f' 1082 * 1083 * the temporary SSA values d', e', and f' never actually show up in the result. 1084 * They are only used for our internal accounting. They may, however, have their 1085 * own spill slot created for them. Similarly, we don't actually emit any copy 1086 * instructions, although we emit the spills/reloads that *would've* been 1087 * required if those copies were there. 1088 * 1089 * TODO: in order to reduce the number of temporaries and therefore spill slots, 1090 * we could instead do a more complicated analysis that considers the location 1091 * transfer graph. 1092 * 1093 * In addition, we actually remove the parallel copy and rewrite all its uses 1094 * (in the phi nodes) rather than rewrite its sources at the end. Recreating it 1095 * later turns out to be easier than keeping it up-to-date throughout this pass, 1096 * since we may have to remove entries for phi sources that are spilled and add 1097 * entries for live-outs that are spilled and reloaded, which can happen here 1098 * and then possibly be undone or done again when processing live-ins of the 1099 * successor block. 1100 */ 1101 1102static void 1103handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) 1104{ 1105 foreach_dst (dst, pcopy) { 1106 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1107 ra_spill_interval_init(dst_interval, dst); 1108 } 1109 1110 foreach_src_n (src, i, pcopy) { 1111 d("processing src %u", i); 1112 struct ir3_register *dst = pcopy->dsts[i]; 1113 1114 /* Skip the intermediate copy for cases where the source is merged with 1115 * the destination. Crucially this means that we also don't reload/spill 1116 * it if it's been spilled, because it shares the same spill slot. 1117 */ 1118 if (src->def && src->def->merge_set && 1119 src->def->merge_set == dst->merge_set && 1120 src->def->merge_set_offset == dst->merge_set_offset) { 1121 struct ra_spill_interval *src_interval = ctx->intervals[src->def->name]; 1122 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1123 if (src_interval->interval.inserted) { 1124 update_src_next_use(ctx, src); 1125 if (is_last_pcopy_src(pcopy, i)) 1126 ra_spill_ctx_remove(ctx, src_interval); 1127 dst_interval->cant_spill = true; 1128 ra_spill_ctx_insert(ctx, dst_interval); 1129 limit(ctx, pcopy); 1130 dst_interval->cant_spill = false; 1131 dst_interval->dst = src_interval->dst; 1132 } 1133 } else if (src->def) { 1134 struct ra_spill_interval *temp_interval = 1135 create_temp_interval(ctx, dst); 1136 struct ir3_register *temp = temp_interval->interval.reg; 1137 temp_interval->next_use_distance = src->next_use; 1138 1139 insert_src(ctx, src); 1140 limit(ctx, pcopy); 1141 reload_src(ctx, pcopy, src); 1142 update_src_next_use(ctx, src); 1143 if (is_last_pcopy_src(pcopy, i)) 1144 remove_src(ctx, pcopy, src); 1145 struct ra_spill_interval *src_interval = 1146 ctx->intervals[src->def->name]; 1147 temp_interval->dst = src_interval->dst; 1148 1149 temp_interval->cant_spill = true; 1150 ra_spill_ctx_insert(ctx, temp_interval); 1151 limit(ctx, pcopy); 1152 temp_interval->cant_spill = false; 1153 1154 src->flags = temp->flags; 1155 src->def = temp; 1156 } 1157 } 1158 1159 d("done with pcopy srcs"); 1160 1161 foreach_src_n (src, i, pcopy) { 1162 struct ir3_register *dst = pcopy->dsts[i]; 1163 1164 if (src->def && src->def->merge_set && 1165 src->def->merge_set == dst->merge_set && 1166 src->def->merge_set_offset == dst->merge_set_offset) 1167 continue; 1168 1169 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1170 1171 if (!src->def) { 1172 dst_interval->cant_spill = true; 1173 ra_spill_ctx_insert(ctx, dst_interval); 1174 limit(ctx, pcopy); 1175 dst_interval->cant_spill = false; 1176 1177 assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED)); 1178 if (src->flags & IR3_REG_CONST) { 1179 dst_interval->dst.flags = src->flags; 1180 dst_interval->dst.const_num = src->num; 1181 } else { 1182 dst_interval->dst.flags = src->flags; 1183 dst_interval->dst.uimm = src->uim_val; 1184 } 1185 } else { 1186 struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name]; 1187 1188 insert_src(ctx, src); 1189 limit(ctx, pcopy); 1190 reload_src(ctx, pcopy, src); 1191 remove_src(ctx, pcopy, src); 1192 1193 dst_interval->dst = temp_interval->dst; 1194 ra_spill_ctx_insert(ctx, dst_interval); 1195 } 1196 } 1197 1198 pcopy->flags |= IR3_INSTR_UNUSED; 1199} 1200 1201static void 1202handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1203{ 1204 init_dst(ctx, instr->dsts[0]); 1205 insert_dst(ctx, instr->dsts[0]); 1206 finish_dst(ctx, instr->dsts[0]); 1207} 1208 1209static void 1210remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1211{ 1212 if (instr->opc == OPC_META_TEX_PREFETCH) { 1213 ra_foreach_src (src, instr) 1214 remove_src(ctx, instr, src); 1215 } 1216 if (instr->dsts[0]->flags & IR3_REG_UNUSED) 1217 remove_dst(ctx, instr->dsts[0]); 1218} 1219 1220static void 1221handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block, 1222 struct ir3_register *def) 1223{ 1224 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1225 ra_spill_interval_init(interval, def); 1226 if (ctx->spilling) { 1227 interval->next_use_distance = 1228 ctx->blocks[block->index].next_use_start[def->name]; 1229 } 1230 1231 ra_spill_ctx_insert(ctx, interval); 1232} 1233 1234static bool 1235is_live_in_phi(struct ir3_register *def, struct ir3_block *block) 1236{ 1237 return def->instr->opc == OPC_META_PHI && def->instr->block == block; 1238} 1239 1240static bool 1241is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def, 1242 struct ir3_block *block, unsigned pred_idx) 1243{ 1244 struct ir3_block *pred = block->predecessors[pred_idx]; 1245 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1246 if (is_live_in_phi(def, block)) { 1247 def = def->instr->srcs[pred_idx]->def; 1248 if (!def) 1249 return false; 1250 } 1251 1252 return _mesa_hash_table_search(state->remap, def); 1253} 1254 1255static bool 1256is_live_in_undef(struct ir3_register *def, 1257 struct ir3_block *block, unsigned pred_idx) 1258{ 1259 if (!is_live_in_phi(def, block)) 1260 return false; 1261 1262 return !def->instr->srcs[pred_idx]->def; 1263} 1264 1265static struct reg_or_immed * 1266read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1267 struct ir3_block *block, unsigned pred_idx) 1268{ 1269 struct ir3_block *pred = block->predecessors[pred_idx]; 1270 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1271 1272 if (is_live_in_phi(def, block)) { 1273 def = def->instr->srcs[pred_idx]->def; 1274 if (!def) 1275 return NULL; 1276 } 1277 1278 struct hash_entry *entry = _mesa_hash_table_search(state->remap, def); 1279 if (entry) 1280 return entry->data; 1281 else 1282 return NULL; 1283} 1284 1285static bool 1286is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def, 1287 struct ir3_block *block) 1288{ 1289 for (unsigned i = 0; i < block->predecessors_count; i++) { 1290 if (!is_live_in_pred(ctx, def, block, i)) 1291 return false; 1292 } 1293 1294 return true; 1295} 1296 1297static void 1298spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1299 struct ir3_block *block) 1300{ 1301 for (unsigned i = 0; i < block->predecessors_count; i++) { 1302 struct ir3_block *pred = block->predecessors[i]; 1303 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1304 1305 if (!state->visited) 1306 continue; 1307 1308 struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i); 1309 if (pred_def) { 1310 spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred); 1311 } 1312 } 1313} 1314 1315static void 1316spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1317{ 1318 bool all_preds_visited = true; 1319 for (unsigned i = 0; i < block->predecessors_count; i++) { 1320 struct ir3_block *pred = block->predecessors[i]; 1321 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1322 if (!state->visited) { 1323 all_preds_visited = false; 1324 break; 1325 } 1326 } 1327 1328 /* Note: in the paper they explicitly spill live-through values first, but we 1329 * should be doing that automatically by virtue of picking the largest 1330 * distance due to the extra distance added to edges out of loops. 1331 * 1332 * TODO: Keep track of pressure in each block and preemptively spill 1333 * live-through values as described in the paper to avoid spilling them 1334 * inside the loop. 1335 */ 1336 1337 if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 1338 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1339 &ctx->half_live_intervals, half_node) { 1340 if (all_preds_visited && 1341 is_live_in_all_preds(ctx, interval->interval.reg, block)) 1342 continue; 1343 spill_live_in(ctx, interval->interval.reg, block); 1344 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1345 if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 1346 break; 1347 } 1348 } 1349 1350 if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 1351 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1352 &ctx->full_live_intervals, node) { 1353 if (all_preds_visited && 1354 is_live_in_all_preds(ctx, interval->interval.reg, block)) 1355 continue; 1356 spill_live_in(ctx, interval->interval.reg, block); 1357 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1358 if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 1359 break; 1360 } 1361 } 1362} 1363 1364static void 1365live_in_rewrite(struct ra_spill_ctx *ctx, 1366 struct ra_spill_interval *interval, 1367 struct reg_or_immed *new_val, 1368 struct ir3_block *block, unsigned pred_idx) 1369{ 1370 struct ir3_block *pred = block->predecessors[pred_idx]; 1371 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1372 struct ir3_register *def = interval->interval.reg; 1373 if (is_live_in_phi(def, block)) { 1374 def = def->instr->srcs[pred_idx]->def; 1375 } 1376 1377 if (def) 1378 _mesa_hash_table_insert(state->remap, def, new_val); 1379 1380 rb_tree_foreach (struct ra_spill_interval, child, 1381 &interval->interval.children, interval.node) { 1382 assert(new_val->flags & IR3_REG_SSA); 1383 struct ir3_register *child_def = 1384 extract(new_val->def, 1385 (child->interval.reg->interval_start - def->interval_start) / 1386 reg_elem_size(def), reg_elems(child->interval.reg), 1387 NULL, pred); 1388 struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed); 1389 child_val->def = child_def; 1390 child_val->flags = child_def->flags; 1391 live_in_rewrite(ctx, child, child_val, block, pred_idx); 1392 } 1393} 1394 1395static void 1396reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1397 struct ir3_block *block) 1398{ 1399 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1400 for (unsigned i = 0; i < block->predecessors_count; i++) { 1401 struct ir3_block *pred = block->predecessors[i]; 1402 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1403 if (!state->visited) 1404 continue; 1405 1406 if (is_live_in_undef(def, block, i)) 1407 continue; 1408 1409 struct reg_or_immed *new_val = read_live_in(ctx, def, block, i); 1410 1411 if (!new_val) { 1412 new_val = ralloc(ctx, struct reg_or_immed); 1413 new_val->def = reload(ctx, def, NULL, pred); 1414 new_val->flags = new_val->def->flags; 1415 } 1416 live_in_rewrite(ctx, interval, new_val, block, i); 1417 } 1418} 1419 1420static void 1421reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1422{ 1423 rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1424 interval.node) { 1425 reload_live_in(ctx, interval->interval.reg, block); 1426 } 1427} 1428 1429static void 1430add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def, 1431 struct ir3_block *block) 1432{ 1433 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1434 if (!interval->interval.inserted) 1435 return; 1436 1437 bool needs_phi = false; 1438 struct ir3_register *cur_def = NULL; 1439 for (unsigned i = 0; i < block->predecessors_count; i++) { 1440 struct ir3_block *pred = block->predecessors[i]; 1441 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1442 1443 if (!state->visited) { 1444 needs_phi = true; 1445 break; 1446 } 1447 1448 struct hash_entry *entry = 1449 _mesa_hash_table_search(state->remap, def); 1450 assert(entry); 1451 struct reg_or_immed *pred_val = entry->data; 1452 if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) || 1453 !pred_val->def || 1454 (cur_def && cur_def != pred_val->def)) { 1455 needs_phi = true; 1456 break; 1457 } 1458 cur_def = pred_val->def; 1459 } 1460 1461 if (!needs_phi) { 1462 interval->dst.def = cur_def; 1463 interval->dst.flags = cur_def->flags; 1464 return; 1465 } 1466 1467 struct ir3_instruction *phi = 1468 ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count); 1469 struct ir3_register *dst = __ssa_dst(phi); 1470 dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 1471 dst->size = def->size; 1472 dst->wrmask = def->wrmask; 1473 1474 dst->interval_start = def->interval_start; 1475 dst->interval_end = def->interval_end; 1476 dst->merge_set = def->merge_set; 1477 dst->merge_set_offset = def->merge_set_offset; 1478 1479 for (unsigned i = 0; i < block->predecessors_count; i++) { 1480 struct ir3_block *pred = block->predecessors[i]; 1481 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1482 struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags); 1483 src->size = def->size; 1484 src->wrmask = def->wrmask; 1485 1486 if (state->visited) { 1487 struct hash_entry *entry = 1488 _mesa_hash_table_search(state->remap, def); 1489 assert(entry); 1490 struct reg_or_immed *new_val = entry->data; 1491 set_src_val(src, new_val); 1492 } else { 1493 src->def = def; 1494 } 1495 } 1496 1497 interval->dst.def = dst; 1498 interval->dst.flags = dst->flags; 1499 1500 ir3_instr_move_before_block(phi, block); 1501} 1502 1503/* When spilling a block with a single predecessors, the pred may have other 1504 * successors so we can't choose what's live in and we can't spill/restore 1505 * anything. Just make the inserted intervals exactly match the predecessor. If 1506 * it wasn't live in the predecessor then it must've already been spilled. Also, 1507 * there are no phi nodes and no live-ins. 1508 */ 1509static void 1510spill_single_pred_live_in(struct ra_spill_ctx *ctx, 1511 struct ir3_block *block) 1512{ 1513 unsigned name; 1514 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1515 ctx->live->definitions_count) { 1516 struct ir3_register *reg = ctx->live->definitions[name]; 1517 struct ra_spill_interval *interval = ctx->intervals[reg->name]; 1518 struct reg_or_immed *val = read_live_in(ctx, reg, block, 0); 1519 if (val) 1520 interval->dst = *val; 1521 else 1522 ra_spill_ctx_remove(ctx, interval); 1523 } 1524} 1525 1526static void 1527rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi, 1528 struct ir3_block *block) 1529{ 1530 if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) { 1531 phi->flags |= IR3_INSTR_UNUSED; 1532 return; 1533 } 1534 1535 for (unsigned i = 0; i < block->predecessors_count; i++) { 1536 struct ir3_block *pred = block->predecessors[i]; 1537 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1538 1539 if (!state->visited) 1540 continue; 1541 1542 struct ir3_register *src = phi->srcs[i]; 1543 if (!src->def) 1544 continue; 1545 1546 struct hash_entry *entry = 1547 _mesa_hash_table_search(state->remap, src->def); 1548 assert(entry); 1549 struct reg_or_immed *new_val = entry->data; 1550 set_src_val(src, new_val); 1551 } 1552} 1553 1554static void 1555spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 1556 struct ir3_block *block) 1557{ 1558 struct ir3_register *def = interval->interval.reg; 1559 1560 spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block); 1561 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1562} 1563 1564static void 1565spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1566{ 1567 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1568 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1569 &ctx->reg_ctx.intervals, interval.node) { 1570 if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) { 1571 spill_live_out(ctx, interval, block); 1572 } 1573 } 1574} 1575 1576static void 1577reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def, 1578 struct ir3_block *block) 1579{ 1580 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1581 ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 1582 1583 reload_def(ctx, def, NULL, block); 1584} 1585 1586static void 1587reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1588{ 1589 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1590 unsigned name; 1591 BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) { 1592 struct ir3_register *reg = ctx->live->definitions[name]; 1593 struct ra_spill_interval *interval = ctx->intervals[name]; 1594 if (!interval->interval.inserted) 1595 reload_live_out(ctx, reg, block); 1596 } 1597} 1598 1599static void 1600update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block) 1601{ 1602 assert(!block->successors[1]); 1603 struct ir3_block *succ = block->successors[0]; 1604 unsigned pred_idx = ir3_block_get_pred_index(succ, block); 1605 1606 foreach_instr (instr, &succ->instr_list) { 1607 if (instr->opc != OPC_META_PHI) 1608 break; 1609 1610 struct ir3_register *def = instr->srcs[pred_idx]->def; 1611 if (!def) 1612 continue; 1613 1614 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1615 if (!interval->interval.inserted) 1616 continue; 1617 set_src_val(instr->srcs[pred_idx], &interval->dst); 1618 } 1619} 1620 1621static void 1622record_pred_live_out(struct ra_spill_ctx *ctx, 1623 struct ra_spill_interval *interval, 1624 struct ir3_block *block, unsigned pred_idx) 1625{ 1626 struct ir3_block *pred = block->predecessors[pred_idx]; 1627 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1628 1629 struct ir3_register *def = interval->interval.reg; 1630 if (is_live_in_phi(def, block)) { 1631 def = def->instr->srcs[pred_idx]->def; 1632 } 1633 BITSET_SET(state->live_out, def->name); 1634 1635 rb_tree_foreach (struct ra_spill_interval, child, 1636 &interval->interval.children, interval.node) { 1637 record_pred_live_out(ctx, child, block, pred_idx); 1638 } 1639} 1640 1641static void 1642record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1643{ 1644 for (unsigned i = 0; i < block->predecessors_count; i++) { 1645 struct ir3_block *pred = block->predecessors[i]; 1646 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1647 if (state->visited) 1648 continue; 1649 1650 state->live_out = rzalloc_array(ctx, BITSET_WORD, 1651 BITSET_WORDS(ctx->live->definitions_count)); 1652 1653 1654 rb_tree_foreach (struct ra_spill_interval, interval, 1655 &ctx->reg_ctx.intervals, interval.node) { 1656 record_pred_live_out(ctx, interval, block, i); 1657 } 1658 } 1659} 1660 1661static void 1662record_live_out(struct ra_spill_ctx *ctx, 1663 struct ra_spill_block_state *state, 1664 struct ra_spill_interval *interval) 1665{ 1666 if (!(interval->dst.flags & IR3_REG_SSA) || 1667 interval->dst.def) { 1668 struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed); 1669 *val = interval->dst; 1670 _mesa_hash_table_insert(state->remap, interval->interval.reg, val); 1671 } 1672 rb_tree_foreach (struct ra_spill_interval, child, 1673 &interval->interval.children, interval.node) { 1674 record_live_out(ctx, state, child); 1675 } 1676} 1677 1678static void 1679record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1680{ 1681 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1682 state->remap = _mesa_pointer_hash_table_create(ctx); 1683 1684 rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1685 interval.node) { 1686 record_live_out(ctx, state, interval); 1687 } 1688} 1689 1690static void 1691handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block) 1692{ 1693 memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure)); 1694 rb_tree_init(&ctx->reg_ctx.intervals); 1695 rb_tree_init(&ctx->full_live_intervals); 1696 rb_tree_init(&ctx->half_live_intervals); 1697 1698 unsigned name; 1699 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1700 ctx->live->definitions_count) { 1701 struct ir3_register *reg = ctx->live->definitions[name]; 1702 handle_live_in(ctx, block, reg); 1703 } 1704 1705 foreach_instr (instr, &block->instr_list) { 1706 if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT && 1707 instr->opc != OPC_META_TEX_PREFETCH) 1708 break; 1709 handle_input_phi(ctx, instr); 1710 } 1711 1712 if (ctx->spilling) { 1713 if (block->predecessors_count == 1) { 1714 spill_single_pred_live_in(ctx, block); 1715 } else { 1716 spill_live_ins(ctx, block); 1717 reload_live_ins(ctx, block); 1718 record_pred_live_outs(ctx, block); 1719 foreach_instr (instr, &block->instr_list) { 1720 if (instr->opc != OPC_META_PHI) 1721 break; 1722 rewrite_phi(ctx, instr, block); 1723 } 1724 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1725 ctx->live->definitions_count) { 1726 struct ir3_register *reg = ctx->live->definitions[name]; 1727 add_live_in_phi(ctx, reg, block); 1728 } 1729 } 1730 } else { 1731 update_max_pressure(ctx); 1732 } 1733 1734 foreach_instr (instr, &block->instr_list) { 1735 di(instr, "processing"); 1736 1737 if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT || 1738 instr->opc == OPC_META_TEX_PREFETCH) 1739 remove_input_phi(ctx, instr); 1740 else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY) 1741 handle_pcopy(ctx, instr); 1742 else if (ctx->spilling && instr->opc == OPC_MOV && 1743 instr->dsts[0] == ctx->base_reg) 1744 /* skip */; 1745 else 1746 handle_instr(ctx, instr); 1747 } 1748 1749 if (ctx->spilling && block->successors[0]) { 1750 struct ra_spill_block_state *state = 1751 &ctx->blocks[block->successors[0]->index]; 1752 if (state->visited) { 1753 assert(!block->successors[1]); 1754 1755 spill_live_outs(ctx, block); 1756 reload_live_outs(ctx, block); 1757 update_live_out_phis(ctx, block); 1758 } 1759 } 1760 1761 if (ctx->spilling) { 1762 record_live_outs(ctx, block); 1763 ctx->blocks[block->index].visited = true; 1764 } 1765} 1766 1767static bool 1768simplify_phi_node(struct ir3_instruction *phi) 1769{ 1770 struct ir3_register *def = NULL; 1771 foreach_src (src, phi) { 1772 /* Ignore phi sources which point to the phi itself. */ 1773 if (src->def == phi->dsts[0]) 1774 continue; 1775 /* If it's undef or it doesn't match the previous sources, bail */ 1776 if (!src->def || (def && def != src->def)) 1777 return false; 1778 def = src->def; 1779 } 1780 1781 phi->data = def; 1782 phi->flags |= IR3_INSTR_UNUSED; 1783 return true; 1784} 1785 1786static struct ir3_register * 1787simplify_phi_def(struct ir3_register *def) 1788{ 1789 if (def->instr->opc == OPC_META_PHI) { 1790 struct ir3_instruction *phi = def->instr; 1791 1792 /* Note: this function is always called at least once after visiting the 1793 * phi, so either there has been a simplified phi in the meantime, in 1794 * which case we will set progress=true and visit the definition again, or 1795 * phi->data already has the most up-to-date value. Therefore we don't 1796 * have to recursively check phi->data. 1797 */ 1798 if (phi->data) 1799 return phi->data; 1800 } 1801 1802 return def; 1803} 1804 1805static void 1806simplify_phi_srcs(struct ir3_instruction *instr) 1807{ 1808 foreach_src (src, instr) { 1809 if (src->def) 1810 src->def = simplify_phi_def(src->def); 1811 } 1812} 1813 1814/* We insert phi nodes for all live-ins of loops in case we need to split the 1815 * live range. This pass cleans that up for the case where the live range didn't 1816 * actually need to be split. 1817 */ 1818static void 1819simplify_phi_nodes(struct ir3 *ir) 1820{ 1821 foreach_block (block, &ir->block_list) { 1822 foreach_instr (instr, &block->instr_list) { 1823 if (instr->opc != OPC_META_PHI) 1824 break; 1825 instr->data = NULL; 1826 } 1827 } 1828 1829 bool progress; 1830 do { 1831 progress = false; 1832 foreach_block (block, &ir->block_list) { 1833 foreach_instr (instr, &block->instr_list) { 1834 if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED)) 1835 continue; 1836 1837 simplify_phi_srcs(instr); 1838 } 1839 1840 /* Visit phi nodes in the sucessors to make sure that phi sources are 1841 * always visited at least once after visiting the definition they 1842 * point to. See note in simplify_phi_def() for why this is necessary. 1843 */ 1844 for (unsigned i = 0; i < 2; i++) { 1845 struct ir3_block *succ = block->successors[i]; 1846 if (!succ) 1847 continue; 1848 foreach_instr (instr, &succ->instr_list) { 1849 if (instr->opc != OPC_META_PHI) 1850 break; 1851 if (instr->flags & IR3_INSTR_UNUSED) { 1852 if (instr->data) 1853 instr->data = simplify_phi_def(instr->data); 1854 } else { 1855 simplify_phi_srcs(instr); 1856 progress |= simplify_phi_node(instr); 1857 } 1858 } 1859 } 1860 } 1861 } while (progress); 1862} 1863 1864static void 1865unmark_dead(struct ir3 *ir) 1866{ 1867 foreach_block (block, &ir->block_list) { 1868 foreach_instr (instr, &block->instr_list) { 1869 instr->flags &= ~IR3_INSTR_UNUSED; 1870 } 1871 } 1872} 1873 1874/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark 1875 * which ones are dead along the way, so there's nothing to compute here. 1876 */ 1877static void 1878cleanup_dead(struct ir3 *ir) 1879{ 1880 foreach_block (block, &ir->block_list) { 1881 foreach_instr_safe (instr, &block->instr_list) { 1882 if (instr->flags & IR3_INSTR_UNUSED) 1883 list_delinit(&instr->node); 1884 } 1885 } 1886} 1887 1888/* Deal with merge sets after spilling. Spilling generally leaves the merge sets 1889 * in a mess, and even if we properly cleaned up after ourselves, we would want 1890 * to recompute the merge sets afterward anway. That's because 1891 * spilling/reloading can "break up" phi webs and split/collect webs so that 1892 * allocating them to the same register no longer gives any benefit. For 1893 * example, imagine we have this: 1894 * 1895 * if (...) { 1896 * foo = ... 1897 * } else { 1898 * bar = ... 1899 * } 1900 * baz = phi(foo, bar) 1901 * 1902 * and we spill "baz": 1903 * 1904 * if (...) { 1905 * foo = ... 1906 * spill(foo) 1907 * } else { 1908 * bar = ... 1909 * spill(bar) 1910 * } 1911 * baz = reload() 1912 * 1913 * now foo, bar, and baz don't have to be allocated to the same register. How 1914 * exactly the merge sets change can be complicated, so it's easier just to 1915 * recompute them. 1916 * 1917 * However, there's a wrinkle in this: those same merge sets determine the 1918 * register pressure, due to multiple values inhabiting the same register! And 1919 * we assume that this sharing happens when spilling. Therefore we need a 1920 * three-step procedure: 1921 * 1922 * 1. Drop the original merge sets. 1923 * 2. Calculate which values *must* be merged, being careful to only use the 1924 * interval information which isn't trashed by spilling, and forcibly merge 1925 * them. 1926 * 3. Let ir3_merge_regs() finish the job, including recalculating the 1927 * intervals. 1928 */ 1929 1930static void 1931fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir) 1932{ 1933 foreach_block (block, &ir->block_list) { 1934 foreach_instr (instr, &block->instr_list) { 1935 ra_foreach_dst (dst, instr) { 1936 dst->merge_set = NULL; 1937 dst->merge_set_offset = 0; 1938 } 1939 } 1940 } 1941 1942 foreach_block (block, &ir->block_list) { 1943 foreach_instr (instr, &block->instr_list) { 1944 if (instr->opc != OPC_META_SPLIT && 1945 instr->opc != OPC_META_COLLECT) 1946 continue; 1947 1948 struct ir3_register *dst = instr->dsts[0]; 1949 ra_foreach_src (src, instr) { 1950 if (!(src->flags & IR3_REG_KILL) && 1951 src->def->interval_start < dst->interval_end && 1952 dst->interval_start < src->def->interval_end) { 1953 ir3_force_merge(dst, src->def, 1954 src->def->interval_start - dst->interval_start); 1955 } 1956 } 1957 } 1958 } 1959 1960 ir3_merge_regs(live, ir); 1961} 1962 1963void 1964ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 1965 struct ir3_pressure *max_pressure) 1966{ 1967 struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx); 1968 spill_ctx_init(ctx, v, live); 1969 1970 foreach_block (block, &v->ir->block_list) { 1971 handle_block(ctx, block); 1972 } 1973 1974 assert(ctx->cur_pressure.full == 0); 1975 assert(ctx->cur_pressure.half == 0); 1976 assert(ctx->cur_pressure.shared == 0); 1977 1978 *max_pressure = ctx->max_pressure; 1979 ralloc_free(ctx); 1980} 1981 1982bool 1983ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 1984 struct ir3_liveness **live, 1985 const struct ir3_pressure *limit_pressure) 1986{ 1987 void *mem_ctx = ralloc_parent(*live); 1988 struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx); 1989 spill_ctx_init(ctx, v, *live); 1990 1991 ctx->spilling = true; 1992 1993 ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state, 1994 ctx->live->block_count); 1995 rb_tree_init(&ctx->full_live_intervals); 1996 rb_tree_init(&ctx->half_live_intervals); 1997 1998 ctx->limit_pressure = *limit_pressure; 1999 ctx->spill_slot = v->pvtmem_size; 2000 2001 add_base_reg(ctx, ir); 2002 compute_next_distance(ctx, ir); 2003 2004 unmark_dead(ir); 2005 2006 foreach_block (block, &ir->block_list) { 2007 handle_block(ctx, block); 2008 } 2009 2010 simplify_phi_nodes(ir); 2011 2012 cleanup_dead(ir); 2013 2014 ir3_create_parallel_copies(ir); 2015 2016 /* After this point, we're done mutating the IR. Liveness has been trashed, 2017 * so recalculate it. We'll need it for recalculating the merge sets. 2018 */ 2019 ralloc_free(ctx->live); 2020 *live = ir3_calc_liveness(mem_ctx, ir); 2021 2022 fixup_merge_sets(*live, ir); 2023 2024 v->pvtmem_size = ctx->spill_slot; 2025 ralloc_free(ctx); 2026 2027 return true; 2028} 2029