17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "util/rb_tree.h" 257ec681f3Smrg#include "ir3_ra.h" 267ec681f3Smrg#include "ir3_shader.h" 277ec681f3Smrg 287ec681f3Smrg/* 297ec681f3Smrg * This pass does two things: 307ec681f3Smrg * 317ec681f3Smrg * 1. Calculates the maximum register pressure. To do this, we need to use the 327ec681f3Smrg * exact same technique that RA uses for combining meta_split instructions 337ec681f3Smrg * with their sources, so that our calculation agrees with RA. 347ec681f3Smrg * 2. Spills when the register pressure is exceeded a limit calculated by RA. 357ec681f3Smrg * The implementation is based on "Register Spilling and Live-Range Splitting 367ec681f3Smrg * for SSA-Form Programs" by Braun and Hack, although again care has to be 377ec681f3Smrg * taken to handle combining split/collect instructions. 387ec681f3Smrg */ 397ec681f3Smrg 407ec681f3Smrgstruct reg_or_immed { 417ec681f3Smrg unsigned flags; 427ec681f3Smrg union { 437ec681f3Smrg struct ir3_register *def; 447ec681f3Smrg uint32_t uimm; 457ec681f3Smrg unsigned const_num; 467ec681f3Smrg }; 477ec681f3Smrg}; 487ec681f3Smrg 497ec681f3Smrgstruct ra_spill_interval { 507ec681f3Smrg struct ir3_reg_interval interval; 517ec681f3Smrg 527ec681f3Smrg struct rb_node node; 537ec681f3Smrg struct rb_node half_node; 547ec681f3Smrg 557ec681f3Smrg /* The current SSA value/const/immed this source is mapped to. */ 567ec681f3Smrg struct reg_or_immed dst; 577ec681f3Smrg 587ec681f3Smrg /* When computing use distances we use the distance relative to the start 597ec681f3Smrg * of the block. So, for example, a value that's defined in cycle 5 of the 607ec681f3Smrg * block and used 6 cycles later will always have a next_use_distance of 11 617ec681f3Smrg * until we reach that use. 627ec681f3Smrg */ 637ec681f3Smrg unsigned next_use_distance; 647ec681f3Smrg 657ec681f3Smrg /* Whether this value was reloaded and therefore doesn't need to be 667ec681f3Smrg * spilled again. Corresponds to the S set in the paper. 677ec681f3Smrg */ 687ec681f3Smrg bool already_spilled; 697ec681f3Smrg 707ec681f3Smrg /* We need to add sources early for accounting purposes, but we have to 717ec681f3Smrg * insert the reload code for them last. Keep track of whether this interval 727ec681f3Smrg * needs to be reloaded later. 737ec681f3Smrg */ 747ec681f3Smrg bool needs_reload; 757ec681f3Smrg 767ec681f3Smrg /* Keep track of whether this interval currently can't be spilled because: 777ec681f3Smrg * - It or one of its children is a source and we're making space for 787ec681f3Smrg * sources. 797ec681f3Smrg * - It is a destination and we're making space for destinations. 807ec681f3Smrg */ 817ec681f3Smrg bool cant_spill; 827ec681f3Smrg}; 837ec681f3Smrg 847ec681f3Smrgstruct ra_spill_block_state { 857ec681f3Smrg unsigned *next_use_end; 867ec681f3Smrg unsigned *next_use_start; 877ec681f3Smrg 887ec681f3Smrg unsigned cycles; 897ec681f3Smrg 907ec681f3Smrg /* Map from SSA def to reg_or_immed it is mapped to at the end of the block. 917ec681f3Smrg * This map only contains values which we didn't spill, so it also serves as 927ec681f3Smrg * a record of the new live-out set for this block. 937ec681f3Smrg */ 947ec681f3Smrg struct hash_table *remap; 957ec681f3Smrg 967ec681f3Smrg /* For blocks whose successors are visited first (i.e. loop backedges), which 977ec681f3Smrg * values should be live at the end. 987ec681f3Smrg */ 997ec681f3Smrg BITSET_WORD *live_out; 1007ec681f3Smrg 1017ec681f3Smrg bool visited; 1027ec681f3Smrg}; 1037ec681f3Smrg 1047ec681f3Smrgstruct ra_spill_ctx { 1057ec681f3Smrg struct ir3_reg_ctx reg_ctx; 1067ec681f3Smrg 1077ec681f3Smrg struct ra_spill_interval **intervals; 1087ec681f3Smrg unsigned intervals_count; 1097ec681f3Smrg 1107ec681f3Smrg /* rb tree of live intervals that we can spill, ordered by next-use distance. 1117ec681f3Smrg * full_live_intervals contains the full+shared intervals in the merged_regs 1127ec681f3Smrg * case. We use this list to determine what to spill. 1137ec681f3Smrg */ 1147ec681f3Smrg struct rb_tree full_live_intervals; 1157ec681f3Smrg struct rb_tree half_live_intervals; 1167ec681f3Smrg 1177ec681f3Smrg struct ir3_pressure cur_pressure, max_pressure; 1187ec681f3Smrg 1197ec681f3Smrg struct ir3_pressure limit_pressure; 1207ec681f3Smrg 1217ec681f3Smrg /* When spilling, we need to reserve a register to serve as the zero'd 1227ec681f3Smrg * "base". For simplicity we reserve a register at the beginning so that it's 1237ec681f3Smrg * always available. 1247ec681f3Smrg */ 1257ec681f3Smrg struct ir3_register *base_reg; 1267ec681f3Smrg 1277ec681f3Smrg /* Current pvtmem offset in bytes. */ 1287ec681f3Smrg unsigned spill_slot; 1297ec681f3Smrg 1307ec681f3Smrg struct ir3_liveness *live; 1317ec681f3Smrg 1327ec681f3Smrg const struct ir3_compiler *compiler; 1337ec681f3Smrg 1347ec681f3Smrg struct ra_spill_block_state *blocks; 1357ec681f3Smrg 1367ec681f3Smrg bool spilling; 1377ec681f3Smrg 1387ec681f3Smrg bool merged_regs; 1397ec681f3Smrg}; 1407ec681f3Smrg 1417ec681f3Smrgstatic void 1427ec681f3Smrgadd_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir) 1437ec681f3Smrg{ 1447ec681f3Smrg struct ir3_block *start = ir3_start_block(ir); 1457ec681f3Smrg 1467ec681f3Smrg /* We need to stick it after any meta instructions which need to be first. */ 1477ec681f3Smrg struct ir3_instruction *after = NULL; 1487ec681f3Smrg foreach_instr (instr, &start->instr_list) { 1497ec681f3Smrg if (instr->opc != OPC_META_INPUT && 1507ec681f3Smrg instr->opc != OPC_META_TEX_PREFETCH) { 1517ec681f3Smrg after = instr; 1527ec681f3Smrg break; 1537ec681f3Smrg } 1547ec681f3Smrg } 1557ec681f3Smrg 1567ec681f3Smrg struct ir3_instruction *mov = create_immed(start, 0); 1577ec681f3Smrg 1587ec681f3Smrg if (after) 1597ec681f3Smrg ir3_instr_move_before(mov, after); 1607ec681f3Smrg 1617ec681f3Smrg ctx->base_reg = mov->dsts[0]; 1627ec681f3Smrg 1637ec681f3Smrg /* We don't create an interval, etc. for the base reg, so just lower the 1647ec681f3Smrg * register pressure limit to account for it. We assume it's always 1657ec681f3Smrg * available for simplicity. 1667ec681f3Smrg */ 1677ec681f3Smrg ctx->limit_pressure.full -= reg_size(ctx->base_reg); 1687ec681f3Smrg} 1697ec681f3Smrg 1707ec681f3Smrg 1717ec681f3Smrg/* Compute the number of cycles per instruction used for next-use-distance 1727ec681f3Smrg * analysis. This is just approximate, obviously. 1737ec681f3Smrg */ 1747ec681f3Smrgstatic unsigned 1757ec681f3Smrginstr_cycles(struct ir3_instruction *instr) 1767ec681f3Smrg{ 1777ec681f3Smrg if (instr->opc == OPC_META_PARALLEL_COPY) { 1787ec681f3Smrg unsigned cycles = 0; 1797ec681f3Smrg for (unsigned i = 0; i < instr->dsts_count; i++) { 1807ec681f3Smrg if (!instr->srcs[i]->def || 1817ec681f3Smrg instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) { 1827ec681f3Smrg cycles += reg_elems(instr->srcs[i]); 1837ec681f3Smrg } 1847ec681f3Smrg } 1857ec681f3Smrg 1867ec681f3Smrg return cycles; 1877ec681f3Smrg } 1887ec681f3Smrg 1897ec681f3Smrg if (instr->opc == OPC_META_COLLECT) { 1907ec681f3Smrg unsigned cycles = 0; 1917ec681f3Smrg for (unsigned i = 0; i < instr->srcs_count; i++) { 1927ec681f3Smrg if (!instr->srcs[i]->def || 1937ec681f3Smrg instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) { 1947ec681f3Smrg cycles++; 1957ec681f3Smrg } 1967ec681f3Smrg } 1977ec681f3Smrg 1987ec681f3Smrg return cycles; 1997ec681f3Smrg } 2007ec681f3Smrg 2017ec681f3Smrg if (is_meta(instr)) 2027ec681f3Smrg return 0; 2037ec681f3Smrg 2047ec681f3Smrg return 1 + instr->repeat; 2057ec681f3Smrg} 2067ec681f3Smrg 2077ec681f3Smrgstatic bool 2087ec681f3Smrgcompute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block, 2097ec681f3Smrg unsigned *tmp_next_use) 2107ec681f3Smrg{ 2117ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[block->index]; 2127ec681f3Smrg memcpy(tmp_next_use, state->next_use_end, 2137ec681f3Smrg ctx->live->definitions_count * sizeof(*tmp_next_use)); 2147ec681f3Smrg 2157ec681f3Smrg unsigned cycle = state->cycles; 2167ec681f3Smrg foreach_instr_rev (instr, &block->instr_list) { 2177ec681f3Smrg ra_foreach_dst (dst, instr) { 2187ec681f3Smrg dst->next_use = tmp_next_use[dst->name]; 2197ec681f3Smrg } 2207ec681f3Smrg 2217ec681f3Smrg ra_foreach_src (src, instr) { 2227ec681f3Smrg src->next_use = tmp_next_use[src->def->name]; 2237ec681f3Smrg } 2247ec681f3Smrg 2257ec681f3Smrg cycle -= instr_cycles(instr); 2267ec681f3Smrg 2277ec681f3Smrg if (instr->opc == OPC_META_PARALLEL_COPY) { 2287ec681f3Smrg ra_foreach_src_n (src, i, instr) { 2297ec681f3Smrg if (src->def->merge_set == instr->dsts[i]->merge_set && 2307ec681f3Smrg src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) { 2317ec681f3Smrg tmp_next_use[src->def->name] = 2327ec681f3Smrg tmp_next_use[instr->dsts[i]->name]; 2337ec681f3Smrg } else { 2347ec681f3Smrg tmp_next_use[src->def->name] = cycle; 2357ec681f3Smrg } 2367ec681f3Smrg } 2377ec681f3Smrg } else if (instr->opc != OPC_META_PHI) { 2387ec681f3Smrg ra_foreach_src (src, instr) { 2397ec681f3Smrg tmp_next_use[src->def->name] = cycle; 2407ec681f3Smrg } 2417ec681f3Smrg } 2427ec681f3Smrg 2437ec681f3Smrg ra_foreach_dst (dst, instr) { 2447ec681f3Smrg tmp_next_use[dst->name] = UINT_MAX; 2457ec681f3Smrg } 2467ec681f3Smrg } 2477ec681f3Smrg 2487ec681f3Smrg memcpy(state->next_use_start, tmp_next_use, 2497ec681f3Smrg ctx->live->definitions_count * sizeof(*tmp_next_use)); 2507ec681f3Smrg 2517ec681f3Smrg bool progress = false; 2527ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 2537ec681f3Smrg const struct ir3_block *pred = block->predecessors[i]; 2547ec681f3Smrg struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index]; 2557ec681f3Smrg 2567ec681f3Smrg /* Add a large-enough distance in front of edges exiting the loop so that 2577ec681f3Smrg * variables that are live-through the loop but not used inside it are 2587ec681f3Smrg * prioritized for spilling, as per the paper. This just needs to be 2597ec681f3Smrg * larger than the longest path through the loop. 2607ec681f3Smrg */ 2617ec681f3Smrg bool loop_exit = pred->loop_depth < block->loop_depth; 2627ec681f3Smrg unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0); 2637ec681f3Smrg 2647ec681f3Smrg for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 2657ec681f3Smrg if (state->next_use_start[j] < UINT_MAX && 2667ec681f3Smrg state->next_use_start[j] + block_distance < 2677ec681f3Smrg pred_state->next_use_end[j]) { 2687ec681f3Smrg pred_state->next_use_end[j] = state->next_use_start[j] + 2697ec681f3Smrg block_distance; 2707ec681f3Smrg progress = true; 2717ec681f3Smrg } 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg foreach_instr (phi, &block->instr_list) { 2757ec681f3Smrg if (phi->opc != OPC_META_PHI) 2767ec681f3Smrg break; 2777ec681f3Smrg if (!phi->srcs[i]->def) 2787ec681f3Smrg continue; 2797ec681f3Smrg unsigned src = phi->srcs[i]->def->name; 2807ec681f3Smrg if (phi->dsts[0]->next_use < UINT_MAX && 2817ec681f3Smrg phi->dsts[0]->next_use + block_distance < 2827ec681f3Smrg pred_state->next_use_end[src]) { 2837ec681f3Smrg pred_state->next_use_end[src] = phi->dsts[0]->next_use + 2847ec681f3Smrg block_distance; 2857ec681f3Smrg progress = true; 2867ec681f3Smrg } 2877ec681f3Smrg } 2887ec681f3Smrg } 2897ec681f3Smrg 2907ec681f3Smrg return progress; 2917ec681f3Smrg} 2927ec681f3Smrg 2937ec681f3Smrgstatic void 2947ec681f3Smrgcompute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir) 2957ec681f3Smrg{ 2967ec681f3Smrg for (unsigned i = 0; i < ctx->live->block_count; i++) { 2977ec681f3Smrg ctx->blocks[i].next_use_start = 2987ec681f3Smrg ralloc_array(ctx, unsigned, ctx->live->definitions_count); 2997ec681f3Smrg ctx->blocks[i].next_use_end = 3007ec681f3Smrg ralloc_array(ctx, unsigned, ctx->live->definitions_count); 3017ec681f3Smrg 3027ec681f3Smrg for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 3037ec681f3Smrg ctx->blocks[i].next_use_start[j] = UINT_MAX; 3047ec681f3Smrg ctx->blocks[i].next_use_end[j] = UINT_MAX; 3057ec681f3Smrg } 3067ec681f3Smrg } 3077ec681f3Smrg 3087ec681f3Smrg foreach_block (block, &ir->block_list) { 3097ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[block->index]; 3107ec681f3Smrg state->cycles = 0; 3117ec681f3Smrg foreach_instr (instr, &block->instr_list) { 3127ec681f3Smrg state->cycles += instr_cycles(instr); 3137ec681f3Smrg foreach_dst (dst, instr) { 3147ec681f3Smrg dst->spill_slot = ~0; 3157ec681f3Smrg } 3167ec681f3Smrg } 3177ec681f3Smrg } 3187ec681f3Smrg 3197ec681f3Smrg unsigned *tmp_next_use = 3207ec681f3Smrg ralloc_array(ctx, unsigned, ctx->live->definitions_count); 3217ec681f3Smrg 3227ec681f3Smrg bool progress = true; 3237ec681f3Smrg while (progress) { 3247ec681f3Smrg progress = false; 3257ec681f3Smrg foreach_block_rev (block, &ir->block_list) { 3267ec681f3Smrg progress |= compute_block_next_distance(ctx, block, tmp_next_use); 3277ec681f3Smrg } 3287ec681f3Smrg } 3297ec681f3Smrg} 3307ec681f3Smrg 3317ec681f3Smrgstatic void 3327ec681f3Smrgra_spill_interval_init(struct ra_spill_interval *interval, 3337ec681f3Smrg struct ir3_register *reg) 3347ec681f3Smrg{ 3357ec681f3Smrg ir3_reg_interval_init(&interval->interval, reg); 3367ec681f3Smrg interval->dst.flags = reg->flags; 3377ec681f3Smrg interval->dst.def = reg; 3387ec681f3Smrg interval->already_spilled = false; 3397ec681f3Smrg interval->needs_reload = false; 3407ec681f3Smrg interval->cant_spill = false; 3417ec681f3Smrg} 3427ec681f3Smrg 3437ec681f3Smrgstatic struct ra_spill_interval * 3447ec681f3Smrgir3_reg_interval_to_interval(struct ir3_reg_interval *interval) 3457ec681f3Smrg{ 3467ec681f3Smrg return rb_node_data(struct ra_spill_interval, interval, interval); 3477ec681f3Smrg} 3487ec681f3Smrg 3497ec681f3Smrgstatic struct ra_spill_interval * 3507ec681f3Smrgra_spill_interval_root(struct ra_spill_interval *interval) 3517ec681f3Smrg{ 3527ec681f3Smrg struct ir3_reg_interval *ir3_interval = &interval->interval; 3537ec681f3Smrg while (ir3_interval->parent) 3547ec681f3Smrg ir3_interval = ir3_interval->parent; 3557ec681f3Smrg return ir3_reg_interval_to_interval(ir3_interval); 3567ec681f3Smrg} 3577ec681f3Smrg 3587ec681f3Smrgstatic struct ra_spill_ctx * 3597ec681f3Smrgir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx) 3607ec681f3Smrg{ 3617ec681f3Smrg return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx); 3627ec681f3Smrg} 3637ec681f3Smrg 3647ec681f3Smrgstatic int 3657ec681f3Smrgra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b) 3667ec681f3Smrg{ 3677ec681f3Smrg const struct ra_spill_interval *a = 3687ec681f3Smrg rb_node_data(const struct ra_spill_interval, _a, node); 3697ec681f3Smrg const struct ra_spill_interval *b = 3707ec681f3Smrg rb_node_data(const struct ra_spill_interval, _b, node); 3717ec681f3Smrg return a->next_use_distance - b->next_use_distance; 3727ec681f3Smrg} 3737ec681f3Smrg 3747ec681f3Smrgstatic int 3757ec681f3Smrgra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b) 3767ec681f3Smrg{ 3777ec681f3Smrg const struct ra_spill_interval *a = 3787ec681f3Smrg rb_node_data(const struct ra_spill_interval, _a, half_node); 3797ec681f3Smrg const struct ra_spill_interval *b = 3807ec681f3Smrg rb_node_data(const struct ra_spill_interval, _b, half_node); 3817ec681f3Smrg return a->next_use_distance - b->next_use_distance; 3827ec681f3Smrg} 3837ec681f3Smrg 3847ec681f3Smrgstatic void 3857ec681f3Smrginterval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 3867ec681f3Smrg{ 3877ec681f3Smrg struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 3887ec681f3Smrg struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 3897ec681f3Smrg 3907ec681f3Smrg unsigned size = reg_size(interval->interval.reg); 3917ec681f3Smrg if (interval->interval.reg->flags & IR3_REG_SHARED) { 3927ec681f3Smrg ctx->cur_pressure.shared += size; 3937ec681f3Smrg } else { 3947ec681f3Smrg if (interval->interval.reg->flags & IR3_REG_HALF) { 3957ec681f3Smrg ctx->cur_pressure.half += size; 3967ec681f3Smrg if (ctx->spilling) { 3977ec681f3Smrg rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 3987ec681f3Smrg ra_spill_interval_half_cmp); 3997ec681f3Smrg } 4007ec681f3Smrg } 4017ec681f3Smrg if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 4027ec681f3Smrg ctx->cur_pressure.full += size; 4037ec681f3Smrg if (ctx->spilling) { 4047ec681f3Smrg rb_tree_insert(&ctx->full_live_intervals, &interval->node, 4057ec681f3Smrg ra_spill_interval_cmp); 4067ec681f3Smrg } 4077ec681f3Smrg } 4087ec681f3Smrg } 4097ec681f3Smrg} 4107ec681f3Smrg 4117ec681f3Smrgstatic void 4127ec681f3Smrginterval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 4137ec681f3Smrg{ 4147ec681f3Smrg struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 4157ec681f3Smrg struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 4167ec681f3Smrg 4177ec681f3Smrg unsigned size = reg_size(interval->interval.reg); 4187ec681f3Smrg if (interval->interval.reg->flags & IR3_REG_SHARED) { 4197ec681f3Smrg ctx->cur_pressure.shared -= size; 4207ec681f3Smrg } else { 4217ec681f3Smrg if (interval->interval.reg->flags & IR3_REG_HALF) { 4227ec681f3Smrg ctx->cur_pressure.half -= size; 4237ec681f3Smrg if (ctx->spilling) { 4247ec681f3Smrg rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 4257ec681f3Smrg } 4267ec681f3Smrg } 4277ec681f3Smrg if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 4287ec681f3Smrg ctx->cur_pressure.full -= size; 4297ec681f3Smrg if (ctx->spilling) { 4307ec681f3Smrg rb_tree_remove(&ctx->full_live_intervals, &interval->node); 4317ec681f3Smrg } 4327ec681f3Smrg } 4337ec681f3Smrg } 4347ec681f3Smrg} 4357ec681f3Smrg 4367ec681f3Smrgstatic void 4377ec681f3Smrginterval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent, 4387ec681f3Smrg struct ir3_reg_interval *_child) 4397ec681f3Smrg{ 4407ec681f3Smrg interval_add(_ctx, _child); 4417ec681f3Smrg} 4427ec681f3Smrg 4437ec681f3Smrgstatic void 4447ec681f3Smrgspill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v, 4457ec681f3Smrg struct ir3_liveness *live) 4467ec681f3Smrg{ 4477ec681f3Smrg ctx->live = live; 4487ec681f3Smrg ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *, 4497ec681f3Smrg ctx->live->definitions_count); 4507ec681f3Smrg struct ra_spill_interval *intervals = 4517ec681f3Smrg rzalloc_array(ctx, struct ra_spill_interval, 4527ec681f3Smrg ctx->live->definitions_count); 4537ec681f3Smrg for (unsigned i = 0; i < ctx->live->definitions_count; i++) 4547ec681f3Smrg ctx->intervals[i] = &intervals[i]; 4557ec681f3Smrg 4567ec681f3Smrg ctx->intervals_count = ctx->live->definitions_count; 4577ec681f3Smrg ctx->compiler = v->shader->compiler; 4587ec681f3Smrg ctx->merged_regs = v->mergedregs; 4597ec681f3Smrg 4607ec681f3Smrg rb_tree_init(&ctx->reg_ctx.intervals); 4617ec681f3Smrg ctx->reg_ctx.interval_add = interval_add; 4627ec681f3Smrg ctx->reg_ctx.interval_delete = interval_delete; 4637ec681f3Smrg ctx->reg_ctx.interval_readd = interval_readd; 4647ec681f3Smrg} 4657ec681f3Smrg 4667ec681f3Smrgstatic void 4677ec681f3Smrgra_spill_ctx_insert(struct ra_spill_ctx *ctx, 4687ec681f3Smrg struct ra_spill_interval *interval) 4697ec681f3Smrg{ 4707ec681f3Smrg ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 4717ec681f3Smrg} 4727ec681f3Smrg 4737ec681f3Smrgstatic void 4747ec681f3Smrgra_spill_ctx_remove(struct ra_spill_ctx *ctx, 4757ec681f3Smrg struct ra_spill_interval *interval) 4767ec681f3Smrg{ 4777ec681f3Smrg ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); 4787ec681f3Smrg} 4797ec681f3Smrg 4807ec681f3Smrgstatic void 4817ec681f3Smrginit_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 4827ec681f3Smrg{ 4837ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[dst->name]; 4847ec681f3Smrg ra_spill_interval_init(interval, dst); 4857ec681f3Smrg if (ctx->spilling) 4867ec681f3Smrg interval->next_use_distance = dst->next_use; 4877ec681f3Smrg} 4887ec681f3Smrg 4897ec681f3Smrgstatic void 4907ec681f3Smrginsert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 4917ec681f3Smrg{ 4927ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[dst->name]; 4937ec681f3Smrg if (interval->interval.inserted) 4947ec681f3Smrg return; 4957ec681f3Smrg 4967ec681f3Smrg ra_spill_ctx_insert(ctx, interval); 4977ec681f3Smrg interval->cant_spill = true; 4987ec681f3Smrg 4997ec681f3Smrg /* For precolored inputs, make sure we leave enough registers to allow for 5007ec681f3Smrg * holes in the inputs. It can happen that the binning shader has a lower 5017ec681f3Smrg * register pressure than the main shader, but the main shader decided to 5027ec681f3Smrg * add holes between the inputs which means that the binning shader has a 5037ec681f3Smrg * higher register demand. 5047ec681f3Smrg */ 5057ec681f3Smrg if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) { 5067ec681f3Smrg physreg_t physreg = ra_reg_get_physreg(dst); 5077ec681f3Smrg physreg_t max = physreg + reg_size(dst); 5087ec681f3Smrg 5097ec681f3Smrg if (interval->interval.reg->flags & IR3_REG_SHARED) 5107ec681f3Smrg ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max); 5117ec681f3Smrg else if (interval->interval.reg->flags & IR3_REG_HALF) 5127ec681f3Smrg ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max); 5137ec681f3Smrg else 5147ec681f3Smrg ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max); 5157ec681f3Smrg } 5167ec681f3Smrg} 5177ec681f3Smrg 5187ec681f3Smrgstatic void 5197ec681f3Smrginsert_src(struct ra_spill_ctx *ctx, struct ir3_register *src) 5207ec681f3Smrg{ 5217ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 5227ec681f3Smrg 5237ec681f3Smrg if (!interval->interval.inserted) { 5247ec681f3Smrg ra_spill_ctx_insert(ctx, interval); 5257ec681f3Smrg interval->needs_reload = true; 5267ec681f3Smrg interval->already_spilled = true; 5277ec681f3Smrg } 5287ec681f3Smrg 5297ec681f3Smrg ra_spill_interval_root(interval)->cant_spill = true; 5307ec681f3Smrg 5317ec681f3Smrg} 5327ec681f3Smrg 5337ec681f3Smrgstatic void 5347ec681f3Smrgremove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 5357ec681f3Smrg struct ir3_register *src) 5367ec681f3Smrg{ 5377ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 5387ec681f3Smrg 5397ec681f3Smrg if (!interval->interval.inserted || interval->interval.parent || 5407ec681f3Smrg !rb_tree_is_empty(&interval->interval.children)) 5417ec681f3Smrg return; 5427ec681f3Smrg 5437ec681f3Smrg ra_spill_ctx_remove(ctx, interval); 5447ec681f3Smrg} 5457ec681f3Smrg 5467ec681f3Smrgstatic void 5477ec681f3Smrgremove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 5487ec681f3Smrg struct ir3_register *src) 5497ec681f3Smrg{ 5507ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 5517ec681f3Smrg 5527ec681f3Smrg if (!interval->interval.inserted) 5537ec681f3Smrg return; 5547ec681f3Smrg 5557ec681f3Smrg ra_spill_ctx_remove(ctx, interval); 5567ec681f3Smrg} 5577ec681f3Smrg 5587ec681f3Smrgstatic void 5597ec681f3Smrgfinish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 5607ec681f3Smrg{ 5617ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[dst->name]; 5627ec681f3Smrg interval->cant_spill = false; 5637ec681f3Smrg} 5647ec681f3Smrg 5657ec681f3Smrgstatic void 5667ec681f3Smrgremove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 5677ec681f3Smrg{ 5687ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[dst->name]; 5697ec681f3Smrg 5707ec681f3Smrg if (!interval->interval.inserted) 5717ec681f3Smrg return; 5727ec681f3Smrg 5737ec681f3Smrg ra_spill_ctx_remove(ctx, interval); 5747ec681f3Smrg} 5757ec681f3Smrg 5767ec681f3Smrgstatic void 5777ec681f3Smrgupdate_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src) 5787ec681f3Smrg{ 5797ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 5807ec681f3Smrg 5817ec681f3Smrg assert(interval->interval.inserted); 5827ec681f3Smrg 5837ec681f3Smrg interval->next_use_distance = src->next_use; 5847ec681f3Smrg 5857ec681f3Smrg /* If this node is inserted in one of the trees, then it needs to be resorted 5867ec681f3Smrg * as its key has changed. 5877ec681f3Smrg */ 5887ec681f3Smrg if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) { 5897ec681f3Smrg if (src->flags & IR3_REG_HALF) { 5907ec681f3Smrg rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 5917ec681f3Smrg rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 5927ec681f3Smrg ra_spill_interval_half_cmp); 5937ec681f3Smrg } 5947ec681f3Smrg if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) { 5957ec681f3Smrg rb_tree_remove(&ctx->full_live_intervals, &interval->node); 5967ec681f3Smrg rb_tree_insert(&ctx->full_live_intervals, &interval->node, 5977ec681f3Smrg ra_spill_interval_cmp); 5987ec681f3Smrg } 5997ec681f3Smrg } 6007ec681f3Smrg} 6017ec681f3Smrg 6027ec681f3Smrgstatic unsigned 6037ec681f3Smrgget_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg) 6047ec681f3Smrg{ 6057ec681f3Smrg if (reg->merge_set) { 6067ec681f3Smrg if (reg->merge_set->spill_slot == ~0) { 6077ec681f3Smrg reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot, 6087ec681f3Smrg reg->merge_set->alignment); 6097ec681f3Smrg ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2; 6107ec681f3Smrg } 6117ec681f3Smrg return reg->merge_set->spill_slot + reg->merge_set_offset * 2; 6127ec681f3Smrg } else { 6137ec681f3Smrg if (reg->spill_slot == ~0) { 6147ec681f3Smrg reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg)); 6157ec681f3Smrg ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2; 6167ec681f3Smrg } 6177ec681f3Smrg return reg->spill_slot; 6187ec681f3Smrg } 6197ec681f3Smrg} 6207ec681f3Smrg 6217ec681f3Smrgstatic void 6227ec681f3Smrgset_src_val(struct ir3_register *src, const struct reg_or_immed *val) 6237ec681f3Smrg{ 6247ec681f3Smrg if (val->flags & IR3_REG_IMMED) { 6257ec681f3Smrg src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF); 6267ec681f3Smrg src->uim_val = val->uimm; 6277ec681f3Smrg src->def = NULL; 6287ec681f3Smrg } else if (val->flags & IR3_REG_CONST) { 6297ec681f3Smrg src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF); 6307ec681f3Smrg src->num = val->const_num; 6317ec681f3Smrg src->def = NULL; 6327ec681f3Smrg } else { 6337ec681f3Smrg src->def = val->def; 6347ec681f3Smrg } 6357ec681f3Smrg} 6367ec681f3Smrg 6377ec681f3Smrgstatic struct ir3_register * 6387ec681f3Smrgmaterialize_pcopy_src(const struct reg_or_immed *src, 6397ec681f3Smrg struct ir3_instruction *instr, 6407ec681f3Smrg struct ir3_block *block) 6417ec681f3Smrg{ 6427ec681f3Smrg struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 6437ec681f3Smrg struct ir3_register *dst = __ssa_dst(mov); 6447ec681f3Smrg dst->flags |= src->flags & IR3_REG_HALF; 6457ec681f3Smrg struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags); 6467ec681f3Smrg set_src_val(mov_src, src); 6477ec681f3Smrg mov->cat1.src_type = mov->cat1.dst_type = 6487ec681f3Smrg (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 6497ec681f3Smrg 6507ec681f3Smrg if (instr) 6517ec681f3Smrg ir3_instr_move_before(mov, instr); 6527ec681f3Smrg return dst; 6537ec681f3Smrg} 6547ec681f3Smrg 6557ec681f3Smrgstatic void 6567ec681f3Smrgspill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val, 6577ec681f3Smrg unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block) 6587ec681f3Smrg{ 6597ec681f3Smrg struct ir3_register *reg; 6607ec681f3Smrg 6617ec681f3Smrg /* If spilling an immed/const pcopy src, we need to actually materialize it 6627ec681f3Smrg * first with a mov. 6637ec681f3Smrg */ 6647ec681f3Smrg if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) { 6657ec681f3Smrg reg = materialize_pcopy_src(val, instr, block); 6667ec681f3Smrg } else { 6677ec681f3Smrg reg = val->def; 6687ec681f3Smrg } 6697ec681f3Smrg 6707ec681f3Smrg d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name, 6717ec681f3Smrg spill_slot); 6727ec681f3Smrg 6737ec681f3Smrg unsigned elems = reg_elems(reg); 6747ec681f3Smrg struct ir3_instruction *spill = 6757ec681f3Smrg ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3); 6767ec681f3Smrg ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 6777ec681f3Smrg unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED | 6787ec681f3Smrg IR3_REG_CONST | IR3_REG_SSA | 6797ec681f3Smrg IR3_REG_ARRAY); 6807ec681f3Smrg struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags); 6817ec681f3Smrg ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 6827ec681f3Smrg spill->cat6.dst_offset = spill_slot; 6837ec681f3Smrg spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 6847ec681f3Smrg 6857ec681f3Smrg src->def = reg; 6867ec681f3Smrg if (reg->flags & IR3_REG_ARRAY) { 6877ec681f3Smrg src->size = reg->size; 6887ec681f3Smrg src->array.id = reg->array.id; 6897ec681f3Smrg src->array.offset = 0; 6907ec681f3Smrg } else { 6917ec681f3Smrg src->wrmask = reg->wrmask; 6927ec681f3Smrg } 6937ec681f3Smrg 6947ec681f3Smrg if (instr) 6957ec681f3Smrg ir3_instr_move_before(spill, instr); 6967ec681f3Smrg} 6977ec681f3Smrg 6987ec681f3Smrgstatic void 6997ec681f3Smrgspill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 7007ec681f3Smrg struct ir3_instruction *instr, struct ir3_block *block) 7017ec681f3Smrg{ 7027ec681f3Smrg spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg), 7037ec681f3Smrg instr, block); 7047ec681f3Smrg} 7057ec681f3Smrg 7067ec681f3Smrg/* This is similar to "limit" in the paper. */ 7077ec681f3Smrgstatic void 7087ec681f3Smrglimit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 7097ec681f3Smrg{ 7107ec681f3Smrg if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 7117ec681f3Smrg d("cur half pressure %u exceeds %u", ctx->cur_pressure.half, 7127ec681f3Smrg ctx->limit_pressure.half); 7137ec681f3Smrg rb_tree_foreach_safe (struct ra_spill_interval, interval, 7147ec681f3Smrg &ctx->half_live_intervals, half_node) { 7157ec681f3Smrg d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 7167ec681f3Smrg interval->interval.reg->name); 7177ec681f3Smrg if (!interval->cant_spill) { 7187ec681f3Smrg if (!interval->already_spilled) 7197ec681f3Smrg spill_interval(ctx, interval, instr, instr->block); 7207ec681f3Smrg ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 7217ec681f3Smrg if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 7227ec681f3Smrg break; 7237ec681f3Smrg } 7247ec681f3Smrg } 7257ec681f3Smrg 7267ec681f3Smrg assert(ctx->cur_pressure.half <= ctx->limit_pressure.half); 7277ec681f3Smrg } 7287ec681f3Smrg 7297ec681f3Smrg if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 7307ec681f3Smrg d("cur full pressure %u exceeds %u", ctx->cur_pressure.full, 7317ec681f3Smrg ctx->limit_pressure.full); 7327ec681f3Smrg rb_tree_foreach_safe (struct ra_spill_interval, interval, 7337ec681f3Smrg &ctx->full_live_intervals, node) { 7347ec681f3Smrg d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 7357ec681f3Smrg interval->interval.reg->name); 7367ec681f3Smrg if (!interval->cant_spill) { 7377ec681f3Smrg if (!interval->already_spilled) 7387ec681f3Smrg spill_interval(ctx, interval, instr, instr->block); 7397ec681f3Smrg ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 7407ec681f3Smrg if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 7417ec681f3Smrg break; 7427ec681f3Smrg } else { 7437ec681f3Smrg d("can't spill"); 7447ec681f3Smrg } 7457ec681f3Smrg } 7467ec681f3Smrg 7477ec681f3Smrg assert(ctx->cur_pressure.full <= ctx->limit_pressure.full); 7487ec681f3Smrg } 7497ec681f3Smrg} 7507ec681f3Smrg 7517ec681f3Smrg/* There's a corner case where we reload a value which has overlapping live 7527ec681f3Smrg * values already reloaded, either because it's the child of some other interval 7537ec681f3Smrg * that was already reloaded or some of its children have already been 7547ec681f3Smrg * reloaded. Because RA only expects overlapping source/dest intervals for meta 7557ec681f3Smrg * instructions (split/collect), and we don't want to add register pressure by 7567ec681f3Smrg * creating an entirely separate value, we need to add splits and collects to 7577ec681f3Smrg * deal with this case. These splits/collects have to also have correct merge 7587ec681f3Smrg * set information, so that it doesn't result in any actual code or register 7597ec681f3Smrg * pressure in practice. 7607ec681f3Smrg */ 7617ec681f3Smrg 7627ec681f3Smrgstatic void 7637ec681f3Smrgadd_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def, 7647ec681f3Smrg unsigned offset) 7657ec681f3Smrg{ 7667ec681f3Smrg def->merge_set = set; 7677ec681f3Smrg def->merge_set_offset = offset; 7687ec681f3Smrg def->interval_start = set->interval_start + offset; 7697ec681f3Smrg def->interval_end = set->interval_start + offset + reg_size(def); 7707ec681f3Smrg} 7717ec681f3Smrg 7727ec681f3Smrgstatic struct ir3_register * 7737ec681f3Smrgsplit(struct ir3_register *def, unsigned offset, 7747ec681f3Smrg struct ir3_instruction *after, struct ir3_block *block) 7757ec681f3Smrg{ 7767ec681f3Smrg if (reg_elems(def) == 1) { 7777ec681f3Smrg assert(offset == 0); 7787ec681f3Smrg return def; 7797ec681f3Smrg } 7807ec681f3Smrg 7817ec681f3Smrg assert(!(def->flags & IR3_REG_ARRAY)); 7827ec681f3Smrg assert(def->merge_set); 7837ec681f3Smrg struct ir3_instruction *split = 7847ec681f3Smrg ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1); 7857ec681f3Smrg struct ir3_register *dst = __ssa_dst(split); 7867ec681f3Smrg dst->flags |= def->flags & IR3_REG_HALF; 7877ec681f3Smrg struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags); 7887ec681f3Smrg src->wrmask = def->wrmask; 7897ec681f3Smrg src->def = def; 7907ec681f3Smrg add_to_merge_set(def->merge_set, dst, 7917ec681f3Smrg def->merge_set_offset + offset * reg_elem_size(def)); 7927ec681f3Smrg if (after) 7937ec681f3Smrg ir3_instr_move_before(split, after); 7947ec681f3Smrg return dst; 7957ec681f3Smrg} 7967ec681f3Smrg 7977ec681f3Smrgstatic struct ir3_register * 7987ec681f3Smrgextract(struct ir3_register *parent_def, unsigned offset, unsigned elems, 7997ec681f3Smrg struct ir3_instruction *after, struct ir3_block *block) 8007ec681f3Smrg{ 8017ec681f3Smrg if (offset == 0 && elems == reg_elems(parent_def)) 8027ec681f3Smrg return parent_def; 8037ec681f3Smrg 8047ec681f3Smrg struct ir3_instruction *collect = 8057ec681f3Smrg ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems); 8067ec681f3Smrg struct ir3_register *dst = __ssa_dst(collect); 8077ec681f3Smrg dst->flags |= parent_def->flags & IR3_REG_HALF; 8087ec681f3Smrg dst->wrmask = MASK(elems); 8097ec681f3Smrg add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset); 8107ec681f3Smrg 8117ec681f3Smrg for (unsigned i = 0; i < elems; i++) { 8127ec681f3Smrg ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = 8137ec681f3Smrg split(parent_def, offset + i, after, block); 8147ec681f3Smrg } 8157ec681f3Smrg 8167ec681f3Smrg if (after) 8177ec681f3Smrg ir3_instr_move_before(collect, after); 8187ec681f3Smrg return dst; 8197ec681f3Smrg} 8207ec681f3Smrg 8217ec681f3Smrgstatic struct ir3_register * 8227ec681f3Smrgreload(struct ra_spill_ctx *ctx, struct ir3_register *reg, 8237ec681f3Smrg struct ir3_instruction *after, struct ir3_block *block) 8247ec681f3Smrg{ 8257ec681f3Smrg unsigned spill_slot = get_spill_slot(ctx, reg); 8267ec681f3Smrg 8277ec681f3Smrg d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name, 8287ec681f3Smrg spill_slot); 8297ec681f3Smrg 8307ec681f3Smrg unsigned elems = reg_elems(reg); 8317ec681f3Smrg struct ir3_instruction *reload = 8327ec681f3Smrg ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3); 8337ec681f3Smrg struct ir3_register *dst = __ssa_dst(reload); 8347ec681f3Smrg dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 8357ec681f3Smrg ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 8367ec681f3Smrg struct ir3_register *offset_reg = 8377ec681f3Smrg ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED); 8387ec681f3Smrg offset_reg->uim_val = spill_slot; 8397ec681f3Smrg ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 8407ec681f3Smrg reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 8417ec681f3Smrg 8427ec681f3Smrg if (reg->flags & IR3_REG_ARRAY) { 8437ec681f3Smrg dst->array.offset = 0; 8447ec681f3Smrg dst->array.id = reg->array.id; 8457ec681f3Smrg dst->size = reg->size; 8467ec681f3Smrg } else { 8477ec681f3Smrg dst->wrmask = MASK(elems); 8487ec681f3Smrg } 8497ec681f3Smrg 8507ec681f3Smrg dst->merge_set = reg->merge_set; 8517ec681f3Smrg dst->merge_set_offset = reg->merge_set_offset; 8527ec681f3Smrg dst->interval_start = reg->interval_start; 8537ec681f3Smrg dst->interval_end = reg->interval_end; 8547ec681f3Smrg 8557ec681f3Smrg if (after) 8567ec681f3Smrg ir3_instr_move_before(reload, after); 8577ec681f3Smrg 8587ec681f3Smrg return dst; 8597ec681f3Smrg} 8607ec681f3Smrg 8617ec681f3Smrgstatic void 8627ec681f3Smrgrewrite_src_interval(struct ra_spill_ctx *ctx, 8637ec681f3Smrg struct ra_spill_interval *interval, 8647ec681f3Smrg struct ir3_register *def, 8657ec681f3Smrg struct ir3_instruction *instr, 8667ec681f3Smrg struct ir3_block *block) 8677ec681f3Smrg{ 8687ec681f3Smrg interval->dst.flags = def->flags; 8697ec681f3Smrg interval->dst.def = def; 8707ec681f3Smrg interval->needs_reload = false; 8717ec681f3Smrg 8727ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, child, 8737ec681f3Smrg &interval->interval.children, interval.node) { 8747ec681f3Smrg struct ir3_register *child_reg = child->interval.reg; 8757ec681f3Smrg struct ir3_register *child_def = 8767ec681f3Smrg extract(def, (child_reg->interval_start - 8777ec681f3Smrg interval->interval.reg->interval_start) / reg_elem_size(def), 8787ec681f3Smrg reg_elems(child_reg), instr, block); 8797ec681f3Smrg rewrite_src_interval(ctx, child, child_def, instr, block); 8807ec681f3Smrg } 8817ec681f3Smrg} 8827ec681f3Smrg 8837ec681f3Smrgstatic void 8847ec681f3Smrgreload_def(struct ra_spill_ctx *ctx, struct ir3_register *def, 8857ec681f3Smrg struct ir3_instruction *instr, struct ir3_block *block) 8867ec681f3Smrg{ 8877ec681f3Smrg unsigned elems = reg_elems(def); 8887ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 8897ec681f3Smrg 8907ec681f3Smrg struct ir3_reg_interval *ir3_parent = interval->interval.parent; 8917ec681f3Smrg 8927ec681f3Smrg if (ir3_parent) { 8937ec681f3Smrg struct ra_spill_interval *parent = 8947ec681f3Smrg ir3_reg_interval_to_interval(ir3_parent); 8957ec681f3Smrg if (!parent->needs_reload) { 8967ec681f3Smrg interval->dst.flags = def->flags; 8977ec681f3Smrg interval->dst.def = extract( 8987ec681f3Smrg parent->dst.def, (def->interval_start - parent->dst.def->interval_start) / 8997ec681f3Smrg reg_elem_size(def), elems, instr, block); 9007ec681f3Smrg return; 9017ec681f3Smrg } 9027ec681f3Smrg } 9037ec681f3Smrg 9047ec681f3Smrg struct ir3_register *dst = reload(ctx, def, instr, block); 9057ec681f3Smrg 9067ec681f3Smrg rewrite_src_interval(ctx, interval, dst, instr, block); 9077ec681f3Smrg} 9087ec681f3Smrg 9097ec681f3Smrgstatic void 9107ec681f3Smrgreload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 9117ec681f3Smrg struct ir3_register *src) 9127ec681f3Smrg{ 9137ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 9147ec681f3Smrg 9157ec681f3Smrg if (interval->needs_reload) { 9167ec681f3Smrg reload_def(ctx, src->def, instr, instr->block); 9177ec681f3Smrg } 9187ec681f3Smrg 9197ec681f3Smrg ra_spill_interval_root(interval)->cant_spill = false; 9207ec681f3Smrg} 9217ec681f3Smrg 9227ec681f3Smrgstatic void 9237ec681f3Smrgrewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 9247ec681f3Smrg struct ir3_register *src) 9257ec681f3Smrg{ 9267ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 9277ec681f3Smrg 9287ec681f3Smrg set_src_val(src, &interval->dst); 9297ec681f3Smrg} 9307ec681f3Smrg 9317ec681f3Smrgstatic void 9327ec681f3Smrgupdate_max_pressure(struct ra_spill_ctx *ctx) 9337ec681f3Smrg{ 9347ec681f3Smrg d("pressure:"); 9357ec681f3Smrg d("\tfull: %u", ctx->cur_pressure.full); 9367ec681f3Smrg d("\thalf: %u", ctx->cur_pressure.half); 9377ec681f3Smrg d("\tshared: %u", ctx->cur_pressure.shared); 9387ec681f3Smrg 9397ec681f3Smrg ctx->max_pressure.full = 9407ec681f3Smrg MAX2(ctx->max_pressure.full, ctx->cur_pressure.full); 9417ec681f3Smrg ctx->max_pressure.half = 9427ec681f3Smrg MAX2(ctx->max_pressure.half, ctx->cur_pressure.half); 9437ec681f3Smrg ctx->max_pressure.shared = 9447ec681f3Smrg MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared); 9457ec681f3Smrg} 9467ec681f3Smrg 9477ec681f3Smrgstatic void 9487ec681f3Smrghandle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 9497ec681f3Smrg{ 9507ec681f3Smrg ra_foreach_dst (dst, instr) { 9517ec681f3Smrg init_dst(ctx, dst); 9527ec681f3Smrg } 9537ec681f3Smrg 9547ec681f3Smrg if (ctx->spilling) { 9557ec681f3Smrg ra_foreach_src (src, instr) 9567ec681f3Smrg insert_src(ctx, src); 9577ec681f3Smrg } 9587ec681f3Smrg 9597ec681f3Smrg /* Handle tied destinations. If a destination is tied to a source and that 9607ec681f3Smrg * source is live-through, then we need to allocate a new register for the 9617ec681f3Smrg * destination which is live-through itself and cannot overlap the 9627ec681f3Smrg * sources. 9637ec681f3Smrg */ 9647ec681f3Smrg 9657ec681f3Smrg ra_foreach_dst (dst, instr) { 9667ec681f3Smrg struct ir3_register *tied_src = dst->tied; 9677ec681f3Smrg if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) 9687ec681f3Smrg insert_dst(ctx, dst); 9697ec681f3Smrg } 9707ec681f3Smrg 9717ec681f3Smrg if (ctx->spilling) 9727ec681f3Smrg limit(ctx, instr); 9737ec681f3Smrg else 9747ec681f3Smrg update_max_pressure(ctx); 9757ec681f3Smrg 9767ec681f3Smrg if (ctx->spilling) { 9777ec681f3Smrg ra_foreach_src (src, instr) { 9787ec681f3Smrg reload_src(ctx, instr, src); 9797ec681f3Smrg update_src_next_use(ctx, src); 9807ec681f3Smrg } 9817ec681f3Smrg } 9827ec681f3Smrg 9837ec681f3Smrg ra_foreach_src (src, instr) { 9847ec681f3Smrg if (src->flags & IR3_REG_FIRST_KILL) 9857ec681f3Smrg remove_src_early(ctx, instr, src); 9867ec681f3Smrg } 9877ec681f3Smrg 9887ec681f3Smrg ra_foreach_dst (dst, instr) { 9897ec681f3Smrg insert_dst(ctx, dst); 9907ec681f3Smrg } 9917ec681f3Smrg 9927ec681f3Smrg if (ctx->spilling) 9937ec681f3Smrg limit(ctx, instr); 9947ec681f3Smrg else 9957ec681f3Smrg update_max_pressure(ctx); 9967ec681f3Smrg 9977ec681f3Smrg /* We have to remove sources before rewriting them so that we can lookup the 9987ec681f3Smrg * interval to remove before the source itself is changed. 9997ec681f3Smrg */ 10007ec681f3Smrg ra_foreach_src (src, instr) { 10017ec681f3Smrg if (src->flags & IR3_REG_FIRST_KILL) 10027ec681f3Smrg remove_src(ctx, instr, src); 10037ec681f3Smrg } 10047ec681f3Smrg 10057ec681f3Smrg if (ctx->spilling) { 10067ec681f3Smrg ra_foreach_src (src, instr) { 10077ec681f3Smrg rewrite_src(ctx, instr, src); 10087ec681f3Smrg } 10097ec681f3Smrg } 10107ec681f3Smrg 10117ec681f3Smrg ra_foreach_dst (dst, instr) { 10127ec681f3Smrg finish_dst(ctx, dst); 10137ec681f3Smrg } 10147ec681f3Smrg 10157ec681f3Smrg for (unsigned i = 0; i < instr->dsts_count; i++) { 10167ec681f3Smrg if (ra_reg_is_dst(instr->dsts[i]) && 10177ec681f3Smrg (instr->dsts[i]->flags & IR3_REG_UNUSED)) 10187ec681f3Smrg remove_dst(ctx, instr->dsts[i]); 10197ec681f3Smrg } 10207ec681f3Smrg} 10217ec681f3Smrg 10227ec681f3Smrgstatic struct ra_spill_interval * 10237ec681f3Smrgcreate_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def) 10247ec681f3Smrg{ 10257ec681f3Smrg unsigned name = ctx->intervals_count++; 10267ec681f3Smrg unsigned offset = ctx->live->interval_offset; 10277ec681f3Smrg 10287ec681f3Smrg /* This is kinda hacky, but we need to create a fake SSA def here that is 10297ec681f3Smrg * only used as part of the pcopy accounting. See below. 10307ec681f3Smrg */ 10317ec681f3Smrg struct ir3_register *reg = rzalloc(ctx, struct ir3_register); 10327ec681f3Smrg *reg = *def; 10337ec681f3Smrg reg->name = name; 10347ec681f3Smrg reg->interval_start = offset; 10357ec681f3Smrg reg->interval_end = offset + reg_size(def); 10367ec681f3Smrg reg->merge_set = NULL; 10377ec681f3Smrg 10387ec681f3Smrg ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *, 10397ec681f3Smrg ctx->intervals_count); 10407ec681f3Smrg struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval); 10417ec681f3Smrg ra_spill_interval_init(interval, reg); 10427ec681f3Smrg ctx->intervals[name] = interval; 10437ec681f3Smrg ctx->live->interval_offset += reg_size(def); 10447ec681f3Smrg return interval; 10457ec681f3Smrg} 10467ec681f3Smrg 10477ec681f3Smrg/* In the sequence of copies generated (see below), would this source be killed? 10487ec681f3Smrg */ 10497ec681f3Smrgstatic bool 10507ec681f3Smrgis_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n) 10517ec681f3Smrg{ 10527ec681f3Smrg struct ir3_register *src = pcopy->srcs[src_n]; 10537ec681f3Smrg if (!(src->flags & IR3_REG_KILL)) 10547ec681f3Smrg return false; 10557ec681f3Smrg for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) { 10567ec681f3Smrg if (pcopy->srcs[j]->def == src->def) 10577ec681f3Smrg return false; 10587ec681f3Smrg } 10597ec681f3Smrg return true; 10607ec681f3Smrg} 10617ec681f3Smrg 10627ec681f3Smrg/* Parallel copies are different from normal instructions. The sources together 10637ec681f3Smrg * may be larger than the entire register file, so we cannot just reload every 10647ec681f3Smrg * source like normal, and indeed that probably wouldn't be a great idea. 10657ec681f3Smrg * Instead we essentially need to lower the parallel copy to "copies," just like 10667ec681f3Smrg * in the normal CSSA construction, although we implement the copies by 10677ec681f3Smrg * reloading and then possibly spilling values. We essentially just shuffle 10687ec681f3Smrg * around the sources until each source either (a) is live or (b) has the same 10697ec681f3Smrg * spill slot as its corresponding destination. We do this by decomposing the 10707ec681f3Smrg * copy into a series of copies, so: 10717ec681f3Smrg * 10727ec681f3Smrg * a, b, c = d, e, f 10737ec681f3Smrg * 10747ec681f3Smrg * becomes: 10757ec681f3Smrg * 10767ec681f3Smrg * d' = d 10777ec681f3Smrg * e' = e 10787ec681f3Smrg * f' = f 10797ec681f3Smrg * a = d' 10807ec681f3Smrg * b = e' 10817ec681f3Smrg * c = f' 10827ec681f3Smrg * 10837ec681f3Smrg * the temporary SSA values d', e', and f' never actually show up in the result. 10847ec681f3Smrg * They are only used for our internal accounting. They may, however, have their 10857ec681f3Smrg * own spill slot created for them. Similarly, we don't actually emit any copy 10867ec681f3Smrg * instructions, although we emit the spills/reloads that *would've* been 10877ec681f3Smrg * required if those copies were there. 10887ec681f3Smrg * 10897ec681f3Smrg * TODO: in order to reduce the number of temporaries and therefore spill slots, 10907ec681f3Smrg * we could instead do a more complicated analysis that considers the location 10917ec681f3Smrg * transfer graph. 10927ec681f3Smrg * 10937ec681f3Smrg * In addition, we actually remove the parallel copy and rewrite all its uses 10947ec681f3Smrg * (in the phi nodes) rather than rewrite its sources at the end. Recreating it 10957ec681f3Smrg * later turns out to be easier than keeping it up-to-date throughout this pass, 10967ec681f3Smrg * since we may have to remove entries for phi sources that are spilled and add 10977ec681f3Smrg * entries for live-outs that are spilled and reloaded, which can happen here 10987ec681f3Smrg * and then possibly be undone or done again when processing live-ins of the 10997ec681f3Smrg * successor block. 11007ec681f3Smrg */ 11017ec681f3Smrg 11027ec681f3Smrgstatic void 11037ec681f3Smrghandle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) 11047ec681f3Smrg{ 11057ec681f3Smrg foreach_dst (dst, pcopy) { 11067ec681f3Smrg struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 11077ec681f3Smrg ra_spill_interval_init(dst_interval, dst); 11087ec681f3Smrg } 11097ec681f3Smrg 11107ec681f3Smrg foreach_src_n (src, i, pcopy) { 11117ec681f3Smrg d("processing src %u", i); 11127ec681f3Smrg struct ir3_register *dst = pcopy->dsts[i]; 11137ec681f3Smrg 11147ec681f3Smrg /* Skip the intermediate copy for cases where the source is merged with 11157ec681f3Smrg * the destination. Crucially this means that we also don't reload/spill 11167ec681f3Smrg * it if it's been spilled, because it shares the same spill slot. 11177ec681f3Smrg */ 11187ec681f3Smrg if (src->def && src->def->merge_set && 11197ec681f3Smrg src->def->merge_set == dst->merge_set && 11207ec681f3Smrg src->def->merge_set_offset == dst->merge_set_offset) { 11217ec681f3Smrg struct ra_spill_interval *src_interval = ctx->intervals[src->def->name]; 11227ec681f3Smrg struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 11237ec681f3Smrg if (src_interval->interval.inserted) { 11247ec681f3Smrg update_src_next_use(ctx, src); 11257ec681f3Smrg if (is_last_pcopy_src(pcopy, i)) 11267ec681f3Smrg ra_spill_ctx_remove(ctx, src_interval); 11277ec681f3Smrg dst_interval->cant_spill = true; 11287ec681f3Smrg ra_spill_ctx_insert(ctx, dst_interval); 11297ec681f3Smrg limit(ctx, pcopy); 11307ec681f3Smrg dst_interval->cant_spill = false; 11317ec681f3Smrg dst_interval->dst = src_interval->dst; 11327ec681f3Smrg } 11337ec681f3Smrg } else if (src->def) { 11347ec681f3Smrg struct ra_spill_interval *temp_interval = 11357ec681f3Smrg create_temp_interval(ctx, dst); 11367ec681f3Smrg struct ir3_register *temp = temp_interval->interval.reg; 11377ec681f3Smrg temp_interval->next_use_distance = src->next_use; 11387ec681f3Smrg 11397ec681f3Smrg insert_src(ctx, src); 11407ec681f3Smrg limit(ctx, pcopy); 11417ec681f3Smrg reload_src(ctx, pcopy, src); 11427ec681f3Smrg update_src_next_use(ctx, src); 11437ec681f3Smrg if (is_last_pcopy_src(pcopy, i)) 11447ec681f3Smrg remove_src(ctx, pcopy, src); 11457ec681f3Smrg struct ra_spill_interval *src_interval = 11467ec681f3Smrg ctx->intervals[src->def->name]; 11477ec681f3Smrg temp_interval->dst = src_interval->dst; 11487ec681f3Smrg 11497ec681f3Smrg temp_interval->cant_spill = true; 11507ec681f3Smrg ra_spill_ctx_insert(ctx, temp_interval); 11517ec681f3Smrg limit(ctx, pcopy); 11527ec681f3Smrg temp_interval->cant_spill = false; 11537ec681f3Smrg 11547ec681f3Smrg src->flags = temp->flags; 11557ec681f3Smrg src->def = temp; 11567ec681f3Smrg } 11577ec681f3Smrg } 11587ec681f3Smrg 11597ec681f3Smrg d("done with pcopy srcs"); 11607ec681f3Smrg 11617ec681f3Smrg foreach_src_n (src, i, pcopy) { 11627ec681f3Smrg struct ir3_register *dst = pcopy->dsts[i]; 11637ec681f3Smrg 11647ec681f3Smrg if (src->def && src->def->merge_set && 11657ec681f3Smrg src->def->merge_set == dst->merge_set && 11667ec681f3Smrg src->def->merge_set_offset == dst->merge_set_offset) 11677ec681f3Smrg continue; 11687ec681f3Smrg 11697ec681f3Smrg struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 11707ec681f3Smrg 11717ec681f3Smrg if (!src->def) { 11727ec681f3Smrg dst_interval->cant_spill = true; 11737ec681f3Smrg ra_spill_ctx_insert(ctx, dst_interval); 11747ec681f3Smrg limit(ctx, pcopy); 11757ec681f3Smrg dst_interval->cant_spill = false; 11767ec681f3Smrg 11777ec681f3Smrg assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED)); 11787ec681f3Smrg if (src->flags & IR3_REG_CONST) { 11797ec681f3Smrg dst_interval->dst.flags = src->flags; 11807ec681f3Smrg dst_interval->dst.const_num = src->num; 11817ec681f3Smrg } else { 11827ec681f3Smrg dst_interval->dst.flags = src->flags; 11837ec681f3Smrg dst_interval->dst.uimm = src->uim_val; 11847ec681f3Smrg } 11857ec681f3Smrg } else { 11867ec681f3Smrg struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name]; 11877ec681f3Smrg 11887ec681f3Smrg insert_src(ctx, src); 11897ec681f3Smrg limit(ctx, pcopy); 11907ec681f3Smrg reload_src(ctx, pcopy, src); 11917ec681f3Smrg remove_src(ctx, pcopy, src); 11927ec681f3Smrg 11937ec681f3Smrg dst_interval->dst = temp_interval->dst; 11947ec681f3Smrg ra_spill_ctx_insert(ctx, dst_interval); 11957ec681f3Smrg } 11967ec681f3Smrg } 11977ec681f3Smrg 11987ec681f3Smrg pcopy->flags |= IR3_INSTR_UNUSED; 11997ec681f3Smrg} 12007ec681f3Smrg 12017ec681f3Smrgstatic void 12027ec681f3Smrghandle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 12037ec681f3Smrg{ 12047ec681f3Smrg init_dst(ctx, instr->dsts[0]); 12057ec681f3Smrg insert_dst(ctx, instr->dsts[0]); 12067ec681f3Smrg finish_dst(ctx, instr->dsts[0]); 12077ec681f3Smrg} 12087ec681f3Smrg 12097ec681f3Smrgstatic void 12107ec681f3Smrgremove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 12117ec681f3Smrg{ 12127ec681f3Smrg if (instr->opc == OPC_META_TEX_PREFETCH) { 12137ec681f3Smrg ra_foreach_src (src, instr) 12147ec681f3Smrg remove_src(ctx, instr, src); 12157ec681f3Smrg } 12167ec681f3Smrg if (instr->dsts[0]->flags & IR3_REG_UNUSED) 12177ec681f3Smrg remove_dst(ctx, instr->dsts[0]); 12187ec681f3Smrg} 12197ec681f3Smrg 12207ec681f3Smrgstatic void 12217ec681f3Smrghandle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block, 12227ec681f3Smrg struct ir3_register *def) 12237ec681f3Smrg{ 12247ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 12257ec681f3Smrg ra_spill_interval_init(interval, def); 12267ec681f3Smrg if (ctx->spilling) { 12277ec681f3Smrg interval->next_use_distance = 12287ec681f3Smrg ctx->blocks[block->index].next_use_start[def->name]; 12297ec681f3Smrg } 12307ec681f3Smrg 12317ec681f3Smrg ra_spill_ctx_insert(ctx, interval); 12327ec681f3Smrg} 12337ec681f3Smrg 12347ec681f3Smrgstatic bool 12357ec681f3Smrgis_live_in_phi(struct ir3_register *def, struct ir3_block *block) 12367ec681f3Smrg{ 12377ec681f3Smrg return def->instr->opc == OPC_META_PHI && def->instr->block == block; 12387ec681f3Smrg} 12397ec681f3Smrg 12407ec681f3Smrgstatic bool 12417ec681f3Smrgis_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def, 12427ec681f3Smrg struct ir3_block *block, unsigned pred_idx) 12437ec681f3Smrg{ 12447ec681f3Smrg struct ir3_block *pred = block->predecessors[pred_idx]; 12457ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 12467ec681f3Smrg if (is_live_in_phi(def, block)) { 12477ec681f3Smrg def = def->instr->srcs[pred_idx]->def; 12487ec681f3Smrg if (!def) 12497ec681f3Smrg return false; 12507ec681f3Smrg } 12517ec681f3Smrg 12527ec681f3Smrg return _mesa_hash_table_search(state->remap, def); 12537ec681f3Smrg} 12547ec681f3Smrg 12557ec681f3Smrgstatic bool 12567ec681f3Smrgis_live_in_undef(struct ir3_register *def, 12577ec681f3Smrg struct ir3_block *block, unsigned pred_idx) 12587ec681f3Smrg{ 12597ec681f3Smrg if (!is_live_in_phi(def, block)) 12607ec681f3Smrg return false; 12617ec681f3Smrg 12627ec681f3Smrg return !def->instr->srcs[pred_idx]->def; 12637ec681f3Smrg} 12647ec681f3Smrg 12657ec681f3Smrgstatic struct reg_or_immed * 12667ec681f3Smrgread_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 12677ec681f3Smrg struct ir3_block *block, unsigned pred_idx) 12687ec681f3Smrg{ 12697ec681f3Smrg struct ir3_block *pred = block->predecessors[pred_idx]; 12707ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 12717ec681f3Smrg 12727ec681f3Smrg if (is_live_in_phi(def, block)) { 12737ec681f3Smrg def = def->instr->srcs[pred_idx]->def; 12747ec681f3Smrg if (!def) 12757ec681f3Smrg return NULL; 12767ec681f3Smrg } 12777ec681f3Smrg 12787ec681f3Smrg struct hash_entry *entry = _mesa_hash_table_search(state->remap, def); 12797ec681f3Smrg if (entry) 12807ec681f3Smrg return entry->data; 12817ec681f3Smrg else 12827ec681f3Smrg return NULL; 12837ec681f3Smrg} 12847ec681f3Smrg 12857ec681f3Smrgstatic bool 12867ec681f3Smrgis_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def, 12877ec681f3Smrg struct ir3_block *block) 12887ec681f3Smrg{ 12897ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 12907ec681f3Smrg if (!is_live_in_pred(ctx, def, block, i)) 12917ec681f3Smrg return false; 12927ec681f3Smrg } 12937ec681f3Smrg 12947ec681f3Smrg return true; 12957ec681f3Smrg} 12967ec681f3Smrg 12977ec681f3Smrgstatic void 12987ec681f3Smrgspill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 12997ec681f3Smrg struct ir3_block *block) 13007ec681f3Smrg{ 13017ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 13027ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 13037ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 13047ec681f3Smrg 13057ec681f3Smrg if (!state->visited) 13067ec681f3Smrg continue; 13077ec681f3Smrg 13087ec681f3Smrg struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i); 13097ec681f3Smrg if (pred_def) { 13107ec681f3Smrg spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred); 13117ec681f3Smrg } 13127ec681f3Smrg } 13137ec681f3Smrg} 13147ec681f3Smrg 13157ec681f3Smrgstatic void 13167ec681f3Smrgspill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 13177ec681f3Smrg{ 13187ec681f3Smrg bool all_preds_visited = true; 13197ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 13207ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 13217ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 13227ec681f3Smrg if (!state->visited) { 13237ec681f3Smrg all_preds_visited = false; 13247ec681f3Smrg break; 13257ec681f3Smrg } 13267ec681f3Smrg } 13277ec681f3Smrg 13287ec681f3Smrg /* Note: in the paper they explicitly spill live-through values first, but we 13297ec681f3Smrg * should be doing that automatically by virtue of picking the largest 13307ec681f3Smrg * distance due to the extra distance added to edges out of loops. 13317ec681f3Smrg * 13327ec681f3Smrg * TODO: Keep track of pressure in each block and preemptively spill 13337ec681f3Smrg * live-through values as described in the paper to avoid spilling them 13347ec681f3Smrg * inside the loop. 13357ec681f3Smrg */ 13367ec681f3Smrg 13377ec681f3Smrg if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 13387ec681f3Smrg rb_tree_foreach_safe (struct ra_spill_interval, interval, 13397ec681f3Smrg &ctx->half_live_intervals, half_node) { 13407ec681f3Smrg if (all_preds_visited && 13417ec681f3Smrg is_live_in_all_preds(ctx, interval->interval.reg, block)) 13427ec681f3Smrg continue; 13437ec681f3Smrg spill_live_in(ctx, interval->interval.reg, block); 13447ec681f3Smrg ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 13457ec681f3Smrg if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 13467ec681f3Smrg break; 13477ec681f3Smrg } 13487ec681f3Smrg } 13497ec681f3Smrg 13507ec681f3Smrg if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 13517ec681f3Smrg rb_tree_foreach_safe (struct ra_spill_interval, interval, 13527ec681f3Smrg &ctx->full_live_intervals, node) { 13537ec681f3Smrg if (all_preds_visited && 13547ec681f3Smrg is_live_in_all_preds(ctx, interval->interval.reg, block)) 13557ec681f3Smrg continue; 13567ec681f3Smrg spill_live_in(ctx, interval->interval.reg, block); 13577ec681f3Smrg ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 13587ec681f3Smrg if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 13597ec681f3Smrg break; 13607ec681f3Smrg } 13617ec681f3Smrg } 13627ec681f3Smrg} 13637ec681f3Smrg 13647ec681f3Smrgstatic void 13657ec681f3Smrglive_in_rewrite(struct ra_spill_ctx *ctx, 13667ec681f3Smrg struct ra_spill_interval *interval, 13677ec681f3Smrg struct reg_or_immed *new_val, 13687ec681f3Smrg struct ir3_block *block, unsigned pred_idx) 13697ec681f3Smrg{ 13707ec681f3Smrg struct ir3_block *pred = block->predecessors[pred_idx]; 13717ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 13727ec681f3Smrg struct ir3_register *def = interval->interval.reg; 13737ec681f3Smrg if (is_live_in_phi(def, block)) { 13747ec681f3Smrg def = def->instr->srcs[pred_idx]->def; 13757ec681f3Smrg } 13767ec681f3Smrg 13777ec681f3Smrg if (def) 13787ec681f3Smrg _mesa_hash_table_insert(state->remap, def, new_val); 13797ec681f3Smrg 13807ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, child, 13817ec681f3Smrg &interval->interval.children, interval.node) { 13827ec681f3Smrg assert(new_val->flags & IR3_REG_SSA); 13837ec681f3Smrg struct ir3_register *child_def = 13847ec681f3Smrg extract(new_val->def, 13857ec681f3Smrg (child->interval.reg->interval_start - def->interval_start) / 13867ec681f3Smrg reg_elem_size(def), reg_elems(child->interval.reg), 13877ec681f3Smrg NULL, pred); 13887ec681f3Smrg struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed); 13897ec681f3Smrg child_val->def = child_def; 13907ec681f3Smrg child_val->flags = child_def->flags; 13917ec681f3Smrg live_in_rewrite(ctx, child, child_val, block, pred_idx); 13927ec681f3Smrg } 13937ec681f3Smrg} 13947ec681f3Smrg 13957ec681f3Smrgstatic void 13967ec681f3Smrgreload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 13977ec681f3Smrg struct ir3_block *block) 13987ec681f3Smrg{ 13997ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 14007ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 14017ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 14027ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 14037ec681f3Smrg if (!state->visited) 14047ec681f3Smrg continue; 14057ec681f3Smrg 14067ec681f3Smrg if (is_live_in_undef(def, block, i)) 14077ec681f3Smrg continue; 14087ec681f3Smrg 14097ec681f3Smrg struct reg_or_immed *new_val = read_live_in(ctx, def, block, i); 14107ec681f3Smrg 14117ec681f3Smrg if (!new_val) { 14127ec681f3Smrg new_val = ralloc(ctx, struct reg_or_immed); 14137ec681f3Smrg new_val->def = reload(ctx, def, NULL, pred); 14147ec681f3Smrg new_val->flags = new_val->def->flags; 14157ec681f3Smrg } 14167ec681f3Smrg live_in_rewrite(ctx, interval, new_val, block, i); 14177ec681f3Smrg } 14187ec681f3Smrg} 14197ec681f3Smrg 14207ec681f3Smrgstatic void 14217ec681f3Smrgreload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 14227ec681f3Smrg{ 14237ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 14247ec681f3Smrg interval.node) { 14257ec681f3Smrg reload_live_in(ctx, interval->interval.reg, block); 14267ec681f3Smrg } 14277ec681f3Smrg} 14287ec681f3Smrg 14297ec681f3Smrgstatic void 14307ec681f3Smrgadd_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def, 14317ec681f3Smrg struct ir3_block *block) 14327ec681f3Smrg{ 14337ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 14347ec681f3Smrg if (!interval->interval.inserted) 14357ec681f3Smrg return; 14367ec681f3Smrg 14377ec681f3Smrg bool needs_phi = false; 14387ec681f3Smrg struct ir3_register *cur_def = NULL; 14397ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 14407ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 14417ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 14427ec681f3Smrg 14437ec681f3Smrg if (!state->visited) { 14447ec681f3Smrg needs_phi = true; 14457ec681f3Smrg break; 14467ec681f3Smrg } 14477ec681f3Smrg 14487ec681f3Smrg struct hash_entry *entry = 14497ec681f3Smrg _mesa_hash_table_search(state->remap, def); 14507ec681f3Smrg assert(entry); 14517ec681f3Smrg struct reg_or_immed *pred_val = entry->data; 14527ec681f3Smrg if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) || 14537ec681f3Smrg !pred_val->def || 14547ec681f3Smrg (cur_def && cur_def != pred_val->def)) { 14557ec681f3Smrg needs_phi = true; 14567ec681f3Smrg break; 14577ec681f3Smrg } 14587ec681f3Smrg cur_def = pred_val->def; 14597ec681f3Smrg } 14607ec681f3Smrg 14617ec681f3Smrg if (!needs_phi) { 14627ec681f3Smrg interval->dst.def = cur_def; 14637ec681f3Smrg interval->dst.flags = cur_def->flags; 14647ec681f3Smrg return; 14657ec681f3Smrg } 14667ec681f3Smrg 14677ec681f3Smrg struct ir3_instruction *phi = 14687ec681f3Smrg ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count); 14697ec681f3Smrg struct ir3_register *dst = __ssa_dst(phi); 14707ec681f3Smrg dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 14717ec681f3Smrg dst->size = def->size; 14727ec681f3Smrg dst->wrmask = def->wrmask; 14737ec681f3Smrg 14747ec681f3Smrg dst->interval_start = def->interval_start; 14757ec681f3Smrg dst->interval_end = def->interval_end; 14767ec681f3Smrg dst->merge_set = def->merge_set; 14777ec681f3Smrg dst->merge_set_offset = def->merge_set_offset; 14787ec681f3Smrg 14797ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 14807ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 14817ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 14827ec681f3Smrg struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags); 14837ec681f3Smrg src->size = def->size; 14847ec681f3Smrg src->wrmask = def->wrmask; 14857ec681f3Smrg 14867ec681f3Smrg if (state->visited) { 14877ec681f3Smrg struct hash_entry *entry = 14887ec681f3Smrg _mesa_hash_table_search(state->remap, def); 14897ec681f3Smrg assert(entry); 14907ec681f3Smrg struct reg_or_immed *new_val = entry->data; 14917ec681f3Smrg set_src_val(src, new_val); 14927ec681f3Smrg } else { 14937ec681f3Smrg src->def = def; 14947ec681f3Smrg } 14957ec681f3Smrg } 14967ec681f3Smrg 14977ec681f3Smrg interval->dst.def = dst; 14987ec681f3Smrg interval->dst.flags = dst->flags; 14997ec681f3Smrg 15007ec681f3Smrg ir3_instr_move_before_block(phi, block); 15017ec681f3Smrg} 15027ec681f3Smrg 15037ec681f3Smrg/* When spilling a block with a single predecessors, the pred may have other 15047ec681f3Smrg * successors so we can't choose what's live in and we can't spill/restore 15057ec681f3Smrg * anything. Just make the inserted intervals exactly match the predecessor. If 15067ec681f3Smrg * it wasn't live in the predecessor then it must've already been spilled. Also, 15077ec681f3Smrg * there are no phi nodes and no live-ins. 15087ec681f3Smrg */ 15097ec681f3Smrgstatic void 15107ec681f3Smrgspill_single_pred_live_in(struct ra_spill_ctx *ctx, 15117ec681f3Smrg struct ir3_block *block) 15127ec681f3Smrg{ 15137ec681f3Smrg unsigned name; 15147ec681f3Smrg BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 15157ec681f3Smrg ctx->live->definitions_count) { 15167ec681f3Smrg struct ir3_register *reg = ctx->live->definitions[name]; 15177ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[reg->name]; 15187ec681f3Smrg struct reg_or_immed *val = read_live_in(ctx, reg, block, 0); 15197ec681f3Smrg if (val) 15207ec681f3Smrg interval->dst = *val; 15217ec681f3Smrg else 15227ec681f3Smrg ra_spill_ctx_remove(ctx, interval); 15237ec681f3Smrg } 15247ec681f3Smrg} 15257ec681f3Smrg 15267ec681f3Smrgstatic void 15277ec681f3Smrgrewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi, 15287ec681f3Smrg struct ir3_block *block) 15297ec681f3Smrg{ 15307ec681f3Smrg if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) { 15317ec681f3Smrg phi->flags |= IR3_INSTR_UNUSED; 15327ec681f3Smrg return; 15337ec681f3Smrg } 15347ec681f3Smrg 15357ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 15367ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 15377ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 15387ec681f3Smrg 15397ec681f3Smrg if (!state->visited) 15407ec681f3Smrg continue; 15417ec681f3Smrg 15427ec681f3Smrg struct ir3_register *src = phi->srcs[i]; 15437ec681f3Smrg if (!src->def) 15447ec681f3Smrg continue; 15457ec681f3Smrg 15467ec681f3Smrg struct hash_entry *entry = 15477ec681f3Smrg _mesa_hash_table_search(state->remap, src->def); 15487ec681f3Smrg assert(entry); 15497ec681f3Smrg struct reg_or_immed *new_val = entry->data; 15507ec681f3Smrg set_src_val(src, new_val); 15517ec681f3Smrg } 15527ec681f3Smrg} 15537ec681f3Smrg 15547ec681f3Smrgstatic void 15557ec681f3Smrgspill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 15567ec681f3Smrg struct ir3_block *block) 15577ec681f3Smrg{ 15587ec681f3Smrg struct ir3_register *def = interval->interval.reg; 15597ec681f3Smrg 15607ec681f3Smrg spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block); 15617ec681f3Smrg ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 15627ec681f3Smrg} 15637ec681f3Smrg 15647ec681f3Smrgstatic void 15657ec681f3Smrgspill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 15667ec681f3Smrg{ 15677ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[block->index]; 15687ec681f3Smrg rb_tree_foreach_safe (struct ra_spill_interval, interval, 15697ec681f3Smrg &ctx->reg_ctx.intervals, interval.node) { 15707ec681f3Smrg if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) { 15717ec681f3Smrg spill_live_out(ctx, interval, block); 15727ec681f3Smrg } 15737ec681f3Smrg } 15747ec681f3Smrg} 15757ec681f3Smrg 15767ec681f3Smrgstatic void 15777ec681f3Smrgreload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def, 15787ec681f3Smrg struct ir3_block *block) 15797ec681f3Smrg{ 15807ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 15817ec681f3Smrg ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 15827ec681f3Smrg 15837ec681f3Smrg reload_def(ctx, def, NULL, block); 15847ec681f3Smrg} 15857ec681f3Smrg 15867ec681f3Smrgstatic void 15877ec681f3Smrgreload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 15887ec681f3Smrg{ 15897ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[block->index]; 15907ec681f3Smrg unsigned name; 15917ec681f3Smrg BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) { 15927ec681f3Smrg struct ir3_register *reg = ctx->live->definitions[name]; 15937ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[name]; 15947ec681f3Smrg if (!interval->interval.inserted) 15957ec681f3Smrg reload_live_out(ctx, reg, block); 15967ec681f3Smrg } 15977ec681f3Smrg} 15987ec681f3Smrg 15997ec681f3Smrgstatic void 16007ec681f3Smrgupdate_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block) 16017ec681f3Smrg{ 16027ec681f3Smrg assert(!block->successors[1]); 16037ec681f3Smrg struct ir3_block *succ = block->successors[0]; 16047ec681f3Smrg unsigned pred_idx = ir3_block_get_pred_index(succ, block); 16057ec681f3Smrg 16067ec681f3Smrg foreach_instr (instr, &succ->instr_list) { 16077ec681f3Smrg if (instr->opc != OPC_META_PHI) 16087ec681f3Smrg break; 16097ec681f3Smrg 16107ec681f3Smrg struct ir3_register *def = instr->srcs[pred_idx]->def; 16117ec681f3Smrg if (!def) 16127ec681f3Smrg continue; 16137ec681f3Smrg 16147ec681f3Smrg struct ra_spill_interval *interval = ctx->intervals[def->name]; 16157ec681f3Smrg if (!interval->interval.inserted) 16167ec681f3Smrg continue; 16177ec681f3Smrg set_src_val(instr->srcs[pred_idx], &interval->dst); 16187ec681f3Smrg } 16197ec681f3Smrg} 16207ec681f3Smrg 16217ec681f3Smrgstatic void 16227ec681f3Smrgrecord_pred_live_out(struct ra_spill_ctx *ctx, 16237ec681f3Smrg struct ra_spill_interval *interval, 16247ec681f3Smrg struct ir3_block *block, unsigned pred_idx) 16257ec681f3Smrg{ 16267ec681f3Smrg struct ir3_block *pred = block->predecessors[pred_idx]; 16277ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 16287ec681f3Smrg 16297ec681f3Smrg struct ir3_register *def = interval->interval.reg; 16307ec681f3Smrg if (is_live_in_phi(def, block)) { 16317ec681f3Smrg def = def->instr->srcs[pred_idx]->def; 16327ec681f3Smrg } 16337ec681f3Smrg BITSET_SET(state->live_out, def->name); 16347ec681f3Smrg 16357ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, child, 16367ec681f3Smrg &interval->interval.children, interval.node) { 16377ec681f3Smrg record_pred_live_out(ctx, child, block, pred_idx); 16387ec681f3Smrg } 16397ec681f3Smrg} 16407ec681f3Smrg 16417ec681f3Smrgstatic void 16427ec681f3Smrgrecord_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 16437ec681f3Smrg{ 16447ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 16457ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 16467ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 16477ec681f3Smrg if (state->visited) 16487ec681f3Smrg continue; 16497ec681f3Smrg 16507ec681f3Smrg state->live_out = rzalloc_array(ctx, BITSET_WORD, 16517ec681f3Smrg BITSET_WORDS(ctx->live->definitions_count)); 16527ec681f3Smrg 16537ec681f3Smrg 16547ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, interval, 16557ec681f3Smrg &ctx->reg_ctx.intervals, interval.node) { 16567ec681f3Smrg record_pred_live_out(ctx, interval, block, i); 16577ec681f3Smrg } 16587ec681f3Smrg } 16597ec681f3Smrg} 16607ec681f3Smrg 16617ec681f3Smrgstatic void 16627ec681f3Smrgrecord_live_out(struct ra_spill_ctx *ctx, 16637ec681f3Smrg struct ra_spill_block_state *state, 16647ec681f3Smrg struct ra_spill_interval *interval) 16657ec681f3Smrg{ 16667ec681f3Smrg if (!(interval->dst.flags & IR3_REG_SSA) || 16677ec681f3Smrg interval->dst.def) { 16687ec681f3Smrg struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed); 16697ec681f3Smrg *val = interval->dst; 16707ec681f3Smrg _mesa_hash_table_insert(state->remap, interval->interval.reg, val); 16717ec681f3Smrg } 16727ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, child, 16737ec681f3Smrg &interval->interval.children, interval.node) { 16747ec681f3Smrg record_live_out(ctx, state, child); 16757ec681f3Smrg } 16767ec681f3Smrg} 16777ec681f3Smrg 16787ec681f3Smrgstatic void 16797ec681f3Smrgrecord_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 16807ec681f3Smrg{ 16817ec681f3Smrg struct ra_spill_block_state *state = &ctx->blocks[block->index]; 16827ec681f3Smrg state->remap = _mesa_pointer_hash_table_create(ctx); 16837ec681f3Smrg 16847ec681f3Smrg rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 16857ec681f3Smrg interval.node) { 16867ec681f3Smrg record_live_out(ctx, state, interval); 16877ec681f3Smrg } 16887ec681f3Smrg} 16897ec681f3Smrg 16907ec681f3Smrgstatic void 16917ec681f3Smrghandle_block(struct ra_spill_ctx *ctx, struct ir3_block *block) 16927ec681f3Smrg{ 16937ec681f3Smrg memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure)); 16947ec681f3Smrg rb_tree_init(&ctx->reg_ctx.intervals); 16957ec681f3Smrg rb_tree_init(&ctx->full_live_intervals); 16967ec681f3Smrg rb_tree_init(&ctx->half_live_intervals); 16977ec681f3Smrg 16987ec681f3Smrg unsigned name; 16997ec681f3Smrg BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 17007ec681f3Smrg ctx->live->definitions_count) { 17017ec681f3Smrg struct ir3_register *reg = ctx->live->definitions[name]; 17027ec681f3Smrg handle_live_in(ctx, block, reg); 17037ec681f3Smrg } 17047ec681f3Smrg 17057ec681f3Smrg foreach_instr (instr, &block->instr_list) { 17067ec681f3Smrg if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT && 17077ec681f3Smrg instr->opc != OPC_META_TEX_PREFETCH) 17087ec681f3Smrg break; 17097ec681f3Smrg handle_input_phi(ctx, instr); 17107ec681f3Smrg } 17117ec681f3Smrg 17127ec681f3Smrg if (ctx->spilling) { 17137ec681f3Smrg if (block->predecessors_count == 1) { 17147ec681f3Smrg spill_single_pred_live_in(ctx, block); 17157ec681f3Smrg } else { 17167ec681f3Smrg spill_live_ins(ctx, block); 17177ec681f3Smrg reload_live_ins(ctx, block); 17187ec681f3Smrg record_pred_live_outs(ctx, block); 17197ec681f3Smrg foreach_instr (instr, &block->instr_list) { 17207ec681f3Smrg if (instr->opc != OPC_META_PHI) 17217ec681f3Smrg break; 17227ec681f3Smrg rewrite_phi(ctx, instr, block); 17237ec681f3Smrg } 17247ec681f3Smrg BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 17257ec681f3Smrg ctx->live->definitions_count) { 17267ec681f3Smrg struct ir3_register *reg = ctx->live->definitions[name]; 17277ec681f3Smrg add_live_in_phi(ctx, reg, block); 17287ec681f3Smrg } 17297ec681f3Smrg } 17307ec681f3Smrg } else { 17317ec681f3Smrg update_max_pressure(ctx); 17327ec681f3Smrg } 17337ec681f3Smrg 17347ec681f3Smrg foreach_instr (instr, &block->instr_list) { 17357ec681f3Smrg di(instr, "processing"); 17367ec681f3Smrg 17377ec681f3Smrg if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT || 17387ec681f3Smrg instr->opc == OPC_META_TEX_PREFETCH) 17397ec681f3Smrg remove_input_phi(ctx, instr); 17407ec681f3Smrg else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY) 17417ec681f3Smrg handle_pcopy(ctx, instr); 17427ec681f3Smrg else if (ctx->spilling && instr->opc == OPC_MOV && 17437ec681f3Smrg instr->dsts[0] == ctx->base_reg) 17447ec681f3Smrg /* skip */; 17457ec681f3Smrg else 17467ec681f3Smrg handle_instr(ctx, instr); 17477ec681f3Smrg } 17487ec681f3Smrg 17497ec681f3Smrg if (ctx->spilling && block->successors[0]) { 17507ec681f3Smrg struct ra_spill_block_state *state = 17517ec681f3Smrg &ctx->blocks[block->successors[0]->index]; 17527ec681f3Smrg if (state->visited) { 17537ec681f3Smrg assert(!block->successors[1]); 17547ec681f3Smrg 17557ec681f3Smrg spill_live_outs(ctx, block); 17567ec681f3Smrg reload_live_outs(ctx, block); 17577ec681f3Smrg update_live_out_phis(ctx, block); 17587ec681f3Smrg } 17597ec681f3Smrg } 17607ec681f3Smrg 17617ec681f3Smrg if (ctx->spilling) { 17627ec681f3Smrg record_live_outs(ctx, block); 17637ec681f3Smrg ctx->blocks[block->index].visited = true; 17647ec681f3Smrg } 17657ec681f3Smrg} 17667ec681f3Smrg 17677ec681f3Smrgstatic bool 17687ec681f3Smrgsimplify_phi_node(struct ir3_instruction *phi) 17697ec681f3Smrg{ 17707ec681f3Smrg struct ir3_register *def = NULL; 17717ec681f3Smrg foreach_src (src, phi) { 17727ec681f3Smrg /* Ignore phi sources which point to the phi itself. */ 17737ec681f3Smrg if (src->def == phi->dsts[0]) 17747ec681f3Smrg continue; 17757ec681f3Smrg /* If it's undef or it doesn't match the previous sources, bail */ 17767ec681f3Smrg if (!src->def || (def && def != src->def)) 17777ec681f3Smrg return false; 17787ec681f3Smrg def = src->def; 17797ec681f3Smrg } 17807ec681f3Smrg 17817ec681f3Smrg phi->data = def; 17827ec681f3Smrg phi->flags |= IR3_INSTR_UNUSED; 17837ec681f3Smrg return true; 17847ec681f3Smrg} 17857ec681f3Smrg 17867ec681f3Smrgstatic struct ir3_register * 17877ec681f3Smrgsimplify_phi_def(struct ir3_register *def) 17887ec681f3Smrg{ 17897ec681f3Smrg if (def->instr->opc == OPC_META_PHI) { 17907ec681f3Smrg struct ir3_instruction *phi = def->instr; 17917ec681f3Smrg 17927ec681f3Smrg /* Note: this function is always called at least once after visiting the 17937ec681f3Smrg * phi, so either there has been a simplified phi in the meantime, in 17947ec681f3Smrg * which case we will set progress=true and visit the definition again, or 17957ec681f3Smrg * phi->data already has the most up-to-date value. Therefore we don't 17967ec681f3Smrg * have to recursively check phi->data. 17977ec681f3Smrg */ 17987ec681f3Smrg if (phi->data) 17997ec681f3Smrg return phi->data; 18007ec681f3Smrg } 18017ec681f3Smrg 18027ec681f3Smrg return def; 18037ec681f3Smrg} 18047ec681f3Smrg 18057ec681f3Smrgstatic void 18067ec681f3Smrgsimplify_phi_srcs(struct ir3_instruction *instr) 18077ec681f3Smrg{ 18087ec681f3Smrg foreach_src (src, instr) { 18097ec681f3Smrg if (src->def) 18107ec681f3Smrg src->def = simplify_phi_def(src->def); 18117ec681f3Smrg } 18127ec681f3Smrg} 18137ec681f3Smrg 18147ec681f3Smrg/* We insert phi nodes for all live-ins of loops in case we need to split the 18157ec681f3Smrg * live range. This pass cleans that up for the case where the live range didn't 18167ec681f3Smrg * actually need to be split. 18177ec681f3Smrg */ 18187ec681f3Smrgstatic void 18197ec681f3Smrgsimplify_phi_nodes(struct ir3 *ir) 18207ec681f3Smrg{ 18217ec681f3Smrg foreach_block (block, &ir->block_list) { 18227ec681f3Smrg foreach_instr (instr, &block->instr_list) { 18237ec681f3Smrg if (instr->opc != OPC_META_PHI) 18247ec681f3Smrg break; 18257ec681f3Smrg instr->data = NULL; 18267ec681f3Smrg } 18277ec681f3Smrg } 18287ec681f3Smrg 18297ec681f3Smrg bool progress; 18307ec681f3Smrg do { 18317ec681f3Smrg progress = false; 18327ec681f3Smrg foreach_block (block, &ir->block_list) { 18337ec681f3Smrg foreach_instr (instr, &block->instr_list) { 18347ec681f3Smrg if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED)) 18357ec681f3Smrg continue; 18367ec681f3Smrg 18377ec681f3Smrg simplify_phi_srcs(instr); 18387ec681f3Smrg } 18397ec681f3Smrg 18407ec681f3Smrg /* Visit phi nodes in the sucessors to make sure that phi sources are 18417ec681f3Smrg * always visited at least once after visiting the definition they 18427ec681f3Smrg * point to. See note in simplify_phi_def() for why this is necessary. 18437ec681f3Smrg */ 18447ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 18457ec681f3Smrg struct ir3_block *succ = block->successors[i]; 18467ec681f3Smrg if (!succ) 18477ec681f3Smrg continue; 18487ec681f3Smrg foreach_instr (instr, &succ->instr_list) { 18497ec681f3Smrg if (instr->opc != OPC_META_PHI) 18507ec681f3Smrg break; 18517ec681f3Smrg if (instr->flags & IR3_INSTR_UNUSED) { 18527ec681f3Smrg if (instr->data) 18537ec681f3Smrg instr->data = simplify_phi_def(instr->data); 18547ec681f3Smrg } else { 18557ec681f3Smrg simplify_phi_srcs(instr); 18567ec681f3Smrg progress |= simplify_phi_node(instr); 18577ec681f3Smrg } 18587ec681f3Smrg } 18597ec681f3Smrg } 18607ec681f3Smrg } 18617ec681f3Smrg } while (progress); 18627ec681f3Smrg} 18637ec681f3Smrg 18647ec681f3Smrgstatic void 18657ec681f3Smrgunmark_dead(struct ir3 *ir) 18667ec681f3Smrg{ 18677ec681f3Smrg foreach_block (block, &ir->block_list) { 18687ec681f3Smrg foreach_instr (instr, &block->instr_list) { 18697ec681f3Smrg instr->flags &= ~IR3_INSTR_UNUSED; 18707ec681f3Smrg } 18717ec681f3Smrg } 18727ec681f3Smrg} 18737ec681f3Smrg 18747ec681f3Smrg/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark 18757ec681f3Smrg * which ones are dead along the way, so there's nothing to compute here. 18767ec681f3Smrg */ 18777ec681f3Smrgstatic void 18787ec681f3Smrgcleanup_dead(struct ir3 *ir) 18797ec681f3Smrg{ 18807ec681f3Smrg foreach_block (block, &ir->block_list) { 18817ec681f3Smrg foreach_instr_safe (instr, &block->instr_list) { 18827ec681f3Smrg if (instr->flags & IR3_INSTR_UNUSED) 18837ec681f3Smrg list_delinit(&instr->node); 18847ec681f3Smrg } 18857ec681f3Smrg } 18867ec681f3Smrg} 18877ec681f3Smrg 18887ec681f3Smrg/* Deal with merge sets after spilling. Spilling generally leaves the merge sets 18897ec681f3Smrg * in a mess, and even if we properly cleaned up after ourselves, we would want 18907ec681f3Smrg * to recompute the merge sets afterward anway. That's because 18917ec681f3Smrg * spilling/reloading can "break up" phi webs and split/collect webs so that 18927ec681f3Smrg * allocating them to the same register no longer gives any benefit. For 18937ec681f3Smrg * example, imagine we have this: 18947ec681f3Smrg * 18957ec681f3Smrg * if (...) { 18967ec681f3Smrg * foo = ... 18977ec681f3Smrg * } else { 18987ec681f3Smrg * bar = ... 18997ec681f3Smrg * } 19007ec681f3Smrg * baz = phi(foo, bar) 19017ec681f3Smrg * 19027ec681f3Smrg * and we spill "baz": 19037ec681f3Smrg * 19047ec681f3Smrg * if (...) { 19057ec681f3Smrg * foo = ... 19067ec681f3Smrg * spill(foo) 19077ec681f3Smrg * } else { 19087ec681f3Smrg * bar = ... 19097ec681f3Smrg * spill(bar) 19107ec681f3Smrg * } 19117ec681f3Smrg * baz = reload() 19127ec681f3Smrg * 19137ec681f3Smrg * now foo, bar, and baz don't have to be allocated to the same register. How 19147ec681f3Smrg * exactly the merge sets change can be complicated, so it's easier just to 19157ec681f3Smrg * recompute them. 19167ec681f3Smrg * 19177ec681f3Smrg * However, there's a wrinkle in this: those same merge sets determine the 19187ec681f3Smrg * register pressure, due to multiple values inhabiting the same register! And 19197ec681f3Smrg * we assume that this sharing happens when spilling. Therefore we need a 19207ec681f3Smrg * three-step procedure: 19217ec681f3Smrg * 19227ec681f3Smrg * 1. Drop the original merge sets. 19237ec681f3Smrg * 2. Calculate which values *must* be merged, being careful to only use the 19247ec681f3Smrg * interval information which isn't trashed by spilling, and forcibly merge 19257ec681f3Smrg * them. 19267ec681f3Smrg * 3. Let ir3_merge_regs() finish the job, including recalculating the 19277ec681f3Smrg * intervals. 19287ec681f3Smrg */ 19297ec681f3Smrg 19307ec681f3Smrgstatic void 19317ec681f3Smrgfixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir) 19327ec681f3Smrg{ 19337ec681f3Smrg foreach_block (block, &ir->block_list) { 19347ec681f3Smrg foreach_instr (instr, &block->instr_list) { 19357ec681f3Smrg ra_foreach_dst (dst, instr) { 19367ec681f3Smrg dst->merge_set = NULL; 19377ec681f3Smrg dst->merge_set_offset = 0; 19387ec681f3Smrg } 19397ec681f3Smrg } 19407ec681f3Smrg } 19417ec681f3Smrg 19427ec681f3Smrg foreach_block (block, &ir->block_list) { 19437ec681f3Smrg foreach_instr (instr, &block->instr_list) { 19447ec681f3Smrg if (instr->opc != OPC_META_SPLIT && 19457ec681f3Smrg instr->opc != OPC_META_COLLECT) 19467ec681f3Smrg continue; 19477ec681f3Smrg 19487ec681f3Smrg struct ir3_register *dst = instr->dsts[0]; 19497ec681f3Smrg ra_foreach_src (src, instr) { 19507ec681f3Smrg if (!(src->flags & IR3_REG_KILL) && 19517ec681f3Smrg src->def->interval_start < dst->interval_end && 19527ec681f3Smrg dst->interval_start < src->def->interval_end) { 19537ec681f3Smrg ir3_force_merge(dst, src->def, 19547ec681f3Smrg src->def->interval_start - dst->interval_start); 19557ec681f3Smrg } 19567ec681f3Smrg } 19577ec681f3Smrg } 19587ec681f3Smrg } 19597ec681f3Smrg 19607ec681f3Smrg ir3_merge_regs(live, ir); 19617ec681f3Smrg} 19627ec681f3Smrg 19637ec681f3Smrgvoid 19647ec681f3Smrgir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 19657ec681f3Smrg struct ir3_pressure *max_pressure) 19667ec681f3Smrg{ 19677ec681f3Smrg struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx); 19687ec681f3Smrg spill_ctx_init(ctx, v, live); 19697ec681f3Smrg 19707ec681f3Smrg foreach_block (block, &v->ir->block_list) { 19717ec681f3Smrg handle_block(ctx, block); 19727ec681f3Smrg } 19737ec681f3Smrg 19747ec681f3Smrg assert(ctx->cur_pressure.full == 0); 19757ec681f3Smrg assert(ctx->cur_pressure.half == 0); 19767ec681f3Smrg assert(ctx->cur_pressure.shared == 0); 19777ec681f3Smrg 19787ec681f3Smrg *max_pressure = ctx->max_pressure; 19797ec681f3Smrg ralloc_free(ctx); 19807ec681f3Smrg} 19817ec681f3Smrg 19827ec681f3Smrgbool 19837ec681f3Smrgir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 19847ec681f3Smrg struct ir3_liveness **live, 19857ec681f3Smrg const struct ir3_pressure *limit_pressure) 19867ec681f3Smrg{ 19877ec681f3Smrg void *mem_ctx = ralloc_parent(*live); 19887ec681f3Smrg struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx); 19897ec681f3Smrg spill_ctx_init(ctx, v, *live); 19907ec681f3Smrg 19917ec681f3Smrg ctx->spilling = true; 19927ec681f3Smrg 19937ec681f3Smrg ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state, 19947ec681f3Smrg ctx->live->block_count); 19957ec681f3Smrg rb_tree_init(&ctx->full_live_intervals); 19967ec681f3Smrg rb_tree_init(&ctx->half_live_intervals); 19977ec681f3Smrg 19987ec681f3Smrg ctx->limit_pressure = *limit_pressure; 19997ec681f3Smrg ctx->spill_slot = v->pvtmem_size; 20007ec681f3Smrg 20017ec681f3Smrg add_base_reg(ctx, ir); 20027ec681f3Smrg compute_next_distance(ctx, ir); 20037ec681f3Smrg 20047ec681f3Smrg unmark_dead(ir); 20057ec681f3Smrg 20067ec681f3Smrg foreach_block (block, &ir->block_list) { 20077ec681f3Smrg handle_block(ctx, block); 20087ec681f3Smrg } 20097ec681f3Smrg 20107ec681f3Smrg simplify_phi_nodes(ir); 20117ec681f3Smrg 20127ec681f3Smrg cleanup_dead(ir); 20137ec681f3Smrg 20147ec681f3Smrg ir3_create_parallel_copies(ir); 20157ec681f3Smrg 20167ec681f3Smrg /* After this point, we're done mutating the IR. Liveness has been trashed, 20177ec681f3Smrg * so recalculate it. We'll need it for recalculating the merge sets. 20187ec681f3Smrg */ 20197ec681f3Smrg ralloc_free(ctx->live); 20207ec681f3Smrg *live = ir3_calc_liveness(mem_ctx, ir); 20217ec681f3Smrg 20227ec681f3Smrg fixup_merge_sets(*live, ir); 20237ec681f3Smrg 20247ec681f3Smrg v->pvtmem_size = ctx->spill_slot; 20257ec681f3Smrg ralloc_free(ctx); 20267ec681f3Smrg 20277ec681f3Smrg return true; 20287ec681f3Smrg} 2029