17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Collabora Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#include "compiler.h" 287ec681f3Smrg#include "bi_builder.h" 297ec681f3Smrg#include "util/u_memory.h" 307ec681f3Smrg 317ec681f3Smrgstruct lcra_state { 327ec681f3Smrg unsigned node_count; 337ec681f3Smrg uint64_t *affinity; 347ec681f3Smrg 357ec681f3Smrg /* Linear constraints imposed. Nested array sized upfront, organized as 367ec681f3Smrg * linear[node_left][node_right]. That is, calculate indices as: 377ec681f3Smrg * 387ec681f3Smrg * Each element is itself a bit field denoting whether (c_j - c_i) bias 397ec681f3Smrg * is present or not, including negative biases. 407ec681f3Smrg * 417ec681f3Smrg * Note for Bifrost, there are 4 components so the bias is in range 427ec681f3Smrg * [-3, 3] so encoded by 8-bit field. */ 437ec681f3Smrg 447ec681f3Smrg uint8_t *linear; 457ec681f3Smrg 467ec681f3Smrg /* Before solving, forced registers; after solving, solutions. */ 477ec681f3Smrg unsigned *solutions; 487ec681f3Smrg 497ec681f3Smrg /** Node which caused register allocation to fail */ 507ec681f3Smrg unsigned spill_node; 517ec681f3Smrg}; 527ec681f3Smrg 537ec681f3Smrg/* This module is an implementation of "Linearly Constrained 547ec681f3Smrg * Register Allocation". The paper is available in PDF form 557ec681f3Smrg * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX 567ec681f3Smrg * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) 577ec681f3Smrg */ 587ec681f3Smrg 597ec681f3Smrgstatic struct lcra_state * 607ec681f3Smrglcra_alloc_equations(unsigned node_count) 617ec681f3Smrg{ 627ec681f3Smrg struct lcra_state *l = calloc(1, sizeof(*l)); 637ec681f3Smrg 647ec681f3Smrg l->node_count = node_count; 657ec681f3Smrg 667ec681f3Smrg l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); 677ec681f3Smrg l->solutions = calloc(sizeof(l->solutions[0]), node_count); 687ec681f3Smrg l->affinity = calloc(sizeof(l->affinity[0]), node_count); 697ec681f3Smrg 707ec681f3Smrg memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); 717ec681f3Smrg 727ec681f3Smrg return l; 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3Smrgstatic void 767ec681f3Smrglcra_free(struct lcra_state *l) 777ec681f3Smrg{ 787ec681f3Smrg free(l->linear); 797ec681f3Smrg free(l->affinity); 807ec681f3Smrg free(l->solutions); 817ec681f3Smrg free(l); 827ec681f3Smrg} 837ec681f3Smrg 847ec681f3Smrgstatic void 857ec681f3Smrglcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j) 867ec681f3Smrg{ 877ec681f3Smrg if (i == j) 887ec681f3Smrg return; 897ec681f3Smrg 907ec681f3Smrg uint8_t constraint_fw = 0; 917ec681f3Smrg uint8_t constraint_bw = 0; 927ec681f3Smrg 937ec681f3Smrg for (unsigned D = 0; D < 4; ++D) { 947ec681f3Smrg if (cmask_i & (cmask_j << D)) { 957ec681f3Smrg constraint_bw |= (1 << (3 + D)); 967ec681f3Smrg constraint_fw |= (1 << (3 - D)); 977ec681f3Smrg } 987ec681f3Smrg 997ec681f3Smrg if (cmask_i & (cmask_j >> D)) { 1007ec681f3Smrg constraint_fw |= (1 << (3 + D)); 1017ec681f3Smrg constraint_bw |= (1 << (3 - D)); 1027ec681f3Smrg } 1037ec681f3Smrg } 1047ec681f3Smrg 1057ec681f3Smrg l->linear[j * l->node_count + i] |= constraint_fw; 1067ec681f3Smrg l->linear[i * l->node_count + j] |= constraint_bw; 1077ec681f3Smrg} 1087ec681f3Smrg 1097ec681f3Smrgstatic bool 1107ec681f3Smrglcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) 1117ec681f3Smrg{ 1127ec681f3Smrg uint8_t *row = &l->linear[i * l->node_count]; 1137ec681f3Smrg signed constant = solutions[i]; 1147ec681f3Smrg 1157ec681f3Smrg for (unsigned j = 0; j < l->node_count; ++j) { 1167ec681f3Smrg if (solutions[j] == ~0) continue; 1177ec681f3Smrg 1187ec681f3Smrg signed lhs = solutions[j] - constant; 1197ec681f3Smrg 1207ec681f3Smrg if (lhs < -3 || lhs > 3) 1217ec681f3Smrg continue; 1227ec681f3Smrg 1237ec681f3Smrg if (row[j] & (1 << (lhs + 3))) 1247ec681f3Smrg return false; 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg return true; 1287ec681f3Smrg} 1297ec681f3Smrg 1307ec681f3Smrgstatic bool 1317ec681f3Smrglcra_solve(struct lcra_state *l) 1327ec681f3Smrg{ 1337ec681f3Smrg for (unsigned step = 0; step < l->node_count; ++step) { 1347ec681f3Smrg if (l->solutions[step] != ~0) continue; 1357ec681f3Smrg if (l->affinity[step] == 0) continue; 1367ec681f3Smrg 1377ec681f3Smrg bool succ = false; 1387ec681f3Smrg 1397ec681f3Smrg u_foreach_bit64(r, l->affinity[step]) { 1407ec681f3Smrg l->solutions[step] = r; 1417ec681f3Smrg 1427ec681f3Smrg if (lcra_test_linear(l, l->solutions, step)) { 1437ec681f3Smrg succ = true; 1447ec681f3Smrg break; 1457ec681f3Smrg } 1467ec681f3Smrg } 1477ec681f3Smrg 1487ec681f3Smrg /* Out of registers - prepare to spill */ 1497ec681f3Smrg if (!succ) { 1507ec681f3Smrg l->spill_node = step; 1517ec681f3Smrg return false; 1527ec681f3Smrg } 1537ec681f3Smrg } 1547ec681f3Smrg 1557ec681f3Smrg return true; 1567ec681f3Smrg} 1577ec681f3Smrg 1587ec681f3Smrg/* Register spilling is implemented with a cost-benefit system. Costs are set 1597ec681f3Smrg * by the user. Benefits are calculated from the constraints. */ 1607ec681f3Smrg 1617ec681f3Smrgstatic unsigned 1627ec681f3Smrglcra_count_constraints(struct lcra_state *l, unsigned i) 1637ec681f3Smrg{ 1647ec681f3Smrg unsigned count = 0; 1657ec681f3Smrg uint8_t *constraints = &l->linear[i * l->node_count]; 1667ec681f3Smrg 1677ec681f3Smrg for (unsigned j = 0; j < l->node_count; ++j) 1687ec681f3Smrg count += util_bitcount(constraints[j]); 1697ec681f3Smrg 1707ec681f3Smrg return count; 1717ec681f3Smrg} 1727ec681f3Smrg 1737ec681f3Smrg/* Construct an affinity mask such that the vector with `count` elements does 1747ec681f3Smrg * not intersect any of the registers in the bitset `clobber`. In other words, 1757ec681f3Smrg * an allocated register r needs to satisfy for each i < count: a + i != b. 1767ec681f3Smrg * Equivalently that's a != b - i, so we need a \ne { b - i : i < n }. For the 1777ec681f3Smrg * entire clobber set B, we need a \ne union b \in B { b - i : i < n }, where 1787ec681f3Smrg * that union is the desired clobber set. That may be written equivalently as 1797ec681f3Smrg * the union over i < n of (B - i), where subtraction is defined elementwise 1807ec681f3Smrg * and corresponds to a shift of the entire bitset. 1817ec681f3Smrg * 1827ec681f3Smrg * EVEN_BITS_MASK is an affinity mask for aligned register pairs. Interpreted 1837ec681f3Smrg * as a bit set, it is { x : 0 <= x < 64 if x is even } 1847ec681f3Smrg */ 1857ec681f3Smrg 1867ec681f3Smrg#define EVEN_BITS_MASK (0x5555555555555555ull) 1877ec681f3Smrg 1887ec681f3Smrgstatic uint64_t 1897ec681f3Smrgbi_make_affinity(uint64_t clobber, unsigned count, bool split_file) 1907ec681f3Smrg{ 1917ec681f3Smrg uint64_t clobbered = 0; 1927ec681f3Smrg 1937ec681f3Smrg for (unsigned i = 0; i < count; ++i) 1947ec681f3Smrg clobbered |= (clobber >> i); 1957ec681f3Smrg 1967ec681f3Smrg /* Don't allocate past the end of the register file */ 1977ec681f3Smrg if (count > 1) { 1987ec681f3Smrg unsigned excess = count - 1; 1997ec681f3Smrg uint64_t mask = BITFIELD_MASK(excess); 2007ec681f3Smrg clobbered |= mask << (64 - excess); 2017ec681f3Smrg 2027ec681f3Smrg if (split_file) 2037ec681f3Smrg clobbered |= mask << (16 - excess); 2047ec681f3Smrg } 2057ec681f3Smrg 2067ec681f3Smrg /* Don't allocate the middle if we split out the middle */ 2077ec681f3Smrg if (split_file) 2087ec681f3Smrg clobbered |= BITFIELD64_MASK(32) << 16; 2097ec681f3Smrg 2107ec681f3Smrg /* We can use a register iff it's not clobberred */ 2117ec681f3Smrg return ~clobbered; 2127ec681f3Smrg} 2137ec681f3Smrg 2147ec681f3Smrgstatic void 2157ec681f3Smrgbi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint64_t preload_live, unsigned node_count, bool is_blend, bool split_file, bool aligned_sr) 2167ec681f3Smrg{ 2177ec681f3Smrg bi_foreach_instr_in_block_rev(block, ins) { 2187ec681f3Smrg /* Mark all registers live after the instruction as 2197ec681f3Smrg * interfering with the destination */ 2207ec681f3Smrg 2217ec681f3Smrg bi_foreach_dest(ins, d) { 2227ec681f3Smrg unsigned node = bi_get_node(ins->dest[d]); 2237ec681f3Smrg 2247ec681f3Smrg if (node >= node_count) 2257ec681f3Smrg continue; 2267ec681f3Smrg 2277ec681f3Smrg /* Don't allocate to anything that's read later as a 2287ec681f3Smrg * preloaded register. The affinity is the intersection 2297ec681f3Smrg * of affinity masks for each write. Since writes have 2307ec681f3Smrg * offsets, but the affinity is for the whole node, we 2317ec681f3Smrg * need to offset the affinity opposite the write 2327ec681f3Smrg * offset, so we shift right. */ 2337ec681f3Smrg unsigned count = bi_count_write_registers(ins, d); 2347ec681f3Smrg unsigned offset = ins->dest[d].offset; 2357ec681f3Smrg uint64_t affinity = bi_make_affinity(preload_live, count, split_file); 2367ec681f3Smrg 2377ec681f3Smrg /* Valhall needs >= 64-bit staging writes to be pair-aligned */ 2387ec681f3Smrg if (aligned_sr && count >= 2) 2397ec681f3Smrg affinity &= EVEN_BITS_MASK; 2407ec681f3Smrg 2417ec681f3Smrg l->affinity[node] &= (affinity >> offset); 2427ec681f3Smrg 2437ec681f3Smrg for (unsigned i = 0; i < node_count; ++i) { 2447ec681f3Smrg if (live[i]) { 2457ec681f3Smrg lcra_add_node_interference(l, node, 2467ec681f3Smrg bi_writemask(ins, d), i, live[i]); 2477ec681f3Smrg } 2487ec681f3Smrg } 2497ec681f3Smrg } 2507ec681f3Smrg 2517ec681f3Smrg /* Valhall needs >= 64-bit staging reads to be pair-aligned */ 2527ec681f3Smrg if (aligned_sr && bi_count_read_registers(ins, 0) >= 2) { 2537ec681f3Smrg unsigned node = bi_get_node(ins->src[0]); 2547ec681f3Smrg 2557ec681f3Smrg if (node < node_count) 2567ec681f3Smrg l->affinity[node] &= EVEN_BITS_MASK; 2577ec681f3Smrg } 2587ec681f3Smrg 2597ec681f3Smrg if (!is_blend && ins->op == BI_OPCODE_BLEND) { 2607ec681f3Smrg /* Blend shaders might clobber r0-r15, r48. */ 2617ec681f3Smrg uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48); 2627ec681f3Smrg 2637ec681f3Smrg for (unsigned i = 0; i < node_count; ++i) { 2647ec681f3Smrg if (live[i]) 2657ec681f3Smrg l->affinity[i] &= ~clobber; 2667ec681f3Smrg } 2677ec681f3Smrg } 2687ec681f3Smrg 2697ec681f3Smrg /* Update live_in */ 2707ec681f3Smrg preload_live = bi_postra_liveness_ins(preload_live, ins); 2717ec681f3Smrg bi_liveness_ins_update(live, ins, node_count); 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg block->reg_live_in = preload_live; 2757ec681f3Smrg} 2767ec681f3Smrg 2777ec681f3Smrgstatic void 2787ec681f3Smrgbi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs) 2797ec681f3Smrg{ 2807ec681f3Smrg unsigned node_count = bi_max_temp(ctx); 2817ec681f3Smrg 2827ec681f3Smrg bi_compute_liveness(ctx); 2837ec681f3Smrg bi_postra_liveness(ctx); 2847ec681f3Smrg 2857ec681f3Smrg bi_foreach_block_rev(ctx, blk) { 2867ec681f3Smrg uint8_t *live = mem_dup(blk->live_out, node_count); 2877ec681f3Smrg 2887ec681f3Smrg bi_mark_interference(blk, l, live, blk->reg_live_out, 2897ec681f3Smrg node_count, ctx->inputs->is_blend, !full_regs, 2907ec681f3Smrg ctx->arch >= 9); 2917ec681f3Smrg 2927ec681f3Smrg free(live); 2937ec681f3Smrg } 2947ec681f3Smrg} 2957ec681f3Smrg 2967ec681f3Smrgstatic struct lcra_state * 2977ec681f3Smrgbi_allocate_registers(bi_context *ctx, bool *success, bool full_regs) 2987ec681f3Smrg{ 2997ec681f3Smrg unsigned node_count = bi_max_temp(ctx); 3007ec681f3Smrg struct lcra_state *l = lcra_alloc_equations(node_count); 3017ec681f3Smrg 3027ec681f3Smrg /* Blend shaders are restricted to R0-R15. Other shaders at full 3037ec681f3Smrg * occupancy also can access R48-R63. At half occupancy they can access 3047ec681f3Smrg * the whole file. */ 3057ec681f3Smrg 3067ec681f3Smrg uint64_t default_affinity = 3077ec681f3Smrg ctx->inputs->is_blend ? BITFIELD64_MASK(16) : 3087ec681f3Smrg full_regs ? BITFIELD64_MASK(64) : 3097ec681f3Smrg (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48)); 3107ec681f3Smrg 3117ec681f3Smrg bi_foreach_instr_global(ctx, ins) { 3127ec681f3Smrg bi_foreach_dest(ins, d) { 3137ec681f3Smrg unsigned dest = bi_get_node(ins->dest[d]); 3147ec681f3Smrg 3157ec681f3Smrg /* Blend shaders expect the src colour to be in r0-r3 */ 3167ec681f3Smrg if (ins->op == BI_OPCODE_BLEND && 3177ec681f3Smrg !ctx->inputs->is_blend) { 3187ec681f3Smrg unsigned node = bi_get_node(ins->src[0]); 3197ec681f3Smrg assert(node < node_count); 3207ec681f3Smrg l->solutions[node] = 0; 3217ec681f3Smrg } 3227ec681f3Smrg 3237ec681f3Smrg if (dest < node_count) 3247ec681f3Smrg l->affinity[dest] = default_affinity; 3257ec681f3Smrg } 3267ec681f3Smrg 3277ec681f3Smrg } 3287ec681f3Smrg 3297ec681f3Smrg bi_compute_interference(ctx, l, full_regs); 3307ec681f3Smrg 3317ec681f3Smrg *success = lcra_solve(l); 3327ec681f3Smrg 3337ec681f3Smrg return l; 3347ec681f3Smrg} 3357ec681f3Smrg 3367ec681f3Smrgstatic bi_index 3377ec681f3Smrgbi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index) 3387ec681f3Smrg{ 3397ec681f3Smrg /* Offsets can only be applied when we register allocated an index, or 3407ec681f3Smrg * alternatively for FAU's encoding */ 3417ec681f3Smrg 3427ec681f3Smrg ASSERTED bool is_offset = (index.offset > 0) && 3437ec681f3Smrg (index.type != BI_INDEX_FAU); 3447ec681f3Smrg unsigned node_count = bi_max_temp(ctx); 3457ec681f3Smrg 3467ec681f3Smrg /* Did we run RA for this index at all */ 3477ec681f3Smrg if (bi_get_node(index) >= node_count) { 3487ec681f3Smrg assert(!is_offset); 3497ec681f3Smrg return index; 3507ec681f3Smrg } 3517ec681f3Smrg 3527ec681f3Smrg /* LCRA didn't bother solving this index (how lazy!) */ 3537ec681f3Smrg signed solution = l->solutions[bi_get_node(index)]; 3547ec681f3Smrg if (solution < 0) { 3557ec681f3Smrg assert(!is_offset); 3567ec681f3Smrg return index; 3577ec681f3Smrg } 3587ec681f3Smrg 3597ec681f3Smrg /* todo: do we want to compose with the subword swizzle? */ 3607ec681f3Smrg bi_index new_index = bi_register(solution + index.offset); 3617ec681f3Smrg new_index.swizzle = index.swizzle; 3627ec681f3Smrg new_index.abs = index.abs; 3637ec681f3Smrg new_index.neg = index.neg; 3647ec681f3Smrg return new_index; 3657ec681f3Smrg} 3667ec681f3Smrg 3677ec681f3Smrgstatic void 3687ec681f3Smrgbi_install_registers(bi_context *ctx, struct lcra_state *l) 3697ec681f3Smrg{ 3707ec681f3Smrg bi_foreach_instr_global(ctx, ins) { 3717ec681f3Smrg bi_foreach_dest(ins, d) 3727ec681f3Smrg ins->dest[d] = bi_reg_from_index(ctx, l, ins->dest[d]); 3737ec681f3Smrg 3747ec681f3Smrg bi_foreach_src(ins, s) 3757ec681f3Smrg ins->src[s] = bi_reg_from_index(ctx, l, ins->src[s]); 3767ec681f3Smrg } 3777ec681f3Smrg} 3787ec681f3Smrg 3797ec681f3Smrgstatic void 3807ec681f3Smrgbi_rewrite_index_src_single(bi_instr *ins, bi_index old, bi_index new) 3817ec681f3Smrg{ 3827ec681f3Smrg bi_foreach_src(ins, i) { 3837ec681f3Smrg if (bi_is_equiv(ins->src[i], old)) { 3847ec681f3Smrg ins->src[i].type = new.type; 3857ec681f3Smrg ins->src[i].reg = new.reg; 3867ec681f3Smrg ins->src[i].value = new.value; 3877ec681f3Smrg } 3887ec681f3Smrg } 3897ec681f3Smrg} 3907ec681f3Smrg 3917ec681f3Smrg/* If register allocation fails, find the best spill node */ 3927ec681f3Smrg 3937ec681f3Smrgstatic signed 3947ec681f3Smrgbi_choose_spill_node(bi_context *ctx, struct lcra_state *l) 3957ec681f3Smrg{ 3967ec681f3Smrg /* Pick a node satisfying bi_spill_register's preconditions */ 3977ec681f3Smrg BITSET_WORD *no_spill = calloc(sizeof(BITSET_WORD), BITSET_WORDS(l->node_count)); 3987ec681f3Smrg 3997ec681f3Smrg bi_foreach_instr_global(ctx, ins) { 4007ec681f3Smrg bi_foreach_dest(ins, d) { 4017ec681f3Smrg unsigned node = bi_get_node(ins->dest[d]); 4027ec681f3Smrg 4037ec681f3Smrg if (node < l->node_count && ins->no_spill) 4047ec681f3Smrg BITSET_SET(no_spill, node); 4057ec681f3Smrg } 4067ec681f3Smrg } 4077ec681f3Smrg 4087ec681f3Smrg unsigned best_benefit = 0.0; 4097ec681f3Smrg signed best_node = -1; 4107ec681f3Smrg 4117ec681f3Smrg for (unsigned i = 0; i < l->node_count; ++i) { 4127ec681f3Smrg if (BITSET_TEST(no_spill, i)) continue; 4137ec681f3Smrg 4147ec681f3Smrg /* Only spill nodes that interfere with the node failing 4157ec681f3Smrg * register allocation. It's pointless to spill anything else */ 4167ec681f3Smrg if (!l->linear[(l->spill_node * l->node_count) + i]) continue; 4177ec681f3Smrg 4187ec681f3Smrg unsigned benefit = lcra_count_constraints(l, i); 4197ec681f3Smrg 4207ec681f3Smrg if (benefit > best_benefit) { 4217ec681f3Smrg best_benefit = benefit; 4227ec681f3Smrg best_node = i; 4237ec681f3Smrg } 4247ec681f3Smrg } 4257ec681f3Smrg 4267ec681f3Smrg free(no_spill); 4277ec681f3Smrg return best_node; 4287ec681f3Smrg} 4297ec681f3Smrg 4307ec681f3Smrgstatic unsigned 4317ec681f3Smrgbi_count_read_index(bi_instr *I, bi_index index) 4327ec681f3Smrg{ 4337ec681f3Smrg unsigned max = 0; 4347ec681f3Smrg 4357ec681f3Smrg bi_foreach_src(I, s) { 4367ec681f3Smrg if (bi_is_equiv(I->src[s], index)) { 4377ec681f3Smrg unsigned count = bi_count_read_registers(I, s); 4387ec681f3Smrg max = MAX2(max, count + I->src[s].offset); 4397ec681f3Smrg } 4407ec681f3Smrg } 4417ec681f3Smrg 4427ec681f3Smrg return max; 4437ec681f3Smrg} 4447ec681f3Smrg 4457ec681f3Smrg/* Once we've chosen a spill node, spill it and returns bytes spilled */ 4467ec681f3Smrg 4477ec681f3Smrgstatic unsigned 4487ec681f3Smrgbi_spill_register(bi_context *ctx, bi_index index, uint32_t offset) 4497ec681f3Smrg{ 4507ec681f3Smrg bi_builder b = { .shader = ctx }; 4517ec681f3Smrg unsigned channels = 0; 4527ec681f3Smrg 4537ec681f3Smrg /* Spill after every store, fill before every load */ 4547ec681f3Smrg bi_foreach_instr_global_safe(ctx, I) { 4557ec681f3Smrg bi_foreach_dest(I, d) { 4567ec681f3Smrg if (!bi_is_equiv(I->dest[d], index)) continue; 4577ec681f3Smrg 4587ec681f3Smrg unsigned extra = I->dest[d].offset; 4597ec681f3Smrg bi_index tmp = bi_temp(ctx); 4607ec681f3Smrg 4617ec681f3Smrg I->dest[d] = bi_replace_index(I->dest[d], tmp); 4627ec681f3Smrg I->no_spill = true; 4637ec681f3Smrg 4647ec681f3Smrg unsigned count = bi_count_write_registers(I, d); 4657ec681f3Smrg unsigned bits = count * 32; 4667ec681f3Smrg 4677ec681f3Smrg b.cursor = bi_after_instr(I); 4687ec681f3Smrg bi_index loc = bi_imm_u32(offset + 4 * extra); 4697ec681f3Smrg bi_store(&b, bits, tmp, loc, bi_zero(), BI_SEG_TL); 4707ec681f3Smrg 4717ec681f3Smrg ctx->spills++; 4727ec681f3Smrg channels = MAX2(channels, extra + count); 4737ec681f3Smrg } 4747ec681f3Smrg 4757ec681f3Smrg if (bi_has_arg(I, index)) { 4767ec681f3Smrg b.cursor = bi_before_instr(I); 4777ec681f3Smrg bi_index tmp = bi_temp(ctx); 4787ec681f3Smrg 4797ec681f3Smrg unsigned bits = bi_count_read_index(I, index) * 32; 4807ec681f3Smrg bi_rewrite_index_src_single(I, index, tmp); 4817ec681f3Smrg 4827ec681f3Smrg bi_instr *ld = bi_load_to(&b, bits, tmp, 4837ec681f3Smrg bi_imm_u32(offset), bi_zero(), BI_SEG_TL); 4847ec681f3Smrg ld->no_spill = true; 4857ec681f3Smrg ctx->fills++; 4867ec681f3Smrg } 4877ec681f3Smrg } 4887ec681f3Smrg 4897ec681f3Smrg return (channels * 4); 4907ec681f3Smrg} 4917ec681f3Smrg 4927ec681f3Smrgvoid 4937ec681f3Smrgbi_register_allocate(bi_context *ctx) 4947ec681f3Smrg{ 4957ec681f3Smrg struct lcra_state *l = NULL; 4967ec681f3Smrg bool success = false; 4977ec681f3Smrg 4987ec681f3Smrg unsigned iter_count = 1000; /* max iterations */ 4997ec681f3Smrg 5007ec681f3Smrg /* Number of bytes of memory we've spilled into */ 5017ec681f3Smrg unsigned spill_count = ctx->info->tls_size; 5027ec681f3Smrg 5037ec681f3Smrg /* Try with reduced register pressure to improve thread count on v7 */ 5047ec681f3Smrg if (ctx->arch == 7) { 5057ec681f3Smrg bi_invalidate_liveness(ctx); 5067ec681f3Smrg l = bi_allocate_registers(ctx, &success, false); 5077ec681f3Smrg 5087ec681f3Smrg if (success) { 5097ec681f3Smrg ctx->info->work_reg_count = 32; 5107ec681f3Smrg } else { 5117ec681f3Smrg lcra_free(l); 5127ec681f3Smrg l = NULL; 5137ec681f3Smrg } 5147ec681f3Smrg } 5157ec681f3Smrg 5167ec681f3Smrg /* Otherwise, use the register file and spill until we succeed */ 5177ec681f3Smrg while (!success && ((iter_count--) > 0)) { 5187ec681f3Smrg bi_invalidate_liveness(ctx); 5197ec681f3Smrg l = bi_allocate_registers(ctx, &success, true); 5207ec681f3Smrg 5217ec681f3Smrg if (success) { 5227ec681f3Smrg ctx->info->work_reg_count = 64; 5237ec681f3Smrg } else { 5247ec681f3Smrg signed spill_node = bi_choose_spill_node(ctx, l); 5257ec681f3Smrg lcra_free(l); 5267ec681f3Smrg l = NULL; 5277ec681f3Smrg 5287ec681f3Smrg if (spill_node == -1) 5297ec681f3Smrg unreachable("Failed to choose spill node\n"); 5307ec681f3Smrg 5317ec681f3Smrg spill_count += bi_spill_register(ctx, 5327ec681f3Smrg bi_node_to_index(spill_node, bi_max_temp(ctx)), 5337ec681f3Smrg spill_count); 5347ec681f3Smrg } 5357ec681f3Smrg } 5367ec681f3Smrg 5377ec681f3Smrg assert(success); 5387ec681f3Smrg assert(l != NULL); 5397ec681f3Smrg 5407ec681f3Smrg ctx->info->tls_size = spill_count; 5417ec681f3Smrg bi_install_registers(ctx, l); 5427ec681f3Smrg 5437ec681f3Smrg lcra_free(l); 5447ec681f3Smrg} 545