17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "agx_compiler.h" 257ec681f3Smrg#include "agx_builder.h" 267ec681f3Smrg 277ec681f3Smrg/* Trivial register allocator that never frees anything. 287ec681f3Smrg * 297ec681f3Smrg * TODO: Write a real register allocator. 307ec681f3Smrg * TODO: Handle phi nodes. 317ec681f3Smrg */ 327ec681f3Smrg 337ec681f3Smrg/** Returns number of registers written by an instruction */ 347ec681f3Smrgstatic unsigned 357ec681f3Smrgagx_write_registers(agx_instr *I, unsigned d) 367ec681f3Smrg{ 377ec681f3Smrg unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1; 387ec681f3Smrg 397ec681f3Smrg switch (I->op) { 407ec681f3Smrg case AGX_OPCODE_LD_VARY: 417ec681f3Smrg case AGX_OPCODE_DEVICE_LOAD: 427ec681f3Smrg case AGX_OPCODE_TEXTURE_SAMPLE: 437ec681f3Smrg case AGX_OPCODE_LD_TILE: 447ec681f3Smrg return 8; 457ec681f3Smrg case AGX_OPCODE_LD_VARY_FLAT: 467ec681f3Smrg return 6; 477ec681f3Smrg case AGX_OPCODE_P_COMBINE: 487ec681f3Smrg { 497ec681f3Smrg unsigned components = 0; 507ec681f3Smrg 517ec681f3Smrg for (unsigned i = 0; i < 4; ++i) { 527ec681f3Smrg if (!agx_is_null(I->src[i])) 537ec681f3Smrg components = i + 1; 547ec681f3Smrg } 557ec681f3Smrg 567ec681f3Smrg return components * size; 577ec681f3Smrg } 587ec681f3Smrg default: 597ec681f3Smrg return size; 607ec681f3Smrg } 617ec681f3Smrg} 627ec681f3Smrg 637ec681f3Smrgstatic unsigned 647ec681f3Smrgagx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max) 657ec681f3Smrg{ 667ec681f3Smrg for (unsigned reg = 0; reg < max; reg += align) { 677ec681f3Smrg bool conflict = false; 687ec681f3Smrg 697ec681f3Smrg for (unsigned j = 0; j < count; ++j) 707ec681f3Smrg conflict |= BITSET_TEST(used_regs, reg + j); 717ec681f3Smrg 727ec681f3Smrg if (!conflict) { 737ec681f3Smrg for (unsigned j = 0; j < count; ++j) 747ec681f3Smrg BITSET_SET(used_regs, reg + j); 757ec681f3Smrg 767ec681f3Smrg return reg; 777ec681f3Smrg } 787ec681f3Smrg } 797ec681f3Smrg 807ec681f3Smrg /* Couldn't find a free register, dump the state of the register file */ 817ec681f3Smrg fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n", 827ec681f3Smrg count, align, max); 837ec681f3Smrg 847ec681f3Smrg fprintf(stderr, "Register file:\n"); 857ec681f3Smrg for (unsigned i = 0; i < BITSET_WORDS(max); ++i) 867ec681f3Smrg fprintf(stderr, " %08X\n", used_regs[i]); 877ec681f3Smrg 887ec681f3Smrg unreachable("Could not find a free register"); 897ec681f3Smrg} 907ec681f3Smrg 917ec681f3Smrg/** Assign registers to SSA values in a block. */ 927ec681f3Smrg 937ec681f3Smrgstatic void 947ec681f3Smrgagx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps, unsigned max_reg) 957ec681f3Smrg{ 967ec681f3Smrg BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 }; 977ec681f3Smrg 987ec681f3Smrg agx_foreach_predecessor(block, pred) { 997ec681f3Smrg for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i) 1007ec681f3Smrg used_regs[i] |= pred->regs_out[i]; 1017ec681f3Smrg } 1027ec681f3Smrg 1037ec681f3Smrg BITSET_SET(used_regs, 0); // control flow writes r0l 1047ec681f3Smrg BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID 1057ec681f3Smrg BITSET_SET(used_regs, (5*2 + 1)); 1067ec681f3Smrg BITSET_SET(used_regs, (6*2 + 0)); 1077ec681f3Smrg BITSET_SET(used_regs, (6*2 + 1)); 1087ec681f3Smrg 1097ec681f3Smrg agx_foreach_instr_in_block(block, I) { 1107ec681f3Smrg /* First, free killed sources */ 1117ec681f3Smrg agx_foreach_src(I, s) { 1127ec681f3Smrg if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) { 1137ec681f3Smrg unsigned reg = ssa_to_reg[I->src[s].value]; 1147ec681f3Smrg unsigned count = ncomps[I->src[s].value]; 1157ec681f3Smrg 1167ec681f3Smrg for (unsigned i = 0; i < count; ++i) 1177ec681f3Smrg BITSET_CLEAR(used_regs, reg + i); 1187ec681f3Smrg } 1197ec681f3Smrg } 1207ec681f3Smrg 1217ec681f3Smrg /* Next, assign destinations. Always legal in SSA form. */ 1227ec681f3Smrg agx_foreach_dest(I, d) { 1237ec681f3Smrg if (I->dest[d].type == AGX_INDEX_NORMAL) { 1247ec681f3Smrg unsigned count = agx_write_registers(I, d); 1257ec681f3Smrg unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2; 1267ec681f3Smrg unsigned reg = agx_assign_regs(used_regs, count, align, max_reg); 1277ec681f3Smrg 1287ec681f3Smrg ssa_to_reg[I->dest[d].value] = reg; 1297ec681f3Smrg } 1307ec681f3Smrg } 1317ec681f3Smrg } 1327ec681f3Smrg 1337ec681f3Smrg STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs)); 1347ec681f3Smrg memcpy(block->regs_out, used_regs, sizeof(used_regs)); 1357ec681f3Smrg} 1367ec681f3Smrg 1377ec681f3Smrgvoid 1387ec681f3Smrgagx_ra(agx_context *ctx) 1397ec681f3Smrg{ 1407ec681f3Smrg unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned)); 1417ec681f3Smrg 1427ec681f3Smrg agx_compute_liveness(ctx); 1437ec681f3Smrg uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t)); 1447ec681f3Smrg uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t)); 1457ec681f3Smrg 1467ec681f3Smrg agx_foreach_instr_global(ctx, I) { 1477ec681f3Smrg agx_foreach_dest(I, d) { 1487ec681f3Smrg if (I->dest[d].type != AGX_INDEX_NORMAL) continue; 1497ec681f3Smrg 1507ec681f3Smrg unsigned v = I->dest[d].value; 1517ec681f3Smrg assert(ncomps[v] == 0 && "broken SSA"); 1527ec681f3Smrg ncomps[v] = agx_write_registers(I, d); 1537ec681f3Smrg } 1547ec681f3Smrg } 1557ec681f3Smrg 1567ec681f3Smrg agx_foreach_block(ctx, block) 1577ec681f3Smrg agx_ra_assign_local(block, ssa_to_reg, ncomps, ctx->max_register); 1587ec681f3Smrg 1597ec681f3Smrg /* TODO: Coalesce combines */ 1607ec681f3Smrg 1617ec681f3Smrg agx_foreach_instr_global_safe(ctx, ins) { 1627ec681f3Smrg /* Lower away RA pseudo-instructions */ 1637ec681f3Smrg if (ins->op == AGX_OPCODE_P_COMBINE) { 1647ec681f3Smrg /* TODO: Optimize out the moves! */ 1657ec681f3Smrg assert(ins->dest[0].type == AGX_INDEX_NORMAL); 1667ec681f3Smrg enum agx_size common_size = ins->dest[0].size; 1677ec681f3Smrg unsigned base = ssa_to_reg[ins->dest[0].value]; 1687ec681f3Smrg unsigned size = common_size == AGX_SIZE_32 ? 2 : 1; 1697ec681f3Smrg 1707ec681f3Smrg /* Move the sources */ 1717ec681f3Smrg agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 1727ec681f3Smrg 1737ec681f3Smrg /* TODO: Eliminate the intermediate copy by handling parallel copies */ 1747ec681f3Smrg for (unsigned i = 0; i < 4; ++i) { 1757ec681f3Smrg if (agx_is_null(ins->src[i])) continue; 1767ec681f3Smrg unsigned base = ins->src[i].value; 1777ec681f3Smrg if (ins->src[i].type == AGX_INDEX_NORMAL) 1787ec681f3Smrg base = ssa_to_reg[base]; 1797ec681f3Smrg else 1807ec681f3Smrg assert(ins->src[i].type == AGX_INDEX_REGISTER); 1817ec681f3Smrg 1827ec681f3Smrg assert(ins->src[i].size == common_size); 1837ec681f3Smrg 1847ec681f3Smrg agx_mov_to(&b, agx_register(124*2 + (i * size), common_size), 1857ec681f3Smrg agx_register(base, common_size)); 1867ec681f3Smrg } 1877ec681f3Smrg 1887ec681f3Smrg for (unsigned i = 0; i < 4; ++i) { 1897ec681f3Smrg if (agx_is_null(ins->src[i])) continue; 1907ec681f3Smrg agx_index src = ins->src[i]; 1917ec681f3Smrg 1927ec681f3Smrg if (src.type == AGX_INDEX_NORMAL) 1937ec681f3Smrg src = agx_register(alloc[src.value], src.size); 1947ec681f3Smrg 1957ec681f3Smrg agx_mov_to(&b, agx_register(base + (i * size), common_size), 1967ec681f3Smrg agx_register(124*2 + (i * size), common_size)); 1977ec681f3Smrg } 1987ec681f3Smrg 1997ec681f3Smrg /* We've lowered away, delete the old */ 2007ec681f3Smrg agx_remove_instruction(ins); 2017ec681f3Smrg continue; 2027ec681f3Smrg } else if (ins->op == AGX_OPCODE_P_EXTRACT) { 2037ec681f3Smrg /* Uses the destination size */ 2047ec681f3Smrg assert(ins->dest[0].type == AGX_INDEX_NORMAL); 2057ec681f3Smrg unsigned base = ins->src[0].value; 2067ec681f3Smrg 2077ec681f3Smrg if (ins->src[0].type != AGX_INDEX_REGISTER) { 2087ec681f3Smrg assert(ins->src[0].type == AGX_INDEX_NORMAL); 2097ec681f3Smrg base = alloc[base]; 2107ec681f3Smrg } 2117ec681f3Smrg 2127ec681f3Smrg unsigned size = ins->dest[0].size == AGX_SIZE_64 ? 4 : ins->dest[0].size == AGX_SIZE_32 ? 2 : 1; 2137ec681f3Smrg unsigned left = ssa_to_reg[ins->dest[0].value]; 2147ec681f3Smrg unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm); 2157ec681f3Smrg 2167ec681f3Smrg if (left != right) { 2177ec681f3Smrg agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 2187ec681f3Smrg agx_mov_to(&b, agx_register(left, ins->dest[0].size), 2197ec681f3Smrg agx_register(right, ins->src[0].size)); 2207ec681f3Smrg } 2217ec681f3Smrg 2227ec681f3Smrg agx_remove_instruction(ins); 2237ec681f3Smrg continue; 2247ec681f3Smrg } 2257ec681f3Smrg 2267ec681f3Smrg agx_foreach_src(ins, s) { 2277ec681f3Smrg if (ins->src[s].type == AGX_INDEX_NORMAL) { 2287ec681f3Smrg unsigned v = ssa_to_reg[ins->src[s].value]; 2297ec681f3Smrg ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size)); 2307ec681f3Smrg } 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg agx_foreach_dest(ins, d) { 2347ec681f3Smrg if (ins->dest[d].type == AGX_INDEX_NORMAL) { 2357ec681f3Smrg unsigned v = ssa_to_reg[ins->dest[d].value]; 2367ec681f3Smrg ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size)); 2377ec681f3Smrg } 2387ec681f3Smrg } 2397ec681f3Smrg } 2407ec681f3Smrg 2417ec681f3Smrg free(ssa_to_reg); 2427ec681f3Smrg free(ncomps); 2437ec681f3Smrg free(alloc); 2447ec681f3Smrg} 245