1/* 2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "agx_compiler.h" 25#include "agx_builder.h" 26 27/* Trivial register allocator that never frees anything. 28 * 29 * TODO: Write a real register allocator. 30 * TODO: Handle phi nodes. 31 */ 32 33/** Returns number of registers written by an instruction */ 34static unsigned 35agx_write_registers(agx_instr *I, unsigned d) 36{ 37 unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1; 38 39 switch (I->op) { 40 case AGX_OPCODE_LD_VARY: 41 case AGX_OPCODE_DEVICE_LOAD: 42 case AGX_OPCODE_TEXTURE_SAMPLE: 43 case AGX_OPCODE_LD_TILE: 44 return 8; 45 case AGX_OPCODE_LD_VARY_FLAT: 46 return 6; 47 case AGX_OPCODE_P_COMBINE: 48 { 49 unsigned components = 0; 50 51 for (unsigned i = 0; i < 4; ++i) { 52 if (!agx_is_null(I->src[i])) 53 components = i + 1; 54 } 55 56 return components * size; 57 } 58 default: 59 return size; 60 } 61} 62 63static unsigned 64agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max) 65{ 66 for (unsigned reg = 0; reg < max; reg += align) { 67 bool conflict = false; 68 69 for (unsigned j = 0; j < count; ++j) 70 conflict |= BITSET_TEST(used_regs, reg + j); 71 72 if (!conflict) { 73 for (unsigned j = 0; j < count; ++j) 74 BITSET_SET(used_regs, reg + j); 75 76 return reg; 77 } 78 } 79 80 /* Couldn't find a free register, dump the state of the register file */ 81 fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n", 82 count, align, max); 83 84 fprintf(stderr, "Register file:\n"); 85 for (unsigned i = 0; i < BITSET_WORDS(max); ++i) 86 fprintf(stderr, " %08X\n", used_regs[i]); 87 88 unreachable("Could not find a free register"); 89} 90 91/** Assign registers to SSA values in a block. */ 92 93static void 94agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps, unsigned max_reg) 95{ 96 BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 }; 97 98 agx_foreach_predecessor(block, pred) { 99 for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i) 100 used_regs[i] |= pred->regs_out[i]; 101 } 102 103 BITSET_SET(used_regs, 0); // control flow writes r0l 104 BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID 105 BITSET_SET(used_regs, (5*2 + 1)); 106 BITSET_SET(used_regs, (6*2 + 0)); 107 BITSET_SET(used_regs, (6*2 + 1)); 108 109 agx_foreach_instr_in_block(block, I) { 110 /* First, free killed sources */ 111 agx_foreach_src(I, s) { 112 if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) { 113 unsigned reg = ssa_to_reg[I->src[s].value]; 114 unsigned count = ncomps[I->src[s].value]; 115 116 for (unsigned i = 0; i < count; ++i) 117 BITSET_CLEAR(used_regs, reg + i); 118 } 119 } 120 121 /* Next, assign destinations. Always legal in SSA form. */ 122 agx_foreach_dest(I, d) { 123 if (I->dest[d].type == AGX_INDEX_NORMAL) { 124 unsigned count = agx_write_registers(I, d); 125 unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2; 126 unsigned reg = agx_assign_regs(used_regs, count, align, max_reg); 127 128 ssa_to_reg[I->dest[d].value] = reg; 129 } 130 } 131 } 132 133 STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs)); 134 memcpy(block->regs_out, used_regs, sizeof(used_regs)); 135} 136 137void 138agx_ra(agx_context *ctx) 139{ 140 unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned)); 141 142 agx_compute_liveness(ctx); 143 uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t)); 144 uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t)); 145 146 agx_foreach_instr_global(ctx, I) { 147 agx_foreach_dest(I, d) { 148 if (I->dest[d].type != AGX_INDEX_NORMAL) continue; 149 150 unsigned v = I->dest[d].value; 151 assert(ncomps[v] == 0 && "broken SSA"); 152 ncomps[v] = agx_write_registers(I, d); 153 } 154 } 155 156 agx_foreach_block(ctx, block) 157 agx_ra_assign_local(block, ssa_to_reg, ncomps, ctx->max_register); 158 159 /* TODO: Coalesce combines */ 160 161 agx_foreach_instr_global_safe(ctx, ins) { 162 /* Lower away RA pseudo-instructions */ 163 if (ins->op == AGX_OPCODE_P_COMBINE) { 164 /* TODO: Optimize out the moves! */ 165 assert(ins->dest[0].type == AGX_INDEX_NORMAL); 166 enum agx_size common_size = ins->dest[0].size; 167 unsigned base = ssa_to_reg[ins->dest[0].value]; 168 unsigned size = common_size == AGX_SIZE_32 ? 2 : 1; 169 170 /* Move the sources */ 171 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 172 173 /* TODO: Eliminate the intermediate copy by handling parallel copies */ 174 for (unsigned i = 0; i < 4; ++i) { 175 if (agx_is_null(ins->src[i])) continue; 176 unsigned base = ins->src[i].value; 177 if (ins->src[i].type == AGX_INDEX_NORMAL) 178 base = ssa_to_reg[base]; 179 else 180 assert(ins->src[i].type == AGX_INDEX_REGISTER); 181 182 assert(ins->src[i].size == common_size); 183 184 agx_mov_to(&b, agx_register(124*2 + (i * size), common_size), 185 agx_register(base, common_size)); 186 } 187 188 for (unsigned i = 0; i < 4; ++i) { 189 if (agx_is_null(ins->src[i])) continue; 190 agx_index src = ins->src[i]; 191 192 if (src.type == AGX_INDEX_NORMAL) 193 src = agx_register(alloc[src.value], src.size); 194 195 agx_mov_to(&b, agx_register(base + (i * size), common_size), 196 agx_register(124*2 + (i * size), common_size)); 197 } 198 199 /* We've lowered away, delete the old */ 200 agx_remove_instruction(ins); 201 continue; 202 } else if (ins->op == AGX_OPCODE_P_EXTRACT) { 203 /* Uses the destination size */ 204 assert(ins->dest[0].type == AGX_INDEX_NORMAL); 205 unsigned base = ins->src[0].value; 206 207 if (ins->src[0].type != AGX_INDEX_REGISTER) { 208 assert(ins->src[0].type == AGX_INDEX_NORMAL); 209 base = alloc[base]; 210 } 211 212 unsigned size = ins->dest[0].size == AGX_SIZE_64 ? 4 : ins->dest[0].size == AGX_SIZE_32 ? 2 : 1; 213 unsigned left = ssa_to_reg[ins->dest[0].value]; 214 unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm); 215 216 if (left != right) { 217 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 218 agx_mov_to(&b, agx_register(left, ins->dest[0].size), 219 agx_register(right, ins->src[0].size)); 220 } 221 222 agx_remove_instruction(ins); 223 continue; 224 } 225 226 agx_foreach_src(ins, s) { 227 if (ins->src[s].type == AGX_INDEX_NORMAL) { 228 unsigned v = ssa_to_reg[ins->src[s].value]; 229 ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size)); 230 } 231 } 232 233 agx_foreach_dest(ins, d) { 234 if (ins->dest[d].type == AGX_INDEX_NORMAL) { 235 unsigned v = ssa_to_reg[ins->dest[d].value]; 236 ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size)); 237 } 238 } 239 } 240 241 free(ssa_to_reg); 242 free(ncomps); 243 free(alloc); 244} 245