17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "agx_compiler.h"
257ec681f3Smrg#include "agx_builder.h"
267ec681f3Smrg
277ec681f3Smrg/* Trivial register allocator that never frees anything.
287ec681f3Smrg *
297ec681f3Smrg * TODO: Write a real register allocator.
307ec681f3Smrg * TODO: Handle phi nodes.
317ec681f3Smrg */
327ec681f3Smrg
337ec681f3Smrg/** Returns number of registers written by an instruction */
347ec681f3Smrgstatic unsigned
357ec681f3Smrgagx_write_registers(agx_instr *I, unsigned d)
367ec681f3Smrg{
377ec681f3Smrg   unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1;
387ec681f3Smrg
397ec681f3Smrg   switch (I->op) {
407ec681f3Smrg   case AGX_OPCODE_LD_VARY:
417ec681f3Smrg   case AGX_OPCODE_DEVICE_LOAD:
427ec681f3Smrg   case AGX_OPCODE_TEXTURE_SAMPLE:
437ec681f3Smrg   case AGX_OPCODE_LD_TILE:
447ec681f3Smrg      return 8;
457ec681f3Smrg   case AGX_OPCODE_LD_VARY_FLAT:
467ec681f3Smrg      return 6;
477ec681f3Smrg   case AGX_OPCODE_P_COMBINE:
487ec681f3Smrg   {
497ec681f3Smrg      unsigned components = 0;
507ec681f3Smrg
517ec681f3Smrg      for (unsigned i = 0; i < 4; ++i) {
527ec681f3Smrg         if (!agx_is_null(I->src[i]))
537ec681f3Smrg            components = i + 1;
547ec681f3Smrg      }
557ec681f3Smrg
567ec681f3Smrg      return components * size;
577ec681f3Smrg   }
587ec681f3Smrg   default:
597ec681f3Smrg      return size;
607ec681f3Smrg   }
617ec681f3Smrg}
627ec681f3Smrg
637ec681f3Smrgstatic unsigned
647ec681f3Smrgagx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
657ec681f3Smrg{
667ec681f3Smrg   for (unsigned reg = 0; reg < max; reg += align) {
677ec681f3Smrg      bool conflict = false;
687ec681f3Smrg
697ec681f3Smrg      for (unsigned j = 0; j < count; ++j)
707ec681f3Smrg         conflict |= BITSET_TEST(used_regs, reg + j);
717ec681f3Smrg
727ec681f3Smrg      if (!conflict) {
737ec681f3Smrg         for (unsigned j = 0; j < count; ++j)
747ec681f3Smrg            BITSET_SET(used_regs, reg + j);
757ec681f3Smrg
767ec681f3Smrg         return reg;
777ec681f3Smrg      }
787ec681f3Smrg   }
797ec681f3Smrg
807ec681f3Smrg   /* Couldn't find a free register, dump the state of the register file */
817ec681f3Smrg   fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n",
827ec681f3Smrg           count, align, max);
837ec681f3Smrg
847ec681f3Smrg   fprintf(stderr, "Register file:\n");
857ec681f3Smrg   for (unsigned i = 0; i < BITSET_WORDS(max); ++i)
867ec681f3Smrg      fprintf(stderr, "    %08X\n", used_regs[i]);
877ec681f3Smrg
887ec681f3Smrg   unreachable("Could not find a free register");
897ec681f3Smrg}
907ec681f3Smrg
917ec681f3Smrg/** Assign registers to SSA values in a block. */
927ec681f3Smrg
937ec681f3Smrgstatic void
947ec681f3Smrgagx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps, unsigned max_reg)
957ec681f3Smrg{
967ec681f3Smrg   BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
977ec681f3Smrg
987ec681f3Smrg   agx_foreach_predecessor(block, pred) {
997ec681f3Smrg      for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i)
1007ec681f3Smrg         used_regs[i] |= pred->regs_out[i];
1017ec681f3Smrg   }
1027ec681f3Smrg
1037ec681f3Smrg   BITSET_SET(used_regs, 0); // control flow writes r0l
1047ec681f3Smrg   BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID
1057ec681f3Smrg   BITSET_SET(used_regs, (5*2 + 1));
1067ec681f3Smrg   BITSET_SET(used_regs, (6*2 + 0));
1077ec681f3Smrg   BITSET_SET(used_regs, (6*2 + 1));
1087ec681f3Smrg
1097ec681f3Smrg   agx_foreach_instr_in_block(block, I) {
1107ec681f3Smrg      /* First, free killed sources */
1117ec681f3Smrg      agx_foreach_src(I, s) {
1127ec681f3Smrg         if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) {
1137ec681f3Smrg            unsigned reg = ssa_to_reg[I->src[s].value];
1147ec681f3Smrg            unsigned count = ncomps[I->src[s].value];
1157ec681f3Smrg
1167ec681f3Smrg            for (unsigned i = 0; i < count; ++i)
1177ec681f3Smrg               BITSET_CLEAR(used_regs, reg + i);
1187ec681f3Smrg         }
1197ec681f3Smrg      }
1207ec681f3Smrg
1217ec681f3Smrg      /* Next, assign destinations. Always legal in SSA form. */
1227ec681f3Smrg      agx_foreach_dest(I, d) {
1237ec681f3Smrg         if (I->dest[d].type == AGX_INDEX_NORMAL) {
1247ec681f3Smrg            unsigned count = agx_write_registers(I, d);
1257ec681f3Smrg            unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2;
1267ec681f3Smrg            unsigned reg = agx_assign_regs(used_regs, count, align, max_reg);
1277ec681f3Smrg
1287ec681f3Smrg            ssa_to_reg[I->dest[d].value] = reg;
1297ec681f3Smrg         }
1307ec681f3Smrg      }
1317ec681f3Smrg   }
1327ec681f3Smrg
1337ec681f3Smrg   STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
1347ec681f3Smrg   memcpy(block->regs_out, used_regs, sizeof(used_regs));
1357ec681f3Smrg}
1367ec681f3Smrg
1377ec681f3Smrgvoid
1387ec681f3Smrgagx_ra(agx_context *ctx)
1397ec681f3Smrg{
1407ec681f3Smrg   unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
1417ec681f3Smrg
1427ec681f3Smrg   agx_compute_liveness(ctx);
1437ec681f3Smrg   uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
1447ec681f3Smrg   uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
1457ec681f3Smrg
1467ec681f3Smrg   agx_foreach_instr_global(ctx, I) {
1477ec681f3Smrg      agx_foreach_dest(I, d) {
1487ec681f3Smrg         if (I->dest[d].type != AGX_INDEX_NORMAL) continue;
1497ec681f3Smrg
1507ec681f3Smrg         unsigned v = I->dest[d].value;
1517ec681f3Smrg         assert(ncomps[v] == 0 && "broken SSA");
1527ec681f3Smrg         ncomps[v] = agx_write_registers(I, d);
1537ec681f3Smrg      }
1547ec681f3Smrg   }
1557ec681f3Smrg
1567ec681f3Smrg   agx_foreach_block(ctx, block)
1577ec681f3Smrg      agx_ra_assign_local(block, ssa_to_reg, ncomps, ctx->max_register);
1587ec681f3Smrg
1597ec681f3Smrg   /* TODO: Coalesce combines */
1607ec681f3Smrg
1617ec681f3Smrg   agx_foreach_instr_global_safe(ctx, ins) {
1627ec681f3Smrg      /* Lower away RA pseudo-instructions */
1637ec681f3Smrg      if (ins->op == AGX_OPCODE_P_COMBINE) {
1647ec681f3Smrg         /* TODO: Optimize out the moves! */
1657ec681f3Smrg         assert(ins->dest[0].type == AGX_INDEX_NORMAL);
1667ec681f3Smrg         enum agx_size common_size = ins->dest[0].size;
1677ec681f3Smrg         unsigned base = ssa_to_reg[ins->dest[0].value];
1687ec681f3Smrg         unsigned size = common_size == AGX_SIZE_32 ? 2 : 1;
1697ec681f3Smrg
1707ec681f3Smrg         /* Move the sources */
1717ec681f3Smrg         agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
1727ec681f3Smrg
1737ec681f3Smrg         /* TODO: Eliminate the intermediate copy by handling parallel copies */
1747ec681f3Smrg         for (unsigned i = 0; i < 4; ++i) {
1757ec681f3Smrg            if (agx_is_null(ins->src[i])) continue;
1767ec681f3Smrg            unsigned base = ins->src[i].value;
1777ec681f3Smrg            if (ins->src[i].type == AGX_INDEX_NORMAL)
1787ec681f3Smrg               base = ssa_to_reg[base];
1797ec681f3Smrg            else
1807ec681f3Smrg               assert(ins->src[i].type == AGX_INDEX_REGISTER);
1817ec681f3Smrg
1827ec681f3Smrg            assert(ins->src[i].size == common_size);
1837ec681f3Smrg
1847ec681f3Smrg            agx_mov_to(&b, agx_register(124*2 + (i * size), common_size),
1857ec681f3Smrg                  agx_register(base, common_size));
1867ec681f3Smrg         }
1877ec681f3Smrg
1887ec681f3Smrg         for (unsigned i = 0; i < 4; ++i) {
1897ec681f3Smrg            if (agx_is_null(ins->src[i])) continue;
1907ec681f3Smrg            agx_index src = ins->src[i];
1917ec681f3Smrg
1927ec681f3Smrg            if (src.type == AGX_INDEX_NORMAL)
1937ec681f3Smrg               src = agx_register(alloc[src.value], src.size);
1947ec681f3Smrg
1957ec681f3Smrg            agx_mov_to(&b, agx_register(base + (i * size), common_size),
1967ec681f3Smrg                  agx_register(124*2 + (i * size), common_size));
1977ec681f3Smrg         }
1987ec681f3Smrg
1997ec681f3Smrg         /* We've lowered away, delete the old */
2007ec681f3Smrg         agx_remove_instruction(ins);
2017ec681f3Smrg         continue;
2027ec681f3Smrg      } else if (ins->op == AGX_OPCODE_P_EXTRACT) {
2037ec681f3Smrg         /* Uses the destination size */
2047ec681f3Smrg         assert(ins->dest[0].type == AGX_INDEX_NORMAL);
2057ec681f3Smrg         unsigned base = ins->src[0].value;
2067ec681f3Smrg
2077ec681f3Smrg         if (ins->src[0].type != AGX_INDEX_REGISTER) {
2087ec681f3Smrg            assert(ins->src[0].type == AGX_INDEX_NORMAL);
2097ec681f3Smrg            base = alloc[base];
2107ec681f3Smrg         }
2117ec681f3Smrg
2127ec681f3Smrg         unsigned size = ins->dest[0].size == AGX_SIZE_64 ? 4 : ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;
2137ec681f3Smrg         unsigned left = ssa_to_reg[ins->dest[0].value];
2147ec681f3Smrg         unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm);
2157ec681f3Smrg
2167ec681f3Smrg         if (left != right) {
2177ec681f3Smrg            agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
2187ec681f3Smrg            agx_mov_to(&b, agx_register(left, ins->dest[0].size),
2197ec681f3Smrg                  agx_register(right, ins->src[0].size));
2207ec681f3Smrg         }
2217ec681f3Smrg
2227ec681f3Smrg         agx_remove_instruction(ins);
2237ec681f3Smrg         continue;
2247ec681f3Smrg      }
2257ec681f3Smrg
2267ec681f3Smrg      agx_foreach_src(ins, s) {
2277ec681f3Smrg         if (ins->src[s].type == AGX_INDEX_NORMAL) {
2287ec681f3Smrg            unsigned v = ssa_to_reg[ins->src[s].value];
2297ec681f3Smrg            ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size));
2307ec681f3Smrg         }
2317ec681f3Smrg      }
2327ec681f3Smrg
2337ec681f3Smrg      agx_foreach_dest(ins, d) {
2347ec681f3Smrg         if (ins->dest[d].type == AGX_INDEX_NORMAL) {
2357ec681f3Smrg            unsigned v = ssa_to_reg[ins->dest[d].value];
2367ec681f3Smrg            ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
2377ec681f3Smrg         }
2387ec681f3Smrg      }
2397ec681f3Smrg   }
2407ec681f3Smrg
2417ec681f3Smrg   free(ssa_to_reg);
2427ec681f3Smrg   free(ncomps);
2437ec681f3Smrg   free(alloc);
2447ec681f3Smrg}
245