1/*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "agx_compiler.h"
25#include "agx_builder.h"
26
27/* Trivial register allocator that never frees anything.
28 *
29 * TODO: Write a real register allocator.
30 * TODO: Handle phi nodes.
31 */
32
33/** Returns number of registers written by an instruction */
34static unsigned
35agx_write_registers(agx_instr *I, unsigned d)
36{
37   unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1;
38
39   switch (I->op) {
40   case AGX_OPCODE_LD_VARY:
41   case AGX_OPCODE_DEVICE_LOAD:
42   case AGX_OPCODE_TEXTURE_SAMPLE:
43   case AGX_OPCODE_LD_TILE:
44      return 8;
45   case AGX_OPCODE_LD_VARY_FLAT:
46      return 6;
47   case AGX_OPCODE_P_COMBINE:
48   {
49      unsigned components = 0;
50
51      for (unsigned i = 0; i < 4; ++i) {
52         if (!agx_is_null(I->src[i]))
53            components = i + 1;
54      }
55
56      return components * size;
57   }
58   default:
59      return size;
60   }
61}
62
63static unsigned
64agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
65{
66   for (unsigned reg = 0; reg < max; reg += align) {
67      bool conflict = false;
68
69      for (unsigned j = 0; j < count; ++j)
70         conflict |= BITSET_TEST(used_regs, reg + j);
71
72      if (!conflict) {
73         for (unsigned j = 0; j < count; ++j)
74            BITSET_SET(used_regs, reg + j);
75
76         return reg;
77      }
78   }
79
80   /* Couldn't find a free register, dump the state of the register file */
81   fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n",
82           count, align, max);
83
84   fprintf(stderr, "Register file:\n");
85   for (unsigned i = 0; i < BITSET_WORDS(max); ++i)
86      fprintf(stderr, "    %08X\n", used_regs[i]);
87
88   unreachable("Could not find a free register");
89}
90
91/** Assign registers to SSA values in a block. */
92
93static void
94agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps, unsigned max_reg)
95{
96   BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
97
98   agx_foreach_predecessor(block, pred) {
99      for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i)
100         used_regs[i] |= pred->regs_out[i];
101   }
102
103   BITSET_SET(used_regs, 0); // control flow writes r0l
104   BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID
105   BITSET_SET(used_regs, (5*2 + 1));
106   BITSET_SET(used_regs, (6*2 + 0));
107   BITSET_SET(used_regs, (6*2 + 1));
108
109   agx_foreach_instr_in_block(block, I) {
110      /* First, free killed sources */
111      agx_foreach_src(I, s) {
112         if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) {
113            unsigned reg = ssa_to_reg[I->src[s].value];
114            unsigned count = ncomps[I->src[s].value];
115
116            for (unsigned i = 0; i < count; ++i)
117               BITSET_CLEAR(used_regs, reg + i);
118         }
119      }
120
121      /* Next, assign destinations. Always legal in SSA form. */
122      agx_foreach_dest(I, d) {
123         if (I->dest[d].type == AGX_INDEX_NORMAL) {
124            unsigned count = agx_write_registers(I, d);
125            unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2;
126            unsigned reg = agx_assign_regs(used_regs, count, align, max_reg);
127
128            ssa_to_reg[I->dest[d].value] = reg;
129         }
130      }
131   }
132
133   STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
134   memcpy(block->regs_out, used_regs, sizeof(used_regs));
135}
136
137void
138agx_ra(agx_context *ctx)
139{
140   unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
141
142   agx_compute_liveness(ctx);
143   uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
144   uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
145
146   agx_foreach_instr_global(ctx, I) {
147      agx_foreach_dest(I, d) {
148         if (I->dest[d].type != AGX_INDEX_NORMAL) continue;
149
150         unsigned v = I->dest[d].value;
151         assert(ncomps[v] == 0 && "broken SSA");
152         ncomps[v] = agx_write_registers(I, d);
153      }
154   }
155
156   agx_foreach_block(ctx, block)
157      agx_ra_assign_local(block, ssa_to_reg, ncomps, ctx->max_register);
158
159   /* TODO: Coalesce combines */
160
161   agx_foreach_instr_global_safe(ctx, ins) {
162      /* Lower away RA pseudo-instructions */
163      if (ins->op == AGX_OPCODE_P_COMBINE) {
164         /* TODO: Optimize out the moves! */
165         assert(ins->dest[0].type == AGX_INDEX_NORMAL);
166         enum agx_size common_size = ins->dest[0].size;
167         unsigned base = ssa_to_reg[ins->dest[0].value];
168         unsigned size = common_size == AGX_SIZE_32 ? 2 : 1;
169
170         /* Move the sources */
171         agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
172
173         /* TODO: Eliminate the intermediate copy by handling parallel copies */
174         for (unsigned i = 0; i < 4; ++i) {
175            if (agx_is_null(ins->src[i])) continue;
176            unsigned base = ins->src[i].value;
177            if (ins->src[i].type == AGX_INDEX_NORMAL)
178               base = ssa_to_reg[base];
179            else
180               assert(ins->src[i].type == AGX_INDEX_REGISTER);
181
182            assert(ins->src[i].size == common_size);
183
184            agx_mov_to(&b, agx_register(124*2 + (i * size), common_size),
185                  agx_register(base, common_size));
186         }
187
188         for (unsigned i = 0; i < 4; ++i) {
189            if (agx_is_null(ins->src[i])) continue;
190            agx_index src = ins->src[i];
191
192            if (src.type == AGX_INDEX_NORMAL)
193               src = agx_register(alloc[src.value], src.size);
194
195            agx_mov_to(&b, agx_register(base + (i * size), common_size),
196                  agx_register(124*2 + (i * size), common_size));
197         }
198
199         /* We've lowered away, delete the old */
200         agx_remove_instruction(ins);
201         continue;
202      } else if (ins->op == AGX_OPCODE_P_EXTRACT) {
203         /* Uses the destination size */
204         assert(ins->dest[0].type == AGX_INDEX_NORMAL);
205         unsigned base = ins->src[0].value;
206
207         if (ins->src[0].type != AGX_INDEX_REGISTER) {
208            assert(ins->src[0].type == AGX_INDEX_NORMAL);
209            base = alloc[base];
210         }
211
212         unsigned size = ins->dest[0].size == AGX_SIZE_64 ? 4 : ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;
213         unsigned left = ssa_to_reg[ins->dest[0].value];
214         unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm);
215
216         if (left != right) {
217            agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
218            agx_mov_to(&b, agx_register(left, ins->dest[0].size),
219                  agx_register(right, ins->src[0].size));
220         }
221
222         agx_remove_instruction(ins);
223         continue;
224      }
225
226      agx_foreach_src(ins, s) {
227         if (ins->src[s].type == AGX_INDEX_NORMAL) {
228            unsigned v = ssa_to_reg[ins->src[s].value];
229            ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size));
230         }
231      }
232
233      agx_foreach_dest(ins, d) {
234         if (ins->dest[d].type == AGX_INDEX_NORMAL) {
235            unsigned v = ssa_to_reg[ins->dest[d].value];
236            ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
237         }
238      }
239   }
240
241   free(ssa_to_reg);
242   free(ncomps);
243   free(alloc);
244}
245