1/*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "agx_compiler.h"
25
26/* Binary patches needed for branch offsets */
27struct agx_branch_fixup {
28   /* Offset into the binary to patch */
29   off_t offset;
30
31   /* Value to patch with will be block->offset */
32   agx_block *block;
33};
34
35/* Texturing has its own operands */
36static unsigned
37agx_pack_sample_coords(agx_index index, bool *flag)
38{
39   /* TODO: how to encode 16-bit coords? */
40   assert(index.size == AGX_SIZE_32);
41   assert(index.value < 0x100);
42
43   *flag = index.discard;
44   return index.value;
45}
46
47static unsigned
48agx_pack_texture(agx_index index, unsigned *flag)
49{
50   /* TODO: indirection */
51   assert(index.type == AGX_INDEX_IMMEDIATE);
52   *flag = 0;
53   return index.value;
54}
55
56static unsigned
57agx_pack_sampler(agx_index index, bool *flag)
58{
59   /* TODO: indirection */
60   assert(index.type == AGX_INDEX_IMMEDIATE);
61   *flag = 0;
62   return index.value;
63}
64
65static unsigned
66agx_pack_sample_offset(agx_index index, bool *flag)
67{
68   /* TODO: offsets */
69   assert(index.type == AGX_INDEX_NULL);
70   *flag = 0;
71   return 0;
72}
73
74static unsigned
75agx_pack_lod(agx_index index)
76{
77   /* Immediate zero */
78   if (index.type == AGX_INDEX_IMMEDIATE && index.value == 0)
79      return 0;
80
81   /* Otherwise must be a 16-bit float immediate */
82   assert(index.type == AGX_INDEX_REGISTER);
83   assert(index.size == AGX_SIZE_16);
84   assert(index.value < 0x100);
85
86   return index.value;
87}
88
89/* Load/stores have their own operands */
90
91static unsigned
92agx_pack_memory_reg(agx_index index, bool *flag)
93{
94   assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
95   assert(index.size == AGX_SIZE_16 || (index.value & 1) == 0);
96   assert(index.value < 0x100);
97
98   *flag = (index.size == AGX_SIZE_32);
99   return index.value;
100}
101
102static unsigned
103agx_pack_memory_base(agx_index index, bool *flag)
104{
105   assert(index.size == AGX_SIZE_64);
106   assert((index.value & 1) == 0);
107
108   if (index.type == AGX_INDEX_UNIFORM) {
109      assert(index.value < 0x200);
110      *flag = 1;
111      return index.value;
112   } else {
113      assert(index.value < 0x100);
114      *flag = 0;
115      return index.value;
116   }
117}
118
119static unsigned
120agx_pack_memory_index(agx_index index, bool *flag)
121{
122   if (index.type == AGX_INDEX_IMMEDIATE) {
123      assert(index.value < 0x10000);
124      *flag = 1;
125
126      return index.value;
127   } else {
128      assert(index.type == AGX_INDEX_REGISTER);
129      assert((index.value & 1) == 0);
130      assert(index.value < 0x100);
131
132      *flag = 0;
133      return index.value;
134   }
135}
136
137/* ALU goes through a common path */
138
139static unsigned
140agx_pack_alu_dst(agx_index dest)
141{
142   assert(dest.type == AGX_INDEX_REGISTER);
143   unsigned reg = dest.value;
144   enum agx_size size = dest.size;
145   assert(reg < 0x100);
146
147   /* RA invariant: alignment of half-reg */
148   if (size >= AGX_SIZE_32)
149      assert((reg & 1) == 0);
150
151   return
152      (dest.cache ? (1 << 0) : 0) |
153      ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
154      ((size == AGX_SIZE_64) ? (1 << 2) : 0) |
155      ((reg << 2));
156}
157
158static unsigned
159agx_pack_alu_src(agx_index src)
160{
161   unsigned value = src.value;
162   enum agx_size size = src.size;
163
164   if (src.type == AGX_INDEX_IMMEDIATE) {
165      /* Flags 0 for an 8-bit immediate */
166      assert(value < 0x100);
167
168      return
169         (value & BITFIELD_MASK(6)) |
170         ((value >> 6) << 10);
171   } else if (src.type == AGX_INDEX_UNIFORM) {
172      assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
173      assert(value < 0x200);
174
175      return
176         (value & BITFIELD_MASK(6)) |
177         ((value >> 8) << 6) |
178         ((size == AGX_SIZE_32) ? (1 << 7) : 0) |
179         (0x1 << 8) |
180         (((value >> 6) & BITFIELD_MASK(2)) << 10);
181   } else {
182      assert(src.type == AGX_INDEX_REGISTER);
183      assert(!(src.cache && src.discard));
184
185      unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
186      unsigned size_flag =
187         (size == AGX_SIZE_64) ? 0x3 :
188         (size == AGX_SIZE_32) ? 0x2 :
189         (size == AGX_SIZE_16) ? 0x0 : 0x0;
190
191      return
192         (value & BITFIELD_MASK(6)) |
193         (hint << 6) |
194         (size_flag << 8) |
195         (((value >> 6) & BITFIELD_MASK(2)) << 10);
196   }
197}
198
199static unsigned
200agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size)
201{
202   unsigned value = src.value;
203   ASSERTED enum agx_size size = src.size;
204
205   if (src.type == AGX_INDEX_IMMEDIATE) {
206      /* Flags 0x4 for an 8-bit immediate */
207      assert(value < 0x100);
208
209      return
210         (value & BITFIELD_MASK(6)) |
211         (0x4 << 6) |
212         ((value >> 6) << 10);
213   } else if (src.type == AGX_INDEX_UNIFORM) {
214      assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
215      assert(size == dest_size);
216      assert(value < 0x200);
217
218      return
219         (value & BITFIELD_MASK(6)) |
220         ((value >> 8) << 6) |
221         (0x3 << 7) |
222         (((value >> 6) & BITFIELD_MASK(2)) << 10);
223   } else {
224      assert(src.type == AGX_INDEX_REGISTER);
225      assert(!(src.cache && src.discard));
226      assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
227      assert(size == dest_size);
228
229      unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
230
231      return
232         (value & BITFIELD_MASK(6)) |
233         (hint << 6) |
234         (((value >> 6) & BITFIELD_MASK(2)) << 10);
235   }
236}
237
238static unsigned
239agx_pack_float_mod(agx_index src)
240{
241   return (src.abs ? (1 << 0) : 0)
242        | (src.neg ? (1 << 1) : 0);
243}
244
245static bool
246agx_all_16(agx_instr *I)
247{
248   agx_foreach_dest(I, d) {
249      if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
250         return false;
251   }
252
253   agx_foreach_src(I, s) {
254      if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
255         return false;
256   }
257
258   return true;
259}
260
261/* Generic pack for ALU instructions, which are quite regular */
262
263static void
264agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
265{
266   struct agx_opcode_info info = agx_opcodes_info[I->op];
267   bool is_16 = agx_all_16(I) && info.encoding_16.exact;
268   struct agx_encoding encoding = is_16 ?
269                                     info.encoding_16 : info.encoding;
270
271   assert(encoding.exact && "invalid encoding");
272
273   uint64_t raw = encoding.exact;
274   uint16_t extend = 0;
275
276   // TODO: assert saturable
277   if (I->saturate)
278      raw |= (1 << 6);
279
280   if (info.nr_dests) {
281      assert(info.nr_dests == 1);
282      unsigned D = agx_pack_alu_dst(I->dest[0]);
283      unsigned extend_offset = (sizeof(extend)*8) - 4;
284
285      raw |= (D & BITFIELD_MASK(8)) << 7;
286      extend |= ((D >> 8) << extend_offset);
287   } else if (info.immediates & AGX_IMMEDIATE_NEST) {
288      raw |= (I->invert_cond << 8);
289      raw |= (I->nest << 11);
290      raw |= (I->icond << 13);
291   }
292
293   for (unsigned s = 0; s < info.nr_srcs; ++s) {
294      bool is_cmpsel = (s >= 2) &&
295         (I->op == AGX_OPCODE_ICMPSEL || I->op == AGX_OPCODE_FCMPSEL);
296
297      unsigned src = is_cmpsel ?
298         agx_pack_cmpsel_src(I->src[s], I->dest[0].size) :
299         agx_pack_alu_src(I->src[s]);
300
301      unsigned src_short = (src & BITFIELD_MASK(10));
302      unsigned src_extend = (src >> 10);
303
304      /* Size bit always zero and so omitted for 16-bit */
305      if (is_16 && !is_cmpsel)
306         assert((src_short & (1 << 9)) == 0);
307
308      if (info.is_float) {
309         unsigned fmod = agx_pack_float_mod(I->src[s]);
310         unsigned fmod_offset = is_16 ? 9 : 10;
311         src_short |= (fmod << fmod_offset);
312      } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
313         bool zext = I->src[s].abs;
314         bool extends = I->src[s].size < AGX_SIZE_64;
315
316         unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
317
318         assert(!I->src[s].neg || s == 1);
319         src_short |= sxt;
320      }
321
322      /* Sources come at predictable offsets */
323      unsigned offset = 16 + (12 * s);
324      raw |= (((uint64_t) src_short) << offset);
325
326      /* Destination and each source get extended in reverse order */
327      unsigned extend_offset = (sizeof(extend)*8) - ((s + 3) * 2);
328      extend |= (src_extend << extend_offset);
329   }
330
331   if ((I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) && I->src[1].neg)
332      raw |= (1 << 27);
333
334   if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
335      raw |= (I->truth_table & 0x3) << 26;
336      raw |= (uint64_t) (I->truth_table >> 2)  << 38;
337   } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
338      raw |= (uint64_t) (I->shift & 1) << 39;
339      raw |= (uint64_t) (I->shift >> 2) << 52;
340   } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
341      raw |= (uint64_t) (I->mask & 0x3) << 38;
342      raw |= (uint64_t) ((I->mask >> 2) & 0x3) << 50;
343      raw |= (uint64_t) ((I->mask >> 4) & 0x1) << 63;
344   } else if (info.immediates & AGX_IMMEDIATE_SR) {
345      raw |= (uint64_t) (I->sr & 0x3F) << 16;
346      raw |= (uint64_t) (I->sr >> 6) << 26;
347   } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
348      raw |= (uint64_t) (I->imm) << 8;
349   else if (info.immediates & AGX_IMMEDIATE_IMM)
350      raw |= (uint64_t) (I->imm) << 16;
351   else if (info.immediates & AGX_IMMEDIATE_ROUND)
352      raw |= (uint64_t) (I->imm) << 26;
353   else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
354      raw |= (uint64_t) (I->fcond) << 61;
355
356   /* Determine length bit */
357   unsigned length = encoding.length_short;
358   unsigned short_mask = (1 << length) - 1;
359   bool length_bit = (extend || (raw & ~short_mask));
360
361   if (encoding.extensible && length_bit) {
362      raw |= (1 << 15);
363      length += (length > 8) ? 4 : 2;
364   }
365
366   /* Pack! */
367   if (length <= sizeof(uint64_t)) {
368      unsigned extend_offset = ((length - sizeof(extend)) * 8);
369
370      /* XXX: This is a weird special case */
371      if (I->op == AGX_OPCODE_IADD)
372         extend_offset -= 16;
373
374      raw |= (uint64_t) extend << extend_offset;
375      memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
376   } else {
377      /* So far, >8 byte ALU is only to store the extend bits */
378      unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
379      unsigned hi = ((uint64_t) extend) << extend_offset;
380
381      memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
382      memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi, length - 8);
383   }
384}
385
386static void
387agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_instr *I)
388{
389   switch (I->op) {
390   case AGX_OPCODE_LD_TILE:
391   case AGX_OPCODE_ST_TILE:
392   {
393      bool load = (I->op == AGX_OPCODE_LD_TILE);
394      unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);
395      unsigned rt = 0; /* TODO */
396      unsigned mask = I->mask ?: 0xF;
397      assert(mask < 0x10);
398
399      uint64_t raw =
400         0x09 |
401         (load ? (1 << 6) : 0) |
402         ((uint64_t) (D & BITFIELD_MASK(8)) << 7) |
403         ((uint64_t) (I->format) << 24) |
404         ((uint64_t) (rt) << 32) |
405         (load ? (1ull << 35) : 0) |
406         ((uint64_t) (mask) << 36) |
407         ((uint64_t) 0x0380FC << 40) |
408         (((uint64_t) (D >> 8)) << 60);
409
410      unsigned size = 8;
411      memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
412      break;
413   }
414
415   case AGX_OPCODE_LD_VARY:
416   case AGX_OPCODE_LD_VARY_FLAT:
417   {
418      bool flat = (I->op == AGX_OPCODE_LD_VARY_FLAT);
419      unsigned D = agx_pack_alu_dst(I->dest[0]);
420      unsigned channels = (I->channels & 0x3);
421      assert(I->mask < 0xF); /* 0 indicates full mask */
422      agx_index index_src = I->src[0];
423      assert(index_src.type == AGX_INDEX_IMMEDIATE);
424      assert(!(flat && I->perspective));
425      unsigned index = index_src.value;
426
427      uint64_t raw =
428            0x21 | (flat ? (1 << 7) : 0) |
429            (I->perspective ? (1 << 6) : 0) |
430            ((D & 0xFF) << 7) |
431            (1ull << 15) | /* XXX */
432            (((uint64_t) index) << 16) |
433            (((uint64_t) channels) << 30) |
434            (!flat ? (1ull << 46) : 0) | /* XXX */
435            (!flat ? (1ull << 52) : 0) | /* XXX */
436            (((uint64_t) (D >> 8)) << 56);
437
438      unsigned size = 8;
439      memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
440      break;
441   }
442
443   case AGX_OPCODE_ST_VARY:
444   {
445      agx_index index_src = I->src[0];
446      agx_index value = I->src[1];
447
448      assert(index_src.type == AGX_INDEX_IMMEDIATE);
449      assert(value.type == AGX_INDEX_REGISTER);
450      assert(value.size == AGX_SIZE_32);
451
452      uint64_t raw =
453            0x11 |
454            (I->last ? (1 << 7) : 0) |
455            ((value.value & 0x3F) << 9) |
456            (((uint64_t) index_src.value) << 16) |
457            (0x80 << 16) | /* XXX */
458            ((value.value >> 6) << 24) |
459            (0x8 << 28); /* XXX */
460
461      unsigned size = 4;
462      memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
463      break;
464   }
465
466   case AGX_OPCODE_DEVICE_LOAD:
467   {
468      assert(I->mask != 0);
469      assert(I->format <= 0x10);
470
471      bool Rt, At, Ot;
472      unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
473      unsigned A = agx_pack_memory_base(I->src[0], &At);
474      unsigned O = agx_pack_memory_index(I->src[1], &Ot);
475      unsigned u1 = 1; // XXX
476      unsigned u3 = 0;
477      unsigned u4 = 4; // XXX
478      unsigned u5 = 0;
479      bool L = true; /* TODO: when would you want short? */
480
481      uint64_t raw =
482            0x05 |
483            ((I->format & BITFIELD_MASK(3)) << 7) |
484            ((R & BITFIELD_MASK(6)) << 10) |
485            ((A & BITFIELD_MASK(4)) << 16) |
486            ((O & BITFIELD_MASK(4)) << 20) |
487            (Ot ? (1 << 24) : 0) |
488            (I->src[1].abs ? (1 << 25) : 0) |
489            (u1 << 26) |
490            (At << 27) |
491            (u3 << 28) |
492            (I->scoreboard << 30) |
493            (((uint64_t) ((O >> 4) & BITFIELD_MASK(4))) << 32) |
494            (((uint64_t) ((A >> 4) & BITFIELD_MASK(4))) << 36) |
495            (((uint64_t) ((R >> 6) & BITFIELD_MASK(2))) << 40) |
496            (((uint64_t) I->shift) << 42) |
497            (((uint64_t) u4) << 44) |
498            (L ? (1ull << 47) : 0) |
499            (((uint64_t) (I->format >> 3)) << 48) |
500            (((uint64_t) Rt) << 49) |
501            (((uint64_t) u5) << 50) |
502            (((uint64_t) I->mask) << 52) |
503            (((uint64_t) (O >> 8)) << 56);
504
505      unsigned size = L ? 8 : 6;
506      memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
507      break;
508   }
509
510   case AGX_OPCODE_TEXTURE_SAMPLE:
511   {
512      assert(I->mask != 0);
513      assert(I->format <= 0x10);
514
515      bool Rt, Ot, Ct, St;
516      unsigned Tt;
517
518      unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
519      unsigned C = agx_pack_sample_coords(I->src[0], &Ct);
520      unsigned T = agx_pack_texture(I->src[2], &Tt);
521      unsigned S = agx_pack_sampler(I->src[3], &St);
522      unsigned O = agx_pack_sample_offset(I->src[4], &Ot);
523      unsigned D = agx_pack_lod(I->src[1]);
524
525      unsigned U = 0; // TODO: what is sampler ureg?
526      unsigned q1 = 0; // XXX
527      unsigned q2 = 0; // XXX
528      unsigned q3 = 12; // XXX
529      unsigned kill = 0; // helper invocation kill bit
530      unsigned q5 = 0; // XXX
531      unsigned q6 = 0; // XXX
532
533      uint32_t extend =
534            ((U & BITFIELD_MASK(5)) << 0) |
535            (kill << 5) |
536            ((R >> 6) << 8) |
537            ((C >> 6) << 10) |
538            ((D >> 6) << 12) |
539            ((T >> 6) << 14) |
540            ((O & BITFIELD_MASK(6)) << 16) |
541            (q6 << 22) |
542            (Ot << 27) |
543            ((S >> 6) << 28) |
544            ((O >> 6) << 30);
545
546      bool L = (extend != 0);
547      assert(I->scoreboard == 0 && "todo");
548
549      uint64_t raw =
550            0x31 |
551            (Rt ? (1 << 8) : 0) |
552            ((R & BITFIELD_MASK(6)) << 9) |
553            (L ? (1 << 15) : 0) |
554            ((C & BITFIELD_MASK(6)) << 16) |
555            (Ct ? (1 << 22) : 0) |
556            (q1 << 23) |
557            ((D & BITFIELD_MASK(6)) << 24) |
558            (q2 << 30) |
559            (((uint64_t) (T & BITFIELD_MASK(6))) << 32) |
560            (((uint64_t) Tt) << 38) |
561            (((uint64_t) I->dim) << 40) |
562            (((uint64_t) q3) << 43) |
563            (((uint64_t) I->mask) << 48) |
564            (((uint64_t) I->lod_mode) << 52) |
565            (((uint64_t) (S & BITFIELD_MASK(6))) << 32) |
566            (((uint64_t) St) << 62) |
567            (((uint64_t) q5) << 63);
568
569      memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
570      if (L)
571         memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
572
573      break;
574   }
575
576   case AGX_OPCODE_JMP_EXEC_ANY:
577   case AGX_OPCODE_JMP_EXEC_NONE:
578   {
579      /* We don't implement indirect branches */
580      assert(I->target != NULL);
581
582      /* We'll fix the offset later. */
583      struct agx_branch_fixup fixup = {
584         .block = I->target,
585         .offset = emission->size
586      };
587
588      util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
589
590      /* The rest of the instruction is fixed */
591      struct agx_opcode_info info = agx_opcodes_info[I->op];
592      uint64_t raw = info.encoding.exact;
593      memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
594      break;
595   }
596
597   default:
598      agx_pack_alu(emission, I);
599      return;
600   }
601}
602
603/* Relative branches may be emitted before their targets, so we patch the
604 * binary to fix up the branch offsets after the main emit */
605
606static void
607agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
608{
609   /* Branch offset is 2 bytes into the jump instruction */
610   uint8_t *location = ((uint8_t *) emission->data) + fix.offset + 2;
611
612   /* Offsets are relative to the jump instruction */
613   int32_t patch = (int32_t) fix.block->offset - (int32_t) fix.offset;
614
615   /* Patch the binary */
616   memcpy(location, &patch, sizeof(patch));
617}
618
619void
620agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
621{
622   struct util_dynarray fixups;
623   util_dynarray_init(&fixups, ctx);
624
625   agx_foreach_block(ctx, block) {
626      /* Relative to the start of the binary, the block begins at the current
627       * number of bytes emitted */
628      block->offset = emission->size;
629
630      agx_foreach_instr_in_block(block, ins) {
631         agx_pack_instr(emission, &fixups, ins);
632      }
633   }
634
635   util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
636      agx_fixup_branch(emission, *fixup);
637
638   util_dynarray_fini(&fixups);
639}
640