agx_compiler.h revision 7ec681f3
1/*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#ifndef __AGX_COMPILER_H
26#define __AGX_COMPILER_H
27
28#include "compiler/nir/nir.h"
29#include "util/u_math.h"
30#include "util/half_float.h"
31#include "util/u_dynarray.h"
32#include "agx_compile.h"
33#include "agx_opcodes.h"
34#include "agx_minifloat.h"
35
36enum agx_dbg {
37   AGX_DBG_MSGS        = BITFIELD_BIT(0),
38   AGX_DBG_SHADERS     = BITFIELD_BIT(1),
39   AGX_DBG_SHADERDB    = BITFIELD_BIT(2),
40   AGX_DBG_VERBOSE     = BITFIELD_BIT(3),
41   AGX_DBG_INTERNAL    = BITFIELD_BIT(4),
42};
43
44extern int agx_debug;
45
46/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
47#define AGX_NUM_REGS (256)
48
49enum agx_index_type {
50   AGX_INDEX_NULL = 0,
51   AGX_INDEX_NORMAL = 1,
52   AGX_INDEX_IMMEDIATE = 2,
53   AGX_INDEX_UNIFORM = 3,
54   AGX_INDEX_REGISTER = 4,
55   AGX_INDEX_NIR_REGISTER = 5,
56};
57
58enum agx_size {
59   AGX_SIZE_16 = 0,
60   AGX_SIZE_32 = 1,
61   AGX_SIZE_64 = 2
62};
63
64typedef struct {
65   /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
66   unsigned value : 22;
67
68   /* Indicates that this source kills the referenced value (because it is the
69    * last use in a block and the source is not live after the block). Set by
70    * liveness analysis. */
71   bool kill : 1;
72
73   /* Cache hints */
74   bool cache : 1;
75   bool discard : 1;
76
77   /* src - float modifiers */
78   bool abs : 1;
79   bool neg : 1;
80
81   enum agx_size size : 2;
82   enum agx_index_type type : 3;
83} agx_index;
84
85static inline agx_index
86agx_get_index(unsigned value, enum agx_size size)
87{
88   return (agx_index) {
89      .type = AGX_INDEX_NORMAL,
90      .value = value,
91      .size = size
92   };
93}
94
95static inline agx_index
96agx_immediate(uint16_t imm)
97{
98   return (agx_index) {
99      .type = AGX_INDEX_IMMEDIATE,
100      .value = imm,
101      .size = AGX_SIZE_32
102   };
103}
104
105static inline agx_index
106agx_immediate_f(float f)
107{
108   assert(agx_minifloat_exact(f));
109   return agx_immediate(agx_minifloat_encode(f));
110}
111
112/* in half-words, specify r0h as 1, r1 as 2... */
113static inline agx_index
114agx_register(uint8_t imm, enum agx_size size)
115{
116   return (agx_index) {
117      .type = AGX_INDEX_REGISTER,
118      .value = imm,
119      .size = size
120   };
121}
122
123static inline agx_index
124agx_nir_register(unsigned imm, enum agx_size size)
125{
126   return (agx_index) {
127      .type = AGX_INDEX_NIR_REGISTER,
128      .value = imm,
129      .size = size
130   };
131}
132
133/* Also in half-words */
134static inline agx_index
135agx_uniform(uint8_t imm, enum agx_size size)
136{
137   return (agx_index) {
138      .type = AGX_INDEX_UNIFORM,
139      .value = imm,
140      .size = size
141   };
142}
143
144static inline agx_index
145agx_null()
146{
147   return (agx_index) { .type = AGX_INDEX_NULL };
148}
149
150static inline agx_index
151agx_zero()
152{
153   return agx_immediate(0);
154}
155
156/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
157 * = exponent = 0, sign bit set */
158
159static inline agx_index
160agx_negzero()
161{
162   return agx_immediate(0x80);
163}
164
165static inline agx_index
166agx_abs(agx_index idx)
167{
168   idx.abs = true;
169   idx.neg = false;
170   return idx;
171}
172
173static inline agx_index
174agx_neg(agx_index idx)
175{
176   idx.neg ^= true;
177   return idx;
178}
179
180/* Replaces an index, preserving any modifiers */
181
182static inline agx_index
183agx_replace_index(agx_index old, agx_index replacement)
184{
185   replacement.abs = old.abs;
186   replacement.neg = old.neg;
187   return replacement;
188}
189
190static inline bool
191agx_is_null(agx_index idx)
192{
193   return idx.type == AGX_INDEX_NULL;
194}
195
196/* Compares equivalence as references */
197
198static inline bool
199agx_is_equiv(agx_index left, agx_index right)
200{
201   return (left.type == right.type) && (left.value == right.value);
202}
203
204#define AGX_MAX_DESTS 1
205#define AGX_MAX_SRCS 5
206
207enum agx_icond {
208   AGX_ICOND_UEQ = 0,
209   AGX_ICOND_ULT = 1,
210   AGX_ICOND_UGT = 2,
211   /* unknown */
212   AGX_ICOND_SEQ = 4,
213   AGX_ICOND_SLT = 5,
214   AGX_ICOND_SGT = 6,
215   /* unknown */
216};
217
218enum agx_fcond {
219   AGX_FCOND_EQ = 0,
220   AGX_FCOND_LT = 1,
221   AGX_FCOND_GT = 2,
222   AGX_FCOND_LTN = 3,
223   /* unknown */
224   AGX_FCOND_GE = 5,
225   AGX_FCOND_LE = 6,
226   AGX_FCOND_GTN = 7,
227};
228
229enum agx_round {
230   AGX_ROUND_RTZ = 0,
231   AGX_ROUND_RTE = 1,
232};
233
234enum agx_convert {
235   AGX_CONVERT_U8_TO_F = 0,
236   AGX_CONVERT_S8_TO_F = 1,
237   AGX_CONVERT_F_TO_U16 = 4,
238   AGX_CONVERT_F_TO_S16 = 5,
239   AGX_CONVERT_U16_TO_F = 6,
240   AGX_CONVERT_S16_TO_F = 7,
241   AGX_CONVERT_F_TO_U32 = 8,
242   AGX_CONVERT_F_TO_S32 = 9,
243   AGX_CONVERT_U32_TO_F = 10,
244   AGX_CONVERT_S32_TO_F = 11
245};
246
247enum agx_lod_mode {
248   AGX_LOD_MODE_AUTO_LOD = 0,
249   AGX_LOD_MODE_LOD_MIN = 6,
250   AGX_LOD_GRAD = 8,
251   AGX_LOD_GRAD_MIN = 12
252};
253
254enum agx_dim {
255   AGX_DIM_TEX_1D = 0,
256   AGX_DIM_TEX_1D_ARRAY = 1,
257   AGX_DIM_TEX_2D = 2,
258   AGX_DIM_TEX_2D_ARRAY = 3,
259   AGX_DIM_TEX_2D_MS = 4,
260   AGX_DIM_TEX_3D = 5,
261   AGX_DIM_TEX_CUBE = 6,
262   AGX_DIM_TEX_CUBE_ARRAY = 7
263};
264
265/* Forward declare for branch target */
266struct agx_block;
267
268typedef struct {
269   /* Must be first */
270   struct list_head link;
271
272   enum agx_opcode op;
273
274   /* Data flow */
275   agx_index dest[AGX_MAX_DESTS];
276   agx_index src[AGX_MAX_SRCS];
277
278   union {
279      uint32_t imm;
280      uint32_t writeout;
281      uint32_t truth_table;
282      uint32_t component;
283      uint32_t channels;
284      uint32_t bfi_mask;
285      enum agx_sr sr;
286      enum agx_icond icond;
287      enum agx_fcond fcond;
288      enum agx_format format;
289      enum agx_round round;
290      enum agx_lod_mode lod_mode;
291      struct agx_block *target;
292   };
293
294   /* For load varying */
295   bool perspective : 1;
296
297   /* Invert icond/fcond */
298   bool invert_cond : 1;
299
300   /* TODO: Handle tex ops more efficient */
301   enum agx_dim dim : 3;
302
303   /* Final st_vary op */
304   bool last : 1;
305
306   /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
307   unsigned shift : 4;
308
309   /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
310    * scoreboarding (everything but memory load/store and texturing). */
311   unsigned scoreboard : 1;
312
313   /* Number of nested control flow layers to jump by */
314   unsigned nest : 2;
315
316   /* Output modifiers */
317   bool saturate : 1;
318   unsigned mask : 4;
319} agx_instr;
320
321struct agx_block;
322
323typedef struct agx_block {
324   /* Link to next block. Must be first */
325   struct list_head link;
326
327   /* List of instructions emitted for the current block */
328   struct list_head instructions;
329
330   /* Index of the block in source order */
331   unsigned name;
332
333   /* Control flow graph */
334   struct agx_block *successors[2];
335   struct set *predecessors;
336   bool unconditional_jumps;
337
338   /* Liveness analysis results */
339   BITSET_WORD *live_in;
340   BITSET_WORD *live_out;
341
342   /* Register allocation */
343   BITSET_DECLARE(regs_out, AGX_NUM_REGS);
344
345   /* Offset of the block in the emitted binary */
346   off_t offset;
347
348   /** Available for passes to use for metadata */
349   uint8_t pass_flags;
350} agx_block;
351
352typedef struct {
353   nir_shader *nir;
354   gl_shader_stage stage;
355   struct list_head blocks; /* list of agx_block */
356   struct agx_shader_info *out;
357   struct agx_shader_key *key;
358
359   /* Remapping table for varyings indexed by driver_location */
360   unsigned varyings[AGX_MAX_VARYINGS];
361
362   /* Handling phi nodes is still TODO while we bring up other parts of the
363    * driver. YOLO the mapping of nir_register to fixed hardware registers */
364   unsigned *nir_regalloc;
365
366   /* We reserve the top (XXX: that hurts thread count) */
367   unsigned max_register;
368
369   /* Place to start pushing new values */
370   unsigned push_base;
371
372   /* For creating temporaries */
373   unsigned alloc;
374
375   /* I don't really understand how writeout ops work yet */
376   bool did_writeout;
377
378   /* Has r0l been zeroed yet due to control flow? */
379   bool any_cf;
380
381   /** Computed metadata */
382   bool has_liveness;
383
384   /* Number of nested control flow structures within the innermost loop. Since
385    * NIR is just loop and if-else, this is the number of nested if-else
386    * statements in the loop */
387   unsigned loop_nesting;
388
389   /* During instruction selection, for inserting control flow */
390   agx_block *current_block;
391   agx_block *continue_block;
392   agx_block *break_block;
393   agx_block *after_block;
394
395   /* Stats for shader-db */
396   unsigned loop_count;
397   unsigned spills;
398   unsigned fills;
399} agx_context;
400
401static inline void
402agx_remove_instruction(agx_instr *ins)
403{
404   list_del(&ins->link);
405}
406
407static inline agx_index
408agx_temp(agx_context *ctx, enum agx_size size)
409{
410   return agx_get_index(ctx->alloc++, size);
411}
412
413static enum agx_size
414agx_size_for_bits(unsigned bits)
415{
416   switch (bits) {
417   case 1:
418   case 16: return AGX_SIZE_16;
419   case 32: return AGX_SIZE_32;
420   case 64: return AGX_SIZE_64;
421   default: unreachable("Invalid bitsize");
422   }
423}
424
425static inline agx_index
426agx_src_index(nir_src *src)
427{
428   if (!src->is_ssa) {
429      return agx_nir_register(src->reg.reg->index,
430            agx_size_for_bits(nir_src_bit_size(*src)));
431   }
432
433   return agx_get_index(src->ssa->index,
434         agx_size_for_bits(nir_src_bit_size(*src)));
435}
436
437static inline agx_index
438agx_dest_index(nir_dest *dst)
439{
440   if (!dst->is_ssa) {
441      return agx_nir_register(dst->reg.reg->index,
442            agx_size_for_bits(nir_dest_bit_size(*dst)));
443   }
444
445   return agx_get_index(dst->ssa.index,
446         agx_size_for_bits(nir_dest_bit_size(*dst)));
447}
448
449/* Iterators for AGX IR */
450
451#define agx_foreach_block(ctx, v) \
452   list_for_each_entry(agx_block, v, &ctx->blocks, link)
453
454#define agx_foreach_block_rev(ctx, v) \
455   list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
456
457#define agx_foreach_block_from(ctx, from, v) \
458   list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
459
460#define agx_foreach_block_from_rev(ctx, from, v) \
461   list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
462
463#define agx_foreach_instr_in_block(block, v) \
464   list_for_each_entry(agx_instr, v, &(block)->instructions, link)
465
466#define agx_foreach_instr_in_block_rev(block, v) \
467   list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
468
469#define agx_foreach_instr_in_block_safe(block, v) \
470   list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
471
472#define agx_foreach_instr_in_block_safe_rev(block, v) \
473   list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
474
475#define agx_foreach_instr_in_block_from(block, v, from) \
476   list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
477
478#define agx_foreach_instr_in_block_from_rev(block, v, from) \
479   list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
480
481#define agx_foreach_instr_global(ctx, v) \
482   agx_foreach_block(ctx, v_block) \
483      agx_foreach_instr_in_block(v_block, v)
484
485#define agx_foreach_instr_global_rev(ctx, v) \
486   agx_foreach_block_rev(ctx, v_block) \
487      agx_foreach_instr_in_block_rev(v_block, v)
488
489#define agx_foreach_instr_global_safe(ctx, v) \
490   agx_foreach_block(ctx, v_block) \
491      agx_foreach_instr_in_block_safe(v_block, v)
492
493#define agx_foreach_instr_global_safe_rev(ctx, v) \
494   agx_foreach_block_rev(ctx, v_block) \
495      agx_foreach_instr_in_block_safe_rev(v_block, v)
496
497/* Based on set_foreach, expanded with automatic type casts */
498
499#define agx_foreach_successor(blk, v) \
500   agx_block *v; \
501   agx_block **_v; \
502   for (_v = (agx_block **) &blk->successors[0], \
503         v = *_v; \
504         v != NULL && _v < (agx_block **) &blk->successors[2]; \
505         _v++, v = *_v) \
506
507#define agx_foreach_predecessor(blk, v) \
508   struct set_entry *_entry_##v; \
509   agx_block *v; \
510   for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
511         v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL);  \
512         _entry_##v != NULL; \
513         _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
514         v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL))
515
516#define agx_foreach_src(ins, v) \
517   for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
518
519#define agx_foreach_dest(ins, v) \
520   for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
521
522static inline agx_instr *
523agx_prev_op(agx_instr *ins)
524{
525   return list_last_entry(&(ins->link), agx_instr, link);
526}
527
528static inline agx_instr *
529agx_next_op(agx_instr *ins)
530{
531   return list_first_entry(&(ins->link), agx_instr, link);
532}
533
534static inline agx_block *
535agx_next_block(agx_block *block)
536{
537   return list_first_entry(&(block->link), agx_block, link);
538}
539
540static inline agx_block *
541agx_exit_block(agx_context *ctx)
542{
543   agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
544   assert(!last->successors[0] && !last->successors[1]);
545   return last;
546}
547
548/* Like in NIR, for use with the builder */
549
550enum agx_cursor_option {
551   agx_cursor_after_block,
552   agx_cursor_before_instr,
553   agx_cursor_after_instr
554};
555
556typedef struct {
557   enum agx_cursor_option option;
558
559   union {
560      agx_block *block;
561      agx_instr *instr;
562   };
563} agx_cursor;
564
565static inline agx_cursor
566agx_after_block(agx_block *block)
567{
568   return (agx_cursor) {
569      .option = agx_cursor_after_block,
570      .block = block
571   };
572}
573
574static inline agx_cursor
575agx_before_instr(agx_instr *instr)
576{
577   return (agx_cursor) {
578      .option = agx_cursor_before_instr,
579      .instr = instr
580   };
581}
582
583static inline agx_cursor
584agx_after_instr(agx_instr *instr)
585{
586   return (agx_cursor) {
587      .option = agx_cursor_after_instr,
588      .instr = instr
589   };
590}
591
592/* IR builder in terms of cursor infrastructure */
593
594typedef struct {
595   agx_context *shader;
596   agx_cursor cursor;
597} agx_builder;
598
599static inline agx_builder
600agx_init_builder(agx_context *ctx, agx_cursor cursor)
601{
602   return (agx_builder) {
603      .shader = ctx,
604      .cursor = cursor
605   };
606}
607
608/* Insert an instruction at the cursor and move the cursor */
609
610static inline void
611agx_builder_insert(agx_cursor *cursor, agx_instr *I)
612{
613   switch (cursor->option) {
614   case agx_cursor_after_instr:
615      list_add(&I->link, &cursor->instr->link);
616      cursor->instr = I;
617      return;
618
619   case agx_cursor_after_block:
620      list_addtail(&I->link, &cursor->block->instructions);
621      cursor->option = agx_cursor_after_instr;
622      cursor->instr = I;
623      return;
624
625   case agx_cursor_before_instr:
626      list_addtail(&I->link, &cursor->instr->link);
627      cursor->option = agx_cursor_after_instr;
628      cursor->instr = I;
629      return;
630   }
631
632   unreachable("Invalid cursor option");
633}
634
635/* Uniform file management */
636
637agx_index
638agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
639      unsigned index, unsigned length);
640
641/* Routines defined for AIR */
642
643void agx_print_instr(agx_instr *I, FILE *fp);
644void agx_print_block(agx_block *block, FILE *fp);
645void agx_print_shader(agx_context *ctx, FILE *fp);
646void agx_optimizer(agx_context *ctx);
647void agx_dce(agx_context *ctx);
648void agx_ra(agx_context *ctx);
649void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
650
651void agx_compute_liveness(agx_context *ctx);
652void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
653
654#endif
655