1/*
2 * Copyright (c) 2020 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Jonathan Marek <jonathan@marek.ca>
25 */
26
27#ifndef H_ETNAVIV_COMPILER_NIR
28#define H_ETNAVIV_COMPILER_NIR
29
30#include "compiler/nir/nir.h"
31#include "etnaviv_asm.h"
32#include "etnaviv_compiler.h"
33#include "util/compiler.h"
34
35struct etna_compile {
36   nir_shader *nir;
37   nir_function_impl *impl;
38#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
39   const struct etna_specs *specs;
40   struct etna_shader_variant *variant;
41
42   /* block # to instr index */
43   unsigned *block_ptr;
44
45   /* Code generation */
46   int inst_ptr; /* current instruction pointer */
47   struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
48
49   /* constants */
50   uint64_t consts[ETNA_MAX_IMM];
51   unsigned const_count;
52
53   /* ra state */
54   struct ra_graph *g;
55   unsigned *live_map;
56   unsigned num_nodes;
57
58   /* There was an error during compilation */
59   bool error;
60};
61
62#define compile_error(ctx, args...) ({ \
63   printf(args); \
64   ctx->error = true; \
65   assert(0); \
66})
67
68enum {
69   BYPASS_DST = 1,
70   BYPASS_SRC = 2,
71};
72
73static inline bool is_sysval(nir_instr *instr)
74{
75   if (instr->type != nir_instr_type_intrinsic)
76      return false;
77
78   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
79   return intr->intrinsic == nir_intrinsic_load_front_face ||
80          intr->intrinsic == nir_intrinsic_load_frag_coord;
81}
82
83/* get unique ssa/reg index for nir_src */
84static inline unsigned
85src_index(nir_function_impl *impl, nir_src *src)
86{
87   return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc);
88}
89
90/* get unique ssa/reg index for nir_dest */
91static inline unsigned
92dest_index(nir_function_impl *impl, nir_dest *dest)
93{
94   return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc);
95}
96
97static inline void
98update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask)
99{
100   if (!swiz)
101      return;
102
103   bool is_vec = dest != NULL;
104   unsigned swizzle = 0, write_mask = 0;
105   for (unsigned i = 0; i < 4; i++) {
106      /* channel not written */
107      if (!(alu->dest.write_mask & (1 << i)))
108         continue;
109      /* src is different (only check for vecN) */
110      if (is_vec && alu->src[i].src.ssa != &dest->ssa)
111         continue;
112
113      unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i];
114      swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2;
115      /* this channel isn't written through this chain */
116      if (*mask & (1 << src_swiz))
117         write_mask |= 1 << i;
118   }
119   *swiz = swizzle;
120   *mask = write_mask;
121}
122
123static nir_dest *
124real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask)
125{
126   if (!dest || !dest->is_ssa)
127      return dest;
128
129   bool can_bypass_src = !list_length(&dest->ssa.if_uses);
130   nir_instr *p_instr = dest->ssa.parent_instr;
131
132   /* if used by a vecN, the "real" destination becomes the vecN destination
133    * lower_alu guarantees that values used by a vecN are only used by that vecN
134    * we can apply the same logic to movs in a some cases too
135    */
136   nir_foreach_use(use_src, &dest->ssa) {
137      nir_instr *instr = use_src->parent_instr;
138
139      /* src bypass check: for now only deal with tex src mov case
140       * note: for alu don't bypass mov for multiple uniform sources
141       */
142      switch (instr->type) {
143      case nir_instr_type_tex:
144         if (p_instr->type == nir_instr_type_alu &&
145             nir_instr_as_alu(p_instr)->op == nir_op_mov) {
146            break;
147         }
148         FALLTHROUGH;
149      default:
150         can_bypass_src = false;
151         break;
152      }
153
154      if (instr->type != nir_instr_type_alu)
155         continue;
156
157      nir_alu_instr *alu = nir_instr_as_alu(instr);
158
159      switch (alu->op) {
160      case nir_op_vec2:
161      case nir_op_vec3:
162      case nir_op_vec4:
163         assert(list_length(&dest->ssa.if_uses) == 0);
164         nir_foreach_use(use_src, &dest->ssa)
165            assert(use_src->parent_instr == instr);
166
167         update_swiz_mask(alu, dest, swiz, mask);
168         break;
169      case nir_op_mov: {
170         switch (dest->ssa.parent_instr->type) {
171         case nir_instr_type_alu:
172         case nir_instr_type_tex:
173            break;
174         default:
175            continue;
176         }
177         if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1)
178            continue;
179
180         update_swiz_mask(alu, NULL, swiz, mask);
181         break;
182      };
183      default:
184         continue;
185      }
186
187      assert(!(instr->pass_flags & BYPASS_SRC));
188      instr->pass_flags |= BYPASS_DST;
189      return real_dest(&alu->dest.dest, swiz, mask);
190   }
191
192   if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) {
193      p_instr->pass_flags |= BYPASS_SRC;
194      return NULL;
195   }
196
197   return dest;
198}
199
200/* if instruction dest needs a register, return nir_dest for it */
201static inline nir_dest *
202dest_for_instr(nir_instr *instr)
203{
204   nir_dest *dest = NULL;
205
206   switch (instr->type) {
207   case nir_instr_type_alu:
208      dest = &nir_instr_as_alu(instr)->dest.dest;
209      break;
210   case nir_instr_type_tex:
211      dest = &nir_instr_as_tex(instr)->dest;
212      break;
213   case nir_instr_type_intrinsic: {
214      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
215      if (intr->intrinsic == nir_intrinsic_load_uniform ||
216          intr->intrinsic == nir_intrinsic_load_ubo ||
217          intr->intrinsic == nir_intrinsic_load_input ||
218          intr->intrinsic == nir_intrinsic_load_instance_id ||
219          intr->intrinsic == nir_intrinsic_load_texture_rect_scaling)
220         dest = &intr->dest;
221   } break;
222   case nir_instr_type_deref:
223      return NULL;
224   default:
225      break;
226   }
227   return real_dest(dest, NULL, NULL);
228}
229
230struct live_def {
231   nir_instr *instr;
232   nir_dest *dest; /* cached dest_for_instr */
233   unsigned live_start, live_end; /* live range */
234};
235
236unsigned
237etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map);
238
239/* Swizzles and write masks can be used to layer virtual non-interfering
240 * registers on top of the real VEC4 registers. For example, the virtual
241 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
242 * physical VEC4 base register do not interfere.
243 */
244enum reg_class {
245   REG_CLASS_VIRT_SCALAR,
246   REG_CLASS_VIRT_VEC2,
247   REG_CLASS_VIRT_VEC3,
248   REG_CLASS_VEC4,
249   /* special vec2 class for fast transcendentals, limited to XY or ZW */
250   REG_CLASS_VIRT_VEC2T,
251   /* special classes for LOAD - contiguous components */
252   REG_CLASS_VIRT_VEC2C,
253   REG_CLASS_VIRT_VEC3C,
254   NUM_REG_CLASSES,
255};
256
257enum reg_type {
258   REG_TYPE_VEC4,
259   REG_TYPE_VIRT_VEC3_XYZ,
260   REG_TYPE_VIRT_VEC3_XYW,
261   REG_TYPE_VIRT_VEC3_XZW,
262   REG_TYPE_VIRT_VEC3_YZW,
263   REG_TYPE_VIRT_VEC2_XY,
264   REG_TYPE_VIRT_VEC2_XZ,
265   REG_TYPE_VIRT_VEC2_XW,
266   REG_TYPE_VIRT_VEC2_YZ,
267   REG_TYPE_VIRT_VEC2_YW,
268   REG_TYPE_VIRT_VEC2_ZW,
269   REG_TYPE_VIRT_SCALAR_X,
270   REG_TYPE_VIRT_SCALAR_Y,
271   REG_TYPE_VIRT_SCALAR_Z,
272   REG_TYPE_VIRT_SCALAR_W,
273   REG_TYPE_VIRT_VEC2T_XY,
274   REG_TYPE_VIRT_VEC2T_ZW,
275   REG_TYPE_VIRT_VEC2C_XY,
276   REG_TYPE_VIRT_VEC2C_YZ,
277   REG_TYPE_VIRT_VEC2C_ZW,
278   REG_TYPE_VIRT_VEC3C_XYZ,
279   REG_TYPE_VIRT_VEC3C_YZW,
280   NUM_REG_TYPES,
281};
282
283/* writemask when used as dest */
284static const uint8_t
285reg_writemask[NUM_REG_TYPES] = {
286   [REG_TYPE_VEC4] = 0xf,
287   [REG_TYPE_VIRT_SCALAR_X] = 0x1,
288   [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
289   [REG_TYPE_VIRT_VEC2_XY] = 0x3,
290   [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
291   [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
292   [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
293   [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
294   [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
295   [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
296   [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
297   [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
298   [REG_TYPE_VIRT_SCALAR_W] = 0x8,
299   [REG_TYPE_VIRT_VEC2_XW] = 0x9,
300   [REG_TYPE_VIRT_VEC2_YW] = 0xa,
301   [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
302   [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
303   [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
304   [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
305   [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
306   [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
307   [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
308};
309
310static inline int reg_get_type(int virt_reg)
311{
312   return virt_reg % NUM_REG_TYPES;
313}
314
315static inline int reg_get_base(struct etna_compile *c, int virt_reg)
316{
317   /* offset by 1 to avoid reserved position register */
318   if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
319      return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
320   return virt_reg / NUM_REG_TYPES;
321}
322
323struct ra_regs *
324etna_ra_setup(void *mem_ctx);
325
326void
327etna_ra_assign(struct etna_compile *c, nir_shader *shader);
328
329unsigned
330etna_ra_finish(struct etna_compile *c);
331
332static inline void
333emit_inst(struct etna_compile *c, struct etna_inst *inst)
334{
335   c->code[c->inst_ptr++] = *inst;
336}
337
338void
339etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
340              struct etna_inst_src src[3], bool saturate);
341
342void
343etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,
344              struct etna_inst_dst dst, struct etna_inst_src coord,
345              struct etna_inst_src lod_bias, struct etna_inst_src compare);
346
347void
348etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition);
349
350void
351etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition);
352
353#endif
354