1/*
2 * Copyright (c) 2012-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27/* TGSI->Vivante shader ISA conversion */
28
29/* What does the compiler return (see etna_shader_object)?
30 *  1) instruction data
31 *  2) input-to-temporary mapping (fixed for ps)
32 *      *) in case of ps, semantic -> varying id mapping
33 *      *) for each varying: number of components used (r, rg, rgb, rgba)
34 *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35 *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 *  5) immediates base offset, immediates data
37 *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 *     configure the hw, but useful for error checking
39 *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 *     (output reg id is enough)
41 *
42 *  Empty shaders are not allowed, should always at least generate a NOP. Also
43 *  if there is a label at the end of the shader, an extra NOP should be
44 *  generated as jump target.
45 *
46 * TODO
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
49 */
50
51#include "etnaviv_compiler.h"
52
53#include "etnaviv_asm.h"
54#include "etnaviv_context.h"
55#include "etnaviv_debug.h"
56#include "etnaviv_uniforms.h"
57#include "etnaviv_util.h"
58
59#include "nir/tgsi_to_nir.h"
60#include "pipe/p_shader_tokens.h"
61#include "tgsi/tgsi_info.h"
62#include "tgsi/tgsi_iterate.h"
63#include "tgsi/tgsi_lowering.h"
64#include "tgsi/tgsi_strings.h"
65#include "tgsi/tgsi_util.h"
66#include "util/u_math.h"
67#include "util/u_memory.h"
68
69#include <fcntl.h>
70#include <stdio.h>
71#include <sys/stat.h>
72#include <sys/types.h>
73
74#define ETNA_MAX_INNER_TEMPS 2
75
76static const float sincos_const[2][4] = {
77   {
78      2., -1., 4., -4.,
79   },
80   {
81      1. / (2. * M_PI), 0.75, 0.5, 0.0,
82   },
83};
84
85/* Native register description structure */
86struct etna_native_reg {
87   unsigned valid : 1;
88   unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89   unsigned rgroup : 3;
90   unsigned id : 9;
91};
92
93/* Register description */
94struct etna_reg_desc {
95   enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96   int idx; /* index into file */
97   bool active; /* used in program */
98   int first_use; /* instruction id of first use (scope begin) */
99   int last_use; /* instruction id of last use (scope end, inclusive) */
100
101   struct etna_native_reg native; /* native register to map to */
102   unsigned usage_mask : 4; /* usage, per channel */
103   bool has_semantic; /* register has associated TGSI semantic */
104   struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105   struct tgsi_declaration_interp interp; /* Interpolation type */
106};
107
108/* Label information structure */
109struct etna_compile_label {
110   int inst_idx; /* Instruction id that label points to */
111};
112
113enum etna_compile_frame_type {
114   ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115   ETNA_COMPILE_FRAME_LOOP,
116};
117
118/* nesting scope frame (LOOP, IF, ...) during compilation
119 */
120struct etna_compile_frame {
121   enum etna_compile_frame_type type;
122   int lbl_else_idx;
123   int lbl_endif_idx;
124   int lbl_loop_bgn_idx;
125   int lbl_loop_end_idx;
126};
127
128struct etna_compile_file {
129   /* Number of registers in each TGSI file (max register+1) */
130   size_t reg_size;
131   /* Register descriptions, per register index */
132   struct etna_reg_desc *reg;
133};
134
135#define array_insert(arr, val)                          \
136   do {                                                 \
137      if (arr##_count == arr##_sz) {                    \
138         arr##_sz = MAX2(2 * arr##_sz, 16);             \
139         arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140      }                                                 \
141      arr[arr##_count++] = val;                         \
142   } while (0)
143
144
145/* scratch area for compiling shader, freed after compilation finishes */
146struct etna_compile {
147   const struct tgsi_token *tokens;
148   bool free_tokens;
149
150   struct tgsi_shader_info info;
151
152   /* Register descriptions, per TGSI file, per register index */
153   struct etna_compile_file file[TGSI_FILE_COUNT];
154
155   /* Keep track of TGSI register declarations */
156   struct etna_reg_desc decl[ETNA_MAX_DECL];
157   uint total_decls;
158
159   /* Bitmap of dead instructions which are removed in a separate pass */
160   bool dead_inst[ETNA_MAX_TOKENS];
161
162   /* Immediate data */
163   enum etna_uniform_contents imm_contents[ETNA_MAX_IMM];
164   uint32_t imm_data[ETNA_MAX_IMM];
165   uint32_t imm_base; /* base of immediates (in 32 bit units) */
166   uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168   /* Next free native register, for register allocation */
169   uint32_t next_free_native;
170
171   /* Temporary register for use within translated TGSI instruction,
172    * only allocated when needed.
173    */
174   int inner_temps; /* number of inner temps used; only up to one available at
175                       this point */
176   struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178   /* Fields for handling nested conditionals */
179   struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180   int frame_sp;
181   int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183   unsigned labels_count, labels_sz;
184   struct etna_compile_label *labels;
185
186   unsigned num_loops;
187
188   /* Code generation */
189   int inst_ptr; /* current instruction pointer */
190   uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191
192   /* I/O */
193
194   /* Number of varyings (PS only) */
195   int num_varyings;
196
197   /* GPU hardware specs */
198   const struct etna_specs *specs;
199
200   const struct etna_shader_key *key;
201};
202
203static struct etna_reg_desc *
204etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205{
206   return &c->file[dst.File].reg[dst.Index];
207}
208
209static struct etna_reg_desc *
210etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211{
212   return &c->file[src.File].reg[src.Index];
213}
214
215static struct etna_native_reg
216etna_native_temp(unsigned reg)
217{
218   return (struct etna_native_reg) {
219      .valid = 1,
220      .rgroup = INST_RGROUP_TEMP,
221      .id = reg
222   };
223}
224
225static struct etna_native_reg
226etna_native_internal(unsigned reg)
227{
228   return (struct etna_native_reg) {
229      .valid = 1,
230      .rgroup = INST_RGROUP_INTERNAL,
231      .id = reg
232   };
233}
234
235/** Register allocation **/
236enum reg_sort_order {
237   FIRST_USE_ASC,
238   FIRST_USE_DESC,
239   LAST_USE_ASC,
240   LAST_USE_DESC
241};
242
243/* Augmented register description for sorting */
244struct sort_rec {
245   struct etna_reg_desc *ptr;
246   int key;
247};
248
249static int
250sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
251{
252   if (a->key < b->key)
253      return -1;
254
255   if (a->key > b->key)
256      return 1;
257
258   return 0;
259}
260
261/* create an index on a register set based on certain criteria. */
262static int
263sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
264               enum reg_sort_order so)
265{
266   struct etna_reg_desc *regs = file->reg;
267   int ptr = 0;
268
269   /* pre-populate keys from active registers */
270   for (int idx = 0; idx < file->reg_size; ++idx) {
271      /* only interested in active registers now; will only assign inactive ones
272       * if no space in active ones */
273      if (regs[idx].active) {
274         sorted[ptr].ptr = &regs[idx];
275
276         switch (so) {
277         case FIRST_USE_ASC:
278            sorted[ptr].key = regs[idx].first_use;
279            break;
280         case LAST_USE_ASC:
281            sorted[ptr].key = regs[idx].last_use;
282            break;
283         case FIRST_USE_DESC:
284            sorted[ptr].key = -regs[idx].first_use;
285            break;
286         case LAST_USE_DESC:
287            sorted[ptr].key = -regs[idx].last_use;
288            break;
289         }
290         ptr++;
291      }
292   }
293
294   /* sort index by key */
295   qsort(sorted, ptr, sizeof(struct sort_rec),
296         (int (*)(const void *, const void *))sort_rec_compar);
297
298   return ptr;
299}
300
301/* Allocate a new, unused, native temp register */
302static struct etna_native_reg
303alloc_new_native_reg(struct etna_compile *c)
304{
305   assert(c->next_free_native < ETNA_MAX_TEMPS);
306   return etna_native_temp(c->next_free_native++);
307}
308
309/* assign TEMPs to native registers */
310static void
311assign_temporaries_to_native(struct etna_compile *c,
312                             struct etna_compile_file *file)
313{
314   struct etna_reg_desc *temps = file->reg;
315
316   for (int idx = 0; idx < file->reg_size; ++idx)
317      temps[idx].native = alloc_new_native_reg(c);
318}
319
320/* assign inputs and outputs to temporaries
321 * Gallium assumes that the hardware has separate registers for taking input and
322 * output, however Vivante GPUs use temporaries both for passing in inputs and
323 * passing back outputs.
324 * Try to re-use temporary registers where possible. */
325static void
326assign_inouts_to_temporaries(struct etna_compile *c, uint file)
327{
328   bool mode_inputs = (file == TGSI_FILE_INPUT);
329   int inout_ptr = 0, num_inouts;
330   int temp_ptr = 0, num_temps;
331   struct sort_rec inout_order[ETNA_MAX_TEMPS];
332   struct sort_rec temps_order[ETNA_MAX_TEMPS];
333   num_inouts = sort_registers(inout_order, &c->file[file],
334                               mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
335   num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
336                              mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
337
338   while (inout_ptr < num_inouts && temp_ptr < num_temps) {
339      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
340      struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
341
342      if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
343         inout_ptr++;
344         continue;
345      }
346
347      /* last usage of this input is before or in same instruction of first use
348       * of temporary? */
349      if (mode_inputs ? (inout->last_use <= temp->first_use)
350                      : (inout->first_use >= temp->last_use)) {
351         /* assign it and advance to next input */
352         inout->native = temp->native;
353         inout_ptr++;
354      }
355
356      temp_ptr++;
357   }
358
359   /* if we couldn't reuse current ones, allocate new temporaries */
360   for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
361      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
362
363      if (inout->active && !inout->native.valid)
364         inout->native = alloc_new_native_reg(c);
365   }
366}
367
368/* Allocate an immediate with a certain value and return the index. If
369 * there is already an immediate with that value, return that.
370 */
371static struct etna_inst_src
372alloc_imm(struct etna_compile *c, enum etna_uniform_contents contents,
373          uint32_t value)
374{
375   int idx;
376
377   /* Could use a hash table to speed this up */
378   for (idx = 0; idx < c->imm_size; ++idx) {
379      if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
380         break;
381   }
382
383   /* look if there is an unused slot */
384   if (idx == c->imm_size) {
385      for (idx = 0; idx < c->imm_size; ++idx) {
386         if (c->imm_contents[idx] == ETNA_UNIFORM_UNUSED)
387            break;
388      }
389   }
390
391   /* allocate new immediate */
392   if (idx == c->imm_size) {
393      assert(c->imm_size < ETNA_MAX_IMM);
394      idx = c->imm_size++;
395      c->imm_data[idx] = value;
396      c->imm_contents[idx] = contents;
397   }
398
399   /* swizzle so that component with value is returned in all components */
400   idx += c->imm_base;
401   struct etna_inst_src imm_src = {
402      .use = 1,
403      .rgroup = INST_RGROUP_UNIFORM_0,
404      .reg = idx / 4,
405      .swiz = INST_SWIZ_BROADCAST(idx & 3)
406   };
407
408   return imm_src;
409}
410
411static struct etna_inst_src
412alloc_imm_u32(struct etna_compile *c, uint32_t value)
413{
414   return alloc_imm(c, ETNA_UNIFORM_CONSTANT, value);
415}
416
417static struct etna_inst_src
418alloc_imm_vec4u(struct etna_compile *c, enum etna_uniform_contents contents,
419                const uint32_t *values)
420{
421   struct etna_inst_src imm_src = { };
422   int idx, i;
423
424   for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
425      /* What if we can use a uniform with a different swizzle? */
426      for (i = 0; i < 4; i++)
427         if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
428            break;
429      if (i == 4)
430         break;
431   }
432
433   if (idx + 3 >= c->imm_size) {
434      idx = align(c->imm_size, 4);
435      assert(idx + 4 <= ETNA_MAX_IMM);
436
437      for (i = 0; i < 4; i++) {
438         c->imm_data[idx + i] = values[i];
439         c->imm_contents[idx + i] = contents;
440      }
441
442      c->imm_size = idx + 4;
443   }
444
445   assert((c->imm_base & 3) == 0);
446   idx += c->imm_base;
447   imm_src.use = 1;
448   imm_src.rgroup = INST_RGROUP_UNIFORM_0;
449   imm_src.reg = idx / 4;
450   imm_src.swiz = INST_SWIZ_IDENTITY;
451
452   return imm_src;
453}
454
455static uint32_t
456get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
457            unsigned swiz_idx)
458{
459   assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
460   unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
461
462   return c->imm_data[idx];
463}
464
465/* Allocate immediate with a certain float value. If there is already an
466 * immediate with that value, return that.
467 */
468static struct etna_inst_src
469alloc_imm_f32(struct etna_compile *c, float value)
470{
471   return alloc_imm_u32(c, fui(value));
472}
473
474static struct etna_inst_src
475etna_imm_vec4f(struct etna_compile *c, const float *vec4)
476{
477   uint32_t val[4];
478
479   for (int i = 0; i < 4; i++)
480      val[i] = fui(vec4[i]);
481
482   return alloc_imm_vec4u(c, ETNA_UNIFORM_CONSTANT, val);
483}
484
485/* Pass -- check register file declarations and immediates */
486static void
487etna_compile_parse_declarations(struct etna_compile *c)
488{
489   struct tgsi_parse_context ctx = { };
490   ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
491   assert(status == TGSI_PARSE_OK);
492
493   while (!tgsi_parse_end_of_tokens(&ctx)) {
494      tgsi_parse_token(&ctx);
495
496      switch (ctx.FullToken.Token.Type) {
497      case TGSI_TOKEN_TYPE_IMMEDIATE: {
498         /* immediates are handled differently from other files; they are
499          * not declared explicitly, and always add four components */
500         const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
501         assert(c->imm_size <= (ETNA_MAX_IMM - 4));
502
503         for (int i = 0; i < 4; ++i) {
504            unsigned idx = c->imm_size++;
505
506            c->imm_data[idx] = imm->u[i].Uint;
507            c->imm_contents[idx] = ETNA_UNIFORM_CONSTANT;
508         }
509      }
510      break;
511      }
512   }
513
514   tgsi_parse_free(&ctx);
515}
516
517/* Allocate register declarations for the registers in all register files */
518static void
519etna_allocate_decls(struct etna_compile *c)
520{
521   uint idx = 0;
522
523   for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
524      c->file[x].reg = &c->decl[idx];
525      c->file[x].reg_size = c->info.file_max[x] + 1;
526
527      for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
528         c->decl[idx].file = x;
529         c->decl[idx].idx = sub;
530         idx++;
531      }
532   }
533
534   c->total_decls = idx;
535}
536
537/* Pass -- check and record usage of temporaries, inputs, outputs */
538static void
539etna_compile_pass_check_usage(struct etna_compile *c)
540{
541   struct tgsi_parse_context ctx = { };
542   ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
543   assert(status == TGSI_PARSE_OK);
544
545   for (int idx = 0; idx < c->total_decls; ++idx) {
546      c->decl[idx].active = false;
547      c->decl[idx].first_use = c->decl[idx].last_use = -1;
548   }
549
550   int inst_idx = 0;
551   while (!tgsi_parse_end_of_tokens(&ctx)) {
552      tgsi_parse_token(&ctx);
553      /* find out max register #s used
554       * For every register mark first and last instruction index where it's
555       * used this allows finding ranges where the temporary can be borrowed
556       * as input and/or output register
557       *
558       * XXX in the case of loops this needs special care, or even be completely
559       * disabled, as
560       * the last usage of a register inside a loop means it can still be used
561       * on next loop
562       * iteration (execution is no longer * chronological). The register can
563       * only be
564       * declared "free" after the loop finishes.
565       *
566       * Same for inputs: the first usage of a register inside a loop doesn't
567       * mean that the register
568       * won't have been overwritten in previous iteration. The register can
569       * only be declared free before the loop
570       * starts.
571       * The proper way would be to do full dominator / post-dominator analysis
572       * (especially with more complicated
573       * control flow such as direct branch instructions) but not for now...
574       */
575      switch (ctx.FullToken.Token.Type) {
576      case TGSI_TOKEN_TYPE_DECLARATION: {
577         /* Declaration: fill in file details */
578         const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
579         struct etna_compile_file *file = &c->file[decl->Declaration.File];
580
581         for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
582            file->reg[idx].usage_mask = 0; // we'll compute this ourselves
583            file->reg[idx].has_semantic = decl->Declaration.Semantic;
584            file->reg[idx].semantic = decl->Semantic;
585            file->reg[idx].interp = decl->Interp;
586         }
587      } break;
588      case TGSI_TOKEN_TYPE_INSTRUCTION: {
589         /* Instruction: iterate over operands of instruction */
590         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
591
592         /* iterate over destination registers */
593         for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
594            struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
595
596            if (reg_desc->first_use == -1)
597               reg_desc->first_use = inst_idx;
598
599            reg_desc->last_use = inst_idx;
600            reg_desc->active = true;
601         }
602
603         /* iterate over source registers */
604         for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
605            struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
606
607            if (reg_desc->first_use == -1)
608               reg_desc->first_use = inst_idx;
609
610            reg_desc->last_use = inst_idx;
611            reg_desc->active = true;
612            /* accumulate usage mask for register, this is used to determine how
613             * many slots for varyings
614             * should be allocated */
615            reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
616         }
617         inst_idx += 1;
618      } break;
619      default:
620         break;
621      }
622   }
623
624   tgsi_parse_free(&ctx);
625}
626
627/* assign inputs that need to be assigned to specific registers */
628static void
629assign_special_inputs(struct etna_compile *c)
630{
631   if (c->info.processor == PIPE_SHADER_FRAGMENT) {
632      /* never assign t0 as it is the position output, start assigning at t1 */
633      c->next_free_native = 1;
634
635      for (int idx = 0; idx < c->total_decls; ++idx) {
636         struct etna_reg_desc *reg = &c->decl[idx];
637
638         if (!reg->active)
639            continue;
640
641         /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
642         if (reg->semantic.Name == TGSI_SEMANTIC_POSITION)
643            reg->native = etna_native_temp(0);
644
645         /* hardwire TGSI_SEMANTIC_FACE to i0 */
646         if (reg->semantic.Name == TGSI_SEMANTIC_FACE)
647            reg->native = etna_native_internal(0);
648      }
649   }
650}
651
652/* Check that a move instruction does not swizzle any of the components
653 * that it writes.
654 */
655static bool
656etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
657                          const struct tgsi_src_register src)
658{
659   return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
660          (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
661          (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
662          (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
663}
664
665/* Pass -- optimize outputs
666 * Mesa tends to generate code like this at the end if their shaders
667 *   MOV OUT[1], TEMP[2]
668 *   MOV OUT[0], TEMP[0]
669 *   MOV OUT[2], TEMP[1]
670 * Recognize if
671 * a) there is only a single assignment to an output register and
672 * b) the temporary is not used after that
673 * Also recognize direct assignment of IN to OUT (passthrough)
674 **/
675static void
676etna_compile_pass_optimize_outputs(struct etna_compile *c)
677{
678   struct tgsi_parse_context ctx = { };
679   int inst_idx = 0;
680   ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
681   assert(status == TGSI_PARSE_OK);
682
683   while (!tgsi_parse_end_of_tokens(&ctx)) {
684      tgsi_parse_token(&ctx);
685
686      switch (ctx.FullToken.Token.Type) {
687      case TGSI_TOKEN_TYPE_INSTRUCTION: {
688         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
689
690         /* iterate over operands */
691         switch (inst->Instruction.Opcode) {
692         case TGSI_OPCODE_MOV: {
693            /* We are only interested in eliminating MOVs which write to
694             * the shader outputs. Test for this early. */
695            if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
696               break;
697            /* Elimination of a MOV must have no visible effect on the
698             * resulting shader: this means the MOV must not swizzle or
699             * saturate, and its source must not have the negate or
700             * absolute modifiers. */
701            if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
702                inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
703                inst->Src[0].Register.Absolute)
704               break;
705
706            uint out_idx = inst->Dst[0].Register.Index;
707            uint in_idx = inst->Src[0].Register.Index;
708            /* assignment of temporary to output --
709             * and the output doesn't yet have a native register assigned
710             * and the last use of the temporary is this instruction
711             * and the MOV does not do a swizzle
712             */
713            if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
714                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
715                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
716               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
717                  c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
718               /* prevent temp from being re-used for the rest of the shader */
719               c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
720               /* mark this MOV instruction as a no-op */
721               c->dead_inst[inst_idx] = true;
722            }
723            /* direct assignment of input to output --
724             * and the input or output doesn't yet have a native register
725             * assigned
726             * and the output is only used in this instruction,
727             * allocate a new register, and associate both input and output to
728             * it
729             * and the MOV does not do a swizzle
730             */
731            if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
732                !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
733                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
734                c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
735                c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
736               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
737                  c->file[TGSI_FILE_INPUT].reg[in_idx].native =
738                     alloc_new_native_reg(c);
739               /* mark this MOV instruction as a no-op */
740               c->dead_inst[inst_idx] = true;
741            }
742         } break;
743         default:;
744         }
745         inst_idx += 1;
746      } break;
747      }
748   }
749
750   tgsi_parse_free(&ctx);
751}
752
753/* Get a temporary to be used within one TGSI instruction.
754 * The first time that this function is called the temporary will be allocated.
755 * Each call to this function will return the same temporary.
756 */
757static struct etna_native_reg
758etna_compile_get_inner_temp(struct etna_compile *c)
759{
760   int inner_temp = c->inner_temps;
761
762   if (inner_temp < ETNA_MAX_INNER_TEMPS) {
763      if (!c->inner_temp[inner_temp].valid)
764         c->inner_temp[inner_temp] = alloc_new_native_reg(c);
765
766      /* alloc_new_native_reg() handles lack of registers */
767      c->inner_temps += 1;
768   } else {
769      BUG("Too many inner temporaries (%i) requested in one instruction",
770          inner_temp + 1);
771   }
772
773   return c->inner_temp[inner_temp];
774}
775
776static struct etna_inst_dst
777etna_native_to_dst(struct etna_native_reg native, unsigned comps)
778{
779   /* Can only assign to temporaries */
780   assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
781
782   struct etna_inst_dst rv = {
783      .write_mask = comps,
784      .use = 1,
785      .reg = native.id,
786   };
787
788   return rv;
789}
790
791static struct etna_inst_src
792etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
793{
794   assert(native.valid && !native.is_tex);
795
796   struct etna_inst_src rv = {
797      .use = 1,
798      .swiz = swizzle,
799      .rgroup = native.rgroup,
800      .reg = native.id,
801      .amode = INST_AMODE_DIRECT,
802   };
803
804   return rv;
805}
806
807static inline struct etna_inst_src
808negate(struct etna_inst_src src)
809{
810   src.neg = !src.neg;
811
812   return src;
813}
814
815static inline struct etna_inst_src
816absolute(struct etna_inst_src src)
817{
818   src.abs = 1;
819
820   return src;
821}
822
823static inline struct etna_inst_src
824swizzle(struct etna_inst_src src, unsigned swizzle)
825{
826   src.swiz = inst_swiz_compose(src.swiz, swizzle);
827
828   return src;
829}
830
831/* Emit instruction and append it to program */
832static void
833emit_inst(struct etna_compile *c, struct etna_inst *inst)
834{
835   assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
836
837   /* Check for uniform conflicts (each instruction can only access one
838    * uniform),
839    * if detected, use an intermediate temporary */
840   unsigned uni_rgroup = -1;
841   unsigned uni_reg = -1;
842
843   for (int src = 0; src < ETNA_NUM_SRC; ++src) {
844      if (inst->src[src].rgroup == INST_RGROUP_INTERNAL &&
845          c->info.processor == PIPE_SHADER_FRAGMENT &&
846          c->key->front_ccw) {
847         struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
848
849         /*
850          * Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing
851          * configuration (CW or CCW).
852          */
853         etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
854            .opcode = INST_OPCODE_SET,
855            .cond = INST_CONDITION_NE,
856            .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
857                                                  INST_COMPS_Z | INST_COMPS_W),
858            .src[0] = inst->src[src],
859            .src[1] = alloc_imm_f32(c, 1.0f)
860         });
861         c->inst_ptr++;
862
863         /* Modify instruction to use temp register instead of uniform */
864         inst->src[src].use = 1;
865         inst->src[src].rgroup = INST_RGROUP_TEMP;
866         inst->src[src].reg = inner_temp.id;
867         inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
868         inst->src[src].neg = 0; /* negation happens on MOV */
869         inst->src[src].abs = 0; /* abs happens on MOV */
870         inst->src[src].amode = 0; /* amode effects happen on MOV */
871      } else if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
872         if (uni_reg == -1) { /* first unique uniform used */
873            uni_rgroup = inst->src[src].rgroup;
874            uni_reg = inst->src[src].reg;
875         } else { /* second or later; check that it is a re-use */
876            if (uni_rgroup != inst->src[src].rgroup ||
877                uni_reg != inst->src[src].reg) {
878               DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
879                                             "accesses different uniforms, "
880                                             "need to generate extra MOV");
881               struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
882
883               /* Generate move instruction to temporary */
884               etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
885                  .opcode = INST_OPCODE_MOV,
886                  .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
887                                                        INST_COMPS_Z | INST_COMPS_W),
888                  .src[2] = inst->src[src]
889               });
890
891               c->inst_ptr++;
892
893               /* Modify instruction to use temp register instead of uniform */
894               inst->src[src].use = 1;
895               inst->src[src].rgroup = INST_RGROUP_TEMP;
896               inst->src[src].reg = inner_temp.id;
897               inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
898               inst->src[src].neg = 0; /* negation happens on MOV */
899               inst->src[src].abs = 0; /* abs happens on MOV */
900               inst->src[src].amode = 0; /* amode effects happen on MOV */
901            }
902         }
903      }
904   }
905
906   /* Finally assemble the actual instruction */
907   etna_assemble(&c->code[c->inst_ptr * 4], inst);
908   c->inst_ptr++;
909}
910
911static unsigned int
912etna_amode(struct tgsi_ind_register indirect)
913{
914   assert(indirect.File == TGSI_FILE_ADDRESS);
915   assert(indirect.Index == 0);
916
917   switch (indirect.Swizzle) {
918   case TGSI_SWIZZLE_X:
919      return INST_AMODE_ADD_A_X;
920   case TGSI_SWIZZLE_Y:
921      return INST_AMODE_ADD_A_Y;
922   case TGSI_SWIZZLE_Z:
923      return INST_AMODE_ADD_A_Z;
924   case TGSI_SWIZZLE_W:
925      return INST_AMODE_ADD_A_W;
926   default:
927      assert(!"Invalid swizzle");
928   }
929
930   unreachable("bad swizzle");
931}
932
933/* convert destination operand */
934static struct etna_inst_dst
935convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
936{
937   struct etna_inst_dst rv = {
938      /// XXX .amode
939      .write_mask = in->Register.WriteMask,
940   };
941
942   if (in->Register.File == TGSI_FILE_ADDRESS) {
943      assert(in->Register.Index == 0);
944      rv.reg = in->Register.Index;
945      rv.use = 0;
946   } else {
947      rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
948                              in->Register.WriteMask);
949   }
950
951   if (in->Register.Indirect)
952      rv.amode = etna_amode(in->Indirect);
953
954   return rv;
955}
956
957/* convert texture operand */
958static struct etna_inst_tex
959convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
960            const struct tgsi_instruction_texture *tex)
961{
962   struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
963   struct etna_inst_tex rv = {
964      // XXX .amode (to allow for an array of samplers?)
965      .swiz = INST_SWIZ_IDENTITY
966   };
967
968   assert(native_reg.is_tex && native_reg.valid);
969   rv.id = native_reg.id;
970
971   return rv;
972}
973
974/* convert source operand */
975static struct etna_inst_src
976etna_create_src(const struct tgsi_full_src_register *tgsi,
977                const struct etna_native_reg *native)
978{
979   const struct tgsi_src_register *reg = &tgsi->Register;
980   struct etna_inst_src rv = {
981      .use = 1,
982      .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
983      .neg = reg->Negate,
984      .abs = reg->Absolute,
985      .rgroup = native->rgroup,
986      .reg = native->id,
987      .amode = INST_AMODE_DIRECT,
988   };
989
990   assert(native->valid && !native->is_tex);
991
992   if (reg->Indirect)
993      rv.amode = etna_amode(tgsi->Indirect);
994
995   return rv;
996}
997
998static struct etna_inst_src
999etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
1000                     struct etna_native_reg temp)
1001{
1002   struct etna_inst mov = { };
1003
1004   mov.opcode = INST_OPCODE_MOV;
1005   mov.sat = 0;
1006   mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1007                                      INST_COMPS_Z | INST_COMPS_W);
1008   mov.src[2] = src;
1009   emit_inst(c, &mov);
1010
1011   src.swiz = INST_SWIZ_IDENTITY;
1012   src.neg = src.abs = 0;
1013   src.rgroup = temp.rgroup;
1014   src.reg = temp.id;
1015
1016   return src;
1017}
1018
1019static struct etna_inst_src
1020etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
1021{
1022   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1023
1024   return etna_mov_src_to_temp(c, src, temp);
1025}
1026
1027static bool
1028etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
1029{
1030   return etna_rgroup_is_uniform(a.rgroup) &&
1031          etna_rgroup_is_uniform(b.rgroup) &&
1032          (a.rgroup != b.rgroup || a.reg != b.reg);
1033}
1034
1035/* create a new label */
1036static unsigned int
1037alloc_new_label(struct etna_compile *c)
1038{
1039   struct etna_compile_label label = {
1040      .inst_idx = -1, /* start by point to no specific instruction */
1041   };
1042
1043   array_insert(c->labels, label);
1044
1045   return c->labels_count - 1;
1046}
1047
1048/* place label at current instruction pointer */
1049static void
1050label_place(struct etna_compile *c, struct etna_compile_label *label)
1051{
1052   label->inst_idx = c->inst_ptr;
1053}
1054
1055/* mark label use at current instruction.
1056 * target of the label will be filled in in the marked instruction's src2.imm
1057 * slot as soon
1058 * as the value becomes known.
1059 */
1060static void
1061label_mark_use(struct etna_compile *c, int lbl_idx)
1062{
1063   assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1064   c->lbl_usage[c->inst_ptr] = lbl_idx;
1065}
1066
1067/* walk the frame stack and return first frame with matching type */
1068static struct etna_compile_frame *
1069find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1070{
1071   for (int sp = c->frame_sp; sp >= 0; sp--)
1072      if (c->frame_stack[sp].type == type)
1073         return &c->frame_stack[sp];
1074
1075   assert(0);
1076   return NULL;
1077}
1078
1079struct instr_translater {
1080   void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1081               const struct tgsi_full_instruction *inst,
1082               struct etna_inst_src *src);
1083   unsigned tgsi_opc;
1084   uint8_t opc;
1085
1086   /* tgsi src -> etna src swizzle */
1087   int src[3];
1088
1089   unsigned cond;
1090};
1091
1092static void
1093trans_instr(const struct instr_translater *t, struct etna_compile *c,
1094            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1095{
1096   const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1097   struct etna_inst instr = { };
1098
1099   instr.opcode = t->opc;
1100   instr.cond = t->cond;
1101   instr.sat = inst->Instruction.Saturate;
1102
1103   assert(info->num_dst <= 1);
1104   if (info->num_dst)
1105      instr.dst = convert_dst(c, &inst->Dst[0]);
1106
1107   assert(info->num_src <= ETNA_NUM_SRC);
1108
1109   for (unsigned i = 0; i < info->num_src; i++) {
1110      int swizzle = t->src[i];
1111
1112      assert(swizzle != -1);
1113      instr.src[swizzle] = src[i];
1114   }
1115
1116   emit_inst(c, &instr);
1117}
1118
1119static void
1120trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1121              const struct tgsi_full_instruction *inst,
1122              struct etna_inst_src *src)
1123{
1124   emit_inst(c, &(struct etna_inst) {
1125      .opcode = INST_OPCODE_SELECT,
1126       .cond = t->cond,
1127       .sat = inst->Instruction.Saturate,
1128       .dst = convert_dst(c, &inst->Dst[0]),
1129       .src[0] = src[0],
1130       .src[1] = src[1],
1131       .src[2] = src[0],
1132    });
1133}
1134
1135static void
1136trans_if(const struct instr_translater *t, struct etna_compile *c,
1137         const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1138{
1139   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1140   struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1141
1142   /* push IF to stack */
1143   f->type = ETNA_COMPILE_FRAME_IF;
1144   /* create "else" label */
1145   f->lbl_else_idx = alloc_new_label(c);
1146   f->lbl_endif_idx = -1;
1147
1148   /* We need to avoid the emit_inst() below becoming two instructions */
1149   if (etna_src_uniforms_conflict(src[0], imm_0))
1150      src[0] = etna_mov_src(c, src[0]);
1151
1152   /* mark position in instruction stream of label reference so that it can be
1153    * filled in in next pass */
1154   label_mark_use(c, f->lbl_else_idx);
1155
1156   /* create conditional branch to label if src0 EQ 0 */
1157   emit_inst(c, &(struct etna_inst){
1158      .opcode = INST_OPCODE_BRANCH,
1159      .cond = INST_CONDITION_EQ,
1160      .src[0] = src[0],
1161      .src[1] = imm_0,
1162    /* imm is filled in later */
1163   });
1164}
1165
1166static void
1167trans_else(const struct instr_translater *t, struct etna_compile *c,
1168           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1169{
1170   assert(c->frame_sp > 0);
1171   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1172   assert(f->type == ETNA_COMPILE_FRAME_IF);
1173
1174   /* create "endif" label, and branch to endif label */
1175   f->lbl_endif_idx = alloc_new_label(c);
1176   label_mark_use(c, f->lbl_endif_idx);
1177   emit_inst(c, &(struct etna_inst) {
1178      .opcode = INST_OPCODE_BRANCH,
1179      .cond = INST_CONDITION_TRUE,
1180      /* imm is filled in later */
1181   });
1182
1183   /* mark "else" label at this position in instruction stream */
1184   label_place(c, &c->labels[f->lbl_else_idx]);
1185}
1186
1187static void
1188trans_endif(const struct instr_translater *t, struct etna_compile *c,
1189            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1190{
1191   assert(c->frame_sp > 0);
1192   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1193   assert(f->type == ETNA_COMPILE_FRAME_IF);
1194
1195   /* assign "endif" or "else" (if no ELSE) label to current position in
1196    * instruction stream, pop IF */
1197   if (f->lbl_endif_idx != -1)
1198      label_place(c, &c->labels[f->lbl_endif_idx]);
1199   else
1200      label_place(c, &c->labels[f->lbl_else_idx]);
1201}
1202
1203static void
1204trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1205               const struct tgsi_full_instruction *inst,
1206               struct etna_inst_src *src)
1207{
1208   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1209
1210   /* push LOOP to stack */
1211   f->type = ETNA_COMPILE_FRAME_LOOP;
1212   f->lbl_loop_bgn_idx = alloc_new_label(c);
1213   f->lbl_loop_end_idx = alloc_new_label(c);
1214
1215   label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1216
1217   c->num_loops++;
1218}
1219
1220static void
1221trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1222               const struct tgsi_full_instruction *inst,
1223               struct etna_inst_src *src)
1224{
1225   assert(c->frame_sp > 0);
1226   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1227   assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1228
1229   /* mark position in instruction stream of label reference so that it can be
1230    * filled in in next pass */
1231   label_mark_use(c, f->lbl_loop_bgn_idx);
1232
1233   /* create branch to loop_bgn label */
1234   emit_inst(c, &(struct etna_inst) {
1235      .opcode = INST_OPCODE_BRANCH,
1236      .cond = INST_CONDITION_TRUE,
1237      .src[0] = src[0],
1238      /* imm is filled in later */
1239   });
1240
1241   label_place(c, &c->labels[f->lbl_loop_end_idx]);
1242}
1243
1244static void
1245trans_brk(const struct instr_translater *t, struct etna_compile *c,
1246          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1247{
1248   assert(c->frame_sp > 0);
1249   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1250
1251   /* mark position in instruction stream of label reference so that it can be
1252    * filled in in next pass */
1253   label_mark_use(c, f->lbl_loop_end_idx);
1254
1255   /* create branch to loop_end label */
1256   emit_inst(c, &(struct etna_inst) {
1257      .opcode = INST_OPCODE_BRANCH,
1258      .cond = INST_CONDITION_TRUE,
1259      .src[0] = src[0],
1260      /* imm is filled in later */
1261   });
1262}
1263
1264static void
1265trans_cont(const struct instr_translater *t, struct etna_compile *c,
1266           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1267{
1268   assert(c->frame_sp > 0);
1269   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1270
1271   /* mark position in instruction stream of label reference so that it can be
1272    * filled in in next pass */
1273   label_mark_use(c, f->lbl_loop_bgn_idx);
1274
1275   /* create branch to loop_end label */
1276   emit_inst(c, &(struct etna_inst) {
1277      .opcode = INST_OPCODE_BRANCH,
1278      .cond = INST_CONDITION_TRUE,
1279      .src[0] = src[0],
1280      /* imm is filled in later */
1281   });
1282}
1283
1284static void
1285trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1286            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1287{
1288   emit_inst(c, &(struct etna_inst) {
1289      .opcode = t->opc,
1290      .sat = inst->Instruction.Saturate,
1291      .dst = convert_dst(c, &inst->Dst[0]),
1292      .src[0] = src[0],
1293      .src[2] = src[0],
1294   });
1295}
1296
1297static void
1298trans_arl(const struct instr_translater *t, struct etna_compile *c,
1299          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1300{
1301   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1302   struct etna_inst arl = { };
1303   struct etna_inst_dst dst;
1304
1305   dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1306                                  INST_COMPS_W);
1307
1308   if (c->specs->has_sign_floor_ceil) {
1309      struct etna_inst floor = { };
1310
1311      floor.opcode = INST_OPCODE_FLOOR;
1312      floor.src[2] = src[0];
1313      floor.dst = dst;
1314
1315      emit_inst(c, &floor);
1316   } else {
1317      struct etna_inst floor[2] = { };
1318
1319      floor[0].opcode = INST_OPCODE_FRC;
1320      floor[0].sat = inst->Instruction.Saturate;
1321      floor[0].dst = dst;
1322      floor[0].src[2] = src[0];
1323
1324      floor[1].opcode = INST_OPCODE_ADD;
1325      floor[1].sat = inst->Instruction.Saturate;
1326      floor[1].dst = dst;
1327      floor[1].src[0] = src[0];
1328      floor[1].src[2].use = 1;
1329      floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1330      floor[1].src[2].neg = 1;
1331      floor[1].src[2].rgroup = temp.rgroup;
1332      floor[1].src[2].reg = temp.id;
1333
1334      emit_inst(c, &floor[0]);
1335      emit_inst(c, &floor[1]);
1336   }
1337
1338   arl.opcode = INST_OPCODE_MOVAR;
1339   arl.sat = inst->Instruction.Saturate;
1340   arl.dst = convert_dst(c, &inst->Dst[0]);
1341   arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1342
1343   emit_inst(c, &arl);
1344}
1345
1346static void
1347trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1348          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1349{
1350   /* dst = src0 * src1 + (1 - src0) * src2
1351    *     => src0 * src1 - (src0 - 1) * src2
1352    *     => src0 * src1 - (src0 * src2 - src2)
1353    * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1354    * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1355    */
1356   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1357   if (etna_src_uniforms_conflict(src[0], src[1]) ||
1358       etna_src_uniforms_conflict(src[0], src[2])) {
1359      src[0] = etna_mov_src(c, src[0]);
1360   }
1361
1362   struct etna_inst mad[2] = { };
1363   mad[0].opcode = INST_OPCODE_MAD;
1364   mad[0].sat = 0;
1365   mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1366                                         INST_COMPS_Z | INST_COMPS_W);
1367   mad[0].src[0] = src[0];
1368   mad[0].src[1] = src[2];
1369   mad[0].src[2] = negate(src[2]);
1370   mad[1].opcode = INST_OPCODE_MAD;
1371   mad[1].sat = inst->Instruction.Saturate;
1372   mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1373   mad[1].src[1] = src[1];
1374   mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1375
1376   emit_inst(c, &mad[0]);
1377   emit_inst(c, &mad[1]);
1378}
1379
1380static void
1381trans_lit(const struct instr_translater *t, struct etna_compile *c,
1382          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1383{
1384   /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1385    *  - can be eliminated if src.y is a uniform and >= 0
1386    * SELECT.GT tmp.___w, 128, src.wwww, 128
1387    * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1388    *  - can be eliminated if src.w is a uniform and fits clamp
1389    * LOG tmp.x, void, void, tmp.yyyy
1390    * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1391    * LITP dst, undef, src.xxxx, tmp.xxxx
1392    */
1393   struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1394   struct etna_inst_src src_y = { };
1395
1396   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1397      src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1398
1399      struct etna_inst ins = { };
1400      ins.opcode = INST_OPCODE_SELECT;
1401      ins.cond = INST_CONDITION_LT;
1402      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1403      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1404      ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1405      emit_inst(c, &ins);
1406   } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1407      src_y = alloc_imm_f32(c, 0.0);
1408   else
1409      src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1410
1411   struct etna_inst_src src_w = { };
1412
1413   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1414      src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1415
1416      struct etna_inst ins = { };
1417      ins.opcode = INST_OPCODE_SELECT;
1418      ins.cond = INST_CONDITION_GT;
1419      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1420      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1421      ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1422      emit_inst(c, &ins);
1423      ins.cond = INST_CONDITION_LT;
1424      ins.src[0].neg = !ins.src[0].neg;
1425      ins.src[2].neg = !ins.src[2].neg;
1426      ins.src[1] = src_w;
1427      emit_inst(c, &ins);
1428   } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1429      src_w = alloc_imm_f32(c, -128.);
1430   else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1431      src_w = alloc_imm_f32(c, 128.);
1432   else
1433      src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1434
1435   if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1436      emit_inst(c, &(struct etna_inst) {
1437         .opcode = INST_OPCODE_LOG,
1438         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1439         .src[2] = src_y,
1440         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1441      });
1442      emit_inst(c, &(struct etna_inst) {
1443         .opcode = INST_OPCODE_MUL,
1444         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1445         .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1446         .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1447      });
1448   } else {
1449      struct etna_inst ins[3] = { };
1450      ins[0].opcode = INST_OPCODE_LOG;
1451      ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1452      ins[0].src[2] = src_y;
1453
1454      emit_inst(c, &ins[0]);
1455   }
1456   emit_inst(c, &(struct etna_inst) {
1457      .opcode = INST_OPCODE_MUL,
1458      .sat = 0,
1459      .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1460      .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1461      .src[1] = src_w,
1462   });
1463   emit_inst(c, &(struct etna_inst) {
1464      .opcode = INST_OPCODE_LITP,
1465      .sat = 0,
1466      .dst = convert_dst(c, &inst->Dst[0]),
1467      .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1468      .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1469      .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1470   });
1471}
1472
1473static void
1474trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1475          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1476{
1477   if (c->specs->has_sign_floor_ceil) {
1478      emit_inst(c, &(struct etna_inst){
1479         .opcode = INST_OPCODE_SIGN,
1480         .sat = inst->Instruction.Saturate,
1481         .dst = convert_dst(c, &inst->Dst[0]),
1482         .src[2] = src[0],
1483      });
1484   } else {
1485      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1486      struct etna_inst ins[2] = { };
1487
1488      ins[0].opcode = INST_OPCODE_SET;
1489      ins[0].cond = INST_CONDITION_NZ;
1490      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1491                                            INST_COMPS_Z | INST_COMPS_W);
1492      ins[0].src[0] = src[0];
1493
1494      ins[1].opcode = INST_OPCODE_SELECT;
1495      ins[1].cond = INST_CONDITION_LZ;
1496      ins[1].sat = inst->Instruction.Saturate;
1497      ins[1].dst = convert_dst(c, &inst->Dst[0]);
1498      ins[1].src[0] = src[0];
1499      ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1500      ins[1].src[1] = negate(ins[1].src[2]);
1501
1502      emit_inst(c, &ins[0]);
1503      emit_inst(c, &ins[1]);
1504   }
1505}
1506
1507static void
1508trans_trig(const struct instr_translater *t, struct etna_compile *c,
1509           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1510{
1511   if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1512      /* On newer chips alternative SIN/COS instructions are implemented,
1513       * which:
1514       * - Need their input scaled by 1/pi instead of 2/pi
1515       * - Output an x and y component, which need to be multiplied to
1516       *   get the result
1517       */
1518      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1519      emit_inst(c, &(struct etna_inst) {
1520         .opcode = INST_OPCODE_MUL,
1521         .sat = 0,
1522         .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1523         .src[0] = src[0], /* any swizzling happens here */
1524         .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1525      });
1526      emit_inst(c, &(struct etna_inst) {
1527         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1528                    ? INST_OPCODE_COS
1529                    : INST_OPCODE_SIN,
1530         .sat = 0,
1531         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1532         .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1533         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1534      });
1535      emit_inst(c, &(struct etna_inst) {
1536         .opcode = INST_OPCODE_MUL,
1537         .sat = inst->Instruction.Saturate,
1538         .dst = convert_dst(c, &inst->Dst[0]),
1539         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1540         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1541      });
1542
1543   } else if (c->specs->has_sin_cos_sqrt) {
1544      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1545      /* add divide by PI/2, using a temp register. GC2000
1546       * fails with src==dst for the trig instruction. */
1547      emit_inst(c, &(struct etna_inst) {
1548         .opcode = INST_OPCODE_MUL,
1549         .sat = 0,
1550         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1551                                         INST_COMPS_Z | INST_COMPS_W),
1552         .src[0] = src[0], /* any swizzling happens here */
1553         .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1554      });
1555      emit_inst(c, &(struct etna_inst) {
1556         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1557                    ? INST_OPCODE_COS
1558                    : INST_OPCODE_SIN,
1559         .sat = inst->Instruction.Saturate,
1560         .dst = convert_dst(c, &inst->Dst[0]),
1561         .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1562      });
1563   } else {
1564      /* Implement Nick's fast sine/cosine. Taken from:
1565       * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1566       * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1567       *  MAD t.x_zw, src.xxxx, A, B
1568       *  FRC t.x_z_, void, void, t.xwzw
1569       *  MAD t.x_z_, t.xwzw, 2, -1
1570       *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1571       *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1572       *  DP3 t.__z_, t.zyww, C, void         (for scs)
1573       *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1574       *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1575       *  DP3 t.x___, t.xyww, C, void         (for scs)
1576       *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1577       *  MAD dst, t.ywyw, .2225, t.xzxz
1578       */
1579      struct etna_inst *p, ins[9] = { };
1580      struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1581      struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1582      struct etna_inst_src sincos[3], in = src[0];
1583      sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1584      sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1585
1586      /* A uniform source will cause the inner temp limit to
1587       * be exceeded.  Explicitly deal with that scenario.
1588       */
1589      if (etna_rgroup_is_uniform(src[0].rgroup)) {
1590         struct etna_inst ins = { };
1591         ins.opcode = INST_OPCODE_MOV;
1592         ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1593         ins.src[2] = in;
1594         emit_inst(c, &ins);
1595         in = t0s;
1596      }
1597
1598      ins[0].opcode = INST_OPCODE_MAD;
1599      ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1600      ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1601      ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1602      ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1603
1604      ins[1].opcode = INST_OPCODE_FRC;
1605      ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1606      ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1607
1608      ins[2].opcode = INST_OPCODE_MAD;
1609      ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1610      ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1611      ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1612      ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1613
1614      unsigned mul_swiz, dp3_swiz;
1615      if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1616         mul_swiz = SWIZZLE(W, Z, W, W);
1617         dp3_swiz = SWIZZLE(Z, Y, W, W);
1618      } else {
1619         mul_swiz = SWIZZLE(W, X, W, W);
1620         dp3_swiz = SWIZZLE(X, Y, W, W);
1621      }
1622
1623      ins[3].opcode = INST_OPCODE_MUL;
1624      ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1625      ins[3].src[0] = swizzle(t0s, mul_swiz);
1626      ins[3].src[1] = absolute(ins[3].src[0]);
1627
1628      ins[4].opcode = INST_OPCODE_DP3;
1629      ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1630      ins[4].src[0] = swizzle(t0s, dp3_swiz);
1631      ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1632
1633      p = &ins[5];
1634      p->opcode = INST_OPCODE_MAD;
1635      p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1636      p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1637      p->src[1] = absolute(p->src[0]);
1638      p->src[2] = negate(p->src[0]);
1639
1640      p++;
1641      p->opcode = INST_OPCODE_MAD;
1642      p->sat = inst->Instruction.Saturate;
1643      p->dst = convert_dst(c, &inst->Dst[0]),
1644      p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1645      p->src[1] = alloc_imm_f32(c, 0.2225);
1646      p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1647
1648      for (int i = 0; &ins[i] <= p; i++)
1649         emit_inst(c, &ins[i]);
1650   }
1651}
1652
1653static void
1654trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1655            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1656{
1657   if (c->specs->has_new_transcendentals) {
1658      /* On newer chips alternative LOG instruction is implemented,
1659       * which outputs an x and y component, which need to be multiplied to
1660       * get the result.
1661       */
1662      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1663      emit_inst(c, &(struct etna_inst) {
1664         .opcode = INST_OPCODE_LOG,
1665         .sat = 0,
1666         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1667         .src[2] = src[0],
1668         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1669      });
1670      emit_inst(c, &(struct etna_inst) {
1671         .opcode = INST_OPCODE_MUL,
1672         .sat = inst->Instruction.Saturate,
1673         .dst = convert_dst(c, &inst->Dst[0]),
1674         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1675         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1676      });
1677   } else {
1678      emit_inst(c, &(struct etna_inst) {
1679         .opcode = INST_OPCODE_LOG,
1680         .sat = inst->Instruction.Saturate,
1681         .dst = convert_dst(c, &inst->Dst[0]),
1682         .src[2] = src[0],
1683      });
1684   }
1685}
1686
1687static void
1688trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1689              const struct tgsi_full_instruction *inst,
1690              struct etna_inst_src *src)
1691{
1692   /* There is no native support for GL texture rectangle coordinates, so
1693    * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1694   if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1695      uint32_t unit = inst->Src[1].Register.Index;
1696      struct etna_inst ins[2] = { };
1697      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1698
1699      ins[0].opcode = INST_OPCODE_MUL;
1700      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1701      ins[0].src[0] = src[0];
1702      ins[0].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_X, unit);
1703
1704      ins[1].opcode = INST_OPCODE_MUL;
1705      ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1706      ins[1].src[0] = src[0];
1707      ins[1].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_Y, unit);
1708
1709      emit_inst(c, &ins[0]);
1710      emit_inst(c, &ins[1]);
1711
1712      src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1713   }
1714
1715   switch (inst->Instruction.Opcode) {
1716   case TGSI_OPCODE_TEX:
1717      emit_inst(c, &(struct etna_inst) {
1718         .opcode = INST_OPCODE_TEXLD,
1719         .sat = 0,
1720         .dst = convert_dst(c, &inst->Dst[0]),
1721         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1722         .src[0] = src[0],
1723      });
1724      break;
1725
1726   case TGSI_OPCODE_TXB:
1727      emit_inst(c, &(struct etna_inst) {
1728         .opcode = INST_OPCODE_TEXLDB,
1729         .sat = 0,
1730         .dst = convert_dst(c, &inst->Dst[0]),
1731         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1732         .src[0] = src[0],
1733      });
1734      break;
1735
1736   case TGSI_OPCODE_TXL:
1737      emit_inst(c, &(struct etna_inst) {
1738         .opcode = INST_OPCODE_TEXLDL,
1739         .sat = 0,
1740         .dst = convert_dst(c, &inst->Dst[0]),
1741         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1742         .src[0] = src[0],
1743      });
1744      break;
1745
1746   case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1747      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1748
1749      emit_inst(c, &(struct etna_inst) {
1750         .opcode = INST_OPCODE_RCP,
1751         .sat = 0,
1752         .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1753         .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1754      });
1755      emit_inst(c, &(struct etna_inst) {
1756         .opcode = INST_OPCODE_MUL,
1757         .sat = 0,
1758         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1759                                         INST_COMPS_Z), /* tmp.xyz */
1760         .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1761         .src[1] = src[0], /* src.xyzw */
1762      });
1763      emit_inst(c, &(struct etna_inst) {
1764         .opcode = INST_OPCODE_TEXLD,
1765         .sat = 0,
1766         .dst = convert_dst(c, &inst->Dst[0]),
1767         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1768         .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1769      });
1770   } break;
1771
1772   default:
1773      BUG("Unhandled instruction %s",
1774          tgsi_get_opcode_name(inst->Instruction.Opcode));
1775      assert(0);
1776      break;
1777   }
1778}
1779
1780static void
1781trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1782            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1783{
1784   /* nothing to do */
1785}
1786
1787static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1788#define INSTR(n, f, ...) \
1789   [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1790
1791   INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1792   INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1793   INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1794   INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1795   INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1796   INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1797   INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1798   INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1799   INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1800   INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1801   INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1802   INSTR(LG2, trans_lg2),
1803   INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1804   INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1805   INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1806   INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1807   INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1808
1809   INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1810   INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1811
1812   INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1813   INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1814
1815   INSTR(IF, trans_if),
1816   INSTR(ELSE, trans_else),
1817   INSTR(ENDIF, trans_endif),
1818
1819   INSTR(BGNLOOP, trans_loop_bgn),
1820   INSTR(ENDLOOP, trans_loop_end),
1821   INSTR(BRK, trans_brk),
1822   INSTR(CONT, trans_cont),
1823
1824   INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1825   INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1826
1827   INSTR(ARL, trans_arl),
1828   INSTR(LRP, trans_lrp),
1829   INSTR(LIT, trans_lit),
1830   INSTR(SSG, trans_ssg),
1831
1832   INSTR(SIN, trans_trig),
1833   INSTR(COS, trans_trig),
1834
1835   INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1836   INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1837   INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1838   INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1839   INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1840   INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1841
1842   INSTR(TEX, trans_sampler),
1843   INSTR(TXB, trans_sampler),
1844   INSTR(TXL, trans_sampler),
1845   INSTR(TXP, trans_sampler),
1846
1847   INSTR(NOP, trans_dummy),
1848   INSTR(END, trans_dummy),
1849};
1850
1851/* Pass -- compile instructions */
1852static void
1853etna_compile_pass_generate_code(struct etna_compile *c)
1854{
1855   struct tgsi_parse_context ctx = { };
1856   ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
1857   assert(status == TGSI_PARSE_OK);
1858
1859   int inst_idx = 0;
1860   while (!tgsi_parse_end_of_tokens(&ctx)) {
1861      const struct tgsi_full_instruction *inst = 0;
1862
1863      /* No inner temps used yet for this instruction, clear counter */
1864      c->inner_temps = 0;
1865
1866      tgsi_parse_token(&ctx);
1867
1868      switch (ctx.FullToken.Token.Type) {
1869      case TGSI_TOKEN_TYPE_INSTRUCTION:
1870         /* iterate over operands */
1871         inst = &ctx.FullToken.FullInstruction;
1872         if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1873            inst_idx++;
1874            continue;
1875         }
1876
1877         /* Lookup the TGSI information and generate the source arguments */
1878         struct etna_inst_src src[ETNA_NUM_SRC];
1879         memset(src, 0, sizeof(src));
1880
1881         const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1882
1883         for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1884            const struct tgsi_full_src_register *reg = &inst->Src[i];
1885            const struct etna_reg_desc *srcreg = etna_get_src_reg(c, reg->Register);
1886            const struct etna_native_reg *n = &srcreg->native;
1887
1888            if (!n->valid || n->is_tex)
1889               continue;
1890
1891            src[i] = etna_create_src(reg, n);
1892
1893            /*
1894	     * Replace W=1.0 for point sprite coordinates, since hardware
1895	     * can only replace X,Y and leaves Z,W=0,0 instead of Z,W=0,1
1896	     */
1897            if (srcreg && srcreg->has_semantic &&
1898                srcreg->semantic.Name == TGSI_SEMANTIC_TEXCOORD &&
1899                (c->key->sprite_coord_enable & BITFIELD_BIT(srcreg->semantic.Index))) {
1900               emit_inst(c, &(struct etna_inst) {
1901                  .opcode = INST_OPCODE_SET,
1902                  .cond = INST_CONDITION_TRUE,
1903                  .dst = etna_native_to_dst(srcreg->native, INST_COMPS_W),
1904               });
1905            }
1906         }
1907
1908         const unsigned opc = inst->Instruction.Opcode;
1909         const struct instr_translater *t = &translaters[opc];
1910
1911         if (t->fxn) {
1912            t->fxn(t, c, inst, src);
1913
1914            inst_idx += 1;
1915         } else {
1916            BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1917            assert(0);
1918         }
1919         break;
1920      }
1921   }
1922   tgsi_parse_free(&ctx);
1923}
1924
1925/* Look up register by semantic */
1926static struct etna_reg_desc *
1927find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1928{
1929   for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1930      struct etna_reg_desc *reg = &c->file[file].reg[idx];
1931
1932      if (reg->semantic.Name == name && reg->semantic.Index == index)
1933         return reg;
1934   }
1935
1936   return NULL; /* not found */
1937}
1938
1939/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1940 * - this is a vertex shader
1941 * - and this is an older GPU
1942 */
1943static void
1944etna_compile_add_z_div_if_needed(struct etna_compile *c)
1945{
1946   if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1947      /* find position out */
1948      struct etna_reg_desc *pos_reg =
1949         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1950
1951      if (pos_reg != NULL) {
1952         /*
1953          * ADD tX.__z_, tX.zzzz, void, tX.wwww
1954          * MUL tX.__z_, tX.zzzz, 0.5, void
1955         */
1956         emit_inst(c, &(struct etna_inst) {
1957            .opcode = INST_OPCODE_ADD,
1958            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1959            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1960            .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1961         });
1962         emit_inst(c, &(struct etna_inst) {
1963            .opcode = INST_OPCODE_MUL,
1964            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1965            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1966            .src[1] = alloc_imm_f32(c, 0.5f),
1967         });
1968      }
1969   }
1970}
1971
1972static void
1973etna_compile_frag_rb_swap(struct etna_compile *c)
1974{
1975   if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1976      /* find color out */
1977      struct etna_reg_desc *color_reg =
1978         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1979
1980      emit_inst(c, &(struct etna_inst) {
1981         .opcode = INST_OPCODE_MOV,
1982         .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1983         .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1984      });
1985   }
1986}
1987
1988/** add a NOP to the shader if
1989 * a) the shader is empty
1990 * or
1991 * b) there is a label at the end of the shader
1992 */
1993static void
1994etna_compile_add_nop_if_needed(struct etna_compile *c)
1995{
1996   bool label_at_last_inst = false;
1997
1998   for (int idx = 0; idx < c->labels_count; ++idx) {
1999      if (c->labels[idx].inst_idx == c->inst_ptr)
2000         label_at_last_inst = true;
2001
2002   }
2003
2004   if (c->inst_ptr == 0 || label_at_last_inst)
2005      emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
2006}
2007
2008static void
2009assign_uniforms(struct etna_compile_file *file, unsigned base)
2010{
2011   for (int idx = 0; idx < file->reg_size; ++idx) {
2012      file->reg[idx].native.valid = 1;
2013      file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
2014      file->reg[idx].native.id = base + idx;
2015   }
2016}
2017
2018/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
2019 * CONST must be consecutive as const buffers are supposed to be consecutive,
2020 * and before IMM, as this is
2021 * more convenient because is possible for the compilation process itself to
2022 * generate extra
2023 * immediates for constants such as pi, one, zero.
2024 */
2025static void
2026assign_constants_and_immediates(struct etna_compile *c)
2027{
2028   assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
2029   /* immediates start after the constants */
2030   c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
2031   assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
2032   DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
2033         c->imm_size);
2034}
2035
2036/* Assign declared samplers to native texture units */
2037static void
2038assign_texture_units(struct etna_compile *c)
2039{
2040   uint tex_base = 0;
2041
2042   if (c->info.processor == PIPE_SHADER_VERTEX)
2043      tex_base = c->specs->vertex_sampler_offset;
2044
2045   for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
2046      c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
2047      c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
2048      c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
2049   }
2050}
2051
2052/* Additional pass to fill in branch targets. This pass should be last
2053 * as no instruction reordering or removing/addition can be done anymore
2054 * once the branch targets are computed.
2055 */
2056static void
2057etna_compile_fill_in_labels(struct etna_compile *c)
2058{
2059   for (int idx = 0; idx < c->inst_ptr; ++idx) {
2060      if (c->lbl_usage[idx] != -1)
2061         etna_assemble_set_imm(&c->code[idx * 4],
2062                               c->labels[c->lbl_usage[idx]].inst_idx);
2063   }
2064}
2065
2066/* compare two etna_native_reg structures, return true if equal */
2067static bool
2068cmp_etna_native_reg(const struct etna_native_reg to,
2069                    const struct etna_native_reg from)
2070{
2071   return to.valid == from.valid && to.is_tex == from.is_tex &&
2072          to.rgroup == from.rgroup && to.id == from.id;
2073}
2074
2075/* go through all declarations and swap native registers *to* and *from* */
2076static void
2077swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2078                      const struct etna_native_reg from)
2079{
2080   if (cmp_etna_native_reg(from, to))
2081      return; /* Nothing to do */
2082
2083   for (int idx = 0; idx < c->total_decls; ++idx) {
2084      if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2085         c->decl[idx].native = to;
2086      } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2087         c->decl[idx].native = from;
2088      }
2089   }
2090}
2091
2092/* For PS we need to permute so that inputs are always in temporary 0..N-1.
2093 * Semantic POS is always t0. If that semantic is not used, avoid t0.
2094 */
2095static void
2096permute_ps_inputs(struct etna_compile *c)
2097{
2098   /* Special inputs:
2099    * gl_FragCoord   VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2100    * gl_FrontFacing VARYING_SLOT_FACE  TGSI_SEMANTIC_FACE
2101    * gl_PointCoord  VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2102    * gl_TexCoord    VARYING_SLOT_TEX   TGSI_SEMANTIC_TEXCOORD
2103    */
2104   uint native_idx = 1;
2105
2106   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2107      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2108      uint input_id;
2109      assert(reg->has_semantic);
2110
2111      if (!reg->active ||
2112          reg->semantic.Name == TGSI_SEMANTIC_POSITION ||
2113          reg->semantic.Name == TGSI_SEMANTIC_FACE)
2114         continue;
2115
2116      input_id = native_idx++;
2117      swap_native_registers(c, etna_native_temp(input_id),
2118                            c->file[TGSI_FILE_INPUT].reg[idx].native);
2119   }
2120
2121   c->num_varyings = native_idx - 1;
2122
2123   if (native_idx > c->next_free_native)
2124      c->next_free_native = native_idx;
2125}
2126
2127static inline int sem2slot(const struct tgsi_declaration_semantic *semantic)
2128{
2129   return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index);
2130}
2131
2132/* fill in ps inputs into shader object */
2133static void
2134fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2135{
2136   struct etna_shader_io_file *sf = &sobj->infile;
2137
2138   sf->num_reg = 0;
2139
2140   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2141      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2142
2143      if (reg->native.id > 0) {
2144         assert(sf->num_reg < ETNA_NUM_INPUTS);
2145         sf->reg[sf->num_reg].reg = reg->native.id;
2146         sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2147         /* convert usage mask to number of components (*=wildcard)
2148          *   .r    (0..1)  -> 1 component
2149          *   .*g   (2..3)  -> 2 component
2150          *   .**b  (4..7)  -> 3 components
2151          *   .***a (8..15) -> 4 components
2152          */
2153         sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2154         sf->num_reg++;
2155      }
2156   }
2157
2158   assert(sf->num_reg == c->num_varyings);
2159   sobj->input_count_unk8 = 31; /* XXX what is this */
2160}
2161
2162/* fill in output mapping for ps into shader object */
2163static void
2164fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2165{
2166   sobj->outfile.num_reg = 0;
2167
2168   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2169      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2170
2171      switch (reg->semantic.Name) {
2172      case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2173         sobj->ps_color_out_reg = reg->native.id;
2174         break;
2175      case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2176         sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2177         break;
2178      default:
2179         assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2180      }
2181   }
2182}
2183
2184/* fill in inputs for vs into shader object */
2185static void
2186fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2187{
2188   struct etna_shader_io_file *sf = &sobj->infile;
2189
2190   sf->num_reg = 0;
2191   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2192      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2193      assert(sf->num_reg < ETNA_NUM_INPUTS);
2194
2195      if (!reg->native.valid)
2196         continue;
2197
2198      /* XXX exclude inputs with special semantics such as gl_frontFacing */
2199      sf->reg[sf->num_reg].reg = reg->native.id;
2200      sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2201      sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2202      sf->num_reg++;
2203   }
2204
2205   sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2206}
2207
2208/* fill in outputs for vs into shader object */
2209static void
2210fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2211{
2212   struct etna_shader_io_file *sf = &sobj->outfile;
2213
2214   sf->num_reg = 0;
2215   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2216      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2217      assert(sf->num_reg < ETNA_NUM_INPUTS);
2218
2219      switch (reg->semantic.Name) {
2220      case TGSI_SEMANTIC_POSITION:
2221         sobj->vs_pos_out_reg = reg->native.id;
2222         break;
2223      case TGSI_SEMANTIC_PSIZE:
2224         sobj->vs_pointsize_out_reg = reg->native.id;
2225         break;
2226      default:
2227         sf->reg[sf->num_reg].reg = reg->native.id;
2228         sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2229         sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2230         sf->num_reg++;
2231      }
2232   }
2233
2234   /* fill in "mystery meat" load balancing value. This value determines how
2235    * work is scheduled between VS and PS
2236    * in the unified shader architecture. More precisely, it is determined from
2237    * the number of VS outputs, as well as chip-specific
2238    * vertex output buffer size, vertex cache size, and the number of shader
2239    * cores.
2240    *
2241    * XXX this is a conservative estimate, the "optimal" value is only known for
2242    * sure at link time because some
2243    * outputs may be unused and thus unmapped. Then again, in the general use
2244    * case with GLSL the vertex and fragment
2245    * shaders are linked already before submitting to Gallium, thus all outputs
2246    * are used.
2247    */
2248   int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2249   assert(half_out);
2250
2251   uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2252                           2 * half_out * c->specs->vertex_cache_size)) +
2253                 9) /
2254                10;
2255   uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2256   sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2257                             VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2258                             VIVS_VS_LOAD_BALANCING_C(0x3f) |
2259                             VIVS_VS_LOAD_BALANCING_D(0x0f);
2260}
2261
2262static bool
2263etna_compile_check_limits(struct etna_compile *c)
2264{
2265   int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2266                         ? c->specs->max_vs_uniforms
2267                         : c->specs->max_ps_uniforms;
2268   /* round up number of uniforms, including immediates, in units of four */
2269   int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2270
2271   if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2272      DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2273          c->specs->max_instructions);
2274      return false;
2275   }
2276
2277   if (c->next_free_native > c->specs->max_registers) {
2278      DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2279          c->specs->max_registers);
2280      return false;
2281   }
2282
2283   if (num_uniforms > max_uniforms) {
2284      DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2285          max_uniforms);
2286      return false;
2287   }
2288
2289   if (c->num_varyings > c->specs->max_varyings) {
2290      DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2291          c->specs->max_varyings);
2292      return false;
2293   }
2294
2295   if (c->imm_base > c->specs->num_constants) {
2296      DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2297          c->specs->num_constants);
2298   }
2299
2300   return true;
2301}
2302
2303static void
2304copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2305{
2306   uint32_t count = c->imm_base + c->imm_size;
2307   struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2308
2309   uinfo->count = count;
2310
2311   uinfo->data = malloc(count * sizeof(*c->imm_data));
2312   for (unsigned i = 0; i < c->imm_base; i++)
2313      uinfo->data[i] = i;
2314   memcpy(&uinfo->data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));
2315
2316   uinfo->contents = malloc(count * sizeof(*c->imm_contents));
2317   for (unsigned i = 0; i < c->imm_base; i++)
2318      uinfo->contents[i] = ETNA_UNIFORM_UNIFORM;
2319   memcpy(&uinfo->contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));
2320
2321   etna_set_shader_uniforms_dirty_flags(sobj);
2322}
2323
2324bool
2325etna_compile_shader(struct etna_shader_variant *v)
2326{
2327   if (DBG_ENABLED(ETNA_DBG_NIR))
2328      return etna_compile_shader_nir(v);
2329
2330   /* Create scratch space that may be too large to fit on stack
2331    */
2332   bool ret;
2333   struct etna_compile *c;
2334
2335   if (unlikely(!v))
2336      return false;
2337
2338   const struct etna_specs *specs = v->shader->specs;
2339
2340   struct tgsi_lowering_config lconfig = {
2341      .lower_FLR = !specs->has_sign_floor_ceil,
2342      .lower_CEIL = !specs->has_sign_floor_ceil,
2343      .lower_POW = true,
2344      .lower_EXP = true,
2345      .lower_LOG = true,
2346      .lower_DP2 = !specs->has_halti2_instructions,
2347      .lower_TRUNC = true,
2348   };
2349
2350   c = CALLOC_STRUCT(etna_compile);
2351   if (!c)
2352      return false;
2353
2354   memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2355
2356   const struct tgsi_token *tokens = v->shader->tokens;
2357
2358   c->specs = specs;
2359   c->key = &v->key;
2360   c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2361   c->free_tokens = !!c->tokens;
2362   if (!c->tokens) {
2363      /* no lowering */
2364      c->tokens = tokens;
2365   }
2366
2367   /* Build a map from gallium register to native registers for files
2368    * CONST, SAMP, IMM, OUT, IN, TEMP.
2369    * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2370    * vertex shaders.
2371    */
2372   /* Pass one -- check register file declarations and immediates */
2373   etna_compile_parse_declarations(c);
2374
2375   etna_allocate_decls(c);
2376
2377   /* Pass two -- check usage of temporaries, inputs, outputs */
2378   etna_compile_pass_check_usage(c);
2379
2380   assign_special_inputs(c);
2381
2382   /* Assign native temp register to TEMPs */
2383   assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2384
2385   /* optimize outputs */
2386   etna_compile_pass_optimize_outputs(c);
2387
2388   /* assign inputs: last usage of input should be <= first usage of temp */
2389   /*   potential optimization case:
2390    *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2391    * after which IN[x]
2392    *     is not read, temp[y] can be used as input register as-is
2393    */
2394   /*   sort temporaries by first use
2395    *   sort inputs by last usage
2396    *   iterate over inputs, temporaries
2397    *     if last usage of input <= first usage of temp:
2398    *       assign input to temp
2399    *       advance input, temporary pointer
2400    *     else
2401    *       advance temporary pointer
2402    *
2403    *   potential problem: instruction with multiple inputs of which one is the
2404    * temp and the other is the input;
2405    *      however, as the temp is not used before this, how would this make
2406    * sense? uninitialized temporaries have an undefined
2407    *      value, so this would be ok
2408    */
2409   assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2410
2411   /* assign outputs: first usage of output should be >= last usage of temp */
2412   /*   potential optimization case:
2413    *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2414    * writing all components that are used in
2415    *        the shader) after which temp y is no longer used temp[y] can be
2416    * used as output register as-is
2417    *
2418    *   potential problem: instruction with multiple outputs of which one is the
2419    * temp and the other is the output;
2420    *      however, as the temp is not used after this, how would this make
2421    * sense? could just discard the output value
2422    */
2423   /*   sort temporaries by last use
2424    *   sort outputs by first usage
2425    *   iterate over outputs, temporaries
2426    *     if first usage of output >= last usage of temp:
2427    *       assign output to temp
2428    *       advance output, temporary pointer
2429    *     else
2430    *       advance temporary pointer
2431    */
2432   assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2433
2434   assign_constants_and_immediates(c);
2435   assign_texture_units(c);
2436
2437   /* list declarations */
2438   for (int x = 0; x < c->total_decls; ++x) {
2439      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2440                                    "last_use=%i native=%i usage_mask=%x "
2441                                    "has_semantic=%i",
2442            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2443            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2444            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2445            c->decl[x].usage_mask, c->decl[x].has_semantic);
2446      if (c->decl[x].has_semantic)
2447         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2448               tgsi_semantic_names[c->decl[x].semantic.Name],
2449               c->decl[x].semantic.Index);
2450   }
2451   /* XXX for PS we need to permute so that inputs are always in temporary
2452    * 0..N-1.
2453    * There is no "switchboard" for varyings (AFAIK!). The output color,
2454    * however, can be routed
2455    * from an arbitrary temporary.
2456    */
2457   if (c->info.processor == PIPE_SHADER_FRAGMENT)
2458      permute_ps_inputs(c);
2459
2460
2461   /* list declarations */
2462   for (int x = 0; x < c->total_decls; ++x) {
2463      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2464                                    "last_use=%i native=%i usage_mask=%x "
2465                                    "has_semantic=%i",
2466            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2467            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2468            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2469            c->decl[x].usage_mask, c->decl[x].has_semantic);
2470      if (c->decl[x].has_semantic)
2471         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2472               tgsi_semantic_names[c->decl[x].semantic.Name],
2473               c->decl[x].semantic.Index);
2474   }
2475
2476   /* pass 3: generate instructions */
2477   etna_compile_pass_generate_code(c);
2478   etna_compile_add_z_div_if_needed(c);
2479   etna_compile_frag_rb_swap(c);
2480   etna_compile_add_nop_if_needed(c);
2481
2482   ret = etna_compile_check_limits(c);
2483   if (!ret)
2484      goto out;
2485
2486   etna_compile_fill_in_labels(c);
2487
2488   /* fill in output structure */
2489   v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
2490   v->uses_discard = c->info.uses_kill;
2491   v->code_size = c->inst_ptr * 4;
2492   v->code = mem_dup(c->code, c->inst_ptr * 16);
2493   v->num_loops = c->num_loops;
2494   v->num_temps = c->next_free_native;
2495   v->vs_id_in_reg = -1;
2496   v->vs_pos_out_reg = -1;
2497   v->vs_pointsize_out_reg = -1;
2498   v->ps_color_out_reg = -1;
2499   v->ps_depth_out_reg = -1;
2500   v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2501   copy_uniform_state_to_shader(c, v);
2502
2503   if (c->info.processor == PIPE_SHADER_VERTEX) {
2504      fill_in_vs_inputs(v, c);
2505      fill_in_vs_outputs(v, c);
2506   } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2507      fill_in_ps_inputs(v, c);
2508      fill_in_ps_outputs(v, c);
2509   }
2510
2511out:
2512   if (c->free_tokens)
2513      FREE((void *)c->tokens);
2514
2515   FREE(c->labels);
2516   FREE(c);
2517
2518   return ret;
2519}
2520
2521static const struct etna_shader_inout *
2522etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2523                      const struct etna_shader_inout *in)
2524{
2525   for (int i = 0; i < sobj->outfile.num_reg; i++)
2526      if (sobj->outfile.reg[i].slot == in->slot)
2527         return &sobj->outfile.reg[i];
2528
2529   return NULL;
2530}
2531
2532bool
2533etna_link_shader(struct etna_shader_link_info *info,
2534                 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2535{
2536   int comp_ofs = 0;
2537   /* For each fragment input we need to find the associated vertex shader
2538    * output, which can be found by matching on semantic name and index. A
2539    * binary search could be used because the vs outputs are sorted by their
2540    * semantic index and grouped by semantic type by fill_in_vs_outputs.
2541    */
2542   assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2543   info->pcoord_varying_comp_ofs = -1;
2544
2545   for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2546      const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2547      const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2548      struct etna_varying *varying;
2549      bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) &&
2550                                 (fsio->slot != VARYING_SLOT_COL1));
2551
2552      assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2553
2554      if (fsio->reg > info->num_varyings)
2555         info->num_varyings = fsio->reg;
2556
2557      varying = &info->varyings[fsio->reg - 1];
2558      varying->num_components = fsio->num_components;
2559
2560      if (!interpolate_always) /* colors affected by flat shading */
2561         varying->pa_attributes = 0x200;
2562      else /* texture coord or other bypasses flat shading */
2563         varying->pa_attributes = 0x2f1;
2564
2565      varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
2566      varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
2567      varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
2568      varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
2569
2570      /* point/tex coord is an input to the PS without matching VS output,
2571       * so it gets a varying slot without being assigned a VS register.
2572       */
2573      if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) {
2574         varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2575         varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2576
2577         info->pcoord_varying_comp_ofs = comp_ofs;
2578      } else {
2579         if (vsio == NULL) { /* not found -- link error */
2580            BUG("Semantic value not found in vertex shader outputs\n");
2581            return true;
2582         }
2583
2584         varying->reg = vsio->reg;
2585      }
2586
2587      comp_ofs += varying->num_components;
2588   }
2589
2590   assert(info->num_varyings == fs->infile.num_reg);
2591
2592   return false;
2593}
2594