1/*
2 * Copyright (c) 2012-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27/* TGSI->Vivante shader ISA conversion */
28
29/* What does the compiler return (see etna_shader_object)?
30 *  1) instruction data
31 *  2) input-to-temporary mapping (fixed for ps)
32 *      *) in case of ps, semantic -> varying id mapping
33 *      *) for each varying: number of components used (r, rg, rgb, rgba)
34 *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35 *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 *  5) immediates base offset, immediates data
37 *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 *     configure the hw, but useful for error checking
39 *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 *     (output reg id is enough)
41 *
42 *  Empty shaders are not allowed, should always at least generate a NOP. Also
43 *  if there is a label at the end of the shader, an extra NOP should be
44 *  generated as jump target.
45 *
46 * TODO
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
49 */
50
51#include "etnaviv_compiler.h"
52
53#include "etnaviv_asm.h"
54#include "etnaviv_context.h"
55#include "etnaviv_debug.h"
56#include "etnaviv_disasm.h"
57#include "etnaviv_uniforms.h"
58#include "etnaviv_util.h"
59
60#include "pipe/p_shader_tokens.h"
61#include "tgsi/tgsi_info.h"
62#include "tgsi/tgsi_iterate.h"
63#include "tgsi/tgsi_lowering.h"
64#include "tgsi/tgsi_strings.h"
65#include "tgsi/tgsi_util.h"
66#include "util/u_math.h"
67#include "util/u_memory.h"
68
69#include <fcntl.h>
70#include <stdio.h>
71#include <sys/stat.h>
72#include <sys/types.h>
73
74#define ETNA_MAX_INNER_TEMPS 2
75
76static const float sincos_const[2][4] = {
77   {
78      2., -1., 4., -4.,
79   },
80   {
81      1. / (2. * M_PI), 0.75, 0.5, 0.0,
82   },
83};
84
85/* Native register description structure */
86struct etna_native_reg {
87   unsigned valid : 1;
88   unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89   unsigned rgroup : 3;
90   unsigned id : 9;
91};
92
93/* Register description */
94struct etna_reg_desc {
95   enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96   int idx; /* index into file */
97   bool active; /* used in program */
98   int first_use; /* instruction id of first use (scope begin) */
99   int last_use; /* instruction id of last use (scope end, inclusive) */
100
101   struct etna_native_reg native; /* native register to map to */
102   unsigned usage_mask : 4; /* usage, per channel */
103   bool has_semantic; /* register has associated TGSI semantic */
104   struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105   struct tgsi_declaration_interp interp; /* Interpolation type */
106};
107
108/* Label information structure */
109struct etna_compile_label {
110   int inst_idx; /* Instruction id that label points to */
111};
112
113enum etna_compile_frame_type {
114   ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115   ETNA_COMPILE_FRAME_LOOP,
116};
117
118/* nesting scope frame (LOOP, IF, ...) during compilation
119 */
120struct etna_compile_frame {
121   enum etna_compile_frame_type type;
122   int lbl_else_idx;
123   int lbl_endif_idx;
124   int lbl_loop_bgn_idx;
125   int lbl_loop_end_idx;
126};
127
128struct etna_compile_file {
129   /* Number of registers in each TGSI file (max register+1) */
130   size_t reg_size;
131   /* Register descriptions, per register index */
132   struct etna_reg_desc *reg;
133};
134
135#define array_insert(arr, val)                          \
136   do {                                                 \
137      if (arr##_count == arr##_sz) {                    \
138         arr##_sz = MAX2(2 * arr##_sz, 16);             \
139         arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140      }                                                 \
141      arr[arr##_count++] = val;                         \
142   } while (0)
143
144
145/* scratch area for compiling shader, freed after compilation finishes */
146struct etna_compile {
147   const struct tgsi_token *tokens;
148   bool free_tokens;
149
150   struct tgsi_shader_info info;
151
152   /* Register descriptions, per TGSI file, per register index */
153   struct etna_compile_file file[TGSI_FILE_COUNT];
154
155   /* Keep track of TGSI register declarations */
156   struct etna_reg_desc decl[ETNA_MAX_DECL];
157   uint total_decls;
158
159   /* Bitmap of dead instructions which are removed in a separate pass */
160   bool dead_inst[ETNA_MAX_TOKENS];
161
162   /* Immediate data */
163   enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164   uint32_t imm_data[ETNA_MAX_IMM];
165   uint32_t imm_base; /* base of immediates (in 32 bit units) */
166   uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168   /* Next free native register, for register allocation */
169   uint32_t next_free_native;
170
171   /* Temporary register for use within translated TGSI instruction,
172    * only allocated when needed.
173    */
174   int inner_temps; /* number of inner temps used; only up to one available at
175                       this point */
176   struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178   /* Fields for handling nested conditionals */
179   struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180   int frame_sp;
181   int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183   unsigned labels_count, labels_sz;
184   struct etna_compile_label *labels;
185
186   unsigned num_loops;
187
188   /* Code generation */
189   int inst_ptr; /* current instruction pointer */
190   uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191
192   /* I/O */
193
194   /* Number of varyings (PS only) */
195   int num_varyings;
196
197   /* GPU hardware specs */
198   const struct etna_specs *specs;
199
200   const struct etna_shader_key *key;
201};
202
203static struct etna_reg_desc *
204etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205{
206   return &c->file[dst.File].reg[dst.Index];
207}
208
209static struct etna_reg_desc *
210etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211{
212   return &c->file[src.File].reg[src.Index];
213}
214
215static struct etna_native_reg
216etna_native_temp(unsigned reg)
217{
218   return (struct etna_native_reg) {
219      .valid = 1,
220      .rgroup = INST_RGROUP_TEMP,
221      .id = reg
222   };
223}
224
225/** Register allocation **/
226enum reg_sort_order {
227   FIRST_USE_ASC,
228   FIRST_USE_DESC,
229   LAST_USE_ASC,
230   LAST_USE_DESC
231};
232
233/* Augmented register description for sorting */
234struct sort_rec {
235   struct etna_reg_desc *ptr;
236   int key;
237};
238
239static int
240sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
241{
242   if (a->key < b->key)
243      return -1;
244
245   if (a->key > b->key)
246      return 1;
247
248   return 0;
249}
250
251/* create an index on a register set based on certain criteria. */
252static int
253sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
254               enum reg_sort_order so)
255{
256   struct etna_reg_desc *regs = file->reg;
257   int ptr = 0;
258
259   /* pre-populate keys from active registers */
260   for (int idx = 0; idx < file->reg_size; ++idx) {
261      /* only interested in active registers now; will only assign inactive ones
262       * if no space in active ones */
263      if (regs[idx].active) {
264         sorted[ptr].ptr = &regs[idx];
265
266         switch (so) {
267         case FIRST_USE_ASC:
268            sorted[ptr].key = regs[idx].first_use;
269            break;
270         case LAST_USE_ASC:
271            sorted[ptr].key = regs[idx].last_use;
272            break;
273         case FIRST_USE_DESC:
274            sorted[ptr].key = -regs[idx].first_use;
275            break;
276         case LAST_USE_DESC:
277            sorted[ptr].key = -regs[idx].last_use;
278            break;
279         }
280         ptr++;
281      }
282   }
283
284   /* sort index by key */
285   qsort(sorted, ptr, sizeof(struct sort_rec),
286         (int (*)(const void *, const void *))sort_rec_compar);
287
288   return ptr;
289}
290
291/* Allocate a new, unused, native temp register */
292static struct etna_native_reg
293alloc_new_native_reg(struct etna_compile *c)
294{
295   assert(c->next_free_native < ETNA_MAX_TEMPS);
296   return etna_native_temp(c->next_free_native++);
297}
298
299/* assign TEMPs to native registers */
300static void
301assign_temporaries_to_native(struct etna_compile *c,
302                             struct etna_compile_file *file)
303{
304   struct etna_reg_desc *temps = file->reg;
305
306   for (int idx = 0; idx < file->reg_size; ++idx)
307      temps[idx].native = alloc_new_native_reg(c);
308}
309
310/* assign inputs and outputs to temporaries
311 * Gallium assumes that the hardware has separate registers for taking input and
312 * output, however Vivante GPUs use temporaries both for passing in inputs and
313 * passing back outputs.
314 * Try to re-use temporary registers where possible. */
315static void
316assign_inouts_to_temporaries(struct etna_compile *c, uint file)
317{
318   bool mode_inputs = (file == TGSI_FILE_INPUT);
319   int inout_ptr = 0, num_inouts;
320   int temp_ptr = 0, num_temps;
321   struct sort_rec inout_order[ETNA_MAX_TEMPS];
322   struct sort_rec temps_order[ETNA_MAX_TEMPS];
323   num_inouts = sort_registers(inout_order, &c->file[file],
324                               mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
325   num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
326                              mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
327
328   while (inout_ptr < num_inouts && temp_ptr < num_temps) {
329      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
330      struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
331
332      if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
333         inout_ptr++;
334         continue;
335      }
336
337      /* last usage of this input is before or in same instruction of first use
338       * of temporary? */
339      if (mode_inputs ? (inout->last_use <= temp->first_use)
340                      : (inout->first_use >= temp->last_use)) {
341         /* assign it and advance to next input */
342         inout->native = temp->native;
343         inout_ptr++;
344      }
345
346      temp_ptr++;
347   }
348
349   /* if we couldn't reuse current ones, allocate new temporaries */
350   for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
351      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
352
353      if (inout->active && !inout->native.valid)
354         inout->native = alloc_new_native_reg(c);
355   }
356}
357
358/* Allocate an immediate with a certain value and return the index. If
359 * there is already an immediate with that value, return that.
360 */
361static struct etna_inst_src
362alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
363          uint32_t value)
364{
365   int idx;
366
367   /* Could use a hash table to speed this up */
368   for (idx = 0; idx < c->imm_size; ++idx) {
369      if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
370         break;
371   }
372
373   /* look if there is an unused slot */
374   if (idx == c->imm_size) {
375      for (idx = 0; idx < c->imm_size; ++idx) {
376         if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
377            break;
378      }
379   }
380
381   /* allocate new immediate */
382   if (idx == c->imm_size) {
383      assert(c->imm_size < ETNA_MAX_IMM);
384      idx = c->imm_size++;
385      c->imm_data[idx] = value;
386      c->imm_contents[idx] = contents;
387   }
388
389   /* swizzle so that component with value is returned in all components */
390   idx += c->imm_base;
391   struct etna_inst_src imm_src = {
392      .use = 1,
393      .rgroup = INST_RGROUP_UNIFORM_0,
394      .reg = idx / 4,
395      .swiz = INST_SWIZ_BROADCAST(idx & 3)
396   };
397
398   return imm_src;
399}
400
401static struct etna_inst_src
402alloc_imm_u32(struct etna_compile *c, uint32_t value)
403{
404   return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
405}
406
407static struct etna_inst_src
408alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
409                const uint32_t *values)
410{
411   struct etna_inst_src imm_src = { };
412   int idx, i;
413
414   for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
415      /* What if we can use a uniform with a different swizzle? */
416      for (i = 0; i < 4; i++)
417         if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
418            break;
419      if (i == 4)
420         break;
421   }
422
423   if (idx + 3 >= c->imm_size) {
424      idx = align(c->imm_size, 4);
425      assert(idx + 4 <= ETNA_MAX_IMM);
426
427      for (i = 0; i < 4; i++) {
428         c->imm_data[idx + i] = values[i];
429         c->imm_contents[idx + i] = contents;
430      }
431
432      c->imm_size = idx + 4;
433   }
434
435   assert((c->imm_base & 3) == 0);
436   idx += c->imm_base;
437   imm_src.use = 1;
438   imm_src.rgroup = INST_RGROUP_UNIFORM_0;
439   imm_src.reg = idx / 4;
440   imm_src.swiz = INST_SWIZ_IDENTITY;
441
442   return imm_src;
443}
444
445static uint32_t
446get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
447            unsigned swiz_idx)
448{
449   assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
450   unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
451
452   return c->imm_data[idx];
453}
454
455/* Allocate immediate with a certain float value. If there is already an
456 * immediate with that value, return that.
457 */
458static struct etna_inst_src
459alloc_imm_f32(struct etna_compile *c, float value)
460{
461   return alloc_imm_u32(c, fui(value));
462}
463
464static struct etna_inst_src
465etna_imm_vec4f(struct etna_compile *c, const float *vec4)
466{
467   uint32_t val[4];
468
469   for (int i = 0; i < 4; i++)
470      val[i] = fui(vec4[i]);
471
472   return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
473}
474
475/* Pass -- check register file declarations and immediates */
476static void
477etna_compile_parse_declarations(struct etna_compile *c)
478{
479   struct tgsi_parse_context ctx = { };
480   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
481   assert(status == TGSI_PARSE_OK);
482
483   while (!tgsi_parse_end_of_tokens(&ctx)) {
484      tgsi_parse_token(&ctx);
485
486      switch (ctx.FullToken.Token.Type) {
487      case TGSI_TOKEN_TYPE_IMMEDIATE: {
488         /* immediates are handled differently from other files; they are
489          * not declared explicitly, and always add four components */
490         const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
491         assert(c->imm_size <= (ETNA_MAX_IMM - 4));
492
493         for (int i = 0; i < 4; ++i) {
494            unsigned idx = c->imm_size++;
495
496            c->imm_data[idx] = imm->u[i].Uint;
497            c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
498         }
499      }
500      break;
501      }
502   }
503
504   tgsi_parse_free(&ctx);
505}
506
507/* Allocate register declarations for the registers in all register files */
508static void
509etna_allocate_decls(struct etna_compile *c)
510{
511   uint idx = 0;
512
513   for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
514      c->file[x].reg = &c->decl[idx];
515      c->file[x].reg_size = c->info.file_max[x] + 1;
516
517      for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
518         c->decl[idx].file = x;
519         c->decl[idx].idx = sub;
520         idx++;
521      }
522   }
523
524   c->total_decls = idx;
525}
526
527/* Pass -- check and record usage of temporaries, inputs, outputs */
528static void
529etna_compile_pass_check_usage(struct etna_compile *c)
530{
531   struct tgsi_parse_context ctx = { };
532   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
533   assert(status == TGSI_PARSE_OK);
534
535   for (int idx = 0; idx < c->total_decls; ++idx) {
536      c->decl[idx].active = false;
537      c->decl[idx].first_use = c->decl[idx].last_use = -1;
538   }
539
540   int inst_idx = 0;
541   while (!tgsi_parse_end_of_tokens(&ctx)) {
542      tgsi_parse_token(&ctx);
543      /* find out max register #s used
544       * For every register mark first and last instruction index where it's
545       * used this allows finding ranges where the temporary can be borrowed
546       * as input and/or output register
547       *
548       * XXX in the case of loops this needs special care, or even be completely
549       * disabled, as
550       * the last usage of a register inside a loop means it can still be used
551       * on next loop
552       * iteration (execution is no longer * chronological). The register can
553       * only be
554       * declared "free" after the loop finishes.
555       *
556       * Same for inputs: the first usage of a register inside a loop doesn't
557       * mean that the register
558       * won't have been overwritten in previous iteration. The register can
559       * only be declared free before the loop
560       * starts.
561       * The proper way would be to do full dominator / post-dominator analysis
562       * (especially with more complicated
563       * control flow such as direct branch instructions) but not for now...
564       */
565      switch (ctx.FullToken.Token.Type) {
566      case TGSI_TOKEN_TYPE_DECLARATION: {
567         /* Declaration: fill in file details */
568         const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
569         struct etna_compile_file *file = &c->file[decl->Declaration.File];
570
571         for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
572            file->reg[idx].usage_mask = 0; // we'll compute this ourselves
573            file->reg[idx].has_semantic = decl->Declaration.Semantic;
574            file->reg[idx].semantic = decl->Semantic;
575            file->reg[idx].interp = decl->Interp;
576         }
577      } break;
578      case TGSI_TOKEN_TYPE_INSTRUCTION: {
579         /* Instruction: iterate over operands of instruction */
580         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
581
582         /* iterate over destination registers */
583         for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
584            struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
585
586            if (reg_desc->first_use == -1)
587               reg_desc->first_use = inst_idx;
588
589            reg_desc->last_use = inst_idx;
590            reg_desc->active = true;
591         }
592
593         /* iterate over source registers */
594         for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
595            struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
596
597            if (reg_desc->first_use == -1)
598               reg_desc->first_use = inst_idx;
599
600            reg_desc->last_use = inst_idx;
601            reg_desc->active = true;
602            /* accumulate usage mask for register, this is used to determine how
603             * many slots for varyings
604             * should be allocated */
605            reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
606         }
607         inst_idx += 1;
608      } break;
609      default:
610         break;
611      }
612   }
613
614   tgsi_parse_free(&ctx);
615}
616
617/* assign inputs that need to be assigned to specific registers */
618static void
619assign_special_inputs(struct etna_compile *c)
620{
621   if (c->info.processor == PIPE_SHADER_FRAGMENT) {
622      /* never assign t0 as it is the position output, start assigning at t1 */
623      c->next_free_native = 1;
624
625      /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
626      for (int idx = 0; idx < c->total_decls; ++idx) {
627         struct etna_reg_desc *reg = &c->decl[idx];
628
629         if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
630            reg->native = etna_native_temp(0);
631      }
632   }
633}
634
635/* Check that a move instruction does not swizzle any of the components
636 * that it writes.
637 */
638static bool
639etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
640                          const struct tgsi_src_register src)
641{
642   return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
643          (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
644          (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
645          (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
646}
647
648/* Pass -- optimize outputs
649 * Mesa tends to generate code like this at the end if their shaders
650 *   MOV OUT[1], TEMP[2]
651 *   MOV OUT[0], TEMP[0]
652 *   MOV OUT[2], TEMP[1]
653 * Recognize if
654 * a) there is only a single assignment to an output register and
655 * b) the temporary is not used after that
656 * Also recognize direct assignment of IN to OUT (passthrough)
657 **/
658static void
659etna_compile_pass_optimize_outputs(struct etna_compile *c)
660{
661   struct tgsi_parse_context ctx = { };
662   int inst_idx = 0;
663   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
664   assert(status == TGSI_PARSE_OK);
665
666   while (!tgsi_parse_end_of_tokens(&ctx)) {
667      tgsi_parse_token(&ctx);
668
669      switch (ctx.FullToken.Token.Type) {
670      case TGSI_TOKEN_TYPE_INSTRUCTION: {
671         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
672
673         /* iterate over operands */
674         switch (inst->Instruction.Opcode) {
675         case TGSI_OPCODE_MOV: {
676            /* We are only interested in eliminating MOVs which write to
677             * the shader outputs. Test for this early. */
678            if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
679               break;
680            /* Elimination of a MOV must have no visible effect on the
681             * resulting shader: this means the MOV must not swizzle or
682             * saturate, and its source must not have the negate or
683             * absolute modifiers. */
684            if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
685                inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
686                inst->Src[0].Register.Absolute)
687               break;
688
689            uint out_idx = inst->Dst[0].Register.Index;
690            uint in_idx = inst->Src[0].Register.Index;
691            /* assignment of temporary to output --
692             * and the output doesn't yet have a native register assigned
693             * and the last use of the temporary is this instruction
694             * and the MOV does not do a swizzle
695             */
696            if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
697                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
698                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
699               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
700                  c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
701               /* prevent temp from being re-used for the rest of the shader */
702               c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
703               /* mark this MOV instruction as a no-op */
704               c->dead_inst[inst_idx] = true;
705            }
706            /* direct assignment of input to output --
707             * and the input or output doesn't yet have a native register
708             * assigned
709             * and the output is only used in this instruction,
710             * allocate a new register, and associate both input and output to
711             * it
712             * and the MOV does not do a swizzle
713             */
714            if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
715                !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
716                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
717                c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
718                c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
719               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
720                  c->file[TGSI_FILE_INPUT].reg[in_idx].native =
721                     alloc_new_native_reg(c);
722               /* mark this MOV instruction as a no-op */
723               c->dead_inst[inst_idx] = true;
724            }
725         } break;
726         default:;
727         }
728         inst_idx += 1;
729      } break;
730      }
731   }
732
733   tgsi_parse_free(&ctx);
734}
735
736/* Get a temporary to be used within one TGSI instruction.
737 * The first time that this function is called the temporary will be allocated.
738 * Each call to this function will return the same temporary.
739 */
740static struct etna_native_reg
741etna_compile_get_inner_temp(struct etna_compile *c)
742{
743   int inner_temp = c->inner_temps;
744
745   if (inner_temp < ETNA_MAX_INNER_TEMPS) {
746      if (!c->inner_temp[inner_temp].valid)
747         c->inner_temp[inner_temp] = alloc_new_native_reg(c);
748
749      /* alloc_new_native_reg() handles lack of registers */
750      c->inner_temps += 1;
751   } else {
752      BUG("Too many inner temporaries (%i) requested in one instruction",
753          inner_temp + 1);
754   }
755
756   return c->inner_temp[inner_temp];
757}
758
759static struct etna_inst_dst
760etna_native_to_dst(struct etna_native_reg native, unsigned comps)
761{
762   /* Can only assign to temporaries */
763   assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
764
765   struct etna_inst_dst rv = {
766      .comps = comps,
767      .use = 1,
768      .reg = native.id,
769   };
770
771   return rv;
772}
773
774static struct etna_inst_src
775etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
776{
777   assert(native.valid && !native.is_tex);
778
779   struct etna_inst_src rv = {
780      .use = 1,
781      .swiz = swizzle,
782      .rgroup = native.rgroup,
783      .reg = native.id,
784      .amode = INST_AMODE_DIRECT,
785   };
786
787   return rv;
788}
789
790static inline struct etna_inst_src
791negate(struct etna_inst_src src)
792{
793   src.neg = !src.neg;
794
795   return src;
796}
797
798static inline struct etna_inst_src
799absolute(struct etna_inst_src src)
800{
801   src.abs = 1;
802
803   return src;
804}
805
806static inline struct etna_inst_src
807swizzle(struct etna_inst_src src, unsigned swizzle)
808{
809   src.swiz = inst_swiz_compose(src.swiz, swizzle);
810
811   return src;
812}
813
814/* Emit instruction and append it to program */
815static void
816emit_inst(struct etna_compile *c, struct etna_inst *inst)
817{
818   assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
819
820   /* Check for uniform conflicts (each instruction can only access one
821    * uniform),
822    * if detected, use an intermediate temporary */
823   unsigned uni_rgroup = -1;
824   unsigned uni_reg = -1;
825
826   for (int src = 0; src < ETNA_NUM_SRC; ++src) {
827      if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
828         if (uni_reg == -1) { /* first unique uniform used */
829            uni_rgroup = inst->src[src].rgroup;
830            uni_reg = inst->src[src].reg;
831         } else { /* second or later; check that it is a re-use */
832            if (uni_rgroup != inst->src[src].rgroup ||
833                uni_reg != inst->src[src].reg) {
834               DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
835                                             "accesses different uniforms, "
836                                             "need to generate extra MOV");
837               struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
838
839               /* Generate move instruction to temporary */
840               etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
841                  .opcode = INST_OPCODE_MOV,
842                  .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
843                                                        INST_COMPS_Z | INST_COMPS_W),
844                  .src[2] = inst->src[src]
845               });
846
847               c->inst_ptr++;
848
849               /* Modify instruction to use temp register instead of uniform */
850               inst->src[src].use = 1;
851               inst->src[src].rgroup = INST_RGROUP_TEMP;
852               inst->src[src].reg = inner_temp.id;
853               inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
854               inst->src[src].neg = 0; /* negation happens on MOV */
855               inst->src[src].abs = 0; /* abs happens on MOV */
856               inst->src[src].amode = 0; /* amode effects happen on MOV */
857            }
858         }
859      }
860   }
861
862   /* Finally assemble the actual instruction */
863   etna_assemble(&c->code[c->inst_ptr * 4], inst);
864   c->inst_ptr++;
865}
866
867static unsigned int
868etna_amode(struct tgsi_ind_register indirect)
869{
870   assert(indirect.File == TGSI_FILE_ADDRESS);
871   assert(indirect.Index == 0);
872
873   switch (indirect.Swizzle) {
874   case TGSI_SWIZZLE_X:
875      return INST_AMODE_ADD_A_X;
876   case TGSI_SWIZZLE_Y:
877      return INST_AMODE_ADD_A_Y;
878   case TGSI_SWIZZLE_Z:
879      return INST_AMODE_ADD_A_Z;
880   case TGSI_SWIZZLE_W:
881      return INST_AMODE_ADD_A_W;
882   default:
883      assert(!"Invalid swizzle");
884   }
885
886   unreachable("bad swizzle");
887}
888
889/* convert destination operand */
890static struct etna_inst_dst
891convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
892{
893   struct etna_inst_dst rv = {
894      /// XXX .amode
895      .comps = in->Register.WriteMask,
896   };
897
898   if (in->Register.File == TGSI_FILE_ADDRESS) {
899      assert(in->Register.Index == 0);
900      rv.reg = in->Register.Index;
901      rv.use = 0;
902   } else {
903      rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
904                              in->Register.WriteMask);
905   }
906
907   if (in->Register.Indirect)
908      rv.amode = etna_amode(in->Indirect);
909
910   return rv;
911}
912
913/* convert texture operand */
914static struct etna_inst_tex
915convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
916            const struct tgsi_instruction_texture *tex)
917{
918   struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
919   struct etna_inst_tex rv = {
920      // XXX .amode (to allow for an array of samplers?)
921      .swiz = INST_SWIZ_IDENTITY
922   };
923
924   assert(native_reg.is_tex && native_reg.valid);
925   rv.id = native_reg.id;
926
927   return rv;
928}
929
930/* convert source operand */
931static struct etna_inst_src
932etna_create_src(const struct tgsi_full_src_register *tgsi,
933                const struct etna_native_reg *native)
934{
935   const struct tgsi_src_register *reg = &tgsi->Register;
936   struct etna_inst_src rv = {
937      .use = 1,
938      .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
939      .neg = reg->Negate,
940      .abs = reg->Absolute,
941      .rgroup = native->rgroup,
942      .reg = native->id,
943      .amode = INST_AMODE_DIRECT,
944   };
945
946   assert(native->valid && !native->is_tex);
947
948   if (reg->Indirect)
949      rv.amode = etna_amode(tgsi->Indirect);
950
951   return rv;
952}
953
954static struct etna_inst_src
955etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
956                     struct etna_native_reg temp)
957{
958   struct etna_inst mov = { };
959
960   mov.opcode = INST_OPCODE_MOV;
961   mov.sat = 0;
962   mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
963                                      INST_COMPS_Z | INST_COMPS_W);
964   mov.src[2] = src;
965   emit_inst(c, &mov);
966
967   src.swiz = INST_SWIZ_IDENTITY;
968   src.neg = src.abs = 0;
969   src.rgroup = temp.rgroup;
970   src.reg = temp.id;
971
972   return src;
973}
974
975static struct etna_inst_src
976etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
977{
978   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
979
980   return etna_mov_src_to_temp(c, src, temp);
981}
982
983static bool
984etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
985{
986   return etna_rgroup_is_uniform(a.rgroup) &&
987          etna_rgroup_is_uniform(b.rgroup) &&
988          (a.rgroup != b.rgroup || a.reg != b.reg);
989}
990
991/* create a new label */
992static unsigned int
993alloc_new_label(struct etna_compile *c)
994{
995   struct etna_compile_label label = {
996      .inst_idx = -1, /* start by point to no specific instruction */
997   };
998
999   array_insert(c->labels, label);
1000
1001   return c->labels_count - 1;
1002}
1003
1004/* place label at current instruction pointer */
1005static void
1006label_place(struct etna_compile *c, struct etna_compile_label *label)
1007{
1008   label->inst_idx = c->inst_ptr;
1009}
1010
1011/* mark label use at current instruction.
1012 * target of the label will be filled in in the marked instruction's src2.imm
1013 * slot as soon
1014 * as the value becomes known.
1015 */
1016static void
1017label_mark_use(struct etna_compile *c, int lbl_idx)
1018{
1019   assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1020   c->lbl_usage[c->inst_ptr] = lbl_idx;
1021}
1022
1023/* walk the frame stack and return first frame with matching type */
1024static struct etna_compile_frame *
1025find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1026{
1027   for (int sp = c->frame_sp; sp >= 0; sp--)
1028      if (c->frame_stack[sp].type == type)
1029         return &c->frame_stack[sp];
1030
1031   assert(0);
1032   return NULL;
1033}
1034
1035struct instr_translater {
1036   void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1037               const struct tgsi_full_instruction *inst,
1038               struct etna_inst_src *src);
1039   unsigned tgsi_opc;
1040   uint8_t opc;
1041
1042   /* tgsi src -> etna src swizzle */
1043   int src[3];
1044
1045   unsigned cond;
1046};
1047
1048static void
1049trans_instr(const struct instr_translater *t, struct etna_compile *c,
1050            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1051{
1052   const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1053   struct etna_inst instr = { };
1054
1055   instr.opcode = t->opc;
1056   instr.cond = t->cond;
1057   instr.sat = inst->Instruction.Saturate;
1058
1059   assert(info->num_dst <= 1);
1060   if (info->num_dst)
1061      instr.dst = convert_dst(c, &inst->Dst[0]);
1062
1063   assert(info->num_src <= ETNA_NUM_SRC);
1064
1065   for (unsigned i = 0; i < info->num_src; i++) {
1066      int swizzle = t->src[i];
1067
1068      assert(swizzle != -1);
1069      instr.src[swizzle] = src[i];
1070   }
1071
1072   emit_inst(c, &instr);
1073}
1074
1075static void
1076trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1077              const struct tgsi_full_instruction *inst,
1078              struct etna_inst_src *src)
1079{
1080   emit_inst(c, &(struct etna_inst) {
1081      .opcode = INST_OPCODE_SELECT,
1082       .cond = t->cond,
1083       .sat = inst->Instruction.Saturate,
1084       .dst = convert_dst(c, &inst->Dst[0]),
1085       .src[0] = src[0],
1086       .src[1] = src[1],
1087       .src[2] = src[0],
1088    });
1089}
1090
1091static void
1092trans_if(const struct instr_translater *t, struct etna_compile *c,
1093         const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1094{
1095   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1096   struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1097
1098   /* push IF to stack */
1099   f->type = ETNA_COMPILE_FRAME_IF;
1100   /* create "else" label */
1101   f->lbl_else_idx = alloc_new_label(c);
1102   f->lbl_endif_idx = -1;
1103
1104   /* We need to avoid the emit_inst() below becoming two instructions */
1105   if (etna_src_uniforms_conflict(src[0], imm_0))
1106      src[0] = etna_mov_src(c, src[0]);
1107
1108   /* mark position in instruction stream of label reference so that it can be
1109    * filled in in next pass */
1110   label_mark_use(c, f->lbl_else_idx);
1111
1112   /* create conditional branch to label if src0 EQ 0 */
1113   emit_inst(c, &(struct etna_inst){
1114      .opcode = INST_OPCODE_BRANCH,
1115      .cond = INST_CONDITION_EQ,
1116      .src[0] = src[0],
1117      .src[1] = imm_0,
1118    /* imm is filled in later */
1119   });
1120}
1121
1122static void
1123trans_else(const struct instr_translater *t, struct etna_compile *c,
1124           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1125{
1126   assert(c->frame_sp > 0);
1127   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1128   assert(f->type == ETNA_COMPILE_FRAME_IF);
1129
1130   /* create "endif" label, and branch to endif label */
1131   f->lbl_endif_idx = alloc_new_label(c);
1132   label_mark_use(c, f->lbl_endif_idx);
1133   emit_inst(c, &(struct etna_inst) {
1134      .opcode = INST_OPCODE_BRANCH,
1135      .cond = INST_CONDITION_TRUE,
1136      /* imm is filled in later */
1137   });
1138
1139   /* mark "else" label at this position in instruction stream */
1140   label_place(c, &c->labels[f->lbl_else_idx]);
1141}
1142
1143static void
1144trans_endif(const struct instr_translater *t, struct etna_compile *c,
1145            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1146{
1147   assert(c->frame_sp > 0);
1148   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1149   assert(f->type == ETNA_COMPILE_FRAME_IF);
1150
1151   /* assign "endif" or "else" (if no ELSE) label to current position in
1152    * instruction stream, pop IF */
1153   if (f->lbl_endif_idx != -1)
1154      label_place(c, &c->labels[f->lbl_endif_idx]);
1155   else
1156      label_place(c, &c->labels[f->lbl_else_idx]);
1157}
1158
1159static void
1160trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1161               const struct tgsi_full_instruction *inst,
1162               struct etna_inst_src *src)
1163{
1164   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1165
1166   /* push LOOP to stack */
1167   f->type = ETNA_COMPILE_FRAME_LOOP;
1168   f->lbl_loop_bgn_idx = alloc_new_label(c);
1169   f->lbl_loop_end_idx = alloc_new_label(c);
1170
1171   label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1172
1173   c->num_loops++;
1174}
1175
1176static void
1177trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1178               const struct tgsi_full_instruction *inst,
1179               struct etna_inst_src *src)
1180{
1181   assert(c->frame_sp > 0);
1182   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1183   assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1184
1185   /* mark position in instruction stream of label reference so that it can be
1186    * filled in in next pass */
1187   label_mark_use(c, f->lbl_loop_bgn_idx);
1188
1189   /* create branch to loop_bgn label */
1190   emit_inst(c, &(struct etna_inst) {
1191      .opcode = INST_OPCODE_BRANCH,
1192      .cond = INST_CONDITION_TRUE,
1193      .src[0] = src[0],
1194      /* imm is filled in later */
1195   });
1196
1197   label_place(c, &c->labels[f->lbl_loop_end_idx]);
1198}
1199
1200static void
1201trans_brk(const struct instr_translater *t, struct etna_compile *c,
1202          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1203{
1204   assert(c->frame_sp > 0);
1205   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1206
1207   /* mark position in instruction stream of label reference so that it can be
1208    * filled in in next pass */
1209   label_mark_use(c, f->lbl_loop_end_idx);
1210
1211   /* create branch to loop_end label */
1212   emit_inst(c, &(struct etna_inst) {
1213      .opcode = INST_OPCODE_BRANCH,
1214      .cond = INST_CONDITION_TRUE,
1215      .src[0] = src[0],
1216      /* imm is filled in later */
1217   });
1218}
1219
1220static void
1221trans_cont(const struct instr_translater *t, struct etna_compile *c,
1222           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1223{
1224   assert(c->frame_sp > 0);
1225   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1226
1227   /* mark position in instruction stream of label reference so that it can be
1228    * filled in in next pass */
1229   label_mark_use(c, f->lbl_loop_bgn_idx);
1230
1231   /* create branch to loop_end label */
1232   emit_inst(c, &(struct etna_inst) {
1233      .opcode = INST_OPCODE_BRANCH,
1234      .cond = INST_CONDITION_TRUE,
1235      .src[0] = src[0],
1236      /* imm is filled in later */
1237   });
1238}
1239
1240static void
1241trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1242            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1243{
1244   emit_inst(c, &(struct etna_inst) {
1245      .opcode = t->opc,
1246      .sat = inst->Instruction.Saturate,
1247      .dst = convert_dst(c, &inst->Dst[0]),
1248      .src[0] = src[0],
1249      .src[2] = src[0],
1250   });
1251}
1252
1253static void
1254trans_arl(const struct instr_translater *t, struct etna_compile *c,
1255          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1256{
1257   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1258   struct etna_inst arl = { };
1259   struct etna_inst_dst dst;
1260
1261   dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1262                                  INST_COMPS_W);
1263
1264   if (c->specs->has_sign_floor_ceil) {
1265      struct etna_inst floor = { };
1266
1267      floor.opcode = INST_OPCODE_FLOOR;
1268      floor.src[2] = src[0];
1269      floor.dst = dst;
1270
1271      emit_inst(c, &floor);
1272   } else {
1273      struct etna_inst floor[2] = { };
1274
1275      floor[0].opcode = INST_OPCODE_FRC;
1276      floor[0].sat = inst->Instruction.Saturate;
1277      floor[0].dst = dst;
1278      floor[0].src[2] = src[0];
1279
1280      floor[1].opcode = INST_OPCODE_ADD;
1281      floor[1].sat = inst->Instruction.Saturate;
1282      floor[1].dst = dst;
1283      floor[1].src[0] = src[0];
1284      floor[1].src[2].use = 1;
1285      floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1286      floor[1].src[2].neg = 1;
1287      floor[1].src[2].rgroup = temp.rgroup;
1288      floor[1].src[2].reg = temp.id;
1289
1290      emit_inst(c, &floor[0]);
1291      emit_inst(c, &floor[1]);
1292   }
1293
1294   arl.opcode = INST_OPCODE_MOVAR;
1295   arl.sat = inst->Instruction.Saturate;
1296   arl.dst = convert_dst(c, &inst->Dst[0]);
1297   arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1298
1299   emit_inst(c, &arl);
1300}
1301
1302static void
1303trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1304          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1305{
1306   /* dst = src0 * src1 + (1 - src0) * src2
1307    *     => src0 * src1 - (src0 - 1) * src2
1308    *     => src0 * src1 - (src0 * src2 - src2)
1309    * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1310    * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1311    */
1312   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1313   if (etna_src_uniforms_conflict(src[0], src[1]) ||
1314       etna_src_uniforms_conflict(src[0], src[2])) {
1315      src[0] = etna_mov_src(c, src[0]);
1316   }
1317
1318   struct etna_inst mad[2] = { };
1319   mad[0].opcode = INST_OPCODE_MAD;
1320   mad[0].sat = 0;
1321   mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1322                                         INST_COMPS_Z | INST_COMPS_W);
1323   mad[0].src[0] = src[0];
1324   mad[0].src[1] = src[2];
1325   mad[0].src[2] = negate(src[2]);
1326   mad[1].opcode = INST_OPCODE_MAD;
1327   mad[1].sat = inst->Instruction.Saturate;
1328   mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1329   mad[1].src[1] = src[1];
1330   mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1331
1332   emit_inst(c, &mad[0]);
1333   emit_inst(c, &mad[1]);
1334}
1335
1336static void
1337trans_lit(const struct instr_translater *t, struct etna_compile *c,
1338          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1339{
1340   /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1341    *  - can be eliminated if src.y is a uniform and >= 0
1342    * SELECT.GT tmp.___w, 128, src.wwww, 128
1343    * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1344    *  - can be eliminated if src.w is a uniform and fits clamp
1345    * LOG tmp.x, void, void, tmp.yyyy
1346    * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1347    * LITP dst, undef, src.xxxx, tmp.xxxx
1348    */
1349   struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1350   struct etna_inst_src src_y = { };
1351
1352   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1353      src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1354
1355      struct etna_inst ins = { };
1356      ins.opcode = INST_OPCODE_SELECT;
1357      ins.cond = INST_CONDITION_LT;
1358      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1359      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1360      ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1361      emit_inst(c, &ins);
1362   } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1363      src_y = alloc_imm_f32(c, 0.0);
1364   else
1365      src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1366
1367   struct etna_inst_src src_w = { };
1368
1369   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1370      src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1371
1372      struct etna_inst ins = { };
1373      ins.opcode = INST_OPCODE_SELECT;
1374      ins.cond = INST_CONDITION_GT;
1375      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1376      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1377      ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1378      emit_inst(c, &ins);
1379      ins.cond = INST_CONDITION_LT;
1380      ins.src[0].neg = !ins.src[0].neg;
1381      ins.src[2].neg = !ins.src[2].neg;
1382      ins.src[1] = src_w;
1383      emit_inst(c, &ins);
1384   } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1385      src_w = alloc_imm_f32(c, -128.);
1386   else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1387      src_w = alloc_imm_f32(c, 128.);
1388   else
1389      src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1390
1391   if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1392      emit_inst(c, &(struct etna_inst) {
1393         .opcode = INST_OPCODE_LOG,
1394         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1395         .src[2] = src_y,
1396         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1397      });
1398      emit_inst(c, &(struct etna_inst) {
1399         .opcode = INST_OPCODE_MUL,
1400         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1401         .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1402         .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1403      });
1404   } else {
1405      struct etna_inst ins[3] = { };
1406      ins[0].opcode = INST_OPCODE_LOG;
1407      ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1408      ins[0].src[2] = src_y;
1409
1410      emit_inst(c, &ins[0]);
1411   }
1412   emit_inst(c, &(struct etna_inst) {
1413      .opcode = INST_OPCODE_MUL,
1414      .sat = 0,
1415      .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1416      .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1417      .src[1] = src_w,
1418   });
1419   emit_inst(c, &(struct etna_inst) {
1420      .opcode = INST_OPCODE_LITP,
1421      .sat = 0,
1422      .dst = convert_dst(c, &inst->Dst[0]),
1423      .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1424      .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1425      .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1426   });
1427}
1428
1429static void
1430trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1431          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1432{
1433   if (c->specs->has_sign_floor_ceil) {
1434      emit_inst(c, &(struct etna_inst){
1435         .opcode = INST_OPCODE_SIGN,
1436         .sat = inst->Instruction.Saturate,
1437         .dst = convert_dst(c, &inst->Dst[0]),
1438         .src[2] = src[0],
1439      });
1440   } else {
1441      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1442      struct etna_inst ins[2] = { };
1443
1444      ins[0].opcode = INST_OPCODE_SET;
1445      ins[0].cond = INST_CONDITION_NZ;
1446      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1447                                            INST_COMPS_Z | INST_COMPS_W);
1448      ins[0].src[0] = src[0];
1449
1450      ins[1].opcode = INST_OPCODE_SELECT;
1451      ins[1].cond = INST_CONDITION_LZ;
1452      ins[1].sat = inst->Instruction.Saturate;
1453      ins[1].dst = convert_dst(c, &inst->Dst[0]);
1454      ins[1].src[0] = src[0];
1455      ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1456      ins[1].src[1] = negate(ins[1].src[2]);
1457
1458      emit_inst(c, &ins[0]);
1459      emit_inst(c, &ins[1]);
1460   }
1461}
1462
1463static void
1464trans_trig(const struct instr_translater *t, struct etna_compile *c,
1465           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1466{
1467   if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1468      /* On newer chips alternative SIN/COS instructions are implemented,
1469       * which:
1470       * - Need their input scaled by 1/pi instead of 2/pi
1471       * - Output an x and y component, which need to be multiplied to
1472       *   get the result
1473       */
1474      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1475      emit_inst(c, &(struct etna_inst) {
1476         .opcode = INST_OPCODE_MUL,
1477         .sat = 0,
1478         .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1479         .src[0] = src[0], /* any swizzling happens here */
1480         .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1481      });
1482      emit_inst(c, &(struct etna_inst) {
1483         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1484                    ? INST_OPCODE_COS
1485                    : INST_OPCODE_SIN,
1486         .sat = 0,
1487         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1488         .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1489         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1490      });
1491      emit_inst(c, &(struct etna_inst) {
1492         .opcode = INST_OPCODE_MUL,
1493         .sat = inst->Instruction.Saturate,
1494         .dst = convert_dst(c, &inst->Dst[0]),
1495         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1496         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1497      });
1498
1499   } else if (c->specs->has_sin_cos_sqrt) {
1500      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1501      /* add divide by PI/2, using a temp register. GC2000
1502       * fails with src==dst for the trig instruction. */
1503      emit_inst(c, &(struct etna_inst) {
1504         .opcode = INST_OPCODE_MUL,
1505         .sat = 0,
1506         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1507                                         INST_COMPS_Z | INST_COMPS_W),
1508         .src[0] = src[0], /* any swizzling happens here */
1509         .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1510      });
1511      emit_inst(c, &(struct etna_inst) {
1512         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1513                    ? INST_OPCODE_COS
1514                    : INST_OPCODE_SIN,
1515         .sat = inst->Instruction.Saturate,
1516         .dst = convert_dst(c, &inst->Dst[0]),
1517         .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1518      });
1519   } else {
1520      /* Implement Nick's fast sine/cosine. Taken from:
1521       * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1522       * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1523       *  MAD t.x_zw, src.xxxx, A, B
1524       *  FRC t.x_z_, void, void, t.xwzw
1525       *  MAD t.x_z_, t.xwzw, 2, -1
1526       *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1527       *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1528       *  DP3 t.__z_, t.zyww, C, void         (for scs)
1529       *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1530       *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1531       *  DP3 t.x___, t.xyww, C, void         (for scs)
1532       *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1533       *  MAD dst, t.ywyw, .2225, t.xzxz
1534       */
1535      struct etna_inst *p, ins[9] = { };
1536      struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1537      struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1538      struct etna_inst_src sincos[3], in = src[0];
1539      sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1540      sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1541
1542      /* A uniform source will cause the inner temp limit to
1543       * be exceeded.  Explicitly deal with that scenario.
1544       */
1545      if (etna_rgroup_is_uniform(src[0].rgroup)) {
1546         struct etna_inst ins = { };
1547         ins.opcode = INST_OPCODE_MOV;
1548         ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1549         ins.src[2] = in;
1550         emit_inst(c, &ins);
1551         in = t0s;
1552      }
1553
1554      ins[0].opcode = INST_OPCODE_MAD;
1555      ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1556      ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1557      ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1558      ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1559
1560      ins[1].opcode = INST_OPCODE_FRC;
1561      ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1562      ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1563
1564      ins[2].opcode = INST_OPCODE_MAD;
1565      ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1566      ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1567      ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1568      ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1569
1570      unsigned mul_swiz, dp3_swiz;
1571      if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1572         mul_swiz = SWIZZLE(W, Z, W, W);
1573         dp3_swiz = SWIZZLE(Z, Y, W, W);
1574      } else {
1575         mul_swiz = SWIZZLE(W, X, W, W);
1576         dp3_swiz = SWIZZLE(X, Y, W, W);
1577      }
1578
1579      ins[3].opcode = INST_OPCODE_MUL;
1580      ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1581      ins[3].src[0] = swizzle(t0s, mul_swiz);
1582      ins[3].src[1] = absolute(ins[3].src[0]);
1583
1584      ins[4].opcode = INST_OPCODE_DP3;
1585      ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1586      ins[4].src[0] = swizzle(t0s, dp3_swiz);
1587      ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1588
1589      p = &ins[5];
1590      p->opcode = INST_OPCODE_MAD;
1591      p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1592      p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1593      p->src[1] = absolute(p->src[0]);
1594      p->src[2] = negate(p->src[0]);
1595
1596      p++;
1597      p->opcode = INST_OPCODE_MAD;
1598      p->sat = inst->Instruction.Saturate;
1599      p->dst = convert_dst(c, &inst->Dst[0]),
1600      p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1601      p->src[1] = alloc_imm_f32(c, 0.2225);
1602      p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1603
1604      for (int i = 0; &ins[i] <= p; i++)
1605         emit_inst(c, &ins[i]);
1606   }
1607}
1608
1609static void
1610trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1611            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1612{
1613   if (c->specs->has_new_transcendentals) {
1614      /* On newer chips alternative LOG instruction is implemented,
1615       * which outputs an x and y component, which need to be multiplied to
1616       * get the result.
1617       */
1618      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1619      emit_inst(c, &(struct etna_inst) {
1620         .opcode = INST_OPCODE_LOG,
1621         .sat = 0,
1622         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1623         .src[2] = src[0],
1624         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1625      });
1626      emit_inst(c, &(struct etna_inst) {
1627         .opcode = INST_OPCODE_MUL,
1628         .sat = inst->Instruction.Saturate,
1629         .dst = convert_dst(c, &inst->Dst[0]),
1630         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1631         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1632      });
1633   } else {
1634      emit_inst(c, &(struct etna_inst) {
1635         .opcode = INST_OPCODE_LOG,
1636         .sat = inst->Instruction.Saturate,
1637         .dst = convert_dst(c, &inst->Dst[0]),
1638         .src[2] = src[0],
1639      });
1640   }
1641}
1642
1643static void
1644trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1645              const struct tgsi_full_instruction *inst,
1646              struct etna_inst_src *src)
1647{
1648   /* There is no native support for GL texture rectangle coordinates, so
1649    * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1650   if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1651      uint32_t unit = inst->Src[1].Register.Index;
1652      struct etna_inst ins[2] = { };
1653      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1654
1655      ins[0].opcode = INST_OPCODE_MUL;
1656      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1657      ins[0].src[0] = src[0];
1658      ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1659
1660      ins[1].opcode = INST_OPCODE_MUL;
1661      ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1662      ins[1].src[0] = src[0];
1663      ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1664
1665      emit_inst(c, &ins[0]);
1666      emit_inst(c, &ins[1]);
1667
1668      src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1669   }
1670
1671   switch (inst->Instruction.Opcode) {
1672   case TGSI_OPCODE_TEX:
1673      emit_inst(c, &(struct etna_inst) {
1674         .opcode = INST_OPCODE_TEXLD,
1675         .sat = 0,
1676         .dst = convert_dst(c, &inst->Dst[0]),
1677         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1678         .src[0] = src[0],
1679      });
1680      break;
1681
1682   case TGSI_OPCODE_TXB:
1683      emit_inst(c, &(struct etna_inst) {
1684         .opcode = INST_OPCODE_TEXLDB,
1685         .sat = 0,
1686         .dst = convert_dst(c, &inst->Dst[0]),
1687         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1688         .src[0] = src[0],
1689      });
1690      break;
1691
1692   case TGSI_OPCODE_TXL:
1693      emit_inst(c, &(struct etna_inst) {
1694         .opcode = INST_OPCODE_TEXLDL,
1695         .sat = 0,
1696         .dst = convert_dst(c, &inst->Dst[0]),
1697         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1698         .src[0] = src[0],
1699      });
1700      break;
1701
1702   case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1703      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1704
1705      emit_inst(c, &(struct etna_inst) {
1706         .opcode = INST_OPCODE_RCP,
1707         .sat = 0,
1708         .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1709         .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1710      });
1711      emit_inst(c, &(struct etna_inst) {
1712         .opcode = INST_OPCODE_MUL,
1713         .sat = 0,
1714         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1715                                         INST_COMPS_Z), /* tmp.xyz */
1716         .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1717         .src[1] = src[0], /* src.xyzw */
1718      });
1719      emit_inst(c, &(struct etna_inst) {
1720         .opcode = INST_OPCODE_TEXLD,
1721         .sat = 0,
1722         .dst = convert_dst(c, &inst->Dst[0]),
1723         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1724         .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1725      });
1726   } break;
1727
1728   default:
1729      BUG("Unhandled instruction %s",
1730          tgsi_get_opcode_name(inst->Instruction.Opcode));
1731      assert(0);
1732      break;
1733   }
1734}
1735
1736static void
1737trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1738            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1739{
1740   /* nothing to do */
1741}
1742
1743static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1744#define INSTR(n, f, ...) \
1745   [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1746
1747   INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1748   INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1749   INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1750   INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1751   INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1752   INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1753   INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1754   INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1755   INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1756   INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1757   INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1758   INSTR(LG2, trans_lg2),
1759   INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1760   INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1761   INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1762   INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1763   INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1764
1765   INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1766   INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1767
1768   INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1769   INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1770
1771   INSTR(IF, trans_if),
1772   INSTR(ELSE, trans_else),
1773   INSTR(ENDIF, trans_endif),
1774
1775   INSTR(BGNLOOP, trans_loop_bgn),
1776   INSTR(ENDLOOP, trans_loop_end),
1777   INSTR(BRK, trans_brk),
1778   INSTR(CONT, trans_cont),
1779
1780   INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1781   INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1782
1783   INSTR(ARL, trans_arl),
1784   INSTR(LRP, trans_lrp),
1785   INSTR(LIT, trans_lit),
1786   INSTR(SSG, trans_ssg),
1787
1788   INSTR(SIN, trans_trig),
1789   INSTR(COS, trans_trig),
1790
1791   INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1792   INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1793   INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1794   INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1795   INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1796   INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1797
1798   INSTR(TEX, trans_sampler),
1799   INSTR(TXB, trans_sampler),
1800   INSTR(TXL, trans_sampler),
1801   INSTR(TXP, trans_sampler),
1802
1803   INSTR(NOP, trans_dummy),
1804   INSTR(END, trans_dummy),
1805};
1806
1807/* Pass -- compile instructions */
1808static void
1809etna_compile_pass_generate_code(struct etna_compile *c)
1810{
1811   struct tgsi_parse_context ctx = { };
1812   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
1813   assert(status == TGSI_PARSE_OK);
1814
1815   int inst_idx = 0;
1816   while (!tgsi_parse_end_of_tokens(&ctx)) {
1817      const struct tgsi_full_instruction *inst = 0;
1818
1819      /* No inner temps used yet for this instruction, clear counter */
1820      c->inner_temps = 0;
1821
1822      tgsi_parse_token(&ctx);
1823
1824      switch (ctx.FullToken.Token.Type) {
1825      case TGSI_TOKEN_TYPE_INSTRUCTION:
1826         /* iterate over operands */
1827         inst = &ctx.FullToken.FullInstruction;
1828         if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1829            inst_idx++;
1830            continue;
1831         }
1832
1833         /* Lookup the TGSI information and generate the source arguments */
1834         struct etna_inst_src src[ETNA_NUM_SRC];
1835         memset(src, 0, sizeof(src));
1836
1837         const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1838
1839         for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1840            const struct tgsi_full_src_register *reg = &inst->Src[i];
1841            const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1842
1843            if (!n->valid || n->is_tex)
1844               continue;
1845
1846            src[i] = etna_create_src(reg, n);
1847         }
1848
1849         const unsigned opc = inst->Instruction.Opcode;
1850         const struct instr_translater *t = &translaters[opc];
1851
1852         if (t->fxn) {
1853            t->fxn(t, c, inst, src);
1854
1855            inst_idx += 1;
1856         } else {
1857            BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1858            assert(0);
1859         }
1860         break;
1861      }
1862   }
1863   tgsi_parse_free(&ctx);
1864}
1865
1866/* Look up register by semantic */
1867static struct etna_reg_desc *
1868find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1869{
1870   for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1871      struct etna_reg_desc *reg = &c->file[file].reg[idx];
1872
1873      if (reg->semantic.Name == name && reg->semantic.Index == index)
1874         return reg;
1875   }
1876
1877   return NULL; /* not found */
1878}
1879
1880/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1881 * - this is a vertex shader
1882 * - and this is an older GPU
1883 */
1884static void
1885etna_compile_add_z_div_if_needed(struct etna_compile *c)
1886{
1887   if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1888      /* find position out */
1889      struct etna_reg_desc *pos_reg =
1890         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1891
1892      if (pos_reg != NULL) {
1893         /*
1894          * ADD tX.__z_, tX.zzzz, void, tX.wwww
1895          * MUL tX.__z_, tX.zzzz, 0.5, void
1896         */
1897         emit_inst(c, &(struct etna_inst) {
1898            .opcode = INST_OPCODE_ADD,
1899            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1900            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1901            .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1902         });
1903         emit_inst(c, &(struct etna_inst) {
1904            .opcode = INST_OPCODE_MUL,
1905            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1906            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1907            .src[1] = alloc_imm_f32(c, 0.5f),
1908         });
1909      }
1910   }
1911}
1912
1913static void
1914etna_compile_frag_rb_swap(struct etna_compile *c)
1915{
1916   if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1917      /* find color out */
1918      struct etna_reg_desc *color_reg =
1919         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1920
1921      emit_inst(c, &(struct etna_inst) {
1922         .opcode = INST_OPCODE_MOV,
1923         .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1924         .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1925      });
1926   }
1927}
1928
1929/** add a NOP to the shader if
1930 * a) the shader is empty
1931 * or
1932 * b) there is a label at the end of the shader
1933 */
1934static void
1935etna_compile_add_nop_if_needed(struct etna_compile *c)
1936{
1937   bool label_at_last_inst = false;
1938
1939   for (int idx = 0; idx < c->labels_count; ++idx) {
1940      if (c->labels[idx].inst_idx == c->inst_ptr)
1941         label_at_last_inst = true;
1942
1943   }
1944
1945   if (c->inst_ptr == 0 || label_at_last_inst)
1946      emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1947}
1948
1949static void
1950assign_uniforms(struct etna_compile_file *file, unsigned base)
1951{
1952   for (int idx = 0; idx < file->reg_size; ++idx) {
1953      file->reg[idx].native.valid = 1;
1954      file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1955      file->reg[idx].native.id = base + idx;
1956   }
1957}
1958
1959/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1960 * CONST must be consecutive as const buffers are supposed to be consecutive,
1961 * and before IMM, as this is
1962 * more convenient because is possible for the compilation process itself to
1963 * generate extra
1964 * immediates for constants such as pi, one, zero.
1965 */
1966static void
1967assign_constants_and_immediates(struct etna_compile *c)
1968{
1969   assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1970   /* immediates start after the constants */
1971   c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1972   assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1973   DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1974         c->imm_size);
1975}
1976
1977/* Assign declared samplers to native texture units */
1978static void
1979assign_texture_units(struct etna_compile *c)
1980{
1981   uint tex_base = 0;
1982
1983   if (c->info.processor == PIPE_SHADER_VERTEX)
1984      tex_base = c->specs->vertex_sampler_offset;
1985
1986   for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1987      c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1988      c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1989      c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1990   }
1991}
1992
1993/* Additional pass to fill in branch targets. This pass should be last
1994 * as no instruction reordering or removing/addition can be done anymore
1995 * once the branch targets are computed.
1996 */
1997static void
1998etna_compile_fill_in_labels(struct etna_compile *c)
1999{
2000   for (int idx = 0; idx < c->inst_ptr; ++idx) {
2001      if (c->lbl_usage[idx] != -1)
2002         etna_assemble_set_imm(&c->code[idx * 4],
2003                               c->labels[c->lbl_usage[idx]].inst_idx);
2004   }
2005}
2006
2007/* compare two etna_native_reg structures, return true if equal */
2008static bool
2009cmp_etna_native_reg(const struct etna_native_reg to,
2010                    const struct etna_native_reg from)
2011{
2012   return to.valid == from.valid && to.is_tex == from.is_tex &&
2013          to.rgroup == from.rgroup && to.id == from.id;
2014}
2015
2016/* go through all declarations and swap native registers *to* and *from* */
2017static void
2018swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2019                      const struct etna_native_reg from)
2020{
2021   if (cmp_etna_native_reg(from, to))
2022      return; /* Nothing to do */
2023
2024   for (int idx = 0; idx < c->total_decls; ++idx) {
2025      if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2026         c->decl[idx].native = to;
2027      } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2028         c->decl[idx].native = from;
2029      }
2030   }
2031}
2032
2033/* For PS we need to permute so that inputs are always in temporary 0..N-1.
2034 * Semantic POS is always t0. If that semantic is not used, avoid t0.
2035 */
2036static void
2037permute_ps_inputs(struct etna_compile *c)
2038{
2039   /* Special inputs:
2040    * gl_FragCoord  VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2041    * gl_PointCoord VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2042    */
2043   uint native_idx = 1;
2044
2045   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2046      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2047      uint input_id;
2048      assert(reg->has_semantic);
2049
2050      if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2051         continue;
2052
2053      input_id = native_idx++;
2054      swap_native_registers(c, etna_native_temp(input_id),
2055                            c->file[TGSI_FILE_INPUT].reg[idx].native);
2056   }
2057
2058   c->num_varyings = native_idx - 1;
2059
2060   if (native_idx > c->next_free_native)
2061      c->next_free_native = native_idx;
2062}
2063
2064/* fill in ps inputs into shader object */
2065static void
2066fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2067{
2068   struct etna_shader_io_file *sf = &sobj->infile;
2069
2070   sf->num_reg = 0;
2071
2072   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2073      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2074
2075      if (reg->native.id > 0) {
2076         assert(sf->num_reg < ETNA_NUM_INPUTS);
2077         sf->reg[sf->num_reg].reg = reg->native.id;
2078         sf->reg[sf->num_reg].semantic = reg->semantic;
2079         /* convert usage mask to number of components (*=wildcard)
2080          *   .r    (0..1)  -> 1 component
2081          *   .*g   (2..3)  -> 2 component
2082          *   .**b  (4..7)  -> 3 components
2083          *   .***a (8..15) -> 4 components
2084          */
2085         sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2086         sf->num_reg++;
2087      }
2088   }
2089
2090   assert(sf->num_reg == c->num_varyings);
2091   sobj->input_count_unk8 = 31; /* XXX what is this */
2092}
2093
2094/* fill in output mapping for ps into shader object */
2095static void
2096fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2097{
2098   sobj->outfile.num_reg = 0;
2099
2100   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2101      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2102
2103      switch (reg->semantic.Name) {
2104      case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2105         sobj->ps_color_out_reg = reg->native.id;
2106         break;
2107      case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2108         sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2109         break;
2110      default:
2111         assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2112      }
2113   }
2114}
2115
2116/* fill in inputs for vs into shader object */
2117static void
2118fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2119{
2120   struct etna_shader_io_file *sf = &sobj->infile;
2121
2122   sf->num_reg = 0;
2123   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2124      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2125      assert(sf->num_reg < ETNA_NUM_INPUTS);
2126
2127      if (!reg->native.valid)
2128         continue;
2129
2130      /* XXX exclude inputs with special semantics such as gl_frontFacing */
2131      sf->reg[sf->num_reg].reg = reg->native.id;
2132      sf->reg[sf->num_reg].semantic = reg->semantic;
2133      sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2134      sf->num_reg++;
2135   }
2136
2137   sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2138}
2139
2140/* build two-level output index [Semantic][Index] for fast linking */
2141static void
2142build_output_index(struct etna_shader_variant *sobj)
2143{
2144   int total = 0;
2145   int offset = 0;
2146
2147   for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2148      total += sobj->output_count_per_semantic[name];
2149
2150   sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2151
2152   for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2153      sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2154      offset += sobj->output_count_per_semantic[name];
2155   }
2156
2157   for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2158      sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2159                               [sobj->outfile.reg[idx].semantic.Index] =
2160         &sobj->outfile.reg[idx];
2161   }
2162}
2163
2164/* fill in outputs for vs into shader object */
2165static void
2166fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2167{
2168   struct etna_shader_io_file *sf = &sobj->outfile;
2169
2170   sf->num_reg = 0;
2171   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2172      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2173      assert(sf->num_reg < ETNA_NUM_INPUTS);
2174
2175      switch (reg->semantic.Name) {
2176      case TGSI_SEMANTIC_POSITION:
2177         sobj->vs_pos_out_reg = reg->native.id;
2178         break;
2179      case TGSI_SEMANTIC_PSIZE:
2180         sobj->vs_pointsize_out_reg = reg->native.id;
2181         break;
2182      default:
2183         sf->reg[sf->num_reg].reg = reg->native.id;
2184         sf->reg[sf->num_reg].semantic = reg->semantic;
2185         sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2186         sf->num_reg++;
2187         sobj->output_count_per_semantic[reg->semantic.Name] =
2188            MAX2(reg->semantic.Index + 1,
2189                 sobj->output_count_per_semantic[reg->semantic.Name]);
2190      }
2191   }
2192
2193   /* build two-level index for linking */
2194   build_output_index(sobj);
2195
2196   /* fill in "mystery meat" load balancing value. This value determines how
2197    * work is scheduled between VS and PS
2198    * in the unified shader architecture. More precisely, it is determined from
2199    * the number of VS outputs, as well as chip-specific
2200    * vertex output buffer size, vertex cache size, and the number of shader
2201    * cores.
2202    *
2203    * XXX this is a conservative estimate, the "optimal" value is only known for
2204    * sure at link time because some
2205    * outputs may be unused and thus unmapped. Then again, in the general use
2206    * case with GLSL the vertex and fragment
2207    * shaders are linked already before submitting to Gallium, thus all outputs
2208    * are used.
2209    */
2210   int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2211   assert(half_out);
2212
2213   uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2214                           2 * half_out * c->specs->vertex_cache_size)) +
2215                 9) /
2216                10;
2217   uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2218   sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2219                             VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2220                             VIVS_VS_LOAD_BALANCING_C(0x3f) |
2221                             VIVS_VS_LOAD_BALANCING_D(0x0f);
2222}
2223
2224static bool
2225etna_compile_check_limits(struct etna_compile *c)
2226{
2227   int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2228                         ? c->specs->max_vs_uniforms
2229                         : c->specs->max_ps_uniforms;
2230   /* round up number of uniforms, including immediates, in units of four */
2231   int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2232
2233   if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2234      DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2235          c->specs->max_instructions);
2236      return false;
2237   }
2238
2239   if (c->next_free_native > c->specs->max_registers) {
2240      DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2241          c->specs->max_registers);
2242      return false;
2243   }
2244
2245   if (num_uniforms > max_uniforms) {
2246      DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2247          max_uniforms);
2248      return false;
2249   }
2250
2251   if (c->num_varyings > c->specs->max_varyings) {
2252      DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2253          c->specs->max_varyings);
2254      return false;
2255   }
2256
2257   if (c->imm_base > c->specs->num_constants) {
2258      DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2259          c->specs->num_constants);
2260   }
2261
2262   return true;
2263}
2264
2265static void
2266copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2267{
2268   uint32_t count = c->imm_size;
2269   struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2270
2271   uinfo->const_count = c->imm_base;
2272   uinfo->imm_count = count;
2273   uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2274   uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2275
2276   etna_set_shader_uniforms_dirty_flags(sobj);
2277}
2278
2279bool
2280etna_compile_shader(struct etna_shader_variant *v)
2281{
2282   /* Create scratch space that may be too large to fit on stack
2283    */
2284   bool ret;
2285   struct etna_compile *c;
2286
2287   if (unlikely(!v))
2288      return false;
2289
2290   const struct etna_specs *specs = v->shader->specs;
2291
2292   struct tgsi_lowering_config lconfig = {
2293      .lower_FLR = !specs->has_sign_floor_ceil,
2294      .lower_CEIL = !specs->has_sign_floor_ceil,
2295      .lower_POW = true,
2296      .lower_EXP = true,
2297      .lower_LOG = true,
2298      .lower_DP2 = !specs->has_halti2_instructions,
2299      .lower_TRUNC = true,
2300   };
2301
2302   c = CALLOC_STRUCT(etna_compile);
2303   if (!c)
2304      return false;
2305
2306   memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2307
2308   const struct tgsi_token *tokens = v->shader->tokens;
2309
2310   c->specs = specs;
2311   c->key = &v->key;
2312   c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2313   c->free_tokens = !!c->tokens;
2314   if (!c->tokens) {
2315      /* no lowering */
2316      c->tokens = tokens;
2317   }
2318
2319   /* Build a map from gallium register to native registers for files
2320    * CONST, SAMP, IMM, OUT, IN, TEMP.
2321    * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2322    * vertex shaders.
2323    */
2324   /* Pass one -- check register file declarations and immediates */
2325   etna_compile_parse_declarations(c);
2326
2327   etna_allocate_decls(c);
2328
2329   /* Pass two -- check usage of temporaries, inputs, outputs */
2330   etna_compile_pass_check_usage(c);
2331
2332   assign_special_inputs(c);
2333
2334   /* Assign native temp register to TEMPs */
2335   assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2336
2337   /* optimize outputs */
2338   etna_compile_pass_optimize_outputs(c);
2339
2340   /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2341    *     this is part of RGROUP_INTERNAL
2342    */
2343
2344   /* assign inputs: last usage of input should be <= first usage of temp */
2345   /*   potential optimization case:
2346    *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2347    * after which IN[x]
2348    *     is not read, temp[y] can be used as input register as-is
2349    */
2350   /*   sort temporaries by first use
2351    *   sort inputs by last usage
2352    *   iterate over inputs, temporaries
2353    *     if last usage of input <= first usage of temp:
2354    *       assign input to temp
2355    *       advance input, temporary pointer
2356    *     else
2357    *       advance temporary pointer
2358    *
2359    *   potential problem: instruction with multiple inputs of which one is the
2360    * temp and the other is the input;
2361    *      however, as the temp is not used before this, how would this make
2362    * sense? uninitialized temporaries have an undefined
2363    *      value, so this would be ok
2364    */
2365   assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2366
2367   /* assign outputs: first usage of output should be >= last usage of temp */
2368   /*   potential optimization case:
2369    *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2370    * writing all components that are used in
2371    *        the shader) after which temp y is no longer used temp[y] can be
2372    * used as output register as-is
2373    *
2374    *   potential problem: instruction with multiple outputs of which one is the
2375    * temp and the other is the output;
2376    *      however, as the temp is not used after this, how would this make
2377    * sense? could just discard the output value
2378    */
2379   /*   sort temporaries by last use
2380    *   sort outputs by first usage
2381    *   iterate over outputs, temporaries
2382    *     if first usage of output >= last usage of temp:
2383    *       assign output to temp
2384    *       advance output, temporary pointer
2385    *     else
2386    *       advance temporary pointer
2387    */
2388   assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2389
2390   assign_constants_and_immediates(c);
2391   assign_texture_units(c);
2392
2393   /* list declarations */
2394   for (int x = 0; x < c->total_decls; ++x) {
2395      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2396                                    "last_use=%i native=%i usage_mask=%x "
2397                                    "has_semantic=%i",
2398            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2399            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2400            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2401            c->decl[x].usage_mask, c->decl[x].has_semantic);
2402      if (c->decl[x].has_semantic)
2403         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2404               tgsi_semantic_names[c->decl[x].semantic.Name],
2405               c->decl[x].semantic.Index);
2406   }
2407   /* XXX for PS we need to permute so that inputs are always in temporary
2408    * 0..N-1.
2409    * There is no "switchboard" for varyings (AFAIK!). The output color,
2410    * however, can be routed
2411    * from an arbitrary temporary.
2412    */
2413   if (c->info.processor == PIPE_SHADER_FRAGMENT)
2414      permute_ps_inputs(c);
2415
2416
2417   /* list declarations */
2418   for (int x = 0; x < c->total_decls; ++x) {
2419      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2420                                    "last_use=%i native=%i usage_mask=%x "
2421                                    "has_semantic=%i",
2422            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2423            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2424            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2425            c->decl[x].usage_mask, c->decl[x].has_semantic);
2426      if (c->decl[x].has_semantic)
2427         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2428               tgsi_semantic_names[c->decl[x].semantic.Name],
2429               c->decl[x].semantic.Index);
2430   }
2431
2432   /* pass 3: generate instructions */
2433   etna_compile_pass_generate_code(c);
2434   etna_compile_add_z_div_if_needed(c);
2435   etna_compile_frag_rb_swap(c);
2436   etna_compile_add_nop_if_needed(c);
2437
2438   ret = etna_compile_check_limits(c);
2439   if (!ret)
2440      goto out;
2441
2442   etna_compile_fill_in_labels(c);
2443
2444   /* fill in output structure */
2445   v->processor = c->info.processor;
2446   v->code_size = c->inst_ptr * 4;
2447   v->code = mem_dup(c->code, c->inst_ptr * 16);
2448   v->num_loops = c->num_loops;
2449   v->num_temps = c->next_free_native;
2450   v->vs_pos_out_reg = -1;
2451   v->vs_pointsize_out_reg = -1;
2452   v->ps_color_out_reg = -1;
2453   v->ps_depth_out_reg = -1;
2454   v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2455   copy_uniform_state_to_shader(c, v);
2456
2457   if (c->info.processor == PIPE_SHADER_VERTEX) {
2458      fill_in_vs_inputs(v, c);
2459      fill_in_vs_outputs(v, c);
2460   } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2461      fill_in_ps_inputs(v, c);
2462      fill_in_ps_outputs(v, c);
2463   }
2464
2465out:
2466   if (c->free_tokens)
2467      FREE((void *)c->tokens);
2468
2469   FREE(c->labels);
2470   FREE(c);
2471
2472   return ret;
2473}
2474
2475extern const char *tgsi_swizzle_names[];
2476void
2477etna_dump_shader(const struct etna_shader_variant *shader)
2478{
2479   if (shader->processor == PIPE_SHADER_VERTEX)
2480      printf("VERT\n");
2481   else
2482      printf("FRAG\n");
2483
2484
2485   etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2486
2487   printf("num loops: %i\n", shader->num_loops);
2488   printf("num temps: %i\n", shader->num_temps);
2489   printf("num const: %i\n", shader->uniforms.const_count);
2490   printf("immediates:\n");
2491   for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2492      printf(" [%i].%s = %f (0x%08x)\n",
2493             (idx + shader->uniforms.const_count) / 4,
2494             tgsi_swizzle_names[idx % 4],
2495             *((float *)&shader->uniforms.imm_data[idx]),
2496             shader->uniforms.imm_data[idx]);
2497   }
2498   printf("inputs:\n");
2499   for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2500      printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2501             tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2502             shader->infile.reg[idx].semantic.Index,
2503             shader->infile.reg[idx].num_components);
2504   }
2505   printf("outputs:\n");
2506   for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2507      printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2508             tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2509             shader->outfile.reg[idx].semantic.Index,
2510             shader->outfile.reg[idx].num_components);
2511   }
2512   printf("special:\n");
2513   if (shader->processor == PIPE_SHADER_VERTEX) {
2514      printf("  vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2515      printf("  vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2516      printf("  vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2517   } else {
2518      printf("  ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2519      printf("  ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2520   }
2521   printf("  input_count_unk8=0x%08x\n", shader->input_count_unk8);
2522}
2523
2524void
2525etna_destroy_shader(struct etna_shader_variant *shader)
2526{
2527   assert(shader);
2528
2529   FREE(shader->code);
2530   FREE(shader->uniforms.imm_data);
2531   FREE(shader->uniforms.imm_contents);
2532   FREE(shader->output_per_semantic_list);
2533   FREE(shader);
2534}
2535
2536static const struct etna_shader_inout *
2537etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2538                      const struct etna_shader_inout *in)
2539{
2540   if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2541      return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2542
2543   return NULL;
2544}
2545
2546bool
2547etna_link_shader(struct etna_shader_link_info *info,
2548                 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2549{
2550   int comp_ofs = 0;
2551   /* For each fragment input we need to find the associated vertex shader
2552    * output, which can be found by matching on semantic name and index. A
2553    * binary search could be used because the vs outputs are sorted by their
2554    * semantic index and grouped by semantic type by fill_in_vs_outputs.
2555    */
2556   assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2557   info->pcoord_varying_comp_ofs = -1;
2558
2559   for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2560      const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2561      const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2562      struct etna_varying *varying;
2563      bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
2564
2565      assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2566
2567      if (fsio->reg > info->num_varyings)
2568         info->num_varyings = fsio->reg;
2569
2570      varying = &info->varyings[fsio->reg - 1];
2571      varying->num_components = fsio->num_components;
2572
2573      if (!interpolate_always) /* colors affected by flat shading */
2574         varying->pa_attributes = 0x200;
2575      else /* texture coord or other bypasses flat shading */
2576         varying->pa_attributes = 0x2f1;
2577
2578      varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED;
2579      varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED;
2580      varying->use[2] = VARYING_COMPONENT_USE_USED;
2581      varying->use[3] = VARYING_COMPONENT_USE_USED;
2582
2583
2584      /* point coord is an input to the PS without matching VS output,
2585       * so it gets a varying slot without being assigned a VS register.
2586       */
2587      if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2588         info->pcoord_varying_comp_ofs = comp_ofs;
2589      } else {
2590         if (vsio == NULL) { /* not found -- link error */
2591            BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
2592            return true;
2593         }
2594
2595         varying->reg = vsio->reg;
2596      }
2597
2598      comp_ofs += varying->num_components;
2599   }
2600
2601   assert(info->num_varyings == fs->infile.num_reg);
2602
2603   return false;
2604}
2605