1b8e80941Smrg/*
2b8e80941Smrg * Copyright (c) 2012-2015 Etnaviv Project
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the
12b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions
13b8e80941Smrg * of the Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Wladimir J. van der Laan <laanwj@gmail.com>
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg/* TGSI->Vivante shader ISA conversion */
28b8e80941Smrg
29b8e80941Smrg/* What does the compiler return (see etna_shader_object)?
30b8e80941Smrg *  1) instruction data
31b8e80941Smrg *  2) input-to-temporary mapping (fixed for ps)
32b8e80941Smrg *      *) in case of ps, semantic -> varying id mapping
33b8e80941Smrg *      *) for each varying: number of components used (r, rg, rgb, rgba)
34b8e80941Smrg *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35b8e80941Smrg *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36b8e80941Smrg *  5) immediates base offset, immediates data
37b8e80941Smrg *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38b8e80941Smrg *     configure the hw, but useful for error checking
39b8e80941Smrg *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40b8e80941Smrg *     (output reg id is enough)
41b8e80941Smrg *
42b8e80941Smrg *  Empty shaders are not allowed, should always at least generate a NOP. Also
43b8e80941Smrg *  if there is a label at the end of the shader, an extra NOP should be
44b8e80941Smrg *  generated as jump target.
45b8e80941Smrg *
46b8e80941Smrg * TODO
47b8e80941Smrg * * Use an instruction scheduler
48b8e80941Smrg * * Indirect access to uniforms / temporaries using amode
49b8e80941Smrg */
50b8e80941Smrg
51b8e80941Smrg#include "etnaviv_compiler.h"
52b8e80941Smrg
53b8e80941Smrg#include "etnaviv_asm.h"
54b8e80941Smrg#include "etnaviv_context.h"
55b8e80941Smrg#include "etnaviv_debug.h"
56b8e80941Smrg#include "etnaviv_disasm.h"
57b8e80941Smrg#include "etnaviv_uniforms.h"
58b8e80941Smrg#include "etnaviv_util.h"
59b8e80941Smrg
60b8e80941Smrg#include "pipe/p_shader_tokens.h"
61b8e80941Smrg#include "tgsi/tgsi_info.h"
62b8e80941Smrg#include "tgsi/tgsi_iterate.h"
63b8e80941Smrg#include "tgsi/tgsi_lowering.h"
64b8e80941Smrg#include "tgsi/tgsi_strings.h"
65b8e80941Smrg#include "tgsi/tgsi_util.h"
66b8e80941Smrg#include "util/u_math.h"
67b8e80941Smrg#include "util/u_memory.h"
68b8e80941Smrg
69b8e80941Smrg#include <fcntl.h>
70b8e80941Smrg#include <stdio.h>
71b8e80941Smrg#include <sys/stat.h>
72b8e80941Smrg#include <sys/types.h>
73b8e80941Smrg
74b8e80941Smrg#define ETNA_MAX_INNER_TEMPS 2
75b8e80941Smrg
76b8e80941Smrgstatic const float sincos_const[2][4] = {
77b8e80941Smrg   {
78b8e80941Smrg      2., -1., 4., -4.,
79b8e80941Smrg   },
80b8e80941Smrg   {
81b8e80941Smrg      1. / (2. * M_PI), 0.75, 0.5, 0.0,
82b8e80941Smrg   },
83b8e80941Smrg};
84b8e80941Smrg
85b8e80941Smrg/* Native register description structure */
86b8e80941Smrgstruct etna_native_reg {
87b8e80941Smrg   unsigned valid : 1;
88b8e80941Smrg   unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89b8e80941Smrg   unsigned rgroup : 3;
90b8e80941Smrg   unsigned id : 9;
91b8e80941Smrg};
92b8e80941Smrg
93b8e80941Smrg/* Register description */
94b8e80941Smrgstruct etna_reg_desc {
95b8e80941Smrg   enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96b8e80941Smrg   int idx; /* index into file */
97b8e80941Smrg   bool active; /* used in program */
98b8e80941Smrg   int first_use; /* instruction id of first use (scope begin) */
99b8e80941Smrg   int last_use; /* instruction id of last use (scope end, inclusive) */
100b8e80941Smrg
101b8e80941Smrg   struct etna_native_reg native; /* native register to map to */
102b8e80941Smrg   unsigned usage_mask : 4; /* usage, per channel */
103b8e80941Smrg   bool has_semantic; /* register has associated TGSI semantic */
104b8e80941Smrg   struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105b8e80941Smrg   struct tgsi_declaration_interp interp; /* Interpolation type */
106b8e80941Smrg};
107b8e80941Smrg
108b8e80941Smrg/* Label information structure */
109b8e80941Smrgstruct etna_compile_label {
110b8e80941Smrg   int inst_idx; /* Instruction id that label points to */
111b8e80941Smrg};
112b8e80941Smrg
113b8e80941Smrgenum etna_compile_frame_type {
114b8e80941Smrg   ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115b8e80941Smrg   ETNA_COMPILE_FRAME_LOOP,
116b8e80941Smrg};
117b8e80941Smrg
118b8e80941Smrg/* nesting scope frame (LOOP, IF, ...) during compilation
119b8e80941Smrg */
120b8e80941Smrgstruct etna_compile_frame {
121b8e80941Smrg   enum etna_compile_frame_type type;
122b8e80941Smrg   int lbl_else_idx;
123b8e80941Smrg   int lbl_endif_idx;
124b8e80941Smrg   int lbl_loop_bgn_idx;
125b8e80941Smrg   int lbl_loop_end_idx;
126b8e80941Smrg};
127b8e80941Smrg
128b8e80941Smrgstruct etna_compile_file {
129b8e80941Smrg   /* Number of registers in each TGSI file (max register+1) */
130b8e80941Smrg   size_t reg_size;
131b8e80941Smrg   /* Register descriptions, per register index */
132b8e80941Smrg   struct etna_reg_desc *reg;
133b8e80941Smrg};
134b8e80941Smrg
135b8e80941Smrg#define array_insert(arr, val)                          \
136b8e80941Smrg   do {                                                 \
137b8e80941Smrg      if (arr##_count == arr##_sz) {                    \
138b8e80941Smrg         arr##_sz = MAX2(2 * arr##_sz, 16);             \
139b8e80941Smrg         arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140b8e80941Smrg      }                                                 \
141b8e80941Smrg      arr[arr##_count++] = val;                         \
142b8e80941Smrg   } while (0)
143b8e80941Smrg
144b8e80941Smrg
145b8e80941Smrg/* scratch area for compiling shader, freed after compilation finishes */
146b8e80941Smrgstruct etna_compile {
147b8e80941Smrg   const struct tgsi_token *tokens;
148b8e80941Smrg   bool free_tokens;
149b8e80941Smrg
150b8e80941Smrg   struct tgsi_shader_info info;
151b8e80941Smrg
152b8e80941Smrg   /* Register descriptions, per TGSI file, per register index */
153b8e80941Smrg   struct etna_compile_file file[TGSI_FILE_COUNT];
154b8e80941Smrg
155b8e80941Smrg   /* Keep track of TGSI register declarations */
156b8e80941Smrg   struct etna_reg_desc decl[ETNA_MAX_DECL];
157b8e80941Smrg   uint total_decls;
158b8e80941Smrg
159b8e80941Smrg   /* Bitmap of dead instructions which are removed in a separate pass */
160b8e80941Smrg   bool dead_inst[ETNA_MAX_TOKENS];
161b8e80941Smrg
162b8e80941Smrg   /* Immediate data */
163b8e80941Smrg   enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164b8e80941Smrg   uint32_t imm_data[ETNA_MAX_IMM];
165b8e80941Smrg   uint32_t imm_base; /* base of immediates (in 32 bit units) */
166b8e80941Smrg   uint32_t imm_size; /* size of immediates (in 32 bit units) */
167b8e80941Smrg
168b8e80941Smrg   /* Next free native register, for register allocation */
169b8e80941Smrg   uint32_t next_free_native;
170b8e80941Smrg
171b8e80941Smrg   /* Temporary register for use within translated TGSI instruction,
172b8e80941Smrg    * only allocated when needed.
173b8e80941Smrg    */
174b8e80941Smrg   int inner_temps; /* number of inner temps used; only up to one available at
175b8e80941Smrg                       this point */
176b8e80941Smrg   struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177b8e80941Smrg
178b8e80941Smrg   /* Fields for handling nested conditionals */
179b8e80941Smrg   struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180b8e80941Smrg   int frame_sp;
181b8e80941Smrg   int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182b8e80941Smrg
183b8e80941Smrg   unsigned labels_count, labels_sz;
184b8e80941Smrg   struct etna_compile_label *labels;
185b8e80941Smrg
186b8e80941Smrg   unsigned num_loops;
187b8e80941Smrg
188b8e80941Smrg   /* Code generation */
189b8e80941Smrg   int inst_ptr; /* current instruction pointer */
190b8e80941Smrg   uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191b8e80941Smrg
192b8e80941Smrg   /* I/O */
193b8e80941Smrg
194b8e80941Smrg   /* Number of varyings (PS only) */
195b8e80941Smrg   int num_varyings;
196b8e80941Smrg
197b8e80941Smrg   /* GPU hardware specs */
198b8e80941Smrg   const struct etna_specs *specs;
199b8e80941Smrg
200b8e80941Smrg   const struct etna_shader_key *key;
201b8e80941Smrg};
202b8e80941Smrg
203b8e80941Smrgstatic struct etna_reg_desc *
204b8e80941Smrgetna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205b8e80941Smrg{
206b8e80941Smrg   return &c->file[dst.File].reg[dst.Index];
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrgstatic struct etna_reg_desc *
210b8e80941Smrgetna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211b8e80941Smrg{
212b8e80941Smrg   return &c->file[src.File].reg[src.Index];
213b8e80941Smrg}
214b8e80941Smrg
215b8e80941Smrgstatic struct etna_native_reg
216b8e80941Smrgetna_native_temp(unsigned reg)
217b8e80941Smrg{
218b8e80941Smrg   return (struct etna_native_reg) {
219b8e80941Smrg      .valid = 1,
220b8e80941Smrg      .rgroup = INST_RGROUP_TEMP,
221b8e80941Smrg      .id = reg
222b8e80941Smrg   };
223b8e80941Smrg}
224b8e80941Smrg
225b8e80941Smrg/** Register allocation **/
226b8e80941Smrgenum reg_sort_order {
227b8e80941Smrg   FIRST_USE_ASC,
228b8e80941Smrg   FIRST_USE_DESC,
229b8e80941Smrg   LAST_USE_ASC,
230b8e80941Smrg   LAST_USE_DESC
231b8e80941Smrg};
232b8e80941Smrg
233b8e80941Smrg/* Augmented register description for sorting */
234b8e80941Smrgstruct sort_rec {
235b8e80941Smrg   struct etna_reg_desc *ptr;
236b8e80941Smrg   int key;
237b8e80941Smrg};
238b8e80941Smrg
239b8e80941Smrgstatic int
240b8e80941Smrgsort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
241b8e80941Smrg{
242b8e80941Smrg   if (a->key < b->key)
243b8e80941Smrg      return -1;
244b8e80941Smrg
245b8e80941Smrg   if (a->key > b->key)
246b8e80941Smrg      return 1;
247b8e80941Smrg
248b8e80941Smrg   return 0;
249b8e80941Smrg}
250b8e80941Smrg
251b8e80941Smrg/* create an index on a register set based on certain criteria. */
252b8e80941Smrgstatic int
253b8e80941Smrgsort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
254b8e80941Smrg               enum reg_sort_order so)
255b8e80941Smrg{
256b8e80941Smrg   struct etna_reg_desc *regs = file->reg;
257b8e80941Smrg   int ptr = 0;
258b8e80941Smrg
259b8e80941Smrg   /* pre-populate keys from active registers */
260b8e80941Smrg   for (int idx = 0; idx < file->reg_size; ++idx) {
261b8e80941Smrg      /* only interested in active registers now; will only assign inactive ones
262b8e80941Smrg       * if no space in active ones */
263b8e80941Smrg      if (regs[idx].active) {
264b8e80941Smrg         sorted[ptr].ptr = &regs[idx];
265b8e80941Smrg
266b8e80941Smrg         switch (so) {
267b8e80941Smrg         case FIRST_USE_ASC:
268b8e80941Smrg            sorted[ptr].key = regs[idx].first_use;
269b8e80941Smrg            break;
270b8e80941Smrg         case LAST_USE_ASC:
271b8e80941Smrg            sorted[ptr].key = regs[idx].last_use;
272b8e80941Smrg            break;
273b8e80941Smrg         case FIRST_USE_DESC:
274b8e80941Smrg            sorted[ptr].key = -regs[idx].first_use;
275b8e80941Smrg            break;
276b8e80941Smrg         case LAST_USE_DESC:
277b8e80941Smrg            sorted[ptr].key = -regs[idx].last_use;
278b8e80941Smrg            break;
279b8e80941Smrg         }
280b8e80941Smrg         ptr++;
281b8e80941Smrg      }
282b8e80941Smrg   }
283b8e80941Smrg
284b8e80941Smrg   /* sort index by key */
285b8e80941Smrg   qsort(sorted, ptr, sizeof(struct sort_rec),
286b8e80941Smrg         (int (*)(const void *, const void *))sort_rec_compar);
287b8e80941Smrg
288b8e80941Smrg   return ptr;
289b8e80941Smrg}
290b8e80941Smrg
291b8e80941Smrg/* Allocate a new, unused, native temp register */
292b8e80941Smrgstatic struct etna_native_reg
293b8e80941Smrgalloc_new_native_reg(struct etna_compile *c)
294b8e80941Smrg{
295b8e80941Smrg   assert(c->next_free_native < ETNA_MAX_TEMPS);
296b8e80941Smrg   return etna_native_temp(c->next_free_native++);
297b8e80941Smrg}
298b8e80941Smrg
299b8e80941Smrg/* assign TEMPs to native registers */
300b8e80941Smrgstatic void
301b8e80941Smrgassign_temporaries_to_native(struct etna_compile *c,
302b8e80941Smrg                             struct etna_compile_file *file)
303b8e80941Smrg{
304b8e80941Smrg   struct etna_reg_desc *temps = file->reg;
305b8e80941Smrg
306b8e80941Smrg   for (int idx = 0; idx < file->reg_size; ++idx)
307b8e80941Smrg      temps[idx].native = alloc_new_native_reg(c);
308b8e80941Smrg}
309b8e80941Smrg
310b8e80941Smrg/* assign inputs and outputs to temporaries
311b8e80941Smrg * Gallium assumes that the hardware has separate registers for taking input and
312b8e80941Smrg * output, however Vivante GPUs use temporaries both for passing in inputs and
313b8e80941Smrg * passing back outputs.
314b8e80941Smrg * Try to re-use temporary registers where possible. */
315b8e80941Smrgstatic void
316b8e80941Smrgassign_inouts_to_temporaries(struct etna_compile *c, uint file)
317b8e80941Smrg{
318b8e80941Smrg   bool mode_inputs = (file == TGSI_FILE_INPUT);
319b8e80941Smrg   int inout_ptr = 0, num_inouts;
320b8e80941Smrg   int temp_ptr = 0, num_temps;
321b8e80941Smrg   struct sort_rec inout_order[ETNA_MAX_TEMPS];
322b8e80941Smrg   struct sort_rec temps_order[ETNA_MAX_TEMPS];
323b8e80941Smrg   num_inouts = sort_registers(inout_order, &c->file[file],
324b8e80941Smrg                               mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
325b8e80941Smrg   num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
326b8e80941Smrg                              mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
327b8e80941Smrg
328b8e80941Smrg   while (inout_ptr < num_inouts && temp_ptr < num_temps) {
329b8e80941Smrg      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
330b8e80941Smrg      struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
331b8e80941Smrg
332b8e80941Smrg      if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
333b8e80941Smrg         inout_ptr++;
334b8e80941Smrg         continue;
335b8e80941Smrg      }
336b8e80941Smrg
337b8e80941Smrg      /* last usage of this input is before or in same instruction of first use
338b8e80941Smrg       * of temporary? */
339b8e80941Smrg      if (mode_inputs ? (inout->last_use <= temp->first_use)
340b8e80941Smrg                      : (inout->first_use >= temp->last_use)) {
341b8e80941Smrg         /* assign it and advance to next input */
342b8e80941Smrg         inout->native = temp->native;
343b8e80941Smrg         inout_ptr++;
344b8e80941Smrg      }
345b8e80941Smrg
346b8e80941Smrg      temp_ptr++;
347b8e80941Smrg   }
348b8e80941Smrg
349b8e80941Smrg   /* if we couldn't reuse current ones, allocate new temporaries */
350b8e80941Smrg   for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
351b8e80941Smrg      struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
352b8e80941Smrg
353b8e80941Smrg      if (inout->active && !inout->native.valid)
354b8e80941Smrg         inout->native = alloc_new_native_reg(c);
355b8e80941Smrg   }
356b8e80941Smrg}
357b8e80941Smrg
358b8e80941Smrg/* Allocate an immediate with a certain value and return the index. If
359b8e80941Smrg * there is already an immediate with that value, return that.
360b8e80941Smrg */
361b8e80941Smrgstatic struct etna_inst_src
362b8e80941Smrgalloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
363b8e80941Smrg          uint32_t value)
364b8e80941Smrg{
365b8e80941Smrg   int idx;
366b8e80941Smrg
367b8e80941Smrg   /* Could use a hash table to speed this up */
368b8e80941Smrg   for (idx = 0; idx < c->imm_size; ++idx) {
369b8e80941Smrg      if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
370b8e80941Smrg         break;
371b8e80941Smrg   }
372b8e80941Smrg
373b8e80941Smrg   /* look if there is an unused slot */
374b8e80941Smrg   if (idx == c->imm_size) {
375b8e80941Smrg      for (idx = 0; idx < c->imm_size; ++idx) {
376b8e80941Smrg         if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
377b8e80941Smrg            break;
378b8e80941Smrg      }
379b8e80941Smrg   }
380b8e80941Smrg
381b8e80941Smrg   /* allocate new immediate */
382b8e80941Smrg   if (idx == c->imm_size) {
383b8e80941Smrg      assert(c->imm_size < ETNA_MAX_IMM);
384b8e80941Smrg      idx = c->imm_size++;
385b8e80941Smrg      c->imm_data[idx] = value;
386b8e80941Smrg      c->imm_contents[idx] = contents;
387b8e80941Smrg   }
388b8e80941Smrg
389b8e80941Smrg   /* swizzle so that component with value is returned in all components */
390b8e80941Smrg   idx += c->imm_base;
391b8e80941Smrg   struct etna_inst_src imm_src = {
392b8e80941Smrg      .use = 1,
393b8e80941Smrg      .rgroup = INST_RGROUP_UNIFORM_0,
394b8e80941Smrg      .reg = idx / 4,
395b8e80941Smrg      .swiz = INST_SWIZ_BROADCAST(idx & 3)
396b8e80941Smrg   };
397b8e80941Smrg
398b8e80941Smrg   return imm_src;
399b8e80941Smrg}
400b8e80941Smrg
401b8e80941Smrgstatic struct etna_inst_src
402b8e80941Smrgalloc_imm_u32(struct etna_compile *c, uint32_t value)
403b8e80941Smrg{
404b8e80941Smrg   return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
405b8e80941Smrg}
406b8e80941Smrg
407b8e80941Smrgstatic struct etna_inst_src
408b8e80941Smrgalloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
409b8e80941Smrg                const uint32_t *values)
410b8e80941Smrg{
411b8e80941Smrg   struct etna_inst_src imm_src = { };
412b8e80941Smrg   int idx, i;
413b8e80941Smrg
414b8e80941Smrg   for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
415b8e80941Smrg      /* What if we can use a uniform with a different swizzle? */
416b8e80941Smrg      for (i = 0; i < 4; i++)
417b8e80941Smrg         if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
418b8e80941Smrg            break;
419b8e80941Smrg      if (i == 4)
420b8e80941Smrg         break;
421b8e80941Smrg   }
422b8e80941Smrg
423b8e80941Smrg   if (idx + 3 >= c->imm_size) {
424b8e80941Smrg      idx = align(c->imm_size, 4);
425b8e80941Smrg      assert(idx + 4 <= ETNA_MAX_IMM);
426b8e80941Smrg
427b8e80941Smrg      for (i = 0; i < 4; i++) {
428b8e80941Smrg         c->imm_data[idx + i] = values[i];
429b8e80941Smrg         c->imm_contents[idx + i] = contents;
430b8e80941Smrg      }
431b8e80941Smrg
432b8e80941Smrg      c->imm_size = idx + 4;
433b8e80941Smrg   }
434b8e80941Smrg
435b8e80941Smrg   assert((c->imm_base & 3) == 0);
436b8e80941Smrg   idx += c->imm_base;
437b8e80941Smrg   imm_src.use = 1;
438b8e80941Smrg   imm_src.rgroup = INST_RGROUP_UNIFORM_0;
439b8e80941Smrg   imm_src.reg = idx / 4;
440b8e80941Smrg   imm_src.swiz = INST_SWIZ_IDENTITY;
441b8e80941Smrg
442b8e80941Smrg   return imm_src;
443b8e80941Smrg}
444b8e80941Smrg
445b8e80941Smrgstatic uint32_t
446b8e80941Smrgget_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
447b8e80941Smrg            unsigned swiz_idx)
448b8e80941Smrg{
449b8e80941Smrg   assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
450b8e80941Smrg   unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
451b8e80941Smrg
452b8e80941Smrg   return c->imm_data[idx];
453b8e80941Smrg}
454b8e80941Smrg
455b8e80941Smrg/* Allocate immediate with a certain float value. If there is already an
456b8e80941Smrg * immediate with that value, return that.
457b8e80941Smrg */
458b8e80941Smrgstatic struct etna_inst_src
459b8e80941Smrgalloc_imm_f32(struct etna_compile *c, float value)
460b8e80941Smrg{
461b8e80941Smrg   return alloc_imm_u32(c, fui(value));
462b8e80941Smrg}
463b8e80941Smrg
464b8e80941Smrgstatic struct etna_inst_src
465b8e80941Smrgetna_imm_vec4f(struct etna_compile *c, const float *vec4)
466b8e80941Smrg{
467b8e80941Smrg   uint32_t val[4];
468b8e80941Smrg
469b8e80941Smrg   for (int i = 0; i < 4; i++)
470b8e80941Smrg      val[i] = fui(vec4[i]);
471b8e80941Smrg
472b8e80941Smrg   return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
473b8e80941Smrg}
474b8e80941Smrg
475b8e80941Smrg/* Pass -- check register file declarations and immediates */
476b8e80941Smrgstatic void
477b8e80941Smrgetna_compile_parse_declarations(struct etna_compile *c)
478b8e80941Smrg{
479b8e80941Smrg   struct tgsi_parse_context ctx = { };
480b8e80941Smrg   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
481b8e80941Smrg   assert(status == TGSI_PARSE_OK);
482b8e80941Smrg
483b8e80941Smrg   while (!tgsi_parse_end_of_tokens(&ctx)) {
484b8e80941Smrg      tgsi_parse_token(&ctx);
485b8e80941Smrg
486b8e80941Smrg      switch (ctx.FullToken.Token.Type) {
487b8e80941Smrg      case TGSI_TOKEN_TYPE_IMMEDIATE: {
488b8e80941Smrg         /* immediates are handled differently from other files; they are
489b8e80941Smrg          * not declared explicitly, and always add four components */
490b8e80941Smrg         const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
491b8e80941Smrg         assert(c->imm_size <= (ETNA_MAX_IMM - 4));
492b8e80941Smrg
493b8e80941Smrg         for (int i = 0; i < 4; ++i) {
494b8e80941Smrg            unsigned idx = c->imm_size++;
495b8e80941Smrg
496b8e80941Smrg            c->imm_data[idx] = imm->u[i].Uint;
497b8e80941Smrg            c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
498b8e80941Smrg         }
499b8e80941Smrg      }
500b8e80941Smrg      break;
501b8e80941Smrg      }
502b8e80941Smrg   }
503b8e80941Smrg
504b8e80941Smrg   tgsi_parse_free(&ctx);
505b8e80941Smrg}
506b8e80941Smrg
507b8e80941Smrg/* Allocate register declarations for the registers in all register files */
508b8e80941Smrgstatic void
509b8e80941Smrgetna_allocate_decls(struct etna_compile *c)
510b8e80941Smrg{
511b8e80941Smrg   uint idx = 0;
512b8e80941Smrg
513b8e80941Smrg   for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
514b8e80941Smrg      c->file[x].reg = &c->decl[idx];
515b8e80941Smrg      c->file[x].reg_size = c->info.file_max[x] + 1;
516b8e80941Smrg
517b8e80941Smrg      for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
518b8e80941Smrg         c->decl[idx].file = x;
519b8e80941Smrg         c->decl[idx].idx = sub;
520b8e80941Smrg         idx++;
521b8e80941Smrg      }
522b8e80941Smrg   }
523b8e80941Smrg
524b8e80941Smrg   c->total_decls = idx;
525b8e80941Smrg}
526b8e80941Smrg
527b8e80941Smrg/* Pass -- check and record usage of temporaries, inputs, outputs */
528b8e80941Smrgstatic void
529b8e80941Smrgetna_compile_pass_check_usage(struct etna_compile *c)
530b8e80941Smrg{
531b8e80941Smrg   struct tgsi_parse_context ctx = { };
532b8e80941Smrg   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
533b8e80941Smrg   assert(status == TGSI_PARSE_OK);
534b8e80941Smrg
535b8e80941Smrg   for (int idx = 0; idx < c->total_decls; ++idx) {
536b8e80941Smrg      c->decl[idx].active = false;
537b8e80941Smrg      c->decl[idx].first_use = c->decl[idx].last_use = -1;
538b8e80941Smrg   }
539b8e80941Smrg
540b8e80941Smrg   int inst_idx = 0;
541b8e80941Smrg   while (!tgsi_parse_end_of_tokens(&ctx)) {
542b8e80941Smrg      tgsi_parse_token(&ctx);
543b8e80941Smrg      /* find out max register #s used
544b8e80941Smrg       * For every register mark first and last instruction index where it's
545b8e80941Smrg       * used this allows finding ranges where the temporary can be borrowed
546b8e80941Smrg       * as input and/or output register
547b8e80941Smrg       *
548b8e80941Smrg       * XXX in the case of loops this needs special care, or even be completely
549b8e80941Smrg       * disabled, as
550b8e80941Smrg       * the last usage of a register inside a loop means it can still be used
551b8e80941Smrg       * on next loop
552b8e80941Smrg       * iteration (execution is no longer * chronological). The register can
553b8e80941Smrg       * only be
554b8e80941Smrg       * declared "free" after the loop finishes.
555b8e80941Smrg       *
556b8e80941Smrg       * Same for inputs: the first usage of a register inside a loop doesn't
557b8e80941Smrg       * mean that the register
558b8e80941Smrg       * won't have been overwritten in previous iteration. The register can
559b8e80941Smrg       * only be declared free before the loop
560b8e80941Smrg       * starts.
561b8e80941Smrg       * The proper way would be to do full dominator / post-dominator analysis
562b8e80941Smrg       * (especially with more complicated
563b8e80941Smrg       * control flow such as direct branch instructions) but not for now...
564b8e80941Smrg       */
565b8e80941Smrg      switch (ctx.FullToken.Token.Type) {
566b8e80941Smrg      case TGSI_TOKEN_TYPE_DECLARATION: {
567b8e80941Smrg         /* Declaration: fill in file details */
568b8e80941Smrg         const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
569b8e80941Smrg         struct etna_compile_file *file = &c->file[decl->Declaration.File];
570b8e80941Smrg
571b8e80941Smrg         for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
572b8e80941Smrg            file->reg[idx].usage_mask = 0; // we'll compute this ourselves
573b8e80941Smrg            file->reg[idx].has_semantic = decl->Declaration.Semantic;
574b8e80941Smrg            file->reg[idx].semantic = decl->Semantic;
575b8e80941Smrg            file->reg[idx].interp = decl->Interp;
576b8e80941Smrg         }
577b8e80941Smrg      } break;
578b8e80941Smrg      case TGSI_TOKEN_TYPE_INSTRUCTION: {
579b8e80941Smrg         /* Instruction: iterate over operands of instruction */
580b8e80941Smrg         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
581b8e80941Smrg
582b8e80941Smrg         /* iterate over destination registers */
583b8e80941Smrg         for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
584b8e80941Smrg            struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
585b8e80941Smrg
586b8e80941Smrg            if (reg_desc->first_use == -1)
587b8e80941Smrg               reg_desc->first_use = inst_idx;
588b8e80941Smrg
589b8e80941Smrg            reg_desc->last_use = inst_idx;
590b8e80941Smrg            reg_desc->active = true;
591b8e80941Smrg         }
592b8e80941Smrg
593b8e80941Smrg         /* iterate over source registers */
594b8e80941Smrg         for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
595b8e80941Smrg            struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
596b8e80941Smrg
597b8e80941Smrg            if (reg_desc->first_use == -1)
598b8e80941Smrg               reg_desc->first_use = inst_idx;
599b8e80941Smrg
600b8e80941Smrg            reg_desc->last_use = inst_idx;
601b8e80941Smrg            reg_desc->active = true;
602b8e80941Smrg            /* accumulate usage mask for register, this is used to determine how
603b8e80941Smrg             * many slots for varyings
604b8e80941Smrg             * should be allocated */
605b8e80941Smrg            reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
606b8e80941Smrg         }
607b8e80941Smrg         inst_idx += 1;
608b8e80941Smrg      } break;
609b8e80941Smrg      default:
610b8e80941Smrg         break;
611b8e80941Smrg      }
612b8e80941Smrg   }
613b8e80941Smrg
614b8e80941Smrg   tgsi_parse_free(&ctx);
615b8e80941Smrg}
616b8e80941Smrg
617b8e80941Smrg/* assign inputs that need to be assigned to specific registers */
618b8e80941Smrgstatic void
619b8e80941Smrgassign_special_inputs(struct etna_compile *c)
620b8e80941Smrg{
621b8e80941Smrg   if (c->info.processor == PIPE_SHADER_FRAGMENT) {
622b8e80941Smrg      /* never assign t0 as it is the position output, start assigning at t1 */
623b8e80941Smrg      c->next_free_native = 1;
624b8e80941Smrg
625b8e80941Smrg      /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
626b8e80941Smrg      for (int idx = 0; idx < c->total_decls; ++idx) {
627b8e80941Smrg         struct etna_reg_desc *reg = &c->decl[idx];
628b8e80941Smrg
629b8e80941Smrg         if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
630b8e80941Smrg            reg->native = etna_native_temp(0);
631b8e80941Smrg      }
632b8e80941Smrg   }
633b8e80941Smrg}
634b8e80941Smrg
635b8e80941Smrg/* Check that a move instruction does not swizzle any of the components
636b8e80941Smrg * that it writes.
637b8e80941Smrg */
638b8e80941Smrgstatic bool
639b8e80941Smrgetna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
640b8e80941Smrg                          const struct tgsi_src_register src)
641b8e80941Smrg{
642b8e80941Smrg   return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
643b8e80941Smrg          (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
644b8e80941Smrg          (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
645b8e80941Smrg          (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
646b8e80941Smrg}
647b8e80941Smrg
648b8e80941Smrg/* Pass -- optimize outputs
649b8e80941Smrg * Mesa tends to generate code like this at the end if their shaders
650b8e80941Smrg *   MOV OUT[1], TEMP[2]
651b8e80941Smrg *   MOV OUT[0], TEMP[0]
652b8e80941Smrg *   MOV OUT[2], TEMP[1]
653b8e80941Smrg * Recognize if
654b8e80941Smrg * a) there is only a single assignment to an output register and
655b8e80941Smrg * b) the temporary is not used after that
656b8e80941Smrg * Also recognize direct assignment of IN to OUT (passthrough)
657b8e80941Smrg **/
658b8e80941Smrgstatic void
659b8e80941Smrgetna_compile_pass_optimize_outputs(struct etna_compile *c)
660b8e80941Smrg{
661b8e80941Smrg   struct tgsi_parse_context ctx = { };
662b8e80941Smrg   int inst_idx = 0;
663b8e80941Smrg   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
664b8e80941Smrg   assert(status == TGSI_PARSE_OK);
665b8e80941Smrg
666b8e80941Smrg   while (!tgsi_parse_end_of_tokens(&ctx)) {
667b8e80941Smrg      tgsi_parse_token(&ctx);
668b8e80941Smrg
669b8e80941Smrg      switch (ctx.FullToken.Token.Type) {
670b8e80941Smrg      case TGSI_TOKEN_TYPE_INSTRUCTION: {
671b8e80941Smrg         const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
672b8e80941Smrg
673b8e80941Smrg         /* iterate over operands */
674b8e80941Smrg         switch (inst->Instruction.Opcode) {
675b8e80941Smrg         case TGSI_OPCODE_MOV: {
676b8e80941Smrg            /* We are only interested in eliminating MOVs which write to
677b8e80941Smrg             * the shader outputs. Test for this early. */
678b8e80941Smrg            if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
679b8e80941Smrg               break;
680b8e80941Smrg            /* Elimination of a MOV must have no visible effect on the
681b8e80941Smrg             * resulting shader: this means the MOV must not swizzle or
682b8e80941Smrg             * saturate, and its source must not have the negate or
683b8e80941Smrg             * absolute modifiers. */
684b8e80941Smrg            if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
685b8e80941Smrg                inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
686b8e80941Smrg                inst->Src[0].Register.Absolute)
687b8e80941Smrg               break;
688b8e80941Smrg
689b8e80941Smrg            uint out_idx = inst->Dst[0].Register.Index;
690b8e80941Smrg            uint in_idx = inst->Src[0].Register.Index;
691b8e80941Smrg            /* assignment of temporary to output --
692b8e80941Smrg             * and the output doesn't yet have a native register assigned
693b8e80941Smrg             * and the last use of the temporary is this instruction
694b8e80941Smrg             * and the MOV does not do a swizzle
695b8e80941Smrg             */
696b8e80941Smrg            if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
697b8e80941Smrg                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
698b8e80941Smrg                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
699b8e80941Smrg               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
700b8e80941Smrg                  c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
701b8e80941Smrg               /* prevent temp from being re-used for the rest of the shader */
702b8e80941Smrg               c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
703b8e80941Smrg               /* mark this MOV instruction as a no-op */
704b8e80941Smrg               c->dead_inst[inst_idx] = true;
705b8e80941Smrg            }
706b8e80941Smrg            /* direct assignment of input to output --
707b8e80941Smrg             * and the input or output doesn't yet have a native register
708b8e80941Smrg             * assigned
709b8e80941Smrg             * and the output is only used in this instruction,
710b8e80941Smrg             * allocate a new register, and associate both input and output to
711b8e80941Smrg             * it
712b8e80941Smrg             * and the MOV does not do a swizzle
713b8e80941Smrg             */
714b8e80941Smrg            if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
715b8e80941Smrg                !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
716b8e80941Smrg                !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
717b8e80941Smrg                c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
718b8e80941Smrg                c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
719b8e80941Smrg               c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
720b8e80941Smrg                  c->file[TGSI_FILE_INPUT].reg[in_idx].native =
721b8e80941Smrg                     alloc_new_native_reg(c);
722b8e80941Smrg               /* mark this MOV instruction as a no-op */
723b8e80941Smrg               c->dead_inst[inst_idx] = true;
724b8e80941Smrg            }
725b8e80941Smrg         } break;
726b8e80941Smrg         default:;
727b8e80941Smrg         }
728b8e80941Smrg         inst_idx += 1;
729b8e80941Smrg      } break;
730b8e80941Smrg      }
731b8e80941Smrg   }
732b8e80941Smrg
733b8e80941Smrg   tgsi_parse_free(&ctx);
734b8e80941Smrg}
735b8e80941Smrg
736b8e80941Smrg/* Get a temporary to be used within one TGSI instruction.
737b8e80941Smrg * The first time that this function is called the temporary will be allocated.
738b8e80941Smrg * Each call to this function will return the same temporary.
739b8e80941Smrg */
740b8e80941Smrgstatic struct etna_native_reg
741b8e80941Smrgetna_compile_get_inner_temp(struct etna_compile *c)
742b8e80941Smrg{
743b8e80941Smrg   int inner_temp = c->inner_temps;
744b8e80941Smrg
745b8e80941Smrg   if (inner_temp < ETNA_MAX_INNER_TEMPS) {
746b8e80941Smrg      if (!c->inner_temp[inner_temp].valid)
747b8e80941Smrg         c->inner_temp[inner_temp] = alloc_new_native_reg(c);
748b8e80941Smrg
749b8e80941Smrg      /* alloc_new_native_reg() handles lack of registers */
750b8e80941Smrg      c->inner_temps += 1;
751b8e80941Smrg   } else {
752b8e80941Smrg      BUG("Too many inner temporaries (%i) requested in one instruction",
753b8e80941Smrg          inner_temp + 1);
754b8e80941Smrg   }
755b8e80941Smrg
756b8e80941Smrg   return c->inner_temp[inner_temp];
757b8e80941Smrg}
758b8e80941Smrg
759b8e80941Smrgstatic struct etna_inst_dst
760b8e80941Smrgetna_native_to_dst(struct etna_native_reg native, unsigned comps)
761b8e80941Smrg{
762b8e80941Smrg   /* Can only assign to temporaries */
763b8e80941Smrg   assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
764b8e80941Smrg
765b8e80941Smrg   struct etna_inst_dst rv = {
766b8e80941Smrg      .comps = comps,
767b8e80941Smrg      .use = 1,
768b8e80941Smrg      .reg = native.id,
769b8e80941Smrg   };
770b8e80941Smrg
771b8e80941Smrg   return rv;
772b8e80941Smrg}
773b8e80941Smrg
774b8e80941Smrgstatic struct etna_inst_src
775b8e80941Smrgetna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
776b8e80941Smrg{
777b8e80941Smrg   assert(native.valid && !native.is_tex);
778b8e80941Smrg
779b8e80941Smrg   struct etna_inst_src rv = {
780b8e80941Smrg      .use = 1,
781b8e80941Smrg      .swiz = swizzle,
782b8e80941Smrg      .rgroup = native.rgroup,
783b8e80941Smrg      .reg = native.id,
784b8e80941Smrg      .amode = INST_AMODE_DIRECT,
785b8e80941Smrg   };
786b8e80941Smrg
787b8e80941Smrg   return rv;
788b8e80941Smrg}
789b8e80941Smrg
790b8e80941Smrgstatic inline struct etna_inst_src
791b8e80941Smrgnegate(struct etna_inst_src src)
792b8e80941Smrg{
793b8e80941Smrg   src.neg = !src.neg;
794b8e80941Smrg
795b8e80941Smrg   return src;
796b8e80941Smrg}
797b8e80941Smrg
798b8e80941Smrgstatic inline struct etna_inst_src
799b8e80941Smrgabsolute(struct etna_inst_src src)
800b8e80941Smrg{
801b8e80941Smrg   src.abs = 1;
802b8e80941Smrg
803b8e80941Smrg   return src;
804b8e80941Smrg}
805b8e80941Smrg
806b8e80941Smrgstatic inline struct etna_inst_src
807b8e80941Smrgswizzle(struct etna_inst_src src, unsigned swizzle)
808b8e80941Smrg{
809b8e80941Smrg   src.swiz = inst_swiz_compose(src.swiz, swizzle);
810b8e80941Smrg
811b8e80941Smrg   return src;
812b8e80941Smrg}
813b8e80941Smrg
814b8e80941Smrg/* Emit instruction and append it to program */
815b8e80941Smrgstatic void
816b8e80941Smrgemit_inst(struct etna_compile *c, struct etna_inst *inst)
817b8e80941Smrg{
818b8e80941Smrg   assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
819b8e80941Smrg
820b8e80941Smrg   /* Check for uniform conflicts (each instruction can only access one
821b8e80941Smrg    * uniform),
822b8e80941Smrg    * if detected, use an intermediate temporary */
823b8e80941Smrg   unsigned uni_rgroup = -1;
824b8e80941Smrg   unsigned uni_reg = -1;
825b8e80941Smrg
826b8e80941Smrg   for (int src = 0; src < ETNA_NUM_SRC; ++src) {
827b8e80941Smrg      if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
828b8e80941Smrg         if (uni_reg == -1) { /* first unique uniform used */
829b8e80941Smrg            uni_rgroup = inst->src[src].rgroup;
830b8e80941Smrg            uni_reg = inst->src[src].reg;
831b8e80941Smrg         } else { /* second or later; check that it is a re-use */
832b8e80941Smrg            if (uni_rgroup != inst->src[src].rgroup ||
833b8e80941Smrg                uni_reg != inst->src[src].reg) {
834b8e80941Smrg               DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
835b8e80941Smrg                                             "accesses different uniforms, "
836b8e80941Smrg                                             "need to generate extra MOV");
837b8e80941Smrg               struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
838b8e80941Smrg
839b8e80941Smrg               /* Generate move instruction to temporary */
840b8e80941Smrg               etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
841b8e80941Smrg                  .opcode = INST_OPCODE_MOV,
842b8e80941Smrg                  .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
843b8e80941Smrg                                                        INST_COMPS_Z | INST_COMPS_W),
844b8e80941Smrg                  .src[2] = inst->src[src]
845b8e80941Smrg               });
846b8e80941Smrg
847b8e80941Smrg               c->inst_ptr++;
848b8e80941Smrg
849b8e80941Smrg               /* Modify instruction to use temp register instead of uniform */
850b8e80941Smrg               inst->src[src].use = 1;
851b8e80941Smrg               inst->src[src].rgroup = INST_RGROUP_TEMP;
852b8e80941Smrg               inst->src[src].reg = inner_temp.id;
853b8e80941Smrg               inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
854b8e80941Smrg               inst->src[src].neg = 0; /* negation happens on MOV */
855b8e80941Smrg               inst->src[src].abs = 0; /* abs happens on MOV */
856b8e80941Smrg               inst->src[src].amode = 0; /* amode effects happen on MOV */
857b8e80941Smrg            }
858b8e80941Smrg         }
859b8e80941Smrg      }
860b8e80941Smrg   }
861b8e80941Smrg
862b8e80941Smrg   /* Finally assemble the actual instruction */
863b8e80941Smrg   etna_assemble(&c->code[c->inst_ptr * 4], inst);
864b8e80941Smrg   c->inst_ptr++;
865b8e80941Smrg}
866b8e80941Smrg
867b8e80941Smrgstatic unsigned int
868b8e80941Smrgetna_amode(struct tgsi_ind_register indirect)
869b8e80941Smrg{
870b8e80941Smrg   assert(indirect.File == TGSI_FILE_ADDRESS);
871b8e80941Smrg   assert(indirect.Index == 0);
872b8e80941Smrg
873b8e80941Smrg   switch (indirect.Swizzle) {
874b8e80941Smrg   case TGSI_SWIZZLE_X:
875b8e80941Smrg      return INST_AMODE_ADD_A_X;
876b8e80941Smrg   case TGSI_SWIZZLE_Y:
877b8e80941Smrg      return INST_AMODE_ADD_A_Y;
878b8e80941Smrg   case TGSI_SWIZZLE_Z:
879b8e80941Smrg      return INST_AMODE_ADD_A_Z;
880b8e80941Smrg   case TGSI_SWIZZLE_W:
881b8e80941Smrg      return INST_AMODE_ADD_A_W;
882b8e80941Smrg   default:
883b8e80941Smrg      assert(!"Invalid swizzle");
884b8e80941Smrg   }
885b8e80941Smrg
886b8e80941Smrg   unreachable("bad swizzle");
887b8e80941Smrg}
888b8e80941Smrg
889b8e80941Smrg/* convert destination operand */
890b8e80941Smrgstatic struct etna_inst_dst
891b8e80941Smrgconvert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
892b8e80941Smrg{
893b8e80941Smrg   struct etna_inst_dst rv = {
894b8e80941Smrg      /// XXX .amode
895b8e80941Smrg      .comps = in->Register.WriteMask,
896b8e80941Smrg   };
897b8e80941Smrg
898b8e80941Smrg   if (in->Register.File == TGSI_FILE_ADDRESS) {
899b8e80941Smrg      assert(in->Register.Index == 0);
900b8e80941Smrg      rv.reg = in->Register.Index;
901b8e80941Smrg      rv.use = 0;
902b8e80941Smrg   } else {
903b8e80941Smrg      rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
904b8e80941Smrg                              in->Register.WriteMask);
905b8e80941Smrg   }
906b8e80941Smrg
907b8e80941Smrg   if (in->Register.Indirect)
908b8e80941Smrg      rv.amode = etna_amode(in->Indirect);
909b8e80941Smrg
910b8e80941Smrg   return rv;
911b8e80941Smrg}
912b8e80941Smrg
913b8e80941Smrg/* convert texture operand */
914b8e80941Smrgstatic struct etna_inst_tex
915b8e80941Smrgconvert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
916b8e80941Smrg            const struct tgsi_instruction_texture *tex)
917b8e80941Smrg{
918b8e80941Smrg   struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
919b8e80941Smrg   struct etna_inst_tex rv = {
920b8e80941Smrg      // XXX .amode (to allow for an array of samplers?)
921b8e80941Smrg      .swiz = INST_SWIZ_IDENTITY
922b8e80941Smrg   };
923b8e80941Smrg
924b8e80941Smrg   assert(native_reg.is_tex && native_reg.valid);
925b8e80941Smrg   rv.id = native_reg.id;
926b8e80941Smrg
927b8e80941Smrg   return rv;
928b8e80941Smrg}
929b8e80941Smrg
930b8e80941Smrg/* convert source operand */
931b8e80941Smrgstatic struct etna_inst_src
932b8e80941Smrgetna_create_src(const struct tgsi_full_src_register *tgsi,
933b8e80941Smrg                const struct etna_native_reg *native)
934b8e80941Smrg{
935b8e80941Smrg   const struct tgsi_src_register *reg = &tgsi->Register;
936b8e80941Smrg   struct etna_inst_src rv = {
937b8e80941Smrg      .use = 1,
938b8e80941Smrg      .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
939b8e80941Smrg      .neg = reg->Negate,
940b8e80941Smrg      .abs = reg->Absolute,
941b8e80941Smrg      .rgroup = native->rgroup,
942b8e80941Smrg      .reg = native->id,
943b8e80941Smrg      .amode = INST_AMODE_DIRECT,
944b8e80941Smrg   };
945b8e80941Smrg
946b8e80941Smrg   assert(native->valid && !native->is_tex);
947b8e80941Smrg
948b8e80941Smrg   if (reg->Indirect)
949b8e80941Smrg      rv.amode = etna_amode(tgsi->Indirect);
950b8e80941Smrg
951b8e80941Smrg   return rv;
952b8e80941Smrg}
953b8e80941Smrg
954b8e80941Smrgstatic struct etna_inst_src
955b8e80941Smrgetna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
956b8e80941Smrg                     struct etna_native_reg temp)
957b8e80941Smrg{
958b8e80941Smrg   struct etna_inst mov = { };
959b8e80941Smrg
960b8e80941Smrg   mov.opcode = INST_OPCODE_MOV;
961b8e80941Smrg   mov.sat = 0;
962b8e80941Smrg   mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
963b8e80941Smrg                                      INST_COMPS_Z | INST_COMPS_W);
964b8e80941Smrg   mov.src[2] = src;
965b8e80941Smrg   emit_inst(c, &mov);
966b8e80941Smrg
967b8e80941Smrg   src.swiz = INST_SWIZ_IDENTITY;
968b8e80941Smrg   src.neg = src.abs = 0;
969b8e80941Smrg   src.rgroup = temp.rgroup;
970b8e80941Smrg   src.reg = temp.id;
971b8e80941Smrg
972b8e80941Smrg   return src;
973b8e80941Smrg}
974b8e80941Smrg
975b8e80941Smrgstatic struct etna_inst_src
976b8e80941Smrgetna_mov_src(struct etna_compile *c, struct etna_inst_src src)
977b8e80941Smrg{
978b8e80941Smrg   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
979b8e80941Smrg
980b8e80941Smrg   return etna_mov_src_to_temp(c, src, temp);
981b8e80941Smrg}
982b8e80941Smrg
983b8e80941Smrgstatic bool
984b8e80941Smrgetna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
985b8e80941Smrg{
986b8e80941Smrg   return etna_rgroup_is_uniform(a.rgroup) &&
987b8e80941Smrg          etna_rgroup_is_uniform(b.rgroup) &&
988b8e80941Smrg          (a.rgroup != b.rgroup || a.reg != b.reg);
989b8e80941Smrg}
990b8e80941Smrg
991b8e80941Smrg/* create a new label */
992b8e80941Smrgstatic unsigned int
993b8e80941Smrgalloc_new_label(struct etna_compile *c)
994b8e80941Smrg{
995b8e80941Smrg   struct etna_compile_label label = {
996b8e80941Smrg      .inst_idx = -1, /* start by point to no specific instruction */
997b8e80941Smrg   };
998b8e80941Smrg
999b8e80941Smrg   array_insert(c->labels, label);
1000b8e80941Smrg
1001b8e80941Smrg   return c->labels_count - 1;
1002b8e80941Smrg}
1003b8e80941Smrg
1004b8e80941Smrg/* place label at current instruction pointer */
1005b8e80941Smrgstatic void
1006b8e80941Smrglabel_place(struct etna_compile *c, struct etna_compile_label *label)
1007b8e80941Smrg{
1008b8e80941Smrg   label->inst_idx = c->inst_ptr;
1009b8e80941Smrg}
1010b8e80941Smrg
1011b8e80941Smrg/* mark label use at current instruction.
1012b8e80941Smrg * target of the label will be filled in in the marked instruction's src2.imm
1013b8e80941Smrg * slot as soon
1014b8e80941Smrg * as the value becomes known.
1015b8e80941Smrg */
1016b8e80941Smrgstatic void
1017b8e80941Smrglabel_mark_use(struct etna_compile *c, int lbl_idx)
1018b8e80941Smrg{
1019b8e80941Smrg   assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1020b8e80941Smrg   c->lbl_usage[c->inst_ptr] = lbl_idx;
1021b8e80941Smrg}
1022b8e80941Smrg
1023b8e80941Smrg/* walk the frame stack and return first frame with matching type */
1024b8e80941Smrgstatic struct etna_compile_frame *
1025b8e80941Smrgfind_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1026b8e80941Smrg{
1027b8e80941Smrg   for (int sp = c->frame_sp; sp >= 0; sp--)
1028b8e80941Smrg      if (c->frame_stack[sp].type == type)
1029b8e80941Smrg         return &c->frame_stack[sp];
1030b8e80941Smrg
1031b8e80941Smrg   assert(0);
1032b8e80941Smrg   return NULL;
1033b8e80941Smrg}
1034b8e80941Smrg
1035b8e80941Smrgstruct instr_translater {
1036b8e80941Smrg   void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1037b8e80941Smrg               const struct tgsi_full_instruction *inst,
1038b8e80941Smrg               struct etna_inst_src *src);
1039b8e80941Smrg   unsigned tgsi_opc;
1040b8e80941Smrg   uint8_t opc;
1041b8e80941Smrg
1042b8e80941Smrg   /* tgsi src -> etna src swizzle */
1043b8e80941Smrg   int src[3];
1044b8e80941Smrg
1045b8e80941Smrg   unsigned cond;
1046b8e80941Smrg};
1047b8e80941Smrg
1048b8e80941Smrgstatic void
1049b8e80941Smrgtrans_instr(const struct instr_translater *t, struct etna_compile *c,
1050b8e80941Smrg            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1051b8e80941Smrg{
1052b8e80941Smrg   const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1053b8e80941Smrg   struct etna_inst instr = { };
1054b8e80941Smrg
1055b8e80941Smrg   instr.opcode = t->opc;
1056b8e80941Smrg   instr.cond = t->cond;
1057b8e80941Smrg   instr.sat = inst->Instruction.Saturate;
1058b8e80941Smrg
1059b8e80941Smrg   assert(info->num_dst <= 1);
1060b8e80941Smrg   if (info->num_dst)
1061b8e80941Smrg      instr.dst = convert_dst(c, &inst->Dst[0]);
1062b8e80941Smrg
1063b8e80941Smrg   assert(info->num_src <= ETNA_NUM_SRC);
1064b8e80941Smrg
1065b8e80941Smrg   for (unsigned i = 0; i < info->num_src; i++) {
1066b8e80941Smrg      int swizzle = t->src[i];
1067b8e80941Smrg
1068b8e80941Smrg      assert(swizzle != -1);
1069b8e80941Smrg      instr.src[swizzle] = src[i];
1070b8e80941Smrg   }
1071b8e80941Smrg
1072b8e80941Smrg   emit_inst(c, &instr);
1073b8e80941Smrg}
1074b8e80941Smrg
1075b8e80941Smrgstatic void
1076b8e80941Smrgtrans_min_max(const struct instr_translater *t, struct etna_compile *c,
1077b8e80941Smrg              const struct tgsi_full_instruction *inst,
1078b8e80941Smrg              struct etna_inst_src *src)
1079b8e80941Smrg{
1080b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1081b8e80941Smrg      .opcode = INST_OPCODE_SELECT,
1082b8e80941Smrg       .cond = t->cond,
1083b8e80941Smrg       .sat = inst->Instruction.Saturate,
1084b8e80941Smrg       .dst = convert_dst(c, &inst->Dst[0]),
1085b8e80941Smrg       .src[0] = src[0],
1086b8e80941Smrg       .src[1] = src[1],
1087b8e80941Smrg       .src[2] = src[0],
1088b8e80941Smrg    });
1089b8e80941Smrg}
1090b8e80941Smrg
1091b8e80941Smrgstatic void
1092b8e80941Smrgtrans_if(const struct instr_translater *t, struct etna_compile *c,
1093b8e80941Smrg         const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1094b8e80941Smrg{
1095b8e80941Smrg   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1096b8e80941Smrg   struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1097b8e80941Smrg
1098b8e80941Smrg   /* push IF to stack */
1099b8e80941Smrg   f->type = ETNA_COMPILE_FRAME_IF;
1100b8e80941Smrg   /* create "else" label */
1101b8e80941Smrg   f->lbl_else_idx = alloc_new_label(c);
1102b8e80941Smrg   f->lbl_endif_idx = -1;
1103b8e80941Smrg
1104b8e80941Smrg   /* We need to avoid the emit_inst() below becoming two instructions */
1105b8e80941Smrg   if (etna_src_uniforms_conflict(src[0], imm_0))
1106b8e80941Smrg      src[0] = etna_mov_src(c, src[0]);
1107b8e80941Smrg
1108b8e80941Smrg   /* mark position in instruction stream of label reference so that it can be
1109b8e80941Smrg    * filled in in next pass */
1110b8e80941Smrg   label_mark_use(c, f->lbl_else_idx);
1111b8e80941Smrg
1112b8e80941Smrg   /* create conditional branch to label if src0 EQ 0 */
1113b8e80941Smrg   emit_inst(c, &(struct etna_inst){
1114b8e80941Smrg      .opcode = INST_OPCODE_BRANCH,
1115b8e80941Smrg      .cond = INST_CONDITION_EQ,
1116b8e80941Smrg      .src[0] = src[0],
1117b8e80941Smrg      .src[1] = imm_0,
1118b8e80941Smrg    /* imm is filled in later */
1119b8e80941Smrg   });
1120b8e80941Smrg}
1121b8e80941Smrg
1122b8e80941Smrgstatic void
1123b8e80941Smrgtrans_else(const struct instr_translater *t, struct etna_compile *c,
1124b8e80941Smrg           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1125b8e80941Smrg{
1126b8e80941Smrg   assert(c->frame_sp > 0);
1127b8e80941Smrg   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1128b8e80941Smrg   assert(f->type == ETNA_COMPILE_FRAME_IF);
1129b8e80941Smrg
1130b8e80941Smrg   /* create "endif" label, and branch to endif label */
1131b8e80941Smrg   f->lbl_endif_idx = alloc_new_label(c);
1132b8e80941Smrg   label_mark_use(c, f->lbl_endif_idx);
1133b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1134b8e80941Smrg      .opcode = INST_OPCODE_BRANCH,
1135b8e80941Smrg      .cond = INST_CONDITION_TRUE,
1136b8e80941Smrg      /* imm is filled in later */
1137b8e80941Smrg   });
1138b8e80941Smrg
1139b8e80941Smrg   /* mark "else" label at this position in instruction stream */
1140b8e80941Smrg   label_place(c, &c->labels[f->lbl_else_idx]);
1141b8e80941Smrg}
1142b8e80941Smrg
1143b8e80941Smrgstatic void
1144b8e80941Smrgtrans_endif(const struct instr_translater *t, struct etna_compile *c,
1145b8e80941Smrg            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1146b8e80941Smrg{
1147b8e80941Smrg   assert(c->frame_sp > 0);
1148b8e80941Smrg   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1149b8e80941Smrg   assert(f->type == ETNA_COMPILE_FRAME_IF);
1150b8e80941Smrg
1151b8e80941Smrg   /* assign "endif" or "else" (if no ELSE) label to current position in
1152b8e80941Smrg    * instruction stream, pop IF */
1153b8e80941Smrg   if (f->lbl_endif_idx != -1)
1154b8e80941Smrg      label_place(c, &c->labels[f->lbl_endif_idx]);
1155b8e80941Smrg   else
1156b8e80941Smrg      label_place(c, &c->labels[f->lbl_else_idx]);
1157b8e80941Smrg}
1158b8e80941Smrg
1159b8e80941Smrgstatic void
1160b8e80941Smrgtrans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1161b8e80941Smrg               const struct tgsi_full_instruction *inst,
1162b8e80941Smrg               struct etna_inst_src *src)
1163b8e80941Smrg{
1164b8e80941Smrg   struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1165b8e80941Smrg
1166b8e80941Smrg   /* push LOOP to stack */
1167b8e80941Smrg   f->type = ETNA_COMPILE_FRAME_LOOP;
1168b8e80941Smrg   f->lbl_loop_bgn_idx = alloc_new_label(c);
1169b8e80941Smrg   f->lbl_loop_end_idx = alloc_new_label(c);
1170b8e80941Smrg
1171b8e80941Smrg   label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1172b8e80941Smrg
1173b8e80941Smrg   c->num_loops++;
1174b8e80941Smrg}
1175b8e80941Smrg
1176b8e80941Smrgstatic void
1177b8e80941Smrgtrans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1178b8e80941Smrg               const struct tgsi_full_instruction *inst,
1179b8e80941Smrg               struct etna_inst_src *src)
1180b8e80941Smrg{
1181b8e80941Smrg   assert(c->frame_sp > 0);
1182b8e80941Smrg   struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1183b8e80941Smrg   assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1184b8e80941Smrg
1185b8e80941Smrg   /* mark position in instruction stream of label reference so that it can be
1186b8e80941Smrg    * filled in in next pass */
1187b8e80941Smrg   label_mark_use(c, f->lbl_loop_bgn_idx);
1188b8e80941Smrg
1189b8e80941Smrg   /* create branch to loop_bgn label */
1190b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1191b8e80941Smrg      .opcode = INST_OPCODE_BRANCH,
1192b8e80941Smrg      .cond = INST_CONDITION_TRUE,
1193b8e80941Smrg      .src[0] = src[0],
1194b8e80941Smrg      /* imm is filled in later */
1195b8e80941Smrg   });
1196b8e80941Smrg
1197b8e80941Smrg   label_place(c, &c->labels[f->lbl_loop_end_idx]);
1198b8e80941Smrg}
1199b8e80941Smrg
1200b8e80941Smrgstatic void
1201b8e80941Smrgtrans_brk(const struct instr_translater *t, struct etna_compile *c,
1202b8e80941Smrg          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1203b8e80941Smrg{
1204b8e80941Smrg   assert(c->frame_sp > 0);
1205b8e80941Smrg   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1206b8e80941Smrg
1207b8e80941Smrg   /* mark position in instruction stream of label reference so that it can be
1208b8e80941Smrg    * filled in in next pass */
1209b8e80941Smrg   label_mark_use(c, f->lbl_loop_end_idx);
1210b8e80941Smrg
1211b8e80941Smrg   /* create branch to loop_end label */
1212b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1213b8e80941Smrg      .opcode = INST_OPCODE_BRANCH,
1214b8e80941Smrg      .cond = INST_CONDITION_TRUE,
1215b8e80941Smrg      .src[0] = src[0],
1216b8e80941Smrg      /* imm is filled in later */
1217b8e80941Smrg   });
1218b8e80941Smrg}
1219b8e80941Smrg
1220b8e80941Smrgstatic void
1221b8e80941Smrgtrans_cont(const struct instr_translater *t, struct etna_compile *c,
1222b8e80941Smrg           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1223b8e80941Smrg{
1224b8e80941Smrg   assert(c->frame_sp > 0);
1225b8e80941Smrg   struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1226b8e80941Smrg
1227b8e80941Smrg   /* mark position in instruction stream of label reference so that it can be
1228b8e80941Smrg    * filled in in next pass */
1229b8e80941Smrg   label_mark_use(c, f->lbl_loop_bgn_idx);
1230b8e80941Smrg
1231b8e80941Smrg   /* create branch to loop_end label */
1232b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1233b8e80941Smrg      .opcode = INST_OPCODE_BRANCH,
1234b8e80941Smrg      .cond = INST_CONDITION_TRUE,
1235b8e80941Smrg      .src[0] = src[0],
1236b8e80941Smrg      /* imm is filled in later */
1237b8e80941Smrg   });
1238b8e80941Smrg}
1239b8e80941Smrg
1240b8e80941Smrgstatic void
1241b8e80941Smrgtrans_deriv(const struct instr_translater *t, struct etna_compile *c,
1242b8e80941Smrg            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1243b8e80941Smrg{
1244b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1245b8e80941Smrg      .opcode = t->opc,
1246b8e80941Smrg      .sat = inst->Instruction.Saturate,
1247b8e80941Smrg      .dst = convert_dst(c, &inst->Dst[0]),
1248b8e80941Smrg      .src[0] = src[0],
1249b8e80941Smrg      .src[2] = src[0],
1250b8e80941Smrg   });
1251b8e80941Smrg}
1252b8e80941Smrg
1253b8e80941Smrgstatic void
1254b8e80941Smrgtrans_arl(const struct instr_translater *t, struct etna_compile *c,
1255b8e80941Smrg          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1256b8e80941Smrg{
1257b8e80941Smrg   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1258b8e80941Smrg   struct etna_inst arl = { };
1259b8e80941Smrg   struct etna_inst_dst dst;
1260b8e80941Smrg
1261b8e80941Smrg   dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1262b8e80941Smrg                                  INST_COMPS_W);
1263b8e80941Smrg
1264b8e80941Smrg   if (c->specs->has_sign_floor_ceil) {
1265b8e80941Smrg      struct etna_inst floor = { };
1266b8e80941Smrg
1267b8e80941Smrg      floor.opcode = INST_OPCODE_FLOOR;
1268b8e80941Smrg      floor.src[2] = src[0];
1269b8e80941Smrg      floor.dst = dst;
1270b8e80941Smrg
1271b8e80941Smrg      emit_inst(c, &floor);
1272b8e80941Smrg   } else {
1273b8e80941Smrg      struct etna_inst floor[2] = { };
1274b8e80941Smrg
1275b8e80941Smrg      floor[0].opcode = INST_OPCODE_FRC;
1276b8e80941Smrg      floor[0].sat = inst->Instruction.Saturate;
1277b8e80941Smrg      floor[0].dst = dst;
1278b8e80941Smrg      floor[0].src[2] = src[0];
1279b8e80941Smrg
1280b8e80941Smrg      floor[1].opcode = INST_OPCODE_ADD;
1281b8e80941Smrg      floor[1].sat = inst->Instruction.Saturate;
1282b8e80941Smrg      floor[1].dst = dst;
1283b8e80941Smrg      floor[1].src[0] = src[0];
1284b8e80941Smrg      floor[1].src[2].use = 1;
1285b8e80941Smrg      floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1286b8e80941Smrg      floor[1].src[2].neg = 1;
1287b8e80941Smrg      floor[1].src[2].rgroup = temp.rgroup;
1288b8e80941Smrg      floor[1].src[2].reg = temp.id;
1289b8e80941Smrg
1290b8e80941Smrg      emit_inst(c, &floor[0]);
1291b8e80941Smrg      emit_inst(c, &floor[1]);
1292b8e80941Smrg   }
1293b8e80941Smrg
1294b8e80941Smrg   arl.opcode = INST_OPCODE_MOVAR;
1295b8e80941Smrg   arl.sat = inst->Instruction.Saturate;
1296b8e80941Smrg   arl.dst = convert_dst(c, &inst->Dst[0]);
1297b8e80941Smrg   arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1298b8e80941Smrg
1299b8e80941Smrg   emit_inst(c, &arl);
1300b8e80941Smrg}
1301b8e80941Smrg
1302b8e80941Smrgstatic void
1303b8e80941Smrgtrans_lrp(const struct instr_translater *t, struct etna_compile *c,
1304b8e80941Smrg          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1305b8e80941Smrg{
1306b8e80941Smrg   /* dst = src0 * src1 + (1 - src0) * src2
1307b8e80941Smrg    *     => src0 * src1 - (src0 - 1) * src2
1308b8e80941Smrg    *     => src0 * src1 - (src0 * src2 - src2)
1309b8e80941Smrg    * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1310b8e80941Smrg    * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1311b8e80941Smrg    */
1312b8e80941Smrg   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1313b8e80941Smrg   if (etna_src_uniforms_conflict(src[0], src[1]) ||
1314b8e80941Smrg       etna_src_uniforms_conflict(src[0], src[2])) {
1315b8e80941Smrg      src[0] = etna_mov_src(c, src[0]);
1316b8e80941Smrg   }
1317b8e80941Smrg
1318b8e80941Smrg   struct etna_inst mad[2] = { };
1319b8e80941Smrg   mad[0].opcode = INST_OPCODE_MAD;
1320b8e80941Smrg   mad[0].sat = 0;
1321b8e80941Smrg   mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1322b8e80941Smrg                                         INST_COMPS_Z | INST_COMPS_W);
1323b8e80941Smrg   mad[0].src[0] = src[0];
1324b8e80941Smrg   mad[0].src[1] = src[2];
1325b8e80941Smrg   mad[0].src[2] = negate(src[2]);
1326b8e80941Smrg   mad[1].opcode = INST_OPCODE_MAD;
1327b8e80941Smrg   mad[1].sat = inst->Instruction.Saturate;
1328b8e80941Smrg   mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1329b8e80941Smrg   mad[1].src[1] = src[1];
1330b8e80941Smrg   mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1331b8e80941Smrg
1332b8e80941Smrg   emit_inst(c, &mad[0]);
1333b8e80941Smrg   emit_inst(c, &mad[1]);
1334b8e80941Smrg}
1335b8e80941Smrg
1336b8e80941Smrgstatic void
1337b8e80941Smrgtrans_lit(const struct instr_translater *t, struct etna_compile *c,
1338b8e80941Smrg          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1339b8e80941Smrg{
1340b8e80941Smrg   /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1341b8e80941Smrg    *  - can be eliminated if src.y is a uniform and >= 0
1342b8e80941Smrg    * SELECT.GT tmp.___w, 128, src.wwww, 128
1343b8e80941Smrg    * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1344b8e80941Smrg    *  - can be eliminated if src.w is a uniform and fits clamp
1345b8e80941Smrg    * LOG tmp.x, void, void, tmp.yyyy
1346b8e80941Smrg    * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1347b8e80941Smrg    * LITP dst, undef, src.xxxx, tmp.xxxx
1348b8e80941Smrg    */
1349b8e80941Smrg   struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1350b8e80941Smrg   struct etna_inst_src src_y = { };
1351b8e80941Smrg
1352b8e80941Smrg   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1353b8e80941Smrg      src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1354b8e80941Smrg
1355b8e80941Smrg      struct etna_inst ins = { };
1356b8e80941Smrg      ins.opcode = INST_OPCODE_SELECT;
1357b8e80941Smrg      ins.cond = INST_CONDITION_LT;
1358b8e80941Smrg      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1359b8e80941Smrg      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1360b8e80941Smrg      ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1361b8e80941Smrg      emit_inst(c, &ins);
1362b8e80941Smrg   } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1363b8e80941Smrg      src_y = alloc_imm_f32(c, 0.0);
1364b8e80941Smrg   else
1365b8e80941Smrg      src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1366b8e80941Smrg
1367b8e80941Smrg   struct etna_inst_src src_w = { };
1368b8e80941Smrg
1369b8e80941Smrg   if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1370b8e80941Smrg      src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1371b8e80941Smrg
1372b8e80941Smrg      struct etna_inst ins = { };
1373b8e80941Smrg      ins.opcode = INST_OPCODE_SELECT;
1374b8e80941Smrg      ins.cond = INST_CONDITION_GT;
1375b8e80941Smrg      ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1376b8e80941Smrg      ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1377b8e80941Smrg      ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1378b8e80941Smrg      emit_inst(c, &ins);
1379b8e80941Smrg      ins.cond = INST_CONDITION_LT;
1380b8e80941Smrg      ins.src[0].neg = !ins.src[0].neg;
1381b8e80941Smrg      ins.src[2].neg = !ins.src[2].neg;
1382b8e80941Smrg      ins.src[1] = src_w;
1383b8e80941Smrg      emit_inst(c, &ins);
1384b8e80941Smrg   } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1385b8e80941Smrg      src_w = alloc_imm_f32(c, -128.);
1386b8e80941Smrg   else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1387b8e80941Smrg      src_w = alloc_imm_f32(c, 128.);
1388b8e80941Smrg   else
1389b8e80941Smrg      src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1390b8e80941Smrg
1391b8e80941Smrg   if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1392b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1393b8e80941Smrg         .opcode = INST_OPCODE_LOG,
1394b8e80941Smrg         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1395b8e80941Smrg         .src[2] = src_y,
1396b8e80941Smrg         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1397b8e80941Smrg      });
1398b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1399b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1400b8e80941Smrg         .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1401b8e80941Smrg         .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1402b8e80941Smrg         .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1403b8e80941Smrg      });
1404b8e80941Smrg   } else {
1405b8e80941Smrg      struct etna_inst ins[3] = { };
1406b8e80941Smrg      ins[0].opcode = INST_OPCODE_LOG;
1407b8e80941Smrg      ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1408b8e80941Smrg      ins[0].src[2] = src_y;
1409b8e80941Smrg
1410b8e80941Smrg      emit_inst(c, &ins[0]);
1411b8e80941Smrg   }
1412b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1413b8e80941Smrg      .opcode = INST_OPCODE_MUL,
1414b8e80941Smrg      .sat = 0,
1415b8e80941Smrg      .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1416b8e80941Smrg      .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1417b8e80941Smrg      .src[1] = src_w,
1418b8e80941Smrg   });
1419b8e80941Smrg   emit_inst(c, &(struct etna_inst) {
1420b8e80941Smrg      .opcode = INST_OPCODE_LITP,
1421b8e80941Smrg      .sat = 0,
1422b8e80941Smrg      .dst = convert_dst(c, &inst->Dst[0]),
1423b8e80941Smrg      .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1424b8e80941Smrg      .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1425b8e80941Smrg      .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1426b8e80941Smrg   });
1427b8e80941Smrg}
1428b8e80941Smrg
1429b8e80941Smrgstatic void
1430b8e80941Smrgtrans_ssg(const struct instr_translater *t, struct etna_compile *c,
1431b8e80941Smrg          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1432b8e80941Smrg{
1433b8e80941Smrg   if (c->specs->has_sign_floor_ceil) {
1434b8e80941Smrg      emit_inst(c, &(struct etna_inst){
1435b8e80941Smrg         .opcode = INST_OPCODE_SIGN,
1436b8e80941Smrg         .sat = inst->Instruction.Saturate,
1437b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1438b8e80941Smrg         .src[2] = src[0],
1439b8e80941Smrg      });
1440b8e80941Smrg   } else {
1441b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1442b8e80941Smrg      struct etna_inst ins[2] = { };
1443b8e80941Smrg
1444b8e80941Smrg      ins[0].opcode = INST_OPCODE_SET;
1445b8e80941Smrg      ins[0].cond = INST_CONDITION_NZ;
1446b8e80941Smrg      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1447b8e80941Smrg                                            INST_COMPS_Z | INST_COMPS_W);
1448b8e80941Smrg      ins[0].src[0] = src[0];
1449b8e80941Smrg
1450b8e80941Smrg      ins[1].opcode = INST_OPCODE_SELECT;
1451b8e80941Smrg      ins[1].cond = INST_CONDITION_LZ;
1452b8e80941Smrg      ins[1].sat = inst->Instruction.Saturate;
1453b8e80941Smrg      ins[1].dst = convert_dst(c, &inst->Dst[0]);
1454b8e80941Smrg      ins[1].src[0] = src[0];
1455b8e80941Smrg      ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1456b8e80941Smrg      ins[1].src[1] = negate(ins[1].src[2]);
1457b8e80941Smrg
1458b8e80941Smrg      emit_inst(c, &ins[0]);
1459b8e80941Smrg      emit_inst(c, &ins[1]);
1460b8e80941Smrg   }
1461b8e80941Smrg}
1462b8e80941Smrg
1463b8e80941Smrgstatic void
1464b8e80941Smrgtrans_trig(const struct instr_translater *t, struct etna_compile *c,
1465b8e80941Smrg           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1466b8e80941Smrg{
1467b8e80941Smrg   if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1468b8e80941Smrg      /* On newer chips alternative SIN/COS instructions are implemented,
1469b8e80941Smrg       * which:
1470b8e80941Smrg       * - Need their input scaled by 1/pi instead of 2/pi
1471b8e80941Smrg       * - Output an x and y component, which need to be multiplied to
1472b8e80941Smrg       *   get the result
1473b8e80941Smrg       */
1474b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1475b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1476b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1477b8e80941Smrg         .sat = 0,
1478b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1479b8e80941Smrg         .src[0] = src[0], /* any swizzling happens here */
1480b8e80941Smrg         .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1481b8e80941Smrg      });
1482b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1483b8e80941Smrg         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1484b8e80941Smrg                    ? INST_OPCODE_COS
1485b8e80941Smrg                    : INST_OPCODE_SIN,
1486b8e80941Smrg         .sat = 0,
1487b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1488b8e80941Smrg         .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1489b8e80941Smrg         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1490b8e80941Smrg      });
1491b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1492b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1493b8e80941Smrg         .sat = inst->Instruction.Saturate,
1494b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1495b8e80941Smrg         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1496b8e80941Smrg         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1497b8e80941Smrg      });
1498b8e80941Smrg
1499b8e80941Smrg   } else if (c->specs->has_sin_cos_sqrt) {
1500b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1501b8e80941Smrg      /* add divide by PI/2, using a temp register. GC2000
1502b8e80941Smrg       * fails with src==dst for the trig instruction. */
1503b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1504b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1505b8e80941Smrg         .sat = 0,
1506b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1507b8e80941Smrg                                         INST_COMPS_Z | INST_COMPS_W),
1508b8e80941Smrg         .src[0] = src[0], /* any swizzling happens here */
1509b8e80941Smrg         .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1510b8e80941Smrg      });
1511b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1512b8e80941Smrg         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1513b8e80941Smrg                    ? INST_OPCODE_COS
1514b8e80941Smrg                    : INST_OPCODE_SIN,
1515b8e80941Smrg         .sat = inst->Instruction.Saturate,
1516b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1517b8e80941Smrg         .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1518b8e80941Smrg      });
1519b8e80941Smrg   } else {
1520b8e80941Smrg      /* Implement Nick's fast sine/cosine. Taken from:
1521b8e80941Smrg       * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1522b8e80941Smrg       * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1523b8e80941Smrg       *  MAD t.x_zw, src.xxxx, A, B
1524b8e80941Smrg       *  FRC t.x_z_, void, void, t.xwzw
1525b8e80941Smrg       *  MAD t.x_z_, t.xwzw, 2, -1
1526b8e80941Smrg       *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1527b8e80941Smrg       *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1528b8e80941Smrg       *  DP3 t.__z_, t.zyww, C, void         (for scs)
1529b8e80941Smrg       *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1530b8e80941Smrg       *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1531b8e80941Smrg       *  DP3 t.x___, t.xyww, C, void         (for scs)
1532b8e80941Smrg       *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1533b8e80941Smrg       *  MAD dst, t.ywyw, .2225, t.xzxz
1534b8e80941Smrg       */
1535b8e80941Smrg      struct etna_inst *p, ins[9] = { };
1536b8e80941Smrg      struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1537b8e80941Smrg      struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1538b8e80941Smrg      struct etna_inst_src sincos[3], in = src[0];
1539b8e80941Smrg      sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1540b8e80941Smrg      sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1541b8e80941Smrg
1542b8e80941Smrg      /* A uniform source will cause the inner temp limit to
1543b8e80941Smrg       * be exceeded.  Explicitly deal with that scenario.
1544b8e80941Smrg       */
1545b8e80941Smrg      if (etna_rgroup_is_uniform(src[0].rgroup)) {
1546b8e80941Smrg         struct etna_inst ins = { };
1547b8e80941Smrg         ins.opcode = INST_OPCODE_MOV;
1548b8e80941Smrg         ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1549b8e80941Smrg         ins.src[2] = in;
1550b8e80941Smrg         emit_inst(c, &ins);
1551b8e80941Smrg         in = t0s;
1552b8e80941Smrg      }
1553b8e80941Smrg
1554b8e80941Smrg      ins[0].opcode = INST_OPCODE_MAD;
1555b8e80941Smrg      ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1556b8e80941Smrg      ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1557b8e80941Smrg      ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1558b8e80941Smrg      ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1559b8e80941Smrg
1560b8e80941Smrg      ins[1].opcode = INST_OPCODE_FRC;
1561b8e80941Smrg      ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1562b8e80941Smrg      ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1563b8e80941Smrg
1564b8e80941Smrg      ins[2].opcode = INST_OPCODE_MAD;
1565b8e80941Smrg      ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1566b8e80941Smrg      ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1567b8e80941Smrg      ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1568b8e80941Smrg      ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1569b8e80941Smrg
1570b8e80941Smrg      unsigned mul_swiz, dp3_swiz;
1571b8e80941Smrg      if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1572b8e80941Smrg         mul_swiz = SWIZZLE(W, Z, W, W);
1573b8e80941Smrg         dp3_swiz = SWIZZLE(Z, Y, W, W);
1574b8e80941Smrg      } else {
1575b8e80941Smrg         mul_swiz = SWIZZLE(W, X, W, W);
1576b8e80941Smrg         dp3_swiz = SWIZZLE(X, Y, W, W);
1577b8e80941Smrg      }
1578b8e80941Smrg
1579b8e80941Smrg      ins[3].opcode = INST_OPCODE_MUL;
1580b8e80941Smrg      ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1581b8e80941Smrg      ins[3].src[0] = swizzle(t0s, mul_swiz);
1582b8e80941Smrg      ins[3].src[1] = absolute(ins[3].src[0]);
1583b8e80941Smrg
1584b8e80941Smrg      ins[4].opcode = INST_OPCODE_DP3;
1585b8e80941Smrg      ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1586b8e80941Smrg      ins[4].src[0] = swizzle(t0s, dp3_swiz);
1587b8e80941Smrg      ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1588b8e80941Smrg
1589b8e80941Smrg      p = &ins[5];
1590b8e80941Smrg      p->opcode = INST_OPCODE_MAD;
1591b8e80941Smrg      p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1592b8e80941Smrg      p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1593b8e80941Smrg      p->src[1] = absolute(p->src[0]);
1594b8e80941Smrg      p->src[2] = negate(p->src[0]);
1595b8e80941Smrg
1596b8e80941Smrg      p++;
1597b8e80941Smrg      p->opcode = INST_OPCODE_MAD;
1598b8e80941Smrg      p->sat = inst->Instruction.Saturate;
1599b8e80941Smrg      p->dst = convert_dst(c, &inst->Dst[0]),
1600b8e80941Smrg      p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1601b8e80941Smrg      p->src[1] = alloc_imm_f32(c, 0.2225);
1602b8e80941Smrg      p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1603b8e80941Smrg
1604b8e80941Smrg      for (int i = 0; &ins[i] <= p; i++)
1605b8e80941Smrg         emit_inst(c, &ins[i]);
1606b8e80941Smrg   }
1607b8e80941Smrg}
1608b8e80941Smrg
1609b8e80941Smrgstatic void
1610b8e80941Smrgtrans_lg2(const struct instr_translater *t, struct etna_compile *c,
1611b8e80941Smrg            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1612b8e80941Smrg{
1613b8e80941Smrg   if (c->specs->has_new_transcendentals) {
1614b8e80941Smrg      /* On newer chips alternative LOG instruction is implemented,
1615b8e80941Smrg       * which outputs an x and y component, which need to be multiplied to
1616b8e80941Smrg       * get the result.
1617b8e80941Smrg       */
1618b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1619b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1620b8e80941Smrg         .opcode = INST_OPCODE_LOG,
1621b8e80941Smrg         .sat = 0,
1622b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1623b8e80941Smrg         .src[2] = src[0],
1624b8e80941Smrg         .tex = { .amode=1 }, /* Unknown bit needs to be set */
1625b8e80941Smrg      });
1626b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1627b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1628b8e80941Smrg         .sat = inst->Instruction.Saturate,
1629b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1630b8e80941Smrg         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1631b8e80941Smrg         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1632b8e80941Smrg      });
1633b8e80941Smrg   } else {
1634b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1635b8e80941Smrg         .opcode = INST_OPCODE_LOG,
1636b8e80941Smrg         .sat = inst->Instruction.Saturate,
1637b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1638b8e80941Smrg         .src[2] = src[0],
1639b8e80941Smrg      });
1640b8e80941Smrg   }
1641b8e80941Smrg}
1642b8e80941Smrg
1643b8e80941Smrgstatic void
1644b8e80941Smrgtrans_sampler(const struct instr_translater *t, struct etna_compile *c,
1645b8e80941Smrg              const struct tgsi_full_instruction *inst,
1646b8e80941Smrg              struct etna_inst_src *src)
1647b8e80941Smrg{
1648b8e80941Smrg   /* There is no native support for GL texture rectangle coordinates, so
1649b8e80941Smrg    * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1650b8e80941Smrg   if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1651b8e80941Smrg      uint32_t unit = inst->Src[1].Register.Index;
1652b8e80941Smrg      struct etna_inst ins[2] = { };
1653b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1654b8e80941Smrg
1655b8e80941Smrg      ins[0].opcode = INST_OPCODE_MUL;
1656b8e80941Smrg      ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1657b8e80941Smrg      ins[0].src[0] = src[0];
1658b8e80941Smrg      ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1659b8e80941Smrg
1660b8e80941Smrg      ins[1].opcode = INST_OPCODE_MUL;
1661b8e80941Smrg      ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1662b8e80941Smrg      ins[1].src[0] = src[0];
1663b8e80941Smrg      ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1664b8e80941Smrg
1665b8e80941Smrg      emit_inst(c, &ins[0]);
1666b8e80941Smrg      emit_inst(c, &ins[1]);
1667b8e80941Smrg
1668b8e80941Smrg      src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1669b8e80941Smrg   }
1670b8e80941Smrg
1671b8e80941Smrg   switch (inst->Instruction.Opcode) {
1672b8e80941Smrg   case TGSI_OPCODE_TEX:
1673b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1674b8e80941Smrg         .opcode = INST_OPCODE_TEXLD,
1675b8e80941Smrg         .sat = 0,
1676b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1677b8e80941Smrg         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1678b8e80941Smrg         .src[0] = src[0],
1679b8e80941Smrg      });
1680b8e80941Smrg      break;
1681b8e80941Smrg
1682b8e80941Smrg   case TGSI_OPCODE_TXB:
1683b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1684b8e80941Smrg         .opcode = INST_OPCODE_TEXLDB,
1685b8e80941Smrg         .sat = 0,
1686b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1687b8e80941Smrg         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1688b8e80941Smrg         .src[0] = src[0],
1689b8e80941Smrg      });
1690b8e80941Smrg      break;
1691b8e80941Smrg
1692b8e80941Smrg   case TGSI_OPCODE_TXL:
1693b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1694b8e80941Smrg         .opcode = INST_OPCODE_TEXLDL,
1695b8e80941Smrg         .sat = 0,
1696b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1697b8e80941Smrg         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1698b8e80941Smrg         .src[0] = src[0],
1699b8e80941Smrg      });
1700b8e80941Smrg      break;
1701b8e80941Smrg
1702b8e80941Smrg   case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1703b8e80941Smrg      struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1704b8e80941Smrg
1705b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1706b8e80941Smrg         .opcode = INST_OPCODE_RCP,
1707b8e80941Smrg         .sat = 0,
1708b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1709b8e80941Smrg         .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1710b8e80941Smrg      });
1711b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1712b8e80941Smrg         .opcode = INST_OPCODE_MUL,
1713b8e80941Smrg         .sat = 0,
1714b8e80941Smrg         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1715b8e80941Smrg                                         INST_COMPS_Z), /* tmp.xyz */
1716b8e80941Smrg         .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1717b8e80941Smrg         .src[1] = src[0], /* src.xyzw */
1718b8e80941Smrg      });
1719b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1720b8e80941Smrg         .opcode = INST_OPCODE_TEXLD,
1721b8e80941Smrg         .sat = 0,
1722b8e80941Smrg         .dst = convert_dst(c, &inst->Dst[0]),
1723b8e80941Smrg         .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1724b8e80941Smrg         .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1725b8e80941Smrg      });
1726b8e80941Smrg   } break;
1727b8e80941Smrg
1728b8e80941Smrg   default:
1729b8e80941Smrg      BUG("Unhandled instruction %s",
1730b8e80941Smrg          tgsi_get_opcode_name(inst->Instruction.Opcode));
1731b8e80941Smrg      assert(0);
1732b8e80941Smrg      break;
1733b8e80941Smrg   }
1734b8e80941Smrg}
1735b8e80941Smrg
1736b8e80941Smrgstatic void
1737b8e80941Smrgtrans_dummy(const struct instr_translater *t, struct etna_compile *c,
1738b8e80941Smrg            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1739b8e80941Smrg{
1740b8e80941Smrg   /* nothing to do */
1741b8e80941Smrg}
1742b8e80941Smrg
1743b8e80941Smrgstatic const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1744b8e80941Smrg#define INSTR(n, f, ...) \
1745b8e80941Smrg   [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1746b8e80941Smrg
1747b8e80941Smrg   INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1748b8e80941Smrg   INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1749b8e80941Smrg   INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1750b8e80941Smrg   INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1751b8e80941Smrg   INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1752b8e80941Smrg   INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1753b8e80941Smrg   INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1754b8e80941Smrg   INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1755b8e80941Smrg   INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1756b8e80941Smrg   INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1757b8e80941Smrg   INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1758b8e80941Smrg   INSTR(LG2, trans_lg2),
1759b8e80941Smrg   INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1760b8e80941Smrg   INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1761b8e80941Smrg   INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1762b8e80941Smrg   INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1763b8e80941Smrg   INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1764b8e80941Smrg
1765b8e80941Smrg   INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1766b8e80941Smrg   INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1767b8e80941Smrg
1768b8e80941Smrg   INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1769b8e80941Smrg   INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1770b8e80941Smrg
1771b8e80941Smrg   INSTR(IF, trans_if),
1772b8e80941Smrg   INSTR(ELSE, trans_else),
1773b8e80941Smrg   INSTR(ENDIF, trans_endif),
1774b8e80941Smrg
1775b8e80941Smrg   INSTR(BGNLOOP, trans_loop_bgn),
1776b8e80941Smrg   INSTR(ENDLOOP, trans_loop_end),
1777b8e80941Smrg   INSTR(BRK, trans_brk),
1778b8e80941Smrg   INSTR(CONT, trans_cont),
1779b8e80941Smrg
1780b8e80941Smrg   INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1781b8e80941Smrg   INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1782b8e80941Smrg
1783b8e80941Smrg   INSTR(ARL, trans_arl),
1784b8e80941Smrg   INSTR(LRP, trans_lrp),
1785b8e80941Smrg   INSTR(LIT, trans_lit),
1786b8e80941Smrg   INSTR(SSG, trans_ssg),
1787b8e80941Smrg
1788b8e80941Smrg   INSTR(SIN, trans_trig),
1789b8e80941Smrg   INSTR(COS, trans_trig),
1790b8e80941Smrg
1791b8e80941Smrg   INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1792b8e80941Smrg   INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1793b8e80941Smrg   INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1794b8e80941Smrg   INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1795b8e80941Smrg   INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1796b8e80941Smrg   INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1797b8e80941Smrg
1798b8e80941Smrg   INSTR(TEX, trans_sampler),
1799b8e80941Smrg   INSTR(TXB, trans_sampler),
1800b8e80941Smrg   INSTR(TXL, trans_sampler),
1801b8e80941Smrg   INSTR(TXP, trans_sampler),
1802b8e80941Smrg
1803b8e80941Smrg   INSTR(NOP, trans_dummy),
1804b8e80941Smrg   INSTR(END, trans_dummy),
1805b8e80941Smrg};
1806b8e80941Smrg
1807b8e80941Smrg/* Pass -- compile instructions */
1808b8e80941Smrgstatic void
1809b8e80941Smrgetna_compile_pass_generate_code(struct etna_compile *c)
1810b8e80941Smrg{
1811b8e80941Smrg   struct tgsi_parse_context ctx = { };
1812b8e80941Smrg   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
1813b8e80941Smrg   assert(status == TGSI_PARSE_OK);
1814b8e80941Smrg
1815b8e80941Smrg   int inst_idx = 0;
1816b8e80941Smrg   while (!tgsi_parse_end_of_tokens(&ctx)) {
1817b8e80941Smrg      const struct tgsi_full_instruction *inst = 0;
1818b8e80941Smrg
1819b8e80941Smrg      /* No inner temps used yet for this instruction, clear counter */
1820b8e80941Smrg      c->inner_temps = 0;
1821b8e80941Smrg
1822b8e80941Smrg      tgsi_parse_token(&ctx);
1823b8e80941Smrg
1824b8e80941Smrg      switch (ctx.FullToken.Token.Type) {
1825b8e80941Smrg      case TGSI_TOKEN_TYPE_INSTRUCTION:
1826b8e80941Smrg         /* iterate over operands */
1827b8e80941Smrg         inst = &ctx.FullToken.FullInstruction;
1828b8e80941Smrg         if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1829b8e80941Smrg            inst_idx++;
1830b8e80941Smrg            continue;
1831b8e80941Smrg         }
1832b8e80941Smrg
1833b8e80941Smrg         /* Lookup the TGSI information and generate the source arguments */
1834b8e80941Smrg         struct etna_inst_src src[ETNA_NUM_SRC];
1835b8e80941Smrg         memset(src, 0, sizeof(src));
1836b8e80941Smrg
1837b8e80941Smrg         const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1838b8e80941Smrg
1839b8e80941Smrg         for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1840b8e80941Smrg            const struct tgsi_full_src_register *reg = &inst->Src[i];
1841b8e80941Smrg            const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1842b8e80941Smrg
1843b8e80941Smrg            if (!n->valid || n->is_tex)
1844b8e80941Smrg               continue;
1845b8e80941Smrg
1846b8e80941Smrg            src[i] = etna_create_src(reg, n);
1847b8e80941Smrg         }
1848b8e80941Smrg
1849b8e80941Smrg         const unsigned opc = inst->Instruction.Opcode;
1850b8e80941Smrg         const struct instr_translater *t = &translaters[opc];
1851b8e80941Smrg
1852b8e80941Smrg         if (t->fxn) {
1853b8e80941Smrg            t->fxn(t, c, inst, src);
1854b8e80941Smrg
1855b8e80941Smrg            inst_idx += 1;
1856b8e80941Smrg         } else {
1857b8e80941Smrg            BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1858b8e80941Smrg            assert(0);
1859b8e80941Smrg         }
1860b8e80941Smrg         break;
1861b8e80941Smrg      }
1862b8e80941Smrg   }
1863b8e80941Smrg   tgsi_parse_free(&ctx);
1864b8e80941Smrg}
1865b8e80941Smrg
1866b8e80941Smrg/* Look up register by semantic */
1867b8e80941Smrgstatic struct etna_reg_desc *
1868b8e80941Smrgfind_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1869b8e80941Smrg{
1870b8e80941Smrg   for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1871b8e80941Smrg      struct etna_reg_desc *reg = &c->file[file].reg[idx];
1872b8e80941Smrg
1873b8e80941Smrg      if (reg->semantic.Name == name && reg->semantic.Index == index)
1874b8e80941Smrg         return reg;
1875b8e80941Smrg   }
1876b8e80941Smrg
1877b8e80941Smrg   return NULL; /* not found */
1878b8e80941Smrg}
1879b8e80941Smrg
1880b8e80941Smrg/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1881b8e80941Smrg * - this is a vertex shader
1882b8e80941Smrg * - and this is an older GPU
1883b8e80941Smrg */
1884b8e80941Smrgstatic void
1885b8e80941Smrgetna_compile_add_z_div_if_needed(struct etna_compile *c)
1886b8e80941Smrg{
1887b8e80941Smrg   if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1888b8e80941Smrg      /* find position out */
1889b8e80941Smrg      struct etna_reg_desc *pos_reg =
1890b8e80941Smrg         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1891b8e80941Smrg
1892b8e80941Smrg      if (pos_reg != NULL) {
1893b8e80941Smrg         /*
1894b8e80941Smrg          * ADD tX.__z_, tX.zzzz, void, tX.wwww
1895b8e80941Smrg          * MUL tX.__z_, tX.zzzz, 0.5, void
1896b8e80941Smrg         */
1897b8e80941Smrg         emit_inst(c, &(struct etna_inst) {
1898b8e80941Smrg            .opcode = INST_OPCODE_ADD,
1899b8e80941Smrg            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1900b8e80941Smrg            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1901b8e80941Smrg            .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1902b8e80941Smrg         });
1903b8e80941Smrg         emit_inst(c, &(struct etna_inst) {
1904b8e80941Smrg            .opcode = INST_OPCODE_MUL,
1905b8e80941Smrg            .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1906b8e80941Smrg            .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1907b8e80941Smrg            .src[1] = alloc_imm_f32(c, 0.5f),
1908b8e80941Smrg         });
1909b8e80941Smrg      }
1910b8e80941Smrg   }
1911b8e80941Smrg}
1912b8e80941Smrg
1913b8e80941Smrgstatic void
1914b8e80941Smrgetna_compile_frag_rb_swap(struct etna_compile *c)
1915b8e80941Smrg{
1916b8e80941Smrg   if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1917b8e80941Smrg      /* find color out */
1918b8e80941Smrg      struct etna_reg_desc *color_reg =
1919b8e80941Smrg         find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1920b8e80941Smrg
1921b8e80941Smrg      emit_inst(c, &(struct etna_inst) {
1922b8e80941Smrg         .opcode = INST_OPCODE_MOV,
1923b8e80941Smrg         .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1924b8e80941Smrg         .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1925b8e80941Smrg      });
1926b8e80941Smrg   }
1927b8e80941Smrg}
1928b8e80941Smrg
1929b8e80941Smrg/** add a NOP to the shader if
1930b8e80941Smrg * a) the shader is empty
1931b8e80941Smrg * or
1932b8e80941Smrg * b) there is a label at the end of the shader
1933b8e80941Smrg */
1934b8e80941Smrgstatic void
1935b8e80941Smrgetna_compile_add_nop_if_needed(struct etna_compile *c)
1936b8e80941Smrg{
1937b8e80941Smrg   bool label_at_last_inst = false;
1938b8e80941Smrg
1939b8e80941Smrg   for (int idx = 0; idx < c->labels_count; ++idx) {
1940b8e80941Smrg      if (c->labels[idx].inst_idx == c->inst_ptr)
1941b8e80941Smrg         label_at_last_inst = true;
1942b8e80941Smrg
1943b8e80941Smrg   }
1944b8e80941Smrg
1945b8e80941Smrg   if (c->inst_ptr == 0 || label_at_last_inst)
1946b8e80941Smrg      emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1947b8e80941Smrg}
1948b8e80941Smrg
1949b8e80941Smrgstatic void
1950b8e80941Smrgassign_uniforms(struct etna_compile_file *file, unsigned base)
1951b8e80941Smrg{
1952b8e80941Smrg   for (int idx = 0; idx < file->reg_size; ++idx) {
1953b8e80941Smrg      file->reg[idx].native.valid = 1;
1954b8e80941Smrg      file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1955b8e80941Smrg      file->reg[idx].native.id = base + idx;
1956b8e80941Smrg   }
1957b8e80941Smrg}
1958b8e80941Smrg
1959b8e80941Smrg/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1960b8e80941Smrg * CONST must be consecutive as const buffers are supposed to be consecutive,
1961b8e80941Smrg * and before IMM, as this is
1962b8e80941Smrg * more convenient because is possible for the compilation process itself to
1963b8e80941Smrg * generate extra
1964b8e80941Smrg * immediates for constants such as pi, one, zero.
1965b8e80941Smrg */
1966b8e80941Smrgstatic void
1967b8e80941Smrgassign_constants_and_immediates(struct etna_compile *c)
1968b8e80941Smrg{
1969b8e80941Smrg   assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1970b8e80941Smrg   /* immediates start after the constants */
1971b8e80941Smrg   c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1972b8e80941Smrg   assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1973b8e80941Smrg   DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1974b8e80941Smrg         c->imm_size);
1975b8e80941Smrg}
1976b8e80941Smrg
1977b8e80941Smrg/* Assign declared samplers to native texture units */
1978b8e80941Smrgstatic void
1979b8e80941Smrgassign_texture_units(struct etna_compile *c)
1980b8e80941Smrg{
1981b8e80941Smrg   uint tex_base = 0;
1982b8e80941Smrg
1983b8e80941Smrg   if (c->info.processor == PIPE_SHADER_VERTEX)
1984b8e80941Smrg      tex_base = c->specs->vertex_sampler_offset;
1985b8e80941Smrg
1986b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1987b8e80941Smrg      c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1988b8e80941Smrg      c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1989b8e80941Smrg      c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1990b8e80941Smrg   }
1991b8e80941Smrg}
1992b8e80941Smrg
1993b8e80941Smrg/* Additional pass to fill in branch targets. This pass should be last
1994b8e80941Smrg * as no instruction reordering or removing/addition can be done anymore
1995b8e80941Smrg * once the branch targets are computed.
1996b8e80941Smrg */
1997b8e80941Smrgstatic void
1998b8e80941Smrgetna_compile_fill_in_labels(struct etna_compile *c)
1999b8e80941Smrg{
2000b8e80941Smrg   for (int idx = 0; idx < c->inst_ptr; ++idx) {
2001b8e80941Smrg      if (c->lbl_usage[idx] != -1)
2002b8e80941Smrg         etna_assemble_set_imm(&c->code[idx * 4],
2003b8e80941Smrg                               c->labels[c->lbl_usage[idx]].inst_idx);
2004b8e80941Smrg   }
2005b8e80941Smrg}
2006b8e80941Smrg
2007b8e80941Smrg/* compare two etna_native_reg structures, return true if equal */
2008b8e80941Smrgstatic bool
2009b8e80941Smrgcmp_etna_native_reg(const struct etna_native_reg to,
2010b8e80941Smrg                    const struct etna_native_reg from)
2011b8e80941Smrg{
2012b8e80941Smrg   return to.valid == from.valid && to.is_tex == from.is_tex &&
2013b8e80941Smrg          to.rgroup == from.rgroup && to.id == from.id;
2014b8e80941Smrg}
2015b8e80941Smrg
2016b8e80941Smrg/* go through all declarations and swap native registers *to* and *from* */
2017b8e80941Smrgstatic void
2018b8e80941Smrgswap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2019b8e80941Smrg                      const struct etna_native_reg from)
2020b8e80941Smrg{
2021b8e80941Smrg   if (cmp_etna_native_reg(from, to))
2022b8e80941Smrg      return; /* Nothing to do */
2023b8e80941Smrg
2024b8e80941Smrg   for (int idx = 0; idx < c->total_decls; ++idx) {
2025b8e80941Smrg      if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2026b8e80941Smrg         c->decl[idx].native = to;
2027b8e80941Smrg      } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2028b8e80941Smrg         c->decl[idx].native = from;
2029b8e80941Smrg      }
2030b8e80941Smrg   }
2031b8e80941Smrg}
2032b8e80941Smrg
2033b8e80941Smrg/* For PS we need to permute so that inputs are always in temporary 0..N-1.
2034b8e80941Smrg * Semantic POS is always t0. If that semantic is not used, avoid t0.
2035b8e80941Smrg */
2036b8e80941Smrgstatic void
2037b8e80941Smrgpermute_ps_inputs(struct etna_compile *c)
2038b8e80941Smrg{
2039b8e80941Smrg   /* Special inputs:
2040b8e80941Smrg    * gl_FragCoord  VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2041b8e80941Smrg    * gl_PointCoord VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2042b8e80941Smrg    */
2043b8e80941Smrg   uint native_idx = 1;
2044b8e80941Smrg
2045b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2046b8e80941Smrg      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2047b8e80941Smrg      uint input_id;
2048b8e80941Smrg      assert(reg->has_semantic);
2049b8e80941Smrg
2050b8e80941Smrg      if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2051b8e80941Smrg         continue;
2052b8e80941Smrg
2053b8e80941Smrg      input_id = native_idx++;
2054b8e80941Smrg      swap_native_registers(c, etna_native_temp(input_id),
2055b8e80941Smrg                            c->file[TGSI_FILE_INPUT].reg[idx].native);
2056b8e80941Smrg   }
2057b8e80941Smrg
2058b8e80941Smrg   c->num_varyings = native_idx - 1;
2059b8e80941Smrg
2060b8e80941Smrg   if (native_idx > c->next_free_native)
2061b8e80941Smrg      c->next_free_native = native_idx;
2062b8e80941Smrg}
2063b8e80941Smrg
2064b8e80941Smrg/* fill in ps inputs into shader object */
2065b8e80941Smrgstatic void
2066b8e80941Smrgfill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2067b8e80941Smrg{
2068b8e80941Smrg   struct etna_shader_io_file *sf = &sobj->infile;
2069b8e80941Smrg
2070b8e80941Smrg   sf->num_reg = 0;
2071b8e80941Smrg
2072b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2073b8e80941Smrg      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2074b8e80941Smrg
2075b8e80941Smrg      if (reg->native.id > 0) {
2076b8e80941Smrg         assert(sf->num_reg < ETNA_NUM_INPUTS);
2077b8e80941Smrg         sf->reg[sf->num_reg].reg = reg->native.id;
2078b8e80941Smrg         sf->reg[sf->num_reg].semantic = reg->semantic;
2079b8e80941Smrg         /* convert usage mask to number of components (*=wildcard)
2080b8e80941Smrg          *   .r    (0..1)  -> 1 component
2081b8e80941Smrg          *   .*g   (2..3)  -> 2 component
2082b8e80941Smrg          *   .**b  (4..7)  -> 3 components
2083b8e80941Smrg          *   .***a (8..15) -> 4 components
2084b8e80941Smrg          */
2085b8e80941Smrg         sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2086b8e80941Smrg         sf->num_reg++;
2087b8e80941Smrg      }
2088b8e80941Smrg   }
2089b8e80941Smrg
2090b8e80941Smrg   assert(sf->num_reg == c->num_varyings);
2091b8e80941Smrg   sobj->input_count_unk8 = 31; /* XXX what is this */
2092b8e80941Smrg}
2093b8e80941Smrg
2094b8e80941Smrg/* fill in output mapping for ps into shader object */
2095b8e80941Smrgstatic void
2096b8e80941Smrgfill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2097b8e80941Smrg{
2098b8e80941Smrg   sobj->outfile.num_reg = 0;
2099b8e80941Smrg
2100b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2101b8e80941Smrg      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2102b8e80941Smrg
2103b8e80941Smrg      switch (reg->semantic.Name) {
2104b8e80941Smrg      case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2105b8e80941Smrg         sobj->ps_color_out_reg = reg->native.id;
2106b8e80941Smrg         break;
2107b8e80941Smrg      case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2108b8e80941Smrg         sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2109b8e80941Smrg         break;
2110b8e80941Smrg      default:
2111b8e80941Smrg         assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2112b8e80941Smrg      }
2113b8e80941Smrg   }
2114b8e80941Smrg}
2115b8e80941Smrg
2116b8e80941Smrg/* fill in inputs for vs into shader object */
2117b8e80941Smrgstatic void
2118b8e80941Smrgfill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2119b8e80941Smrg{
2120b8e80941Smrg   struct etna_shader_io_file *sf = &sobj->infile;
2121b8e80941Smrg
2122b8e80941Smrg   sf->num_reg = 0;
2123b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2124b8e80941Smrg      struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2125b8e80941Smrg      assert(sf->num_reg < ETNA_NUM_INPUTS);
2126b8e80941Smrg
2127b8e80941Smrg      if (!reg->native.valid)
2128b8e80941Smrg         continue;
2129b8e80941Smrg
2130b8e80941Smrg      /* XXX exclude inputs with special semantics such as gl_frontFacing */
2131b8e80941Smrg      sf->reg[sf->num_reg].reg = reg->native.id;
2132b8e80941Smrg      sf->reg[sf->num_reg].semantic = reg->semantic;
2133b8e80941Smrg      sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2134b8e80941Smrg      sf->num_reg++;
2135b8e80941Smrg   }
2136b8e80941Smrg
2137b8e80941Smrg   sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2138b8e80941Smrg}
2139b8e80941Smrg
2140b8e80941Smrg/* build two-level output index [Semantic][Index] for fast linking */
2141b8e80941Smrgstatic void
2142b8e80941Smrgbuild_output_index(struct etna_shader_variant *sobj)
2143b8e80941Smrg{
2144b8e80941Smrg   int total = 0;
2145b8e80941Smrg   int offset = 0;
2146b8e80941Smrg
2147b8e80941Smrg   for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2148b8e80941Smrg      total += sobj->output_count_per_semantic[name];
2149b8e80941Smrg
2150b8e80941Smrg   sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2151b8e80941Smrg
2152b8e80941Smrg   for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2153b8e80941Smrg      sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2154b8e80941Smrg      offset += sobj->output_count_per_semantic[name];
2155b8e80941Smrg   }
2156b8e80941Smrg
2157b8e80941Smrg   for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2158b8e80941Smrg      sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2159b8e80941Smrg                               [sobj->outfile.reg[idx].semantic.Index] =
2160b8e80941Smrg         &sobj->outfile.reg[idx];
2161b8e80941Smrg   }
2162b8e80941Smrg}
2163b8e80941Smrg
2164b8e80941Smrg/* fill in outputs for vs into shader object */
2165b8e80941Smrgstatic void
2166b8e80941Smrgfill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2167b8e80941Smrg{
2168b8e80941Smrg   struct etna_shader_io_file *sf = &sobj->outfile;
2169b8e80941Smrg
2170b8e80941Smrg   sf->num_reg = 0;
2171b8e80941Smrg   for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2172b8e80941Smrg      struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2173b8e80941Smrg      assert(sf->num_reg < ETNA_NUM_INPUTS);
2174b8e80941Smrg
2175b8e80941Smrg      switch (reg->semantic.Name) {
2176b8e80941Smrg      case TGSI_SEMANTIC_POSITION:
2177b8e80941Smrg         sobj->vs_pos_out_reg = reg->native.id;
2178b8e80941Smrg         break;
2179b8e80941Smrg      case TGSI_SEMANTIC_PSIZE:
2180b8e80941Smrg         sobj->vs_pointsize_out_reg = reg->native.id;
2181b8e80941Smrg         break;
2182b8e80941Smrg      default:
2183b8e80941Smrg         sf->reg[sf->num_reg].reg = reg->native.id;
2184b8e80941Smrg         sf->reg[sf->num_reg].semantic = reg->semantic;
2185b8e80941Smrg         sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2186b8e80941Smrg         sf->num_reg++;
2187b8e80941Smrg         sobj->output_count_per_semantic[reg->semantic.Name] =
2188b8e80941Smrg            MAX2(reg->semantic.Index + 1,
2189b8e80941Smrg                 sobj->output_count_per_semantic[reg->semantic.Name]);
2190b8e80941Smrg      }
2191b8e80941Smrg   }
2192b8e80941Smrg
2193b8e80941Smrg   /* build two-level index for linking */
2194b8e80941Smrg   build_output_index(sobj);
2195b8e80941Smrg
2196b8e80941Smrg   /* fill in "mystery meat" load balancing value. This value determines how
2197b8e80941Smrg    * work is scheduled between VS and PS
2198b8e80941Smrg    * in the unified shader architecture. More precisely, it is determined from
2199b8e80941Smrg    * the number of VS outputs, as well as chip-specific
2200b8e80941Smrg    * vertex output buffer size, vertex cache size, and the number of shader
2201b8e80941Smrg    * cores.
2202b8e80941Smrg    *
2203b8e80941Smrg    * XXX this is a conservative estimate, the "optimal" value is only known for
2204b8e80941Smrg    * sure at link time because some
2205b8e80941Smrg    * outputs may be unused and thus unmapped. Then again, in the general use
2206b8e80941Smrg    * case with GLSL the vertex and fragment
2207b8e80941Smrg    * shaders are linked already before submitting to Gallium, thus all outputs
2208b8e80941Smrg    * are used.
2209b8e80941Smrg    */
2210b8e80941Smrg   int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2211b8e80941Smrg   assert(half_out);
2212b8e80941Smrg
2213b8e80941Smrg   uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2214b8e80941Smrg                           2 * half_out * c->specs->vertex_cache_size)) +
2215b8e80941Smrg                 9) /
2216b8e80941Smrg                10;
2217b8e80941Smrg   uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2218b8e80941Smrg   sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2219b8e80941Smrg                             VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2220b8e80941Smrg                             VIVS_VS_LOAD_BALANCING_C(0x3f) |
2221b8e80941Smrg                             VIVS_VS_LOAD_BALANCING_D(0x0f);
2222b8e80941Smrg}
2223b8e80941Smrg
2224b8e80941Smrgstatic bool
2225b8e80941Smrgetna_compile_check_limits(struct etna_compile *c)
2226b8e80941Smrg{
2227b8e80941Smrg   int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2228b8e80941Smrg                         ? c->specs->max_vs_uniforms
2229b8e80941Smrg                         : c->specs->max_ps_uniforms;
2230b8e80941Smrg   /* round up number of uniforms, including immediates, in units of four */
2231b8e80941Smrg   int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2232b8e80941Smrg
2233b8e80941Smrg   if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2234b8e80941Smrg      DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2235b8e80941Smrg          c->specs->max_instructions);
2236b8e80941Smrg      return false;
2237b8e80941Smrg   }
2238b8e80941Smrg
2239b8e80941Smrg   if (c->next_free_native > c->specs->max_registers) {
2240b8e80941Smrg      DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2241b8e80941Smrg          c->specs->max_registers);
2242b8e80941Smrg      return false;
2243b8e80941Smrg   }
2244b8e80941Smrg
2245b8e80941Smrg   if (num_uniforms > max_uniforms) {
2246b8e80941Smrg      DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2247b8e80941Smrg          max_uniforms);
2248b8e80941Smrg      return false;
2249b8e80941Smrg   }
2250b8e80941Smrg
2251b8e80941Smrg   if (c->num_varyings > c->specs->max_varyings) {
2252b8e80941Smrg      DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2253b8e80941Smrg          c->specs->max_varyings);
2254b8e80941Smrg      return false;
2255b8e80941Smrg   }
2256b8e80941Smrg
2257b8e80941Smrg   if (c->imm_base > c->specs->num_constants) {
2258b8e80941Smrg      DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2259b8e80941Smrg          c->specs->num_constants);
2260b8e80941Smrg   }
2261b8e80941Smrg
2262b8e80941Smrg   return true;
2263b8e80941Smrg}
2264b8e80941Smrg
2265b8e80941Smrgstatic void
2266b8e80941Smrgcopy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2267b8e80941Smrg{
2268b8e80941Smrg   uint32_t count = c->imm_size;
2269b8e80941Smrg   struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2270b8e80941Smrg
2271b8e80941Smrg   uinfo->const_count = c->imm_base;
2272b8e80941Smrg   uinfo->imm_count = count;
2273b8e80941Smrg   uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2274b8e80941Smrg   uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2275b8e80941Smrg
2276b8e80941Smrg   etna_set_shader_uniforms_dirty_flags(sobj);
2277b8e80941Smrg}
2278b8e80941Smrg
2279b8e80941Smrgbool
2280b8e80941Smrgetna_compile_shader(struct etna_shader_variant *v)
2281b8e80941Smrg{
2282b8e80941Smrg   /* Create scratch space that may be too large to fit on stack
2283b8e80941Smrg    */
2284b8e80941Smrg   bool ret;
2285b8e80941Smrg   struct etna_compile *c;
2286b8e80941Smrg
2287b8e80941Smrg   if (unlikely(!v))
2288b8e80941Smrg      return false;
2289b8e80941Smrg
2290b8e80941Smrg   const struct etna_specs *specs = v->shader->specs;
2291b8e80941Smrg
2292b8e80941Smrg   struct tgsi_lowering_config lconfig = {
2293b8e80941Smrg      .lower_FLR = !specs->has_sign_floor_ceil,
2294b8e80941Smrg      .lower_CEIL = !specs->has_sign_floor_ceil,
2295b8e80941Smrg      .lower_POW = true,
2296b8e80941Smrg      .lower_EXP = true,
2297b8e80941Smrg      .lower_LOG = true,
2298b8e80941Smrg      .lower_DP2 = !specs->has_halti2_instructions,
2299b8e80941Smrg      .lower_TRUNC = true,
2300b8e80941Smrg   };
2301b8e80941Smrg
2302b8e80941Smrg   c = CALLOC_STRUCT(etna_compile);
2303b8e80941Smrg   if (!c)
2304b8e80941Smrg      return false;
2305b8e80941Smrg
2306b8e80941Smrg   memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2307b8e80941Smrg
2308b8e80941Smrg   const struct tgsi_token *tokens = v->shader->tokens;
2309b8e80941Smrg
2310b8e80941Smrg   c->specs = specs;
2311b8e80941Smrg   c->key = &v->key;
2312b8e80941Smrg   c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2313b8e80941Smrg   c->free_tokens = !!c->tokens;
2314b8e80941Smrg   if (!c->tokens) {
2315b8e80941Smrg      /* no lowering */
2316b8e80941Smrg      c->tokens = tokens;
2317b8e80941Smrg   }
2318b8e80941Smrg
2319b8e80941Smrg   /* Build a map from gallium register to native registers for files
2320b8e80941Smrg    * CONST, SAMP, IMM, OUT, IN, TEMP.
2321b8e80941Smrg    * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2322b8e80941Smrg    * vertex shaders.
2323b8e80941Smrg    */
2324b8e80941Smrg   /* Pass one -- check register file declarations and immediates */
2325b8e80941Smrg   etna_compile_parse_declarations(c);
2326b8e80941Smrg
2327b8e80941Smrg   etna_allocate_decls(c);
2328b8e80941Smrg
2329b8e80941Smrg   /* Pass two -- check usage of temporaries, inputs, outputs */
2330b8e80941Smrg   etna_compile_pass_check_usage(c);
2331b8e80941Smrg
2332b8e80941Smrg   assign_special_inputs(c);
2333b8e80941Smrg
2334b8e80941Smrg   /* Assign native temp register to TEMPs */
2335b8e80941Smrg   assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2336b8e80941Smrg
2337b8e80941Smrg   /* optimize outputs */
2338b8e80941Smrg   etna_compile_pass_optimize_outputs(c);
2339b8e80941Smrg
2340b8e80941Smrg   /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2341b8e80941Smrg    *     this is part of RGROUP_INTERNAL
2342b8e80941Smrg    */
2343b8e80941Smrg
2344b8e80941Smrg   /* assign inputs: last usage of input should be <= first usage of temp */
2345b8e80941Smrg   /*   potential optimization case:
2346b8e80941Smrg    *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2347b8e80941Smrg    * after which IN[x]
2348b8e80941Smrg    *     is not read, temp[y] can be used as input register as-is
2349b8e80941Smrg    */
2350b8e80941Smrg   /*   sort temporaries by first use
2351b8e80941Smrg    *   sort inputs by last usage
2352b8e80941Smrg    *   iterate over inputs, temporaries
2353b8e80941Smrg    *     if last usage of input <= first usage of temp:
2354b8e80941Smrg    *       assign input to temp
2355b8e80941Smrg    *       advance input, temporary pointer
2356b8e80941Smrg    *     else
2357b8e80941Smrg    *       advance temporary pointer
2358b8e80941Smrg    *
2359b8e80941Smrg    *   potential problem: instruction with multiple inputs of which one is the
2360b8e80941Smrg    * temp and the other is the input;
2361b8e80941Smrg    *      however, as the temp is not used before this, how would this make
2362b8e80941Smrg    * sense? uninitialized temporaries have an undefined
2363b8e80941Smrg    *      value, so this would be ok
2364b8e80941Smrg    */
2365b8e80941Smrg   assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2366b8e80941Smrg
2367b8e80941Smrg   /* assign outputs: first usage of output should be >= last usage of temp */
2368b8e80941Smrg   /*   potential optimization case:
2369b8e80941Smrg    *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2370b8e80941Smrg    * writing all components that are used in
2371b8e80941Smrg    *        the shader) after which temp y is no longer used temp[y] can be
2372b8e80941Smrg    * used as output register as-is
2373b8e80941Smrg    *
2374b8e80941Smrg    *   potential problem: instruction with multiple outputs of which one is the
2375b8e80941Smrg    * temp and the other is the output;
2376b8e80941Smrg    *      however, as the temp is not used after this, how would this make
2377b8e80941Smrg    * sense? could just discard the output value
2378b8e80941Smrg    */
2379b8e80941Smrg   /*   sort temporaries by last use
2380b8e80941Smrg    *   sort outputs by first usage
2381b8e80941Smrg    *   iterate over outputs, temporaries
2382b8e80941Smrg    *     if first usage of output >= last usage of temp:
2383b8e80941Smrg    *       assign output to temp
2384b8e80941Smrg    *       advance output, temporary pointer
2385b8e80941Smrg    *     else
2386b8e80941Smrg    *       advance temporary pointer
2387b8e80941Smrg    */
2388b8e80941Smrg   assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2389b8e80941Smrg
2390b8e80941Smrg   assign_constants_and_immediates(c);
2391b8e80941Smrg   assign_texture_units(c);
2392b8e80941Smrg
2393b8e80941Smrg   /* list declarations */
2394b8e80941Smrg   for (int x = 0; x < c->total_decls; ++x) {
2395b8e80941Smrg      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2396b8e80941Smrg                                    "last_use=%i native=%i usage_mask=%x "
2397b8e80941Smrg                                    "has_semantic=%i",
2398b8e80941Smrg            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2399b8e80941Smrg            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2400b8e80941Smrg            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2401b8e80941Smrg            c->decl[x].usage_mask, c->decl[x].has_semantic);
2402b8e80941Smrg      if (c->decl[x].has_semantic)
2403b8e80941Smrg         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2404b8e80941Smrg               tgsi_semantic_names[c->decl[x].semantic.Name],
2405b8e80941Smrg               c->decl[x].semantic.Index);
2406b8e80941Smrg   }
2407b8e80941Smrg   /* XXX for PS we need to permute so that inputs are always in temporary
2408b8e80941Smrg    * 0..N-1.
2409b8e80941Smrg    * There is no "switchboard" for varyings (AFAIK!). The output color,
2410b8e80941Smrg    * however, can be routed
2411b8e80941Smrg    * from an arbitrary temporary.
2412b8e80941Smrg    */
2413b8e80941Smrg   if (c->info.processor == PIPE_SHADER_FRAGMENT)
2414b8e80941Smrg      permute_ps_inputs(c);
2415b8e80941Smrg
2416b8e80941Smrg
2417b8e80941Smrg   /* list declarations */
2418b8e80941Smrg   for (int x = 0; x < c->total_decls; ++x) {
2419b8e80941Smrg      DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2420b8e80941Smrg                                    "last_use=%i native=%i usage_mask=%x "
2421b8e80941Smrg                                    "has_semantic=%i",
2422b8e80941Smrg            x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2423b8e80941Smrg            c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2424b8e80941Smrg            c->decl[x].native.valid ? c->decl[x].native.id : -1,
2425b8e80941Smrg            c->decl[x].usage_mask, c->decl[x].has_semantic);
2426b8e80941Smrg      if (c->decl[x].has_semantic)
2427b8e80941Smrg         DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2428b8e80941Smrg               tgsi_semantic_names[c->decl[x].semantic.Name],
2429b8e80941Smrg               c->decl[x].semantic.Index);
2430b8e80941Smrg   }
2431b8e80941Smrg
2432b8e80941Smrg   /* pass 3: generate instructions */
2433b8e80941Smrg   etna_compile_pass_generate_code(c);
2434b8e80941Smrg   etna_compile_add_z_div_if_needed(c);
2435b8e80941Smrg   etna_compile_frag_rb_swap(c);
2436b8e80941Smrg   etna_compile_add_nop_if_needed(c);
2437b8e80941Smrg
2438b8e80941Smrg   ret = etna_compile_check_limits(c);
2439b8e80941Smrg   if (!ret)
2440b8e80941Smrg      goto out;
2441b8e80941Smrg
2442b8e80941Smrg   etna_compile_fill_in_labels(c);
2443b8e80941Smrg
2444b8e80941Smrg   /* fill in output structure */
2445b8e80941Smrg   v->processor = c->info.processor;
2446b8e80941Smrg   v->code_size = c->inst_ptr * 4;
2447b8e80941Smrg   v->code = mem_dup(c->code, c->inst_ptr * 16);
2448b8e80941Smrg   v->num_loops = c->num_loops;
2449b8e80941Smrg   v->num_temps = c->next_free_native;
2450b8e80941Smrg   v->vs_pos_out_reg = -1;
2451b8e80941Smrg   v->vs_pointsize_out_reg = -1;
2452b8e80941Smrg   v->ps_color_out_reg = -1;
2453b8e80941Smrg   v->ps_depth_out_reg = -1;
2454b8e80941Smrg   v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2455b8e80941Smrg   copy_uniform_state_to_shader(c, v);
2456b8e80941Smrg
2457b8e80941Smrg   if (c->info.processor == PIPE_SHADER_VERTEX) {
2458b8e80941Smrg      fill_in_vs_inputs(v, c);
2459b8e80941Smrg      fill_in_vs_outputs(v, c);
2460b8e80941Smrg   } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2461b8e80941Smrg      fill_in_ps_inputs(v, c);
2462b8e80941Smrg      fill_in_ps_outputs(v, c);
2463b8e80941Smrg   }
2464b8e80941Smrg
2465b8e80941Smrgout:
2466b8e80941Smrg   if (c->free_tokens)
2467b8e80941Smrg      FREE((void *)c->tokens);
2468b8e80941Smrg
2469b8e80941Smrg   FREE(c->labels);
2470b8e80941Smrg   FREE(c);
2471b8e80941Smrg
2472b8e80941Smrg   return ret;
2473b8e80941Smrg}
2474b8e80941Smrg
2475b8e80941Smrgextern const char *tgsi_swizzle_names[];
2476b8e80941Smrgvoid
2477b8e80941Smrgetna_dump_shader(const struct etna_shader_variant *shader)
2478b8e80941Smrg{
2479b8e80941Smrg   if (shader->processor == PIPE_SHADER_VERTEX)
2480b8e80941Smrg      printf("VERT\n");
2481b8e80941Smrg   else
2482b8e80941Smrg      printf("FRAG\n");
2483b8e80941Smrg
2484b8e80941Smrg
2485b8e80941Smrg   etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2486b8e80941Smrg
2487b8e80941Smrg   printf("num loops: %i\n", shader->num_loops);
2488b8e80941Smrg   printf("num temps: %i\n", shader->num_temps);
2489b8e80941Smrg   printf("num const: %i\n", shader->uniforms.const_count);
2490b8e80941Smrg   printf("immediates:\n");
2491b8e80941Smrg   for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2492b8e80941Smrg      printf(" [%i].%s = %f (0x%08x)\n",
2493b8e80941Smrg             (idx + shader->uniforms.const_count) / 4,
2494b8e80941Smrg             tgsi_swizzle_names[idx % 4],
2495b8e80941Smrg             *((float *)&shader->uniforms.imm_data[idx]),
2496b8e80941Smrg             shader->uniforms.imm_data[idx]);
2497b8e80941Smrg   }
2498b8e80941Smrg   printf("inputs:\n");
2499b8e80941Smrg   for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2500b8e80941Smrg      printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2501b8e80941Smrg             tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2502b8e80941Smrg             shader->infile.reg[idx].semantic.Index,
2503b8e80941Smrg             shader->infile.reg[idx].num_components);
2504b8e80941Smrg   }
2505b8e80941Smrg   printf("outputs:\n");
2506b8e80941Smrg   for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2507b8e80941Smrg      printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2508b8e80941Smrg             tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2509b8e80941Smrg             shader->outfile.reg[idx].semantic.Index,
2510b8e80941Smrg             shader->outfile.reg[idx].num_components);
2511b8e80941Smrg   }
2512b8e80941Smrg   printf("special:\n");
2513b8e80941Smrg   if (shader->processor == PIPE_SHADER_VERTEX) {
2514b8e80941Smrg      printf("  vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2515b8e80941Smrg      printf("  vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2516b8e80941Smrg      printf("  vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2517b8e80941Smrg   } else {
2518b8e80941Smrg      printf("  ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2519b8e80941Smrg      printf("  ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2520b8e80941Smrg   }
2521b8e80941Smrg   printf("  input_count_unk8=0x%08x\n", shader->input_count_unk8);
2522b8e80941Smrg}
2523b8e80941Smrg
2524b8e80941Smrgvoid
2525b8e80941Smrgetna_destroy_shader(struct etna_shader_variant *shader)
2526b8e80941Smrg{
2527b8e80941Smrg   assert(shader);
2528b8e80941Smrg
2529b8e80941Smrg   FREE(shader->code);
2530b8e80941Smrg   FREE(shader->uniforms.imm_data);
2531b8e80941Smrg   FREE(shader->uniforms.imm_contents);
2532b8e80941Smrg   FREE(shader->output_per_semantic_list);
2533b8e80941Smrg   FREE(shader);
2534b8e80941Smrg}
2535b8e80941Smrg
2536b8e80941Smrgstatic const struct etna_shader_inout *
2537b8e80941Smrgetna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2538b8e80941Smrg                      const struct etna_shader_inout *in)
2539b8e80941Smrg{
2540b8e80941Smrg   if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2541b8e80941Smrg      return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2542b8e80941Smrg
2543b8e80941Smrg   return NULL;
2544b8e80941Smrg}
2545b8e80941Smrg
2546b8e80941Smrgbool
2547b8e80941Smrgetna_link_shader(struct etna_shader_link_info *info,
2548b8e80941Smrg                 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2549b8e80941Smrg{
2550b8e80941Smrg   int comp_ofs = 0;
2551b8e80941Smrg   /* For each fragment input we need to find the associated vertex shader
2552b8e80941Smrg    * output, which can be found by matching on semantic name and index. A
2553b8e80941Smrg    * binary search could be used because the vs outputs are sorted by their
2554b8e80941Smrg    * semantic index and grouped by semantic type by fill_in_vs_outputs.
2555b8e80941Smrg    */
2556b8e80941Smrg   assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2557b8e80941Smrg   info->pcoord_varying_comp_ofs = -1;
2558b8e80941Smrg
2559b8e80941Smrg   for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2560b8e80941Smrg      const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2561b8e80941Smrg      const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2562b8e80941Smrg      struct etna_varying *varying;
2563b8e80941Smrg      bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
2564b8e80941Smrg
2565b8e80941Smrg      assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2566b8e80941Smrg
2567b8e80941Smrg      if (fsio->reg > info->num_varyings)
2568b8e80941Smrg         info->num_varyings = fsio->reg;
2569b8e80941Smrg
2570b8e80941Smrg      varying = &info->varyings[fsio->reg - 1];
2571b8e80941Smrg      varying->num_components = fsio->num_components;
2572b8e80941Smrg
2573b8e80941Smrg      if (!interpolate_always) /* colors affected by flat shading */
2574b8e80941Smrg         varying->pa_attributes = 0x200;
2575b8e80941Smrg      else /* texture coord or other bypasses flat shading */
2576b8e80941Smrg         varying->pa_attributes = 0x2f1;
2577b8e80941Smrg
2578b8e80941Smrg      varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED;
2579b8e80941Smrg      varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED;
2580b8e80941Smrg      varying->use[2] = VARYING_COMPONENT_USE_USED;
2581b8e80941Smrg      varying->use[3] = VARYING_COMPONENT_USE_USED;
2582b8e80941Smrg
2583b8e80941Smrg
2584b8e80941Smrg      /* point coord is an input to the PS without matching VS output,
2585b8e80941Smrg       * so it gets a varying slot without being assigned a VS register.
2586b8e80941Smrg       */
2587b8e80941Smrg      if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2588b8e80941Smrg         info->pcoord_varying_comp_ofs = comp_ofs;
2589b8e80941Smrg      } else {
2590b8e80941Smrg         if (vsio == NULL) { /* not found -- link error */
2591b8e80941Smrg            BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
2592b8e80941Smrg            return true;
2593b8e80941Smrg         }
2594b8e80941Smrg
2595b8e80941Smrg         varying->reg = vsio->reg;
2596b8e80941Smrg      }
2597b8e80941Smrg
2598b8e80941Smrg      comp_ofs += varying->num_components;
2599b8e80941Smrg   }
2600b8e80941Smrg
2601b8e80941Smrg   assert(info->num_varyings == fs->infile.num_reg);
2602b8e80941Smrg
2603b8e80941Smrg   return false;
2604b8e80941Smrg}
2605