1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Intel Corporation
3b8e80941Smrg * Copyright © 2014-2015 Broadcom
4b8e80941Smrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5b8e80941Smrg *
6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
8b8e80941Smrg * to deal in the Software without restriction, including without limitation
9b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
11b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
12b8e80941Smrg *
13b8e80941Smrg * The above copyright notice and this permission notice (including the next
14b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
15b8e80941Smrg * Software.
16b8e80941Smrg *
17b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23b8e80941Smrg * IN THE SOFTWARE.
24b8e80941Smrg */
25b8e80941Smrg
26b8e80941Smrg#include "compiler/nir/nir.h"
27b8e80941Smrg#include "compiler/nir/nir_builder.h"
28b8e80941Smrg#include "compiler/glsl/list.h"
29b8e80941Smrg#include "main/imports.h"
30b8e80941Smrg#include "main/mtypes.h"
31b8e80941Smrg#include "util/ralloc.h"
32b8e80941Smrg
33b8e80941Smrg#include "prog_to_nir.h"
34b8e80941Smrg#include "prog_instruction.h"
35b8e80941Smrg#include "prog_parameter.h"
36b8e80941Smrg#include "prog_print.h"
37b8e80941Smrg#include "program.h"
38b8e80941Smrg
39b8e80941Smrg/**
40b8e80941Smrg * \file prog_to_nir.c
41b8e80941Smrg *
42b8e80941Smrg * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
43b8e80941Smrg * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44b8e80941Smrg * vertex processing.  Full GLSL support should use glsl_to_nir instead.
45b8e80941Smrg */
46b8e80941Smrg
47b8e80941Smrgstruct ptn_compile {
48b8e80941Smrg   const struct gl_program *prog;
49b8e80941Smrg   nir_builder build;
50b8e80941Smrg   bool error;
51b8e80941Smrg
52b8e80941Smrg   nir_variable *parameters;
53b8e80941Smrg   nir_variable *input_vars[VARYING_SLOT_MAX];
54b8e80941Smrg   nir_variable *output_vars[VARYING_SLOT_MAX];
55b8e80941Smrg   nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
56b8e80941Smrg   nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57b8e80941Smrg   nir_register **output_regs;
58b8e80941Smrg   nir_register **temp_regs;
59b8e80941Smrg
60b8e80941Smrg   nir_register *addr_reg;
61b8e80941Smrg};
62b8e80941Smrg
63b8e80941Smrg#define SWIZ(X, Y, Z, W) \
64b8e80941Smrg   (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65b8e80941Smrg#define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
66b8e80941Smrg
67b8e80941Smrgstatic nir_ssa_def *
68b8e80941Smrgptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
69b8e80941Smrg{
70b8e80941Smrg   nir_builder *b = &c->build;
71b8e80941Smrg
72b8e80941Smrg   nir_alu_src src;
73b8e80941Smrg   memset(&src, 0, sizeof(src));
74b8e80941Smrg
75b8e80941Smrg   if (dest->dest.is_ssa)
76b8e80941Smrg      src.src = nir_src_for_ssa(&dest->dest.ssa);
77b8e80941Smrg   else {
78b8e80941Smrg      assert(!dest->dest.reg.indirect);
79b8e80941Smrg      src.src = nir_src_for_reg(dest->dest.reg.reg);
80b8e80941Smrg      src.src.reg.base_offset = dest->dest.reg.base_offset;
81b8e80941Smrg   }
82b8e80941Smrg
83b8e80941Smrg   for (int i = 0; i < 4; i++)
84b8e80941Smrg      src.swizzle[i] = i;
85b8e80941Smrg
86b8e80941Smrg   return nir_fmov_alu(b, src, 4);
87b8e80941Smrg}
88b8e80941Smrg
89b8e80941Smrgstatic nir_alu_dest
90b8e80941Smrgptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
91b8e80941Smrg{
92b8e80941Smrg   nir_alu_dest dest;
93b8e80941Smrg
94b8e80941Smrg   memset(&dest, 0, sizeof(dest));
95b8e80941Smrg
96b8e80941Smrg   switch (prog_dst->File) {
97b8e80941Smrg   case PROGRAM_TEMPORARY:
98b8e80941Smrg      dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
99b8e80941Smrg      break;
100b8e80941Smrg   case PROGRAM_OUTPUT:
101b8e80941Smrg      dest.dest.reg.reg = c->output_regs[prog_dst->Index];
102b8e80941Smrg      break;
103b8e80941Smrg   case PROGRAM_ADDRESS:
104b8e80941Smrg      assert(prog_dst->Index == 0);
105b8e80941Smrg      dest.dest.reg.reg = c->addr_reg;
106b8e80941Smrg      break;
107b8e80941Smrg   case PROGRAM_UNDEFINED:
108b8e80941Smrg      break;
109b8e80941Smrg   }
110b8e80941Smrg
111b8e80941Smrg   dest.write_mask = prog_dst->WriteMask;
112b8e80941Smrg   dest.saturate = false;
113b8e80941Smrg
114b8e80941Smrg   assert(!prog_dst->RelAddr);
115b8e80941Smrg
116b8e80941Smrg   return dest;
117b8e80941Smrg}
118b8e80941Smrg
119b8e80941Smrgstatic nir_ssa_def *
120b8e80941Smrgptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
121b8e80941Smrg{
122b8e80941Smrg   nir_builder *b = &c->build;
123b8e80941Smrg   nir_alu_src src;
124b8e80941Smrg
125b8e80941Smrg   memset(&src, 0, sizeof(src));
126b8e80941Smrg
127b8e80941Smrg   switch (prog_src->File) {
128b8e80941Smrg   case PROGRAM_UNDEFINED:
129b8e80941Smrg      return nir_imm_float(b, 0.0);
130b8e80941Smrg   case PROGRAM_TEMPORARY:
131b8e80941Smrg      assert(!prog_src->RelAddr && prog_src->Index >= 0);
132b8e80941Smrg      src.src.reg.reg = c->temp_regs[prog_src->Index];
133b8e80941Smrg      break;
134b8e80941Smrg   case PROGRAM_INPUT: {
135b8e80941Smrg      /* ARB_vertex_program doesn't allow relative addressing on vertex
136b8e80941Smrg       * attributes; ARB_fragment_program has no relative addressing at all.
137b8e80941Smrg       */
138b8e80941Smrg      assert(!prog_src->RelAddr);
139b8e80941Smrg
140b8e80941Smrg      assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
141b8e80941Smrg
142b8e80941Smrg      nir_variable *var = c->input_vars[prog_src->Index];
143b8e80941Smrg      src.src = nir_src_for_ssa(nir_load_var(b, var));
144b8e80941Smrg      break;
145b8e80941Smrg   }
146b8e80941Smrg   case PROGRAM_SYSTEM_VALUE: {
147b8e80941Smrg      assert(!prog_src->RelAddr);
148b8e80941Smrg
149b8e80941Smrg      assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
150b8e80941Smrg
151b8e80941Smrg      nir_variable *var = c->sysval_vars[prog_src->Index];
152b8e80941Smrg      src.src = nir_src_for_ssa(nir_load_var(b, var));
153b8e80941Smrg      break;
154b8e80941Smrg   }
155b8e80941Smrg   case PROGRAM_STATE_VAR:
156b8e80941Smrg   case PROGRAM_CONSTANT: {
157b8e80941Smrg      /* We actually want to look at the type in the Parameters list for this,
158b8e80941Smrg       * because it lets us upload constant builtin uniforms as actual
159b8e80941Smrg       * constants.
160b8e80941Smrg       */
161b8e80941Smrg      struct gl_program_parameter_list *plist = c->prog->Parameters;
162b8e80941Smrg      gl_register_file file = prog_src->RelAddr ? prog_src->File :
163b8e80941Smrg         plist->Parameters[prog_src->Index].Type;
164b8e80941Smrg
165b8e80941Smrg      switch (file) {
166b8e80941Smrg      case PROGRAM_CONSTANT:
167b8e80941Smrg         if ((c->prog->arb.IndirectRegisterFiles &
168b8e80941Smrg              (1 << PROGRAM_CONSTANT)) == 0) {
169b8e80941Smrg            unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
170b8e80941Smrg            float *v = (float *) plist->ParameterValues + pvo;
171b8e80941Smrg            src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
172b8e80941Smrg            break;
173b8e80941Smrg         }
174b8e80941Smrg         /* FALLTHROUGH */
175b8e80941Smrg      case PROGRAM_STATE_VAR: {
176b8e80941Smrg         assert(c->parameters != NULL);
177b8e80941Smrg
178b8e80941Smrg         nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
179b8e80941Smrg
180b8e80941Smrg         nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
181b8e80941Smrg         if (prog_src->RelAddr)
182b8e80941Smrg            index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
183b8e80941Smrg         deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
184b8e80941Smrg
185b8e80941Smrg         src.src = nir_src_for_ssa(nir_load_deref(b, deref));
186b8e80941Smrg         break;
187b8e80941Smrg      }
188b8e80941Smrg      default:
189b8e80941Smrg         fprintf(stderr, "bad uniform src register file: %s (%d)\n",
190b8e80941Smrg                 _mesa_register_file_name(file), file);
191b8e80941Smrg         abort();
192b8e80941Smrg      }
193b8e80941Smrg      break;
194b8e80941Smrg   }
195b8e80941Smrg   default:
196b8e80941Smrg      fprintf(stderr, "unknown src register file: %s (%d)\n",
197b8e80941Smrg              _mesa_register_file_name(prog_src->File), prog_src->File);
198b8e80941Smrg      abort();
199b8e80941Smrg   }
200b8e80941Smrg
201b8e80941Smrg   nir_ssa_def *def;
202b8e80941Smrg   if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
203b8e80941Smrg       (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
204b8e80941Smrg      /* The simple non-SWZ case. */
205b8e80941Smrg      for (int i = 0; i < 4; i++)
206b8e80941Smrg         src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
207b8e80941Smrg
208b8e80941Smrg      def = nir_fmov_alu(b, src, 4);
209b8e80941Smrg
210b8e80941Smrg      if (prog_src->Negate)
211b8e80941Smrg         def = nir_fneg(b, def);
212b8e80941Smrg   } else {
213b8e80941Smrg      /* The SWZ instruction allows per-component zero/one swizzles, and also
214b8e80941Smrg       * per-component negation.
215b8e80941Smrg       */
216b8e80941Smrg      nir_ssa_def *chans[4];
217b8e80941Smrg      for (int i = 0; i < 4; i++) {
218b8e80941Smrg         int swizzle = GET_SWZ(prog_src->Swizzle, i);
219b8e80941Smrg         if (swizzle == SWIZZLE_ZERO) {
220b8e80941Smrg            chans[i] = nir_imm_float(b, 0.0);
221b8e80941Smrg         } else if (swizzle == SWIZZLE_ONE) {
222b8e80941Smrg            chans[i] = nir_imm_float(b, 1.0);
223b8e80941Smrg         } else {
224b8e80941Smrg            assert(swizzle != SWIZZLE_NIL);
225b8e80941Smrg            nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
226b8e80941Smrg            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
227b8e80941Smrg            mov->dest.write_mask = 0x1;
228b8e80941Smrg            mov->src[0] = src;
229b8e80941Smrg            mov->src[0].swizzle[0] = swizzle;
230b8e80941Smrg            nir_builder_instr_insert(b, &mov->instr);
231b8e80941Smrg
232b8e80941Smrg            chans[i] = &mov->dest.dest.ssa;
233b8e80941Smrg         }
234b8e80941Smrg
235b8e80941Smrg         if (prog_src->Negate & (1 << i))
236b8e80941Smrg            chans[i] = nir_fneg(b, chans[i]);
237b8e80941Smrg      }
238b8e80941Smrg      def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
239b8e80941Smrg   }
240b8e80941Smrg
241b8e80941Smrg   return def;
242b8e80941Smrg}
243b8e80941Smrg
244b8e80941Smrgstatic void
245b8e80941Smrgptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
246b8e80941Smrg{
247b8e80941Smrg   unsigned num_srcs = nir_op_infos[op].num_inputs;
248b8e80941Smrg   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
249b8e80941Smrg   unsigned i;
250b8e80941Smrg
251b8e80941Smrg   for (i = 0; i < num_srcs; i++)
252b8e80941Smrg      instr->src[i].src = nir_src_for_ssa(src[i]);
253b8e80941Smrg
254b8e80941Smrg   instr->dest = dest;
255b8e80941Smrg   nir_builder_instr_insert(b, &instr->instr);
256b8e80941Smrg}
257b8e80941Smrg
258b8e80941Smrgstatic void
259b8e80941Smrgptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
260b8e80941Smrg                     nir_ssa_def *def, unsigned write_mask)
261b8e80941Smrg{
262b8e80941Smrg   if (!(dest.write_mask & write_mask))
263b8e80941Smrg      return;
264b8e80941Smrg
265b8e80941Smrg   nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
266b8e80941Smrg   if (!mov)
267b8e80941Smrg      return;
268b8e80941Smrg
269b8e80941Smrg   mov->dest = dest;
270b8e80941Smrg   mov->dest.write_mask &= write_mask;
271b8e80941Smrg   mov->src[0].src = nir_src_for_ssa(def);
272b8e80941Smrg   for (unsigned i = def->num_components; i < 4; i++)
273b8e80941Smrg      mov->src[0].swizzle[i] = def->num_components - 1;
274b8e80941Smrg   nir_builder_instr_insert(b, &mov->instr);
275b8e80941Smrg}
276b8e80941Smrg
277b8e80941Smrgstatic void
278b8e80941Smrgptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
279b8e80941Smrg{
280b8e80941Smrg   ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
281b8e80941Smrg}
282b8e80941Smrg
283b8e80941Smrgstatic void
284b8e80941Smrgptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
285b8e80941Smrg{
286b8e80941Smrg   ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
287b8e80941Smrg}
288b8e80941Smrg
289b8e80941Smrg/* EXP - Approximate Exponential Base 2
290b8e80941Smrg *  dst.x = 2^{\lfloor src.x\rfloor}
291b8e80941Smrg *  dst.y = src.x - \lfloor src.x\rfloor
292b8e80941Smrg *  dst.z = 2^{src.x}
293b8e80941Smrg *  dst.w = 1.0
294b8e80941Smrg */
295b8e80941Smrgstatic void
296b8e80941Smrgptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
297b8e80941Smrg{
298b8e80941Smrg   nir_ssa_def *srcx = ptn_channel(b, src[0], X);
299b8e80941Smrg
300b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
301b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
302b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
303b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
304b8e80941Smrg}
305b8e80941Smrg
306b8e80941Smrg/* LOG - Approximate Logarithm Base 2
307b8e80941Smrg *  dst.x = \lfloor\log_2{|src.x|}\rfloor
308b8e80941Smrg *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
309b8e80941Smrg *  dst.z = \log_2{|src.x|}
310b8e80941Smrg *  dst.w = 1.0
311b8e80941Smrg */
312b8e80941Smrgstatic void
313b8e80941Smrgptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314b8e80941Smrg{
315b8e80941Smrg   nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
316b8e80941Smrg   nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
317b8e80941Smrg   nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
318b8e80941Smrg
319b8e80941Smrg   ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
320b8e80941Smrg   ptn_move_dest_masked(b, dest,
321b8e80941Smrg                        nir_fmul(b, abs_srcx,
322b8e80941Smrg                                 nir_fexp2(b, nir_fneg(b, floor_log2))),
323b8e80941Smrg                        WRITEMASK_Y);
324b8e80941Smrg   ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
325b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
326b8e80941Smrg}
327b8e80941Smrg
328b8e80941Smrg/* DST - Distance Vector
329b8e80941Smrg *   dst.x = 1.0
330b8e80941Smrg *   dst.y = src0.y \times src1.y
331b8e80941Smrg *   dst.z = src0.z
332b8e80941Smrg *   dst.w = src1.w
333b8e80941Smrg */
334b8e80941Smrgstatic void
335b8e80941Smrgptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
336b8e80941Smrg{
337b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
338b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
339b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
340b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
341b8e80941Smrg}
342b8e80941Smrg
343b8e80941Smrg/* LIT - Light Coefficients
344b8e80941Smrg *  dst.x = 1.0
345b8e80941Smrg *  dst.y = max(src.x, 0.0)
346b8e80941Smrg *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
347b8e80941Smrg *  dst.w = 1.0
348b8e80941Smrg */
349b8e80941Smrgstatic void
350b8e80941Smrgptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
351b8e80941Smrg{
352b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
353b8e80941Smrg
354b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
355b8e80941Smrg                                          nir_imm_float(b, 0.0)), WRITEMASK_Y);
356b8e80941Smrg
357b8e80941Smrg   if (dest.write_mask & WRITEMASK_Z) {
358b8e80941Smrg      nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
359b8e80941Smrg      nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
360b8e80941Smrg                                                 nir_imm_float(b, 128.0)),
361b8e80941Smrg                                     nir_imm_float(b, -128.0));
362b8e80941Smrg      nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
363b8e80941Smrg                                  wclamp);
364b8e80941Smrg
365b8e80941Smrg      nir_ssa_def *z;
366b8e80941Smrg      if (b->shader->options->native_integers) {
367b8e80941Smrg         z = nir_bcsel(b,
368b8e80941Smrg                       nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
369b8e80941Smrg                       nir_imm_float(b, 0.0),
370b8e80941Smrg                       pow);
371b8e80941Smrg      } else {
372b8e80941Smrg         z = nir_fcsel(b,
373b8e80941Smrg                       nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
374b8e80941Smrg                       nir_imm_float(b, 0.0),
375b8e80941Smrg                       pow);
376b8e80941Smrg      }
377b8e80941Smrg
378b8e80941Smrg      ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
379b8e80941Smrg   }
380b8e80941Smrg}
381b8e80941Smrg
382b8e80941Smrg/* SCS - Sine Cosine
383b8e80941Smrg *   dst.x = \cos{src.x}
384b8e80941Smrg *   dst.y = \sin{src.x}
385b8e80941Smrg *   dst.z = 0.0
386b8e80941Smrg *   dst.w = 1.0
387b8e80941Smrg */
388b8e80941Smrgstatic void
389b8e80941Smrgptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
390b8e80941Smrg{
391b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
392b8e80941Smrg                        WRITEMASK_X);
393b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
394b8e80941Smrg                        WRITEMASK_Y);
395b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
396b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
397b8e80941Smrg}
398b8e80941Smrg
399b8e80941Smrg/**
400b8e80941Smrg * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
401b8e80941Smrg */
402b8e80941Smrgstatic void
403b8e80941Smrgptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
404b8e80941Smrg{
405b8e80941Smrg   if (b->shader->options->native_integers) {
406b8e80941Smrg      ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1])));
407b8e80941Smrg   } else {
408b8e80941Smrg      ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
409b8e80941Smrg   }
410b8e80941Smrg}
411b8e80941Smrg
412b8e80941Smrg/**
413b8e80941Smrg * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
414b8e80941Smrg */
415b8e80941Smrgstatic void
416b8e80941Smrgptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
417b8e80941Smrg{
418b8e80941Smrg   if (b->shader->options->native_integers) {
419b8e80941Smrg      ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1])));
420b8e80941Smrg   } else {
421b8e80941Smrg      ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
422b8e80941Smrg   }
423b8e80941Smrg}
424b8e80941Smrg
425b8e80941Smrgstatic void
426b8e80941Smrgptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
427b8e80941Smrg{
428b8e80941Smrg   ptn_move_dest_masked(b, dest,
429b8e80941Smrg                        nir_fsub(b,
430b8e80941Smrg                                 nir_fmul(b,
431b8e80941Smrg                                          nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
432b8e80941Smrg                                          nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
433b8e80941Smrg                                 nir_fmul(b,
434b8e80941Smrg                                          nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
435b8e80941Smrg                                          nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
436b8e80941Smrg                        WRITEMASK_XYZ);
437b8e80941Smrg   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
438b8e80941Smrg}
439b8e80941Smrg
440b8e80941Smrgstatic void
441b8e80941Smrgptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
442b8e80941Smrg{
443b8e80941Smrg   ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
444b8e80941Smrg}
445b8e80941Smrg
446b8e80941Smrgstatic void
447b8e80941Smrgptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
448b8e80941Smrg{
449b8e80941Smrg   ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
450b8e80941Smrg}
451b8e80941Smrg
452b8e80941Smrgstatic void
453b8e80941Smrgptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
454b8e80941Smrg{
455b8e80941Smrg   ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
456b8e80941Smrg}
457b8e80941Smrg
458b8e80941Smrgstatic void
459b8e80941Smrgptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
460b8e80941Smrg{
461b8e80941Smrg   ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
462b8e80941Smrg}
463b8e80941Smrg
464b8e80941Smrgstatic void
465b8e80941Smrgptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
466b8e80941Smrg{
467b8e80941Smrg   if (b->shader->options->native_integers) {
468b8e80941Smrg      ptn_move_dest(b, dest, nir_bcsel(b,
469b8e80941Smrg                                       nir_flt(b, src[0], nir_imm_float(b, 0.0)),
470b8e80941Smrg                                       src[1], src[2]));
471b8e80941Smrg   } else {
472b8e80941Smrg      ptn_move_dest(b, dest, nir_fcsel(b,
473b8e80941Smrg                                       nir_slt(b, src[0], nir_imm_float(b, 0.0)),
474b8e80941Smrg                                       src[1], src[2]));
475b8e80941Smrg   }
476b8e80941Smrg}
477b8e80941Smrg
478b8e80941Smrgstatic void
479b8e80941Smrgptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
480b8e80941Smrg{
481b8e80941Smrg   ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
482b8e80941Smrg}
483b8e80941Smrg
484b8e80941Smrgstatic void
485b8e80941Smrgptn_kil(nir_builder *b, nir_ssa_def **src)
486b8e80941Smrg{
487b8e80941Smrg   nir_ssa_def *cmp = b->shader->options->native_integers ?
488b8e80941Smrg      nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
489b8e80941Smrg      nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
490b8e80941Smrg
491b8e80941Smrg   nir_intrinsic_instr *discard =
492b8e80941Smrg      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
493b8e80941Smrg   discard->src[0] = nir_src_for_ssa(cmp);
494b8e80941Smrg   nir_builder_instr_insert(b, &discard->instr);
495b8e80941Smrg}
496b8e80941Smrg
497b8e80941Smrgstatic void
498b8e80941Smrgptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
499b8e80941Smrg        struct prog_instruction *prog_inst)
500b8e80941Smrg{
501b8e80941Smrg   nir_builder *b = &c->build;
502b8e80941Smrg   nir_tex_instr *instr;
503b8e80941Smrg   nir_texop op;
504b8e80941Smrg   unsigned num_srcs;
505b8e80941Smrg
506b8e80941Smrg   switch (prog_inst->Opcode) {
507b8e80941Smrg   case OPCODE_TEX:
508b8e80941Smrg      op = nir_texop_tex;
509b8e80941Smrg      num_srcs = 1;
510b8e80941Smrg      break;
511b8e80941Smrg   case OPCODE_TXB:
512b8e80941Smrg      op = nir_texop_txb;
513b8e80941Smrg      num_srcs = 2;
514b8e80941Smrg      break;
515b8e80941Smrg   case OPCODE_TXD:
516b8e80941Smrg      op = nir_texop_txd;
517b8e80941Smrg      num_srcs = 3;
518b8e80941Smrg      break;
519b8e80941Smrg   case OPCODE_TXL:
520b8e80941Smrg      op = nir_texop_txl;
521b8e80941Smrg      num_srcs = 2;
522b8e80941Smrg      break;
523b8e80941Smrg   case OPCODE_TXP:
524b8e80941Smrg      op = nir_texop_tex;
525b8e80941Smrg      num_srcs = 2;
526b8e80941Smrg      break;
527b8e80941Smrg   default:
528b8e80941Smrg      fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
529b8e80941Smrg      abort();
530b8e80941Smrg   }
531b8e80941Smrg
532b8e80941Smrg   /* Deref sources */
533b8e80941Smrg   num_srcs += 2;
534b8e80941Smrg
535b8e80941Smrg   if (prog_inst->TexShadow)
536b8e80941Smrg      num_srcs++;
537b8e80941Smrg
538b8e80941Smrg   instr = nir_tex_instr_create(b->shader, num_srcs);
539b8e80941Smrg   instr->op = op;
540b8e80941Smrg   instr->dest_type = nir_type_float;
541b8e80941Smrg   instr->is_shadow = prog_inst->TexShadow;
542b8e80941Smrg
543b8e80941Smrg   switch (prog_inst->TexSrcTarget) {
544b8e80941Smrg   case TEXTURE_1D_INDEX:
545b8e80941Smrg      instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
546b8e80941Smrg      break;
547b8e80941Smrg   case TEXTURE_2D_INDEX:
548b8e80941Smrg      instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
549b8e80941Smrg      break;
550b8e80941Smrg   case TEXTURE_3D_INDEX:
551b8e80941Smrg      instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
552b8e80941Smrg      break;
553b8e80941Smrg   case TEXTURE_CUBE_INDEX:
554b8e80941Smrg      instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
555b8e80941Smrg      break;
556b8e80941Smrg   case TEXTURE_RECT_INDEX:
557b8e80941Smrg      instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
558b8e80941Smrg      break;
559b8e80941Smrg   default:
560b8e80941Smrg      fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
561b8e80941Smrg      abort();
562b8e80941Smrg   }
563b8e80941Smrg
564b8e80941Smrg   switch (instr->sampler_dim) {
565b8e80941Smrg   case GLSL_SAMPLER_DIM_1D:
566b8e80941Smrg   case GLSL_SAMPLER_DIM_BUF:
567b8e80941Smrg      instr->coord_components = 1;
568b8e80941Smrg      break;
569b8e80941Smrg   case GLSL_SAMPLER_DIM_2D:
570b8e80941Smrg   case GLSL_SAMPLER_DIM_RECT:
571b8e80941Smrg   case GLSL_SAMPLER_DIM_EXTERNAL:
572b8e80941Smrg   case GLSL_SAMPLER_DIM_MS:
573b8e80941Smrg      instr->coord_components = 2;
574b8e80941Smrg      break;
575b8e80941Smrg   case GLSL_SAMPLER_DIM_3D:
576b8e80941Smrg   case GLSL_SAMPLER_DIM_CUBE:
577b8e80941Smrg      instr->coord_components = 3;
578b8e80941Smrg      break;
579b8e80941Smrg   case GLSL_SAMPLER_DIM_SUBPASS:
580b8e80941Smrg   case GLSL_SAMPLER_DIM_SUBPASS_MS:
581b8e80941Smrg      unreachable("can't reach");
582b8e80941Smrg   }
583b8e80941Smrg
584b8e80941Smrg   nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
585b8e80941Smrg   if (!var) {
586b8e80941Smrg      const struct glsl_type *type =
587b8e80941Smrg         glsl_sampler_type(instr->sampler_dim, false, false, GLSL_TYPE_FLOAT);
588b8e80941Smrg      var = nir_variable_create(b->shader, nir_var_uniform, type, "sampler");
589b8e80941Smrg      var->data.binding = prog_inst->TexSrcUnit;
590b8e80941Smrg      var->data.explicit_binding = true;
591b8e80941Smrg      c->sampler_vars[prog_inst->TexSrcUnit] = var;
592b8e80941Smrg   }
593b8e80941Smrg
594b8e80941Smrg   nir_deref_instr *deref = nir_build_deref_var(b, var);
595b8e80941Smrg
596b8e80941Smrg   unsigned src_number = 0;
597b8e80941Smrg
598b8e80941Smrg   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
599b8e80941Smrg   instr->src[src_number].src_type = nir_tex_src_texture_deref;
600b8e80941Smrg   src_number++;
601b8e80941Smrg   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
602b8e80941Smrg   instr->src[src_number].src_type = nir_tex_src_sampler_deref;
603b8e80941Smrg   src_number++;
604b8e80941Smrg
605b8e80941Smrg   instr->src[src_number].src =
606b8e80941Smrg      nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
607b8e80941Smrg                                  instr->coord_components, true));
608b8e80941Smrg   instr->src[src_number].src_type = nir_tex_src_coord;
609b8e80941Smrg   src_number++;
610b8e80941Smrg
611b8e80941Smrg   if (prog_inst->Opcode == OPCODE_TXP) {
612b8e80941Smrg      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613b8e80941Smrg      instr->src[src_number].src_type = nir_tex_src_projector;
614b8e80941Smrg      src_number++;
615b8e80941Smrg   }
616b8e80941Smrg
617b8e80941Smrg   if (prog_inst->Opcode == OPCODE_TXB) {
618b8e80941Smrg      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
619b8e80941Smrg      instr->src[src_number].src_type = nir_tex_src_bias;
620b8e80941Smrg      src_number++;
621b8e80941Smrg   }
622b8e80941Smrg
623b8e80941Smrg   if (prog_inst->Opcode == OPCODE_TXL) {
624b8e80941Smrg      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
625b8e80941Smrg      instr->src[src_number].src_type = nir_tex_src_lod;
626b8e80941Smrg      src_number++;
627b8e80941Smrg   }
628b8e80941Smrg
629b8e80941Smrg   if (instr->is_shadow) {
630b8e80941Smrg      if (instr->coord_components < 3)
631b8e80941Smrg         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
632b8e80941Smrg      else
633b8e80941Smrg         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
634b8e80941Smrg
635b8e80941Smrg      instr->src[src_number].src_type = nir_tex_src_comparator;
636b8e80941Smrg      src_number++;
637b8e80941Smrg   }
638b8e80941Smrg
639b8e80941Smrg   assert(src_number == num_srcs);
640b8e80941Smrg
641b8e80941Smrg   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
642b8e80941Smrg   nir_builder_instr_insert(b, &instr->instr);
643b8e80941Smrg
644b8e80941Smrg   /* Resolve the writemask on the texture op. */
645b8e80941Smrg   ptn_move_dest(b, dest, &instr->dest.ssa);
646b8e80941Smrg}
647b8e80941Smrg
648b8e80941Smrgstatic const nir_op op_trans[MAX_OPCODE] = {
649b8e80941Smrg   [OPCODE_NOP] = 0,
650b8e80941Smrg   [OPCODE_ABS] = nir_op_fabs,
651b8e80941Smrg   [OPCODE_ADD] = nir_op_fadd,
652b8e80941Smrg   [OPCODE_ARL] = 0,
653b8e80941Smrg   [OPCODE_CMP] = 0,
654b8e80941Smrg   [OPCODE_COS] = 0,
655b8e80941Smrg   [OPCODE_DDX] = nir_op_fddx,
656b8e80941Smrg   [OPCODE_DDY] = nir_op_fddy,
657b8e80941Smrg   [OPCODE_DP2] = 0,
658b8e80941Smrg   [OPCODE_DP3] = 0,
659b8e80941Smrg   [OPCODE_DP4] = 0,
660b8e80941Smrg   [OPCODE_DPH] = 0,
661b8e80941Smrg   [OPCODE_DST] = 0,
662b8e80941Smrg   [OPCODE_END] = 0,
663b8e80941Smrg   [OPCODE_EX2] = 0,
664b8e80941Smrg   [OPCODE_EXP] = 0,
665b8e80941Smrg   [OPCODE_FLR] = nir_op_ffloor,
666b8e80941Smrg   [OPCODE_FRC] = nir_op_ffract,
667b8e80941Smrg   [OPCODE_LG2] = 0,
668b8e80941Smrg   [OPCODE_LIT] = 0,
669b8e80941Smrg   [OPCODE_LOG] = 0,
670b8e80941Smrg   [OPCODE_LRP] = 0,
671b8e80941Smrg   [OPCODE_MAD] = 0,
672b8e80941Smrg   [OPCODE_MAX] = nir_op_fmax,
673b8e80941Smrg   [OPCODE_MIN] = nir_op_fmin,
674b8e80941Smrg   [OPCODE_MOV] = nir_op_fmov,
675b8e80941Smrg   [OPCODE_MUL] = nir_op_fmul,
676b8e80941Smrg   [OPCODE_POW] = 0,
677b8e80941Smrg   [OPCODE_RCP] = 0,
678b8e80941Smrg
679b8e80941Smrg   [OPCODE_RSQ] = 0,
680b8e80941Smrg   [OPCODE_SCS] = 0,
681b8e80941Smrg   [OPCODE_SGE] = 0,
682b8e80941Smrg   [OPCODE_SIN] = 0,
683b8e80941Smrg   [OPCODE_SLT] = 0,
684b8e80941Smrg   [OPCODE_SSG] = nir_op_fsign,
685b8e80941Smrg   [OPCODE_SUB] = nir_op_fsub,
686b8e80941Smrg   [OPCODE_SWZ] = 0,
687b8e80941Smrg   [OPCODE_TEX] = 0,
688b8e80941Smrg   [OPCODE_TRUNC] = nir_op_ftrunc,
689b8e80941Smrg   [OPCODE_TXB] = 0,
690b8e80941Smrg   [OPCODE_TXD] = 0,
691b8e80941Smrg   [OPCODE_TXL] = 0,
692b8e80941Smrg   [OPCODE_TXP] = 0,
693b8e80941Smrg   [OPCODE_XPD] = 0,
694b8e80941Smrg};
695b8e80941Smrg
696b8e80941Smrgstatic void
697b8e80941Smrgptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
698b8e80941Smrg{
699b8e80941Smrg   nir_builder *b = &c->build;
700b8e80941Smrg   unsigned i;
701b8e80941Smrg   const unsigned op = prog_inst->Opcode;
702b8e80941Smrg
703b8e80941Smrg   if (op == OPCODE_END)
704b8e80941Smrg      return;
705b8e80941Smrg
706b8e80941Smrg   nir_ssa_def *src[3];
707b8e80941Smrg   for (i = 0; i < 3; i++) {
708b8e80941Smrg      src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
709b8e80941Smrg   }
710b8e80941Smrg   nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
711b8e80941Smrg   if (c->error)
712b8e80941Smrg      return;
713b8e80941Smrg
714b8e80941Smrg   switch (op) {
715b8e80941Smrg   case OPCODE_RSQ:
716b8e80941Smrg      ptn_move_dest(b, dest,
717b8e80941Smrg                    nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
718b8e80941Smrg      break;
719b8e80941Smrg
720b8e80941Smrg   case OPCODE_RCP:
721b8e80941Smrg      ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
722b8e80941Smrg      break;
723b8e80941Smrg
724b8e80941Smrg   case OPCODE_EX2:
725b8e80941Smrg      ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
726b8e80941Smrg      break;
727b8e80941Smrg
728b8e80941Smrg   case OPCODE_LG2:
729b8e80941Smrg      ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
730b8e80941Smrg      break;
731b8e80941Smrg
732b8e80941Smrg   case OPCODE_POW:
733b8e80941Smrg      ptn_move_dest(b, dest, nir_fpow(b,
734b8e80941Smrg                                      ptn_channel(b, src[0], X),
735b8e80941Smrg                                      ptn_channel(b, src[1], X)));
736b8e80941Smrg      break;
737b8e80941Smrg
738b8e80941Smrg   case OPCODE_COS:
739b8e80941Smrg      ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
740b8e80941Smrg      break;
741b8e80941Smrg
742b8e80941Smrg   case OPCODE_SIN:
743b8e80941Smrg      ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
744b8e80941Smrg      break;
745b8e80941Smrg
746b8e80941Smrg   case OPCODE_ARL:
747b8e80941Smrg      ptn_arl(b, dest, src);
748b8e80941Smrg      break;
749b8e80941Smrg
750b8e80941Smrg   case OPCODE_EXP:
751b8e80941Smrg      ptn_exp(b, dest, src);
752b8e80941Smrg      break;
753b8e80941Smrg
754b8e80941Smrg   case OPCODE_LOG:
755b8e80941Smrg      ptn_log(b, dest, src);
756b8e80941Smrg      break;
757b8e80941Smrg
758b8e80941Smrg   case OPCODE_LRP:
759b8e80941Smrg      ptn_lrp(b, dest, src);
760b8e80941Smrg      break;
761b8e80941Smrg
762b8e80941Smrg   case OPCODE_MAD:
763b8e80941Smrg      ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
764b8e80941Smrg      break;
765b8e80941Smrg
766b8e80941Smrg   case OPCODE_DST:
767b8e80941Smrg      ptn_dst(b, dest, src);
768b8e80941Smrg      break;
769b8e80941Smrg
770b8e80941Smrg   case OPCODE_LIT:
771b8e80941Smrg      ptn_lit(b, dest, src);
772b8e80941Smrg      break;
773b8e80941Smrg
774b8e80941Smrg   case OPCODE_XPD:
775b8e80941Smrg      ptn_xpd(b, dest, src);
776b8e80941Smrg      break;
777b8e80941Smrg
778b8e80941Smrg   case OPCODE_DP2:
779b8e80941Smrg      ptn_dp2(b, dest, src);
780b8e80941Smrg      break;
781b8e80941Smrg
782b8e80941Smrg   case OPCODE_DP3:
783b8e80941Smrg      ptn_dp3(b, dest, src);
784b8e80941Smrg      break;
785b8e80941Smrg
786b8e80941Smrg   case OPCODE_DP4:
787b8e80941Smrg      ptn_dp4(b, dest, src);
788b8e80941Smrg      break;
789b8e80941Smrg
790b8e80941Smrg   case OPCODE_DPH:
791b8e80941Smrg      ptn_dph(b, dest, src);
792b8e80941Smrg      break;
793b8e80941Smrg
794b8e80941Smrg   case OPCODE_KIL:
795b8e80941Smrg      ptn_kil(b, src);
796b8e80941Smrg      break;
797b8e80941Smrg
798b8e80941Smrg   case OPCODE_CMP:
799b8e80941Smrg      ptn_cmp(b, dest, src);
800b8e80941Smrg      break;
801b8e80941Smrg
802b8e80941Smrg   case OPCODE_SCS:
803b8e80941Smrg      ptn_scs(b, dest, src);
804b8e80941Smrg      break;
805b8e80941Smrg
806b8e80941Smrg   case OPCODE_SLT:
807b8e80941Smrg      ptn_slt(b, dest, src);
808b8e80941Smrg      break;
809b8e80941Smrg
810b8e80941Smrg   case OPCODE_SGE:
811b8e80941Smrg      ptn_sge(b, dest, src);
812b8e80941Smrg      break;
813b8e80941Smrg
814b8e80941Smrg   case OPCODE_TEX:
815b8e80941Smrg   case OPCODE_TXB:
816b8e80941Smrg   case OPCODE_TXD:
817b8e80941Smrg   case OPCODE_TXL:
818b8e80941Smrg   case OPCODE_TXP:
819b8e80941Smrg      ptn_tex(c, dest, src, prog_inst);
820b8e80941Smrg      break;
821b8e80941Smrg
822b8e80941Smrg   case OPCODE_SWZ:
823b8e80941Smrg      /* Extended swizzles were already handled in ptn_get_src(). */
824b8e80941Smrg      ptn_alu(b, nir_op_fmov, dest, src);
825b8e80941Smrg      break;
826b8e80941Smrg
827b8e80941Smrg   case OPCODE_NOP:
828b8e80941Smrg      break;
829b8e80941Smrg
830b8e80941Smrg   default:
831b8e80941Smrg      if (op_trans[op] != 0) {
832b8e80941Smrg         ptn_alu(b, op_trans[op], dest, src);
833b8e80941Smrg      } else {
834b8e80941Smrg         fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
835b8e80941Smrg         abort();
836b8e80941Smrg      }
837b8e80941Smrg      break;
838b8e80941Smrg   }
839b8e80941Smrg
840b8e80941Smrg   if (prog_inst->Saturate) {
841b8e80941Smrg      assert(prog_inst->Saturate);
842b8e80941Smrg      assert(!dest.dest.is_ssa);
843b8e80941Smrg      ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
844b8e80941Smrg   }
845b8e80941Smrg}
846b8e80941Smrg
847b8e80941Smrg/**
848b8e80941Smrg * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
849b8e80941Smrg * variables at the end of the shader.
850b8e80941Smrg *
851b8e80941Smrg * We don't generate these incrementally as the PROGRAM_OUTPUT values are
852b8e80941Smrg * written, because there's no output load intrinsic, which means we couldn't
853b8e80941Smrg * handle writemasks.
854b8e80941Smrg */
855b8e80941Smrgstatic void
856b8e80941Smrgptn_add_output_stores(struct ptn_compile *c)
857b8e80941Smrg{
858b8e80941Smrg   nir_builder *b = &c->build;
859b8e80941Smrg
860b8e80941Smrg   nir_foreach_variable(var, &b->shader->outputs) {
861b8e80941Smrg      nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
862b8e80941Smrg      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
863b8e80941Smrg          var->data.location == FRAG_RESULT_DEPTH) {
864b8e80941Smrg         /* result.depth has this strange convention of being the .z component of
865b8e80941Smrg          * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
866b8e80941Smrg          * match GLSL's gl_FragDepth and the expectations of most backends.
867b8e80941Smrg          */
868b8e80941Smrg         src = nir_channel(b, src, 2);
869b8e80941Smrg      }
870b8e80941Smrg      if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
871b8e80941Smrg          var->data.location == VARYING_SLOT_FOGC) {
872b8e80941Smrg         /* result.fogcoord is a single component value */
873b8e80941Smrg         src = nir_channel(b, src, 0);
874b8e80941Smrg      }
875b8e80941Smrg      unsigned num_components = glsl_get_vector_elements(var->type);
876b8e80941Smrg      nir_store_var(b, var, src, (1 << num_components) - 1);
877b8e80941Smrg   }
878b8e80941Smrg}
879b8e80941Smrg
880b8e80941Smrgstatic void
881b8e80941Smrgsetup_registers_and_variables(struct ptn_compile *c)
882b8e80941Smrg{
883b8e80941Smrg   nir_builder *b = &c->build;
884b8e80941Smrg   struct nir_shader *shader = b->shader;
885b8e80941Smrg
886b8e80941Smrg   /* Create input variables. */
887b8e80941Smrg   uint64_t inputs_read = c->prog->info.inputs_read;
888b8e80941Smrg   while (inputs_read) {
889b8e80941Smrg      const int i = u_bit_scan64(&inputs_read);
890b8e80941Smrg
891b8e80941Smrg      nir_variable *var =
892b8e80941Smrg         nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
893b8e80941Smrg                             ralloc_asprintf(shader, "in_%d", i));
894b8e80941Smrg      var->data.location = i;
895b8e80941Smrg      var->data.index = 0;
896b8e80941Smrg
897b8e80941Smrg      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
898b8e80941Smrg         if (i == VARYING_SLOT_FOGC) {
899b8e80941Smrg            /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
900b8e80941Smrg             * input variable a float, and create a local containing the
901b8e80941Smrg             * full vec4 value.
902b8e80941Smrg             */
903b8e80941Smrg            var->type = glsl_float_type();
904b8e80941Smrg
905b8e80941Smrg            nir_variable *fullvar =
906b8e80941Smrg               nir_local_variable_create(b->impl, glsl_vec4_type(),
907b8e80941Smrg                                         "fogcoord_tmp");
908b8e80941Smrg
909b8e80941Smrg            nir_store_var(b, fullvar,
910b8e80941Smrg                          nir_vec4(b, nir_load_var(b, var),
911b8e80941Smrg                                   nir_imm_float(b, 0.0),
912b8e80941Smrg                                   nir_imm_float(b, 0.0),
913b8e80941Smrg                                   nir_imm_float(b, 1.0)),
914b8e80941Smrg                          WRITEMASK_XYZW);
915b8e80941Smrg
916b8e80941Smrg            /* We inserted the real input into the list so the driver has real
917b8e80941Smrg             * inputs, but we set c->input_vars[i] to the temporary so we use
918b8e80941Smrg             * the splatted value.
919b8e80941Smrg             */
920b8e80941Smrg            c->input_vars[i] = fullvar;
921b8e80941Smrg            continue;
922b8e80941Smrg         }
923b8e80941Smrg      }
924b8e80941Smrg
925b8e80941Smrg      c->input_vars[i] = var;
926b8e80941Smrg   }
927b8e80941Smrg
928b8e80941Smrg   /* Create system value variables */
929b8e80941Smrg   uint64_t system_values_read = c->prog->info.system_values_read;
930b8e80941Smrg   while (system_values_read) {
931b8e80941Smrg      const int i = u_bit_scan64(&system_values_read);
932b8e80941Smrg
933b8e80941Smrg      nir_variable *var =
934b8e80941Smrg         nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
935b8e80941Smrg                             ralloc_asprintf(shader, "sv_%d", i));
936b8e80941Smrg      var->data.location = i;
937b8e80941Smrg      var->data.index = 0;
938b8e80941Smrg
939b8e80941Smrg      c->sysval_vars[i] = var;
940b8e80941Smrg   }
941b8e80941Smrg
942b8e80941Smrg   /* Create output registers and variables. */
943b8e80941Smrg   int max_outputs = util_last_bit(c->prog->info.outputs_written);
944b8e80941Smrg   c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
945b8e80941Smrg
946b8e80941Smrg   uint64_t outputs_written = c->prog->info.outputs_written;
947b8e80941Smrg   while (outputs_written) {
948b8e80941Smrg      const int i = u_bit_scan64(&outputs_written);
949b8e80941Smrg
950b8e80941Smrg      /* Since we can't load from outputs in the IR, we make temporaries
951b8e80941Smrg       * for the outputs and emit stores to the real outputs at the end of
952b8e80941Smrg       * the shader.
953b8e80941Smrg       */
954b8e80941Smrg      nir_register *reg = nir_local_reg_create(b->impl);
955b8e80941Smrg      reg->num_components = 4;
956b8e80941Smrg
957b8e80941Smrg      nir_variable *var = rzalloc(shader, nir_variable);
958b8e80941Smrg      if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
959b8e80941Smrg          (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC))
960b8e80941Smrg         var->type = glsl_float_type();
961b8e80941Smrg      else
962b8e80941Smrg         var->type = glsl_vec4_type();
963b8e80941Smrg      var->data.mode = nir_var_shader_out;
964b8e80941Smrg      var->name = ralloc_asprintf(var, "out_%d", i);
965b8e80941Smrg
966b8e80941Smrg      var->data.location = i;
967b8e80941Smrg      var->data.index = 0;
968b8e80941Smrg
969b8e80941Smrg      c->output_regs[i] = reg;
970b8e80941Smrg
971b8e80941Smrg      exec_list_push_tail(&shader->outputs, &var->node);
972b8e80941Smrg      c->output_vars[i] = var;
973b8e80941Smrg   }
974b8e80941Smrg
975b8e80941Smrg   /* Create temporary registers. */
976b8e80941Smrg   c->temp_regs = rzalloc_array(c, nir_register *,
977b8e80941Smrg                                c->prog->arb.NumTemporaries);
978b8e80941Smrg
979b8e80941Smrg   nir_register *reg;
980b8e80941Smrg   for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
981b8e80941Smrg      reg = nir_local_reg_create(b->impl);
982b8e80941Smrg      if (!reg) {
983b8e80941Smrg         c->error = true;
984b8e80941Smrg         return;
985b8e80941Smrg      }
986b8e80941Smrg      reg->num_components = 4;
987b8e80941Smrg      c->temp_regs[i] = reg;
988b8e80941Smrg   }
989b8e80941Smrg
990b8e80941Smrg   /* Create the address register (for ARB_vertex_program). */
991b8e80941Smrg   reg = nir_local_reg_create(b->impl);
992b8e80941Smrg   if (!reg) {
993b8e80941Smrg      c->error = true;
994b8e80941Smrg      return;
995b8e80941Smrg   }
996b8e80941Smrg   reg->num_components = 1;
997b8e80941Smrg   c->addr_reg = reg;
998b8e80941Smrg}
999b8e80941Smrg
1000b8e80941Smrgstruct nir_shader *
1001b8e80941Smrgprog_to_nir(const struct gl_program *prog,
1002b8e80941Smrg            const nir_shader_compiler_options *options)
1003b8e80941Smrg{
1004b8e80941Smrg   struct ptn_compile *c;
1005b8e80941Smrg   struct nir_shader *s;
1006b8e80941Smrg   gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1007b8e80941Smrg
1008b8e80941Smrg   c = rzalloc(NULL, struct ptn_compile);
1009b8e80941Smrg   if (!c)
1010b8e80941Smrg      return NULL;
1011b8e80941Smrg   c->prog = prog;
1012b8e80941Smrg
1013b8e80941Smrg   nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1014b8e80941Smrg
1015b8e80941Smrg   /* Copy the shader_info from the gl_program */
1016b8e80941Smrg   c->build.shader->info = prog->info;
1017b8e80941Smrg
1018b8e80941Smrg   s = c->build.shader;
1019b8e80941Smrg
1020b8e80941Smrg   if (prog->Parameters->NumParameters > 0) {
1021b8e80941Smrg      const struct glsl_type *type =
1022b8e80941Smrg         glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
1023b8e80941Smrg      c->parameters =
1024b8e80941Smrg         nir_variable_create(s, nir_var_uniform, type,
1025b8e80941Smrg                             prog->Parameters->Parameters[0].Name);
1026b8e80941Smrg   }
1027b8e80941Smrg
1028b8e80941Smrg   setup_registers_and_variables(c);
1029b8e80941Smrg   if (unlikely(c->error))
1030b8e80941Smrg      goto fail;
1031b8e80941Smrg
1032b8e80941Smrg   for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1033b8e80941Smrg      ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1034b8e80941Smrg
1035b8e80941Smrg      if (unlikely(c->error))
1036b8e80941Smrg         break;
1037b8e80941Smrg   }
1038b8e80941Smrg
1039b8e80941Smrg   ptn_add_output_stores(c);
1040b8e80941Smrg
1041b8e80941Smrg   s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1042b8e80941Smrg   s->info.num_textures = util_last_bit(prog->SamplersUsed);
1043b8e80941Smrg   s->info.num_ubos = 0;
1044b8e80941Smrg   s->info.num_abos = 0;
1045b8e80941Smrg   s->info.num_ssbos = 0;
1046b8e80941Smrg   s->info.num_images = 0;
1047b8e80941Smrg   s->info.uses_texture_gather = false;
1048b8e80941Smrg   s->info.clip_distance_array_size = 0;
1049b8e80941Smrg   s->info.cull_distance_array_size = 0;
1050b8e80941Smrg   s->info.separate_shader = false;
1051b8e80941Smrg
1052b8e80941Smrgfail:
1053b8e80941Smrg   if (c->error) {
1054b8e80941Smrg      ralloc_free(s);
1055b8e80941Smrg      s = NULL;
1056b8e80941Smrg   }
1057b8e80941Smrg   ralloc_free(c);
1058b8e80941Smrg   return s;
1059b8e80941Smrg}
1060