prog_to_nir.c revision b8e80941
1/*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26#include "compiler/nir/nir.h"
27#include "compiler/nir/nir_builder.h"
28#include "compiler/glsl/list.h"
29#include "main/imports.h"
30#include "main/mtypes.h"
31#include "util/ralloc.h"
32
33#include "prog_to_nir.h"
34#include "prog_instruction.h"
35#include "prog_parameter.h"
36#include "prog_print.h"
37#include "program.h"
38
39/**
40 * \file prog_to_nir.c
41 *
42 * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44 * vertex processing.  Full GLSL support should use glsl_to_nir instead.
45 */
46
47struct ptn_compile {
48   const struct gl_program *prog;
49   nir_builder build;
50   bool error;
51
52   nir_variable *parameters;
53   nir_variable *input_vars[VARYING_SLOT_MAX];
54   nir_variable *output_vars[VARYING_SLOT_MAX];
55   nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
56   nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57   nir_register **output_regs;
58   nir_register **temp_regs;
59
60   nir_register *addr_reg;
61};
62
63#define SWIZ(X, Y, Z, W) \
64   (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65#define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
66
67static nir_ssa_def *
68ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
69{
70   nir_builder *b = &c->build;
71
72   nir_alu_src src;
73   memset(&src, 0, sizeof(src));
74
75   if (dest->dest.is_ssa)
76      src.src = nir_src_for_ssa(&dest->dest.ssa);
77   else {
78      assert(!dest->dest.reg.indirect);
79      src.src = nir_src_for_reg(dest->dest.reg.reg);
80      src.src.reg.base_offset = dest->dest.reg.base_offset;
81   }
82
83   for (int i = 0; i < 4; i++)
84      src.swizzle[i] = i;
85
86   return nir_fmov_alu(b, src, 4);
87}
88
89static nir_alu_dest
90ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
91{
92   nir_alu_dest dest;
93
94   memset(&dest, 0, sizeof(dest));
95
96   switch (prog_dst->File) {
97   case PROGRAM_TEMPORARY:
98      dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
99      break;
100   case PROGRAM_OUTPUT:
101      dest.dest.reg.reg = c->output_regs[prog_dst->Index];
102      break;
103   case PROGRAM_ADDRESS:
104      assert(prog_dst->Index == 0);
105      dest.dest.reg.reg = c->addr_reg;
106      break;
107   case PROGRAM_UNDEFINED:
108      break;
109   }
110
111   dest.write_mask = prog_dst->WriteMask;
112   dest.saturate = false;
113
114   assert(!prog_dst->RelAddr);
115
116   return dest;
117}
118
119static nir_ssa_def *
120ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
121{
122   nir_builder *b = &c->build;
123   nir_alu_src src;
124
125   memset(&src, 0, sizeof(src));
126
127   switch (prog_src->File) {
128   case PROGRAM_UNDEFINED:
129      return nir_imm_float(b, 0.0);
130   case PROGRAM_TEMPORARY:
131      assert(!prog_src->RelAddr && prog_src->Index >= 0);
132      src.src.reg.reg = c->temp_regs[prog_src->Index];
133      break;
134   case PROGRAM_INPUT: {
135      /* ARB_vertex_program doesn't allow relative addressing on vertex
136       * attributes; ARB_fragment_program has no relative addressing at all.
137       */
138      assert(!prog_src->RelAddr);
139
140      assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
141
142      nir_variable *var = c->input_vars[prog_src->Index];
143      src.src = nir_src_for_ssa(nir_load_var(b, var));
144      break;
145   }
146   case PROGRAM_SYSTEM_VALUE: {
147      assert(!prog_src->RelAddr);
148
149      assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
150
151      nir_variable *var = c->sysval_vars[prog_src->Index];
152      src.src = nir_src_for_ssa(nir_load_var(b, var));
153      break;
154   }
155   case PROGRAM_STATE_VAR:
156   case PROGRAM_CONSTANT: {
157      /* We actually want to look at the type in the Parameters list for this,
158       * because it lets us upload constant builtin uniforms as actual
159       * constants.
160       */
161      struct gl_program_parameter_list *plist = c->prog->Parameters;
162      gl_register_file file = prog_src->RelAddr ? prog_src->File :
163         plist->Parameters[prog_src->Index].Type;
164
165      switch (file) {
166      case PROGRAM_CONSTANT:
167         if ((c->prog->arb.IndirectRegisterFiles &
168              (1 << PROGRAM_CONSTANT)) == 0) {
169            unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
170            float *v = (float *) plist->ParameterValues + pvo;
171            src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
172            break;
173         }
174         /* FALLTHROUGH */
175      case PROGRAM_STATE_VAR: {
176         assert(c->parameters != NULL);
177
178         nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
179
180         nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
181         if (prog_src->RelAddr)
182            index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
183         deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
184
185         src.src = nir_src_for_ssa(nir_load_deref(b, deref));
186         break;
187      }
188      default:
189         fprintf(stderr, "bad uniform src register file: %s (%d)\n",
190                 _mesa_register_file_name(file), file);
191         abort();
192      }
193      break;
194   }
195   default:
196      fprintf(stderr, "unknown src register file: %s (%d)\n",
197              _mesa_register_file_name(prog_src->File), prog_src->File);
198      abort();
199   }
200
201   nir_ssa_def *def;
202   if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
203       (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
204      /* The simple non-SWZ case. */
205      for (int i = 0; i < 4; i++)
206         src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
207
208      def = nir_fmov_alu(b, src, 4);
209
210      if (prog_src->Negate)
211         def = nir_fneg(b, def);
212   } else {
213      /* The SWZ instruction allows per-component zero/one swizzles, and also
214       * per-component negation.
215       */
216      nir_ssa_def *chans[4];
217      for (int i = 0; i < 4; i++) {
218         int swizzle = GET_SWZ(prog_src->Swizzle, i);
219         if (swizzle == SWIZZLE_ZERO) {
220            chans[i] = nir_imm_float(b, 0.0);
221         } else if (swizzle == SWIZZLE_ONE) {
222            chans[i] = nir_imm_float(b, 1.0);
223         } else {
224            assert(swizzle != SWIZZLE_NIL);
225            nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
226            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
227            mov->dest.write_mask = 0x1;
228            mov->src[0] = src;
229            mov->src[0].swizzle[0] = swizzle;
230            nir_builder_instr_insert(b, &mov->instr);
231
232            chans[i] = &mov->dest.dest.ssa;
233         }
234
235         if (prog_src->Negate & (1 << i))
236            chans[i] = nir_fneg(b, chans[i]);
237      }
238      def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
239   }
240
241   return def;
242}
243
244static void
245ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
246{
247   unsigned num_srcs = nir_op_infos[op].num_inputs;
248   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
249   unsigned i;
250
251   for (i = 0; i < num_srcs; i++)
252      instr->src[i].src = nir_src_for_ssa(src[i]);
253
254   instr->dest = dest;
255   nir_builder_instr_insert(b, &instr->instr);
256}
257
258static void
259ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
260                     nir_ssa_def *def, unsigned write_mask)
261{
262   if (!(dest.write_mask & write_mask))
263      return;
264
265   nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
266   if (!mov)
267      return;
268
269   mov->dest = dest;
270   mov->dest.write_mask &= write_mask;
271   mov->src[0].src = nir_src_for_ssa(def);
272   for (unsigned i = def->num_components; i < 4; i++)
273      mov->src[0].swizzle[i] = def->num_components - 1;
274   nir_builder_instr_insert(b, &mov->instr);
275}
276
277static void
278ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
279{
280   ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
281}
282
283static void
284ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
285{
286   ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
287}
288
289/* EXP - Approximate Exponential Base 2
290 *  dst.x = 2^{\lfloor src.x\rfloor}
291 *  dst.y = src.x - \lfloor src.x\rfloor
292 *  dst.z = 2^{src.x}
293 *  dst.w = 1.0
294 */
295static void
296ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
297{
298   nir_ssa_def *srcx = ptn_channel(b, src[0], X);
299
300   ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
301   ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
302   ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
303   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
304}
305
306/* LOG - Approximate Logarithm Base 2
307 *  dst.x = \lfloor\log_2{|src.x|}\rfloor
308 *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
309 *  dst.z = \log_2{|src.x|}
310 *  dst.w = 1.0
311 */
312static void
313ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314{
315   nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
316   nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
317   nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
318
319   ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
320   ptn_move_dest_masked(b, dest,
321                        nir_fmul(b, abs_srcx,
322                                 nir_fexp2(b, nir_fneg(b, floor_log2))),
323                        WRITEMASK_Y);
324   ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
325   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
326}
327
328/* DST - Distance Vector
329 *   dst.x = 1.0
330 *   dst.y = src0.y \times src1.y
331 *   dst.z = src0.z
332 *   dst.w = src1.w
333 */
334static void
335ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
336{
337   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
338   ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
339   ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
340   ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
341}
342
343/* LIT - Light Coefficients
344 *  dst.x = 1.0
345 *  dst.y = max(src.x, 0.0)
346 *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
347 *  dst.w = 1.0
348 */
349static void
350ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
351{
352   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
353
354   ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
355                                          nir_imm_float(b, 0.0)), WRITEMASK_Y);
356
357   if (dest.write_mask & WRITEMASK_Z) {
358      nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
359      nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
360                                                 nir_imm_float(b, 128.0)),
361                                     nir_imm_float(b, -128.0));
362      nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
363                                  wclamp);
364
365      nir_ssa_def *z;
366      if (b->shader->options->native_integers) {
367         z = nir_bcsel(b,
368                       nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
369                       nir_imm_float(b, 0.0),
370                       pow);
371      } else {
372         z = nir_fcsel(b,
373                       nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
374                       nir_imm_float(b, 0.0),
375                       pow);
376      }
377
378      ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
379   }
380}
381
382/* SCS - Sine Cosine
383 *   dst.x = \cos{src.x}
384 *   dst.y = \sin{src.x}
385 *   dst.z = 0.0
386 *   dst.w = 1.0
387 */
388static void
389ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
390{
391   ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
392                        WRITEMASK_X);
393   ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
394                        WRITEMASK_Y);
395   ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
396   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
397}
398
399/**
400 * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
401 */
402static void
403ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
404{
405   if (b->shader->options->native_integers) {
406      ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1])));
407   } else {
408      ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
409   }
410}
411
412/**
413 * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
414 */
415static void
416ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
417{
418   if (b->shader->options->native_integers) {
419      ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1])));
420   } else {
421      ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
422   }
423}
424
425static void
426ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
427{
428   ptn_move_dest_masked(b, dest,
429                        nir_fsub(b,
430                                 nir_fmul(b,
431                                          nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
432                                          nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
433                                 nir_fmul(b,
434                                          nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
435                                          nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
436                        WRITEMASK_XYZ);
437   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
438}
439
440static void
441ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
442{
443   ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
444}
445
446static void
447ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
448{
449   ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
450}
451
452static void
453ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
454{
455   ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
456}
457
458static void
459ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
460{
461   ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
462}
463
464static void
465ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
466{
467   if (b->shader->options->native_integers) {
468      ptn_move_dest(b, dest, nir_bcsel(b,
469                                       nir_flt(b, src[0], nir_imm_float(b, 0.0)),
470                                       src[1], src[2]));
471   } else {
472      ptn_move_dest(b, dest, nir_fcsel(b,
473                                       nir_slt(b, src[0], nir_imm_float(b, 0.0)),
474                                       src[1], src[2]));
475   }
476}
477
478static void
479ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
480{
481   ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
482}
483
484static void
485ptn_kil(nir_builder *b, nir_ssa_def **src)
486{
487   nir_ssa_def *cmp = b->shader->options->native_integers ?
488      nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
489      nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
490
491   nir_intrinsic_instr *discard =
492      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
493   discard->src[0] = nir_src_for_ssa(cmp);
494   nir_builder_instr_insert(b, &discard->instr);
495}
496
497static void
498ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
499        struct prog_instruction *prog_inst)
500{
501   nir_builder *b = &c->build;
502   nir_tex_instr *instr;
503   nir_texop op;
504   unsigned num_srcs;
505
506   switch (prog_inst->Opcode) {
507   case OPCODE_TEX:
508      op = nir_texop_tex;
509      num_srcs = 1;
510      break;
511   case OPCODE_TXB:
512      op = nir_texop_txb;
513      num_srcs = 2;
514      break;
515   case OPCODE_TXD:
516      op = nir_texop_txd;
517      num_srcs = 3;
518      break;
519   case OPCODE_TXL:
520      op = nir_texop_txl;
521      num_srcs = 2;
522      break;
523   case OPCODE_TXP:
524      op = nir_texop_tex;
525      num_srcs = 2;
526      break;
527   default:
528      fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
529      abort();
530   }
531
532   /* Deref sources */
533   num_srcs += 2;
534
535   if (prog_inst->TexShadow)
536      num_srcs++;
537
538   instr = nir_tex_instr_create(b->shader, num_srcs);
539   instr->op = op;
540   instr->dest_type = nir_type_float;
541   instr->is_shadow = prog_inst->TexShadow;
542
543   switch (prog_inst->TexSrcTarget) {
544   case TEXTURE_1D_INDEX:
545      instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
546      break;
547   case TEXTURE_2D_INDEX:
548      instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
549      break;
550   case TEXTURE_3D_INDEX:
551      instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
552      break;
553   case TEXTURE_CUBE_INDEX:
554      instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
555      break;
556   case TEXTURE_RECT_INDEX:
557      instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
558      break;
559   default:
560      fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
561      abort();
562   }
563
564   switch (instr->sampler_dim) {
565   case GLSL_SAMPLER_DIM_1D:
566   case GLSL_SAMPLER_DIM_BUF:
567      instr->coord_components = 1;
568      break;
569   case GLSL_SAMPLER_DIM_2D:
570   case GLSL_SAMPLER_DIM_RECT:
571   case GLSL_SAMPLER_DIM_EXTERNAL:
572   case GLSL_SAMPLER_DIM_MS:
573      instr->coord_components = 2;
574      break;
575   case GLSL_SAMPLER_DIM_3D:
576   case GLSL_SAMPLER_DIM_CUBE:
577      instr->coord_components = 3;
578      break;
579   case GLSL_SAMPLER_DIM_SUBPASS:
580   case GLSL_SAMPLER_DIM_SUBPASS_MS:
581      unreachable("can't reach");
582   }
583
584   nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
585   if (!var) {
586      const struct glsl_type *type =
587         glsl_sampler_type(instr->sampler_dim, false, false, GLSL_TYPE_FLOAT);
588      var = nir_variable_create(b->shader, nir_var_uniform, type, "sampler");
589      var->data.binding = prog_inst->TexSrcUnit;
590      var->data.explicit_binding = true;
591      c->sampler_vars[prog_inst->TexSrcUnit] = var;
592   }
593
594   nir_deref_instr *deref = nir_build_deref_var(b, var);
595
596   unsigned src_number = 0;
597
598   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
599   instr->src[src_number].src_type = nir_tex_src_texture_deref;
600   src_number++;
601   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
602   instr->src[src_number].src_type = nir_tex_src_sampler_deref;
603   src_number++;
604
605   instr->src[src_number].src =
606      nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
607                                  instr->coord_components, true));
608   instr->src[src_number].src_type = nir_tex_src_coord;
609   src_number++;
610
611   if (prog_inst->Opcode == OPCODE_TXP) {
612      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613      instr->src[src_number].src_type = nir_tex_src_projector;
614      src_number++;
615   }
616
617   if (prog_inst->Opcode == OPCODE_TXB) {
618      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
619      instr->src[src_number].src_type = nir_tex_src_bias;
620      src_number++;
621   }
622
623   if (prog_inst->Opcode == OPCODE_TXL) {
624      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
625      instr->src[src_number].src_type = nir_tex_src_lod;
626      src_number++;
627   }
628
629   if (instr->is_shadow) {
630      if (instr->coord_components < 3)
631         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
632      else
633         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
634
635      instr->src[src_number].src_type = nir_tex_src_comparator;
636      src_number++;
637   }
638
639   assert(src_number == num_srcs);
640
641   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
642   nir_builder_instr_insert(b, &instr->instr);
643
644   /* Resolve the writemask on the texture op. */
645   ptn_move_dest(b, dest, &instr->dest.ssa);
646}
647
648static const nir_op op_trans[MAX_OPCODE] = {
649   [OPCODE_NOP] = 0,
650   [OPCODE_ABS] = nir_op_fabs,
651   [OPCODE_ADD] = nir_op_fadd,
652   [OPCODE_ARL] = 0,
653   [OPCODE_CMP] = 0,
654   [OPCODE_COS] = 0,
655   [OPCODE_DDX] = nir_op_fddx,
656   [OPCODE_DDY] = nir_op_fddy,
657   [OPCODE_DP2] = 0,
658   [OPCODE_DP3] = 0,
659   [OPCODE_DP4] = 0,
660   [OPCODE_DPH] = 0,
661   [OPCODE_DST] = 0,
662   [OPCODE_END] = 0,
663   [OPCODE_EX2] = 0,
664   [OPCODE_EXP] = 0,
665   [OPCODE_FLR] = nir_op_ffloor,
666   [OPCODE_FRC] = nir_op_ffract,
667   [OPCODE_LG2] = 0,
668   [OPCODE_LIT] = 0,
669   [OPCODE_LOG] = 0,
670   [OPCODE_LRP] = 0,
671   [OPCODE_MAD] = 0,
672   [OPCODE_MAX] = nir_op_fmax,
673   [OPCODE_MIN] = nir_op_fmin,
674   [OPCODE_MOV] = nir_op_fmov,
675   [OPCODE_MUL] = nir_op_fmul,
676   [OPCODE_POW] = 0,
677   [OPCODE_RCP] = 0,
678
679   [OPCODE_RSQ] = 0,
680   [OPCODE_SCS] = 0,
681   [OPCODE_SGE] = 0,
682   [OPCODE_SIN] = 0,
683   [OPCODE_SLT] = 0,
684   [OPCODE_SSG] = nir_op_fsign,
685   [OPCODE_SUB] = nir_op_fsub,
686   [OPCODE_SWZ] = 0,
687   [OPCODE_TEX] = 0,
688   [OPCODE_TRUNC] = nir_op_ftrunc,
689   [OPCODE_TXB] = 0,
690   [OPCODE_TXD] = 0,
691   [OPCODE_TXL] = 0,
692   [OPCODE_TXP] = 0,
693   [OPCODE_XPD] = 0,
694};
695
696static void
697ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
698{
699   nir_builder *b = &c->build;
700   unsigned i;
701   const unsigned op = prog_inst->Opcode;
702
703   if (op == OPCODE_END)
704      return;
705
706   nir_ssa_def *src[3];
707   for (i = 0; i < 3; i++) {
708      src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
709   }
710   nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
711   if (c->error)
712      return;
713
714   switch (op) {
715   case OPCODE_RSQ:
716      ptn_move_dest(b, dest,
717                    nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
718      break;
719
720   case OPCODE_RCP:
721      ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
722      break;
723
724   case OPCODE_EX2:
725      ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
726      break;
727
728   case OPCODE_LG2:
729      ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
730      break;
731
732   case OPCODE_POW:
733      ptn_move_dest(b, dest, nir_fpow(b,
734                                      ptn_channel(b, src[0], X),
735                                      ptn_channel(b, src[1], X)));
736      break;
737
738   case OPCODE_COS:
739      ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
740      break;
741
742   case OPCODE_SIN:
743      ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
744      break;
745
746   case OPCODE_ARL:
747      ptn_arl(b, dest, src);
748      break;
749
750   case OPCODE_EXP:
751      ptn_exp(b, dest, src);
752      break;
753
754   case OPCODE_LOG:
755      ptn_log(b, dest, src);
756      break;
757
758   case OPCODE_LRP:
759      ptn_lrp(b, dest, src);
760      break;
761
762   case OPCODE_MAD:
763      ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
764      break;
765
766   case OPCODE_DST:
767      ptn_dst(b, dest, src);
768      break;
769
770   case OPCODE_LIT:
771      ptn_lit(b, dest, src);
772      break;
773
774   case OPCODE_XPD:
775      ptn_xpd(b, dest, src);
776      break;
777
778   case OPCODE_DP2:
779      ptn_dp2(b, dest, src);
780      break;
781
782   case OPCODE_DP3:
783      ptn_dp3(b, dest, src);
784      break;
785
786   case OPCODE_DP4:
787      ptn_dp4(b, dest, src);
788      break;
789
790   case OPCODE_DPH:
791      ptn_dph(b, dest, src);
792      break;
793
794   case OPCODE_KIL:
795      ptn_kil(b, src);
796      break;
797
798   case OPCODE_CMP:
799      ptn_cmp(b, dest, src);
800      break;
801
802   case OPCODE_SCS:
803      ptn_scs(b, dest, src);
804      break;
805
806   case OPCODE_SLT:
807      ptn_slt(b, dest, src);
808      break;
809
810   case OPCODE_SGE:
811      ptn_sge(b, dest, src);
812      break;
813
814   case OPCODE_TEX:
815   case OPCODE_TXB:
816   case OPCODE_TXD:
817   case OPCODE_TXL:
818   case OPCODE_TXP:
819      ptn_tex(c, dest, src, prog_inst);
820      break;
821
822   case OPCODE_SWZ:
823      /* Extended swizzles were already handled in ptn_get_src(). */
824      ptn_alu(b, nir_op_fmov, dest, src);
825      break;
826
827   case OPCODE_NOP:
828      break;
829
830   default:
831      if (op_trans[op] != 0) {
832         ptn_alu(b, op_trans[op], dest, src);
833      } else {
834         fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
835         abort();
836      }
837      break;
838   }
839
840   if (prog_inst->Saturate) {
841      assert(prog_inst->Saturate);
842      assert(!dest.dest.is_ssa);
843      ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
844   }
845}
846
847/**
848 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
849 * variables at the end of the shader.
850 *
851 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
852 * written, because there's no output load intrinsic, which means we couldn't
853 * handle writemasks.
854 */
855static void
856ptn_add_output_stores(struct ptn_compile *c)
857{
858   nir_builder *b = &c->build;
859
860   nir_foreach_variable(var, &b->shader->outputs) {
861      nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
862      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
863          var->data.location == FRAG_RESULT_DEPTH) {
864         /* result.depth has this strange convention of being the .z component of
865          * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
866          * match GLSL's gl_FragDepth and the expectations of most backends.
867          */
868         src = nir_channel(b, src, 2);
869      }
870      if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
871          var->data.location == VARYING_SLOT_FOGC) {
872         /* result.fogcoord is a single component value */
873         src = nir_channel(b, src, 0);
874      }
875      unsigned num_components = glsl_get_vector_elements(var->type);
876      nir_store_var(b, var, src, (1 << num_components) - 1);
877   }
878}
879
880static void
881setup_registers_and_variables(struct ptn_compile *c)
882{
883   nir_builder *b = &c->build;
884   struct nir_shader *shader = b->shader;
885
886   /* Create input variables. */
887   uint64_t inputs_read = c->prog->info.inputs_read;
888   while (inputs_read) {
889      const int i = u_bit_scan64(&inputs_read);
890
891      nir_variable *var =
892         nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
893                             ralloc_asprintf(shader, "in_%d", i));
894      var->data.location = i;
895      var->data.index = 0;
896
897      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
898         if (i == VARYING_SLOT_FOGC) {
899            /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
900             * input variable a float, and create a local containing the
901             * full vec4 value.
902             */
903            var->type = glsl_float_type();
904
905            nir_variable *fullvar =
906               nir_local_variable_create(b->impl, glsl_vec4_type(),
907                                         "fogcoord_tmp");
908
909            nir_store_var(b, fullvar,
910                          nir_vec4(b, nir_load_var(b, var),
911                                   nir_imm_float(b, 0.0),
912                                   nir_imm_float(b, 0.0),
913                                   nir_imm_float(b, 1.0)),
914                          WRITEMASK_XYZW);
915
916            /* We inserted the real input into the list so the driver has real
917             * inputs, but we set c->input_vars[i] to the temporary so we use
918             * the splatted value.
919             */
920            c->input_vars[i] = fullvar;
921            continue;
922         }
923      }
924
925      c->input_vars[i] = var;
926   }
927
928   /* Create system value variables */
929   uint64_t system_values_read = c->prog->info.system_values_read;
930   while (system_values_read) {
931      const int i = u_bit_scan64(&system_values_read);
932
933      nir_variable *var =
934         nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
935                             ralloc_asprintf(shader, "sv_%d", i));
936      var->data.location = i;
937      var->data.index = 0;
938
939      c->sysval_vars[i] = var;
940   }
941
942   /* Create output registers and variables. */
943   int max_outputs = util_last_bit(c->prog->info.outputs_written);
944   c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
945
946   uint64_t outputs_written = c->prog->info.outputs_written;
947   while (outputs_written) {
948      const int i = u_bit_scan64(&outputs_written);
949
950      /* Since we can't load from outputs in the IR, we make temporaries
951       * for the outputs and emit stores to the real outputs at the end of
952       * the shader.
953       */
954      nir_register *reg = nir_local_reg_create(b->impl);
955      reg->num_components = 4;
956
957      nir_variable *var = rzalloc(shader, nir_variable);
958      if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
959          (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC))
960         var->type = glsl_float_type();
961      else
962         var->type = glsl_vec4_type();
963      var->data.mode = nir_var_shader_out;
964      var->name = ralloc_asprintf(var, "out_%d", i);
965
966      var->data.location = i;
967      var->data.index = 0;
968
969      c->output_regs[i] = reg;
970
971      exec_list_push_tail(&shader->outputs, &var->node);
972      c->output_vars[i] = var;
973   }
974
975   /* Create temporary registers. */
976   c->temp_regs = rzalloc_array(c, nir_register *,
977                                c->prog->arb.NumTemporaries);
978
979   nir_register *reg;
980   for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
981      reg = nir_local_reg_create(b->impl);
982      if (!reg) {
983         c->error = true;
984         return;
985      }
986      reg->num_components = 4;
987      c->temp_regs[i] = reg;
988   }
989
990   /* Create the address register (for ARB_vertex_program). */
991   reg = nir_local_reg_create(b->impl);
992   if (!reg) {
993      c->error = true;
994      return;
995   }
996   reg->num_components = 1;
997   c->addr_reg = reg;
998}
999
1000struct nir_shader *
1001prog_to_nir(const struct gl_program *prog,
1002            const nir_shader_compiler_options *options)
1003{
1004   struct ptn_compile *c;
1005   struct nir_shader *s;
1006   gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1007
1008   c = rzalloc(NULL, struct ptn_compile);
1009   if (!c)
1010      return NULL;
1011   c->prog = prog;
1012
1013   nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1014
1015   /* Copy the shader_info from the gl_program */
1016   c->build.shader->info = prog->info;
1017
1018   s = c->build.shader;
1019
1020   if (prog->Parameters->NumParameters > 0) {
1021      const struct glsl_type *type =
1022         glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
1023      c->parameters =
1024         nir_variable_create(s, nir_var_uniform, type,
1025                             prog->Parameters->Parameters[0].Name);
1026   }
1027
1028   setup_registers_and_variables(c);
1029   if (unlikely(c->error))
1030      goto fail;
1031
1032   for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1033      ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1034
1035      if (unlikely(c->error))
1036         break;
1037   }
1038
1039   ptn_add_output_stores(c);
1040
1041   s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1042   s->info.num_textures = util_last_bit(prog->SamplersUsed);
1043   s->info.num_ubos = 0;
1044   s->info.num_abos = 0;
1045   s->info.num_ssbos = 0;
1046   s->info.num_images = 0;
1047   s->info.uses_texture_gather = false;
1048   s->info.clip_distance_array_size = 0;
1049   s->info.cull_distance_array_size = 0;
1050   s->info.separate_shader = false;
1051
1052fail:
1053   if (c->error) {
1054      ralloc_free(s);
1055      s = NULL;
1056   }
1057   ralloc_free(c);
1058   return s;
1059}
1060