1/*
2 * Copyright (C) 2016 Miklós Máté
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "main/mtypes.h"
24#include "main/atifragshader.h"
25#include "main/errors.h"
26#include "program/prog_parameter.h"
27
28#include "tgsi/tgsi_ureg.h"
29#include "tgsi/tgsi_scan.h"
30#include "tgsi/tgsi_transform.h"
31
32#include "st_program.h"
33#include "st_atifs_to_tgsi.h"
34
35/**
36 * Intermediate state used during shader translation.
37 */
38struct st_translate {
39   struct ureg_program *ureg;
40   struct ati_fragment_shader *atifs;
41
42   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
43   struct ureg_src *constants;
44   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
45   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
46   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
47
48   const ubyte *inputMapping;
49   const ubyte *outputMapping;
50
51   unsigned current_pass;
52
53   bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
54
55   boolean error;
56};
57
58struct instruction_desc {
59   unsigned TGSI_opcode;
60   const char *name;
61   unsigned char arg_count;
62};
63
64static const struct instruction_desc inst_desc[] = {
65   {TGSI_OPCODE_MOV, "MOV", 1},
66   {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
67   {TGSI_OPCODE_ADD, "ADD", 2},
68   {TGSI_OPCODE_MUL, "MUL", 2},
69   {TGSI_OPCODE_NOP, "SUB", 2},
70   {TGSI_OPCODE_DP3, "DOT3", 2},
71   {TGSI_OPCODE_DP4, "DOT4", 2},
72   {TGSI_OPCODE_MAD, "MAD", 3},
73   {TGSI_OPCODE_LRP, "LERP", 3},
74   {TGSI_OPCODE_NOP, "CND", 3},
75   {TGSI_OPCODE_NOP, "CND0", 3},
76   {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
77};
78
79static struct ureg_dst
80get_temp(struct st_translate *t, unsigned index)
81{
82   if (ureg_dst_is_undef(t->temps[index]))
83      t->temps[index] = ureg_DECL_temporary(t->ureg);
84   return t->temps[index];
85}
86
87static struct ureg_src
88apply_swizzle(struct st_translate *t,
89              struct ureg_src src, GLuint swizzle)
90{
91   if (swizzle == GL_SWIZZLE_STR_ATI) {
92      return src;
93   } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
94      return ureg_swizzle(src,
95                          TGSI_SWIZZLE_X,
96                          TGSI_SWIZZLE_Y,
97                          TGSI_SWIZZLE_W,
98                          TGSI_SWIZZLE_Z);
99   } else {
100      struct ureg_dst tmp[2];
101      struct ureg_src imm[3];
102
103      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
104      tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1);
105      imm[0] = src;
106      imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f);
107      imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f);
108      ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3, 0);
109
110      if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
111         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
112      } else {
113         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
114      }
115      ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1, 0);
116
117      imm[0] = ureg_src(tmp[0]);
118      imm[1] = ureg_src(tmp[1]);
119      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2, 0);
120
121      return ureg_src(tmp[0]);
122   }
123}
124
125static struct ureg_src
126get_source(struct st_translate *t, GLuint src_type)
127{
128   if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
129      if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
130         return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
131      } else {
132         return ureg_imm1f(t->ureg, 0.0f);
133      }
134   } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
135      return t->constants[src_type - GL_CON_0_ATI];
136   } else if (src_type == GL_ZERO) {
137      return ureg_imm1f(t->ureg, 0.0f);
138   } else if (src_type == GL_ONE) {
139      return ureg_imm1f(t->ureg, 1.0f);
140   } else if (src_type == GL_PRIMARY_COLOR_ARB) {
141      return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
142   } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
143      return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
144   } else {
145      /* frontend prevents this */
146      unreachable("unknown source");
147   }
148}
149
150static struct ureg_src
151prepare_argument(struct st_translate *t, const unsigned argId,
152                 const struct atifragshader_src_register *srcReg)
153{
154   struct ureg_src src = get_source(t, srcReg->Index);
155   struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
156
157   switch (srcReg->argRep) {
158   case GL_NONE:
159      break;
160   case GL_RED:
161      src = ureg_scalar(src, TGSI_SWIZZLE_X);
162      break;
163   case GL_GREEN:
164      src = ureg_scalar(src, TGSI_SWIZZLE_Y);
165      break;
166   case GL_BLUE:
167      src = ureg_scalar(src, TGSI_SWIZZLE_Z);
168      break;
169   case GL_ALPHA:
170      src = ureg_scalar(src, TGSI_SWIZZLE_W);
171      break;
172   }
173   ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1, 0);
174
175   if (srcReg->argMod & GL_COMP_BIT_ATI) {
176      struct ureg_src modsrc[2];
177      modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
178      modsrc[1] = ureg_negate(ureg_src(arg));
179
180      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2, 0);
181   }
182   if (srcReg->argMod & GL_BIAS_BIT_ATI) {
183      struct ureg_src modsrc[2];
184      modsrc[0] = ureg_src(arg);
185      modsrc[1] = ureg_imm1f(t->ureg, -0.5f);
186
187      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2, 0);
188   }
189   if (srcReg->argMod & GL_2X_BIT_ATI) {
190      struct ureg_src modsrc[2];
191      modsrc[0] = ureg_src(arg);
192      modsrc[1] = ureg_src(arg);
193
194      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2, 0);
195   }
196   if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
197      struct ureg_src modsrc[2];
198      modsrc[0] = ureg_src(arg);
199      modsrc[1] = ureg_imm1f(t->ureg, -1.0f);
200
201      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2, 0);
202   }
203   return  ureg_src(arg);
204}
205
206/* These instructions need special treatment */
207static void
208emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
209                  struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
210{
211   struct ureg_dst tmp[1];
212   struct ureg_src src[3];
213
214   if (!strcmp(desc->name, "SUB")) {
215      ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1]));
216   } else if (!strcmp(desc->name, "CND")) {
217      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
218      src[0] = ureg_imm1f(t->ureg, 0.5f);
219      src[1] = ureg_negate(args[2]);
220      ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2, 0);
221      src[0] = ureg_src(tmp[0]);
222      src[1] = args[0];
223      src[2] = args[1];
224      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0);
225   } else if (!strcmp(desc->name, "CND0")) {
226      src[0] = args[2];
227      src[1] = args[1];
228      src[2] = args[0];
229      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0);
230   } else if (!strcmp(desc->name, "DOT2_ADD")) {
231      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
232      src[0] = args[0];
233      src[1] = args[1];
234      ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2, 0);
235      src[0] = ureg_src(tmp[0]);
236      src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
237      ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2, 0);
238   }
239}
240
241static void
242emit_arith_inst(struct st_translate *t,
243                const struct instruction_desc *desc,
244                struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
245{
246   if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
247      emit_special_inst(t, desc, dst, args, argcount);
248      return;
249   }
250
251   ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount, 0);
252}
253
254static void
255emit_dstmod(struct st_translate *t,
256            struct ureg_dst dst, GLuint dstMod)
257{
258   float imm;
259   struct ureg_src src[3];
260   GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
261
262   if (dstMod == GL_NONE) {
263      return;
264   }
265
266   switch (scale) {
267   case GL_2X_BIT_ATI:
268      imm = 2.0f;
269      break;
270   case GL_4X_BIT_ATI:
271      imm = 4.0f;
272      break;
273   case GL_8X_BIT_ATI:
274      imm = 8.0f;
275      break;
276   case GL_HALF_BIT_ATI:
277      imm = 0.5f;
278      break;
279   case GL_QUARTER_BIT_ATI:
280      imm = 0.25f;
281      break;
282   case GL_EIGHTH_BIT_ATI:
283      imm = 0.125f;
284      break;
285   default:
286      imm = 1.0f;
287   }
288
289   src[0] = ureg_src(dst);
290   src[1] = ureg_imm1f(t->ureg, imm);
291   if (dstMod & GL_SATURATE_BIT_ATI) {
292      dst = ureg_saturate(dst);
293   }
294   ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2, 0);
295}
296
297/**
298 * Compile one setup instruction to TGSI instructions.
299 */
300static void
301compile_setupinst(struct st_translate *t,
302                  const unsigned r,
303                  const struct atifs_setupinst *texinst)
304{
305   struct ureg_dst dst[1];
306   struct ureg_src src[2];
307
308   if (!texinst->Opcode)
309      return;
310
311   dst[0] = get_temp(t, r);
312
313   GLuint pass_tex = texinst->src;
314
315   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
316      unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
317
318      src[0] = t->inputs[t->inputMapping[attr]];
319   } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
320      unsigned reg = pass_tex - GL_REG_0_ATI;
321
322      /* the frontend already validated that REG is only allowed in second pass */
323      if (t->regs_written[0][reg]) {
324         src[0] = ureg_src(t->temps[reg]);
325      } else {
326         src[0] = ureg_imm1f(t->ureg, 0.0f);
327      }
328   }
329   src[0] = apply_swizzle(t, src[0], texinst->swizzle);
330
331   if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
332      /* by default texture and sampler indexes are the same */
333      src[1] = t->samplers[r];
334      /* the texture target is still unknown, it will be fixed in the draw call */
335      ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
336                    TGSI_RETURN_TYPE_FLOAT, NULL, 0, src, 2);
337   } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
338      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1, 0);
339   }
340
341   t->regs_written[t->current_pass][r] = true;
342}
343
344/**
345 * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
346 */
347static void
348compile_instruction(struct st_translate *t,
349                    const struct atifs_instruction *inst)
350{
351   unsigned optype;
352
353   for (optype = 0; optype < 2; optype++) { /* color, alpha */
354      const struct instruction_desc *desc;
355      struct ureg_dst dst[1];
356      struct ureg_src args[3]; /* arguments for the main operation */
357      unsigned arg;
358      unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
359
360      if (!inst->Opcode[optype])
361         continue;
362
363      desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
364
365      /* prepare the arguments */
366      for (arg = 0; arg < desc->arg_count; arg++) {
367         if (arg >= inst->ArgCount[optype]) {
368            _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
369                          arg, desc->name);
370            args[arg] = ureg_imm1f(t->ureg, 0.0f);
371         } else {
372            args[arg] = prepare_argument(t, arg,
373                                         &inst->SrcReg[optype][arg]);
374         }
375      }
376
377      /* prepare dst */
378      dst[0] = get_temp(t, dstreg);
379
380      if (optype) {
381         dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
382      } else {
383         GLuint dstMask = inst->DstReg[optype].dstMask;
384         if (dstMask == GL_NONE) {
385            dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
386         } else {
387            dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
388         }
389      }
390
391      /* emit the main instruction */
392      emit_arith_inst(t, desc, dst, args, arg);
393
394      emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
395
396      t->regs_written[t->current_pass][dstreg] = true;
397   }
398}
399
400static void
401finalize_shader(struct st_translate *t, unsigned numPasses)
402{
403   struct ureg_dst dst[1] = { { 0 } };
404   struct ureg_src src[1] = { { 0 } };
405
406   if (t->regs_written[numPasses-1][0]) {
407      /* copy the result into the OUT slot */
408      dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
409      src[0] = ureg_src(t->temps[0]);
410      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1, 0);
411   }
412
413   /* signal the end of the program */
414   ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0, 0);
415}
416
417/**
418 * Called when a new variant is needed, we need to translate
419 * the ATI fragment shader to TGSI
420 */
421enum pipe_error
422st_translate_atifs_program(
423   struct ureg_program *ureg,
424   struct ati_fragment_shader *atifs,
425   struct gl_program *program,
426   GLuint numInputs,
427   const ubyte inputMapping[],
428   const ubyte inputSemanticName[],
429   const ubyte inputSemanticIndex[],
430   const ubyte interpMode[],
431   GLuint numOutputs,
432   const ubyte outputMapping[],
433   const ubyte outputSemanticName[],
434   const ubyte outputSemanticIndex[])
435{
436   enum pipe_error ret = PIPE_OK;
437
438   unsigned pass, i, r;
439
440   struct st_translate translate, *t;
441   t = &translate;
442   memset(t, 0, sizeof *t);
443
444   t->inputMapping = inputMapping;
445   t->outputMapping = outputMapping;
446   t->ureg = ureg;
447   t->atifs = atifs;
448
449   /*
450    * Declare input attributes.
451    */
452   for (i = 0; i < numInputs; i++) {
453      t->inputs[i] = ureg_DECL_fs_input(ureg,
454                                        inputSemanticName[i],
455                                        inputSemanticIndex[i],
456                                        interpMode[i]);
457   }
458
459   /*
460    * Declare output attributes:
461    *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
462    */
463   t->outputs[0] = ureg_DECL_output(ureg,
464                                    TGSI_SEMANTIC_COLOR,
465                                    outputSemanticIndex[0]);
466
467   /* Emit constants and immediates.  Mesa uses a single index space
468    * for these, so we put all the translated regs in t->constants.
469    */
470   if (program->Parameters) {
471      t->constants = calloc(program->Parameters->NumParameters,
472                            sizeof t->constants[0]);
473      if (t->constants == NULL) {
474         ret = PIPE_ERROR_OUT_OF_MEMORY;
475         goto out;
476      }
477
478      for (i = 0; i < program->Parameters->NumParameters; i++) {
479         unsigned pvo = program->Parameters->ParameterValueOffset[i];
480
481         switch (program->Parameters->Parameters[i].Type) {
482         case PROGRAM_STATE_VAR:
483         case PROGRAM_UNIFORM:
484            t->constants[i] = ureg_DECL_constant(ureg, i);
485            break;
486         case PROGRAM_CONSTANT:
487            t->constants[i] =
488               ureg_DECL_immediate(ureg,
489                                   (const float*)program->Parameters->ParameterValues + pvo,
490                                   4);
491            break;
492         default:
493            break;
494         }
495      }
496   }
497
498   /* texture samplers */
499   for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
500      if (program->SamplersUsed & (1 << i)) {
501         t->samplers[i] = ureg_DECL_sampler(ureg, i);
502         /* the texture target is still unknown, it will be fixed in the draw call */
503         ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
504                                TGSI_RETURN_TYPE_FLOAT,
505                                TGSI_RETURN_TYPE_FLOAT,
506                                TGSI_RETURN_TYPE_FLOAT,
507                                TGSI_RETURN_TYPE_FLOAT);
508      }
509   }
510
511   /* emit instructions */
512   for (pass = 0; pass < atifs->NumPasses; pass++) {
513      t->current_pass = pass;
514      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
515         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
516         compile_setupinst(t, r, texinst);
517      }
518      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
519         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
520         compile_instruction(t, inst);
521      }
522   }
523
524   finalize_shader(t, atifs->NumPasses);
525
526out:
527   free(t->constants);
528
529   if (t->error) {
530      debug_printf("%s: translate error flag set\n", __func__);
531   }
532
533   return ret;
534}
535
536/**
537 * Called in ProgramStringNotify, we need to fill the metadata of the
538 * gl_program attached to the ati_fragment_shader
539 */
540void
541st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
542{
543   /* we know this is st_fragment_program, because of st_new_ati_fs() */
544   struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
545   struct ati_fragment_shader *atifs = stfp->ati_fs;
546
547   unsigned pass, i, r, optype, arg;
548
549   static const gl_state_index16 fog_params_state[STATE_LENGTH] =
550      {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
551   static const gl_state_index16 fog_color[STATE_LENGTH] =
552      {STATE_FOG_COLOR, 0, 0, 0, 0};
553
554   prog->info.inputs_read = 0;
555   prog->info.outputs_written = BITFIELD64_BIT(FRAG_RESULT_COLOR);
556   prog->SamplersUsed = 0;
557   prog->Parameters = _mesa_new_parameter_list();
558
559   /* fill in inputs_read, SamplersUsed, TexturesUsed */
560   for (pass = 0; pass < atifs->NumPasses; pass++) {
561      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
562         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
563         GLuint pass_tex = texinst->src;
564
565         if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
566            /* mark which texcoords are used */
567            prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
568            /* by default there is 1:1 mapping between samplers and textures */
569            prog->SamplersUsed |= (1 << r);
570            /* the target is unknown here, it will be fixed in the draw call */
571            prog->TexturesUsed[r] = TEXTURE_2D_BIT;
572         } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
573            if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
574               prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
575            }
576         }
577      }
578   }
579   for (pass = 0; pass < atifs->NumPasses; pass++) {
580      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
581         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
582
583         for (optype = 0; optype < 2; optype++) { /* color, alpha */
584            if (inst->Opcode[optype]) {
585               for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
586                  GLint index = inst->SrcReg[optype][arg].Index;
587                  if (index == GL_PRIMARY_COLOR_EXT) {
588                     prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL0);
589                  } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
590                     /* note: ATI_fragment_shader.txt never specifies what
591                      * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
592                      * VARYING_SLOT_COL1 for this input */
593                     prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL1);
594                  }
595               }
596            }
597         }
598      }
599   }
600   /* we may need fog */
601   prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
602
603   /* we always have the ATI_fs constants, and the fog params */
604   for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
605      _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
606                          NULL, 4, GL_FLOAT, NULL, NULL, true);
607   }
608   _mesa_add_state_reference(prog->Parameters, fog_params_state);
609   _mesa_add_state_reference(prog->Parameters, fog_color);
610}
611
612
613struct tgsi_atifs_transform {
614   struct tgsi_transform_context base;
615   struct tgsi_shader_info info;
616   const struct st_fp_variant_key *key;
617   bool first_instruction_emitted;
618   unsigned fog_factor_temp;
619};
620
621static inline struct tgsi_atifs_transform *
622tgsi_atifs_transform(struct tgsi_transform_context *tctx)
623{
624   return (struct tgsi_atifs_transform *)tctx;
625}
626
627/* copied from st_cb_drawpixels_shader.c */
628static void
629set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
630        unsigned x, unsigned y, unsigned z, unsigned w)
631{
632   inst->Src[i].Register.File  = file;
633   inst->Src[i].Register.Index = index;
634   inst->Src[i].Register.SwizzleX = x;
635   inst->Src[i].Register.SwizzleY = y;
636   inst->Src[i].Register.SwizzleZ = z;
637   inst->Src[i].Register.SwizzleW = w;
638   if (file == TGSI_FILE_CONSTANT) {
639      inst->Src[i].Register.Dimension = 1;
640      inst->Src[i].Dimension.Index = 0;
641   }
642}
643
644#define SET_SRC(inst, i, file, index, x, y, z, w) \
645   set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
646           TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
647
648static void
649transform_decl(struct tgsi_transform_context *tctx,
650               struct tgsi_full_declaration *decl)
651{
652   struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
653
654   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
655      /* fix texture target */
656      unsigned newtarget = ctx->key->texture_targets[decl->Range.First];
657      if (newtarget)
658         decl->SamplerView.Resource = newtarget;
659   }
660
661   tctx->emit_declaration(tctx, decl);
662}
663
664static void
665transform_instr(struct tgsi_transform_context *tctx,
666                struct tgsi_full_instruction *current_inst)
667{
668   struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
669
670   if (ctx->first_instruction_emitted)
671      goto transform_inst;
672
673   ctx->first_instruction_emitted = true;
674
675   if (ctx->key->fog) {
676      /* add a new temp for the fog factor */
677      ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
678      tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp);
679   }
680
681transform_inst:
682   if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
683      /* fix texture target */
684      unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index];
685      if (newtarget)
686         current_inst->Texture.Texture = newtarget;
687
688   } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
689              current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
690      struct tgsi_full_instruction inst;
691      unsigned i;
692      int fogc_index = -1;
693      int reg0_index = current_inst->Src[0].Register.Index;
694
695      /* find FOGC input */
696      for (i = 0; i < ctx->info.num_inputs; i++) {
697         if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) {
698            fogc_index = i;
699            break;
700         }
701      }
702      if (fogc_index < 0) {
703         /* should never be reached, because fog coord input is always declared */
704         tctx->emit_instruction(tctx, current_inst);
705         return;
706      }
707
708      /* compute the 1 component fog factor f */
709      if (ctx->key->fog == FOG_LINEAR) {
710         /* LINEAR formula: f = (end - z) / (end - start)
711          * with optimized parameters:
712          *    f = MAD(fogcoord, oparams.x, oparams.y)
713          */
714         inst = tgsi_default_full_instruction();
715         inst.Instruction.Opcode = TGSI_OPCODE_MAD;
716         inst.Instruction.NumDstRegs = 1;
717         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
718         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
719         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
720         inst.Instruction.NumSrcRegs = 3;
721         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
722         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X);
723         SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y);
724         tctx->emit_instruction(tctx, &inst);
725      } else if (ctx->key->fog == FOG_EXP) {
726         /* EXP formula: f = exp(-dens * z)
727          * with optimized parameters:
728          *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
729          */
730         inst = tgsi_default_full_instruction();
731         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
732         inst.Instruction.NumDstRegs = 1;
733         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
734         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
735         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
736         inst.Instruction.NumSrcRegs = 2;
737         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
738         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z);
739         tctx->emit_instruction(tctx, &inst);
740
741         inst = tgsi_default_full_instruction();
742         inst.Instruction.Opcode = TGSI_OPCODE_EX2;
743         inst.Instruction.NumDstRegs = 1;
744         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
745         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
746         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
747         inst.Instruction.NumSrcRegs = 1;
748         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
749         inst.Src[0].Register.Negate = 1;
750         tctx->emit_instruction(tctx, &inst);
751      } else if (ctx->key->fog == FOG_EXP2) {
752         /* EXP2 formula: f = exp(-(dens * z)^2)
753          * with optimized parameters:
754          *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
755          */
756         inst = tgsi_default_full_instruction();
757         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
758         inst.Instruction.NumDstRegs = 1;
759         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
760         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
761         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
762         inst.Instruction.NumSrcRegs = 2;
763         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
764         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W);
765         tctx->emit_instruction(tctx, &inst);
766
767         inst = tgsi_default_full_instruction();
768         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
769         inst.Instruction.NumDstRegs = 1;
770         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
771         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
772         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
773         inst.Instruction.NumSrcRegs = 2;
774         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
775         SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
776         tctx->emit_instruction(tctx, &inst);
777
778         inst = tgsi_default_full_instruction();
779         inst.Instruction.Opcode = TGSI_OPCODE_EX2;
780         inst.Instruction.NumDstRegs = 1;
781         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
782         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
783         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
784         inst.Instruction.NumSrcRegs = 1;
785         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
786         inst.Src[0].Register.Negate ^= 1;
787         tctx->emit_instruction(tctx, &inst);
788      }
789      /* f = saturate(f) */
790      inst = tgsi_default_full_instruction();
791      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
792      inst.Instruction.NumDstRegs = 1;
793      inst.Instruction.Saturate = 1;
794      inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
795      inst.Dst[0].Register.Index = ctx->fog_factor_temp;
796      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
797      inst.Instruction.NumSrcRegs = 1;
798      SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
799      tctx->emit_instruction(tctx, &inst);
800
801      /* REG0 = LRP(f, REG0, fogcolor) */
802      inst = tgsi_default_full_instruction();
803      inst.Instruction.Opcode = TGSI_OPCODE_LRP;
804      inst.Instruction.NumDstRegs = 1;
805      inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
806      inst.Dst[0].Register.Index = reg0_index;
807      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
808      inst.Instruction.NumSrcRegs = 3;
809      SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y);
810      SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, reg0_index, X, Y, Z, W);
811      SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W);
812      tctx->emit_instruction(tctx, &inst);
813   }
814
815   tctx->emit_instruction(tctx, current_inst);
816}
817
818/*
819 * A post-process step in the draw call to fix texture targets and
820 * insert code for fog.
821 */
822const struct tgsi_token *
823st_fixup_atifs(const struct tgsi_token *tokens,
824               const struct st_fp_variant_key *key)
825{
826   struct tgsi_atifs_transform ctx;
827   struct tgsi_token *newtoks;
828   int newlen;
829
830   memset(&ctx, 0, sizeof(ctx));
831   ctx.base.transform_declaration = transform_decl;
832   ctx.base.transform_instruction = transform_instr;
833   ctx.key = key;
834   tgsi_scan_shader(tokens, &ctx.info);
835
836   newlen = tgsi_num_tokens(tokens) + 30;
837   newtoks = tgsi_alloc_tokens(newlen);
838   if (!newtoks)
839      return NULL;
840
841   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
842   return newtoks;
843}
844
845