1/**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34#include "pipe/p_compiler.h"
35#include "pipe/p_context.h"
36#include "pipe/p_screen.h"
37#include "pipe/p_shader_tokens.h"
38#include "pipe/p_state.h"
39#include "tgsi/tgsi_ureg.h"
40#include "st_mesa_to_tgsi.h"
41#include "st_context.h"
42#include "program/prog_instruction.h"
43#include "program/prog_parameter.h"
44#include "util/u_debug.h"
45#include "util/u_math.h"
46#include "util/u_memory.h"
47#include "st_glsl_to_tgsi.h" /* for _mesa_sysval_to_semantic */
48
49
50#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
51                           (1 << PROGRAM_CONSTANT) |     \
52                           (1 << PROGRAM_UNIFORM))
53
54/**
55 * Intermediate state used during shader translation.
56 */
57struct st_translate {
58   struct ureg_program *ureg;
59
60   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
61   struct ureg_src *constants;
62   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
63   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
64   struct ureg_dst address[1];
65   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
66   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
67
68   const ubyte *inputMapping;
69   const ubyte *outputMapping;
70
71   unsigned procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
72};
73
74
75/**
76 * Map a Mesa dst register to a TGSI ureg_dst register.
77 */
78static struct ureg_dst
79dst_register(struct st_translate *t, gl_register_file file, GLuint index)
80{
81   switch(file) {
82   case PROGRAM_UNDEFINED:
83      return ureg_dst_undef();
84
85   case PROGRAM_TEMPORARY:
86      if (ureg_dst_is_undef(t->temps[index]))
87         t->temps[index] = ureg_DECL_temporary(t->ureg);
88
89      return t->temps[index];
90
91   case PROGRAM_OUTPUT:
92      if (t->procType == PIPE_SHADER_VERTEX)
93         assert(index < VARYING_SLOT_MAX);
94      else if (t->procType == PIPE_SHADER_FRAGMENT)
95         assert(index < FRAG_RESULT_MAX);
96      else
97         assert(index < VARYING_SLOT_MAX);
98
99      assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
100
101      return t->outputs[t->outputMapping[index]];
102
103   case PROGRAM_ADDRESS:
104      return t->address[index];
105
106   default:
107      debug_assert(0);
108      return ureg_dst_undef();
109   }
110}
111
112
113/**
114 * Map a Mesa src register to a TGSI ureg_src register.
115 */
116static struct ureg_src
117src_register(struct st_translate *t,
118              gl_register_file file,
119              GLint index)
120{
121   switch(file) {
122   case PROGRAM_UNDEFINED:
123      return ureg_src_undef();
124
125   case PROGRAM_TEMPORARY:
126      assert(index >= 0);
127      assert(index < ARRAY_SIZE(t->temps));
128      if (ureg_dst_is_undef(t->temps[index]))
129         t->temps[index] = ureg_DECL_temporary(t->ureg);
130      return ureg_src(t->temps[index]);
131
132   case PROGRAM_UNIFORM:
133      assert(index >= 0);
134      return t->constants[index];
135   case PROGRAM_STATE_VAR:
136   case PROGRAM_CONSTANT:       /* ie, immediate */
137      if (index < 0)
138         return ureg_DECL_constant(t->ureg, 0);
139      else
140         return t->constants[index];
141
142   case PROGRAM_INPUT:
143      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
144      return t->inputs[t->inputMapping[index]];
145
146   case PROGRAM_OUTPUT:
147      assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
148      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
149
150   case PROGRAM_ADDRESS:
151      return ureg_src(t->address[index]);
152
153   case PROGRAM_SYSTEM_VALUE:
154      assert(index < ARRAY_SIZE(t->systemValues));
155      return t->systemValues[index];
156
157   default:
158      debug_assert(0);
159      return ureg_src_undef();
160   }
161}
162
163
164/**
165 * Map mesa texture target to TGSI texture target.
166 */
167enum tgsi_texture_type
168st_translate_texture_target(gl_texture_index textarget, GLboolean shadow)
169{
170   if (shadow) {
171      switch (textarget) {
172      case TEXTURE_1D_INDEX:
173         return TGSI_TEXTURE_SHADOW1D;
174      case TEXTURE_2D_INDEX:
175         return TGSI_TEXTURE_SHADOW2D;
176      case TEXTURE_RECT_INDEX:
177         return TGSI_TEXTURE_SHADOWRECT;
178      case TEXTURE_1D_ARRAY_INDEX:
179         return TGSI_TEXTURE_SHADOW1D_ARRAY;
180      case TEXTURE_2D_ARRAY_INDEX:
181         return TGSI_TEXTURE_SHADOW2D_ARRAY;
182      case TEXTURE_CUBE_INDEX:
183         return TGSI_TEXTURE_SHADOWCUBE;
184      case TEXTURE_CUBE_ARRAY_INDEX:
185         return TGSI_TEXTURE_SHADOWCUBE_ARRAY;
186      default:
187         break;
188      }
189   }
190
191   switch (textarget) {
192   case TEXTURE_2D_MULTISAMPLE_INDEX:
193      return TGSI_TEXTURE_2D_MSAA;
194   case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
195      return TGSI_TEXTURE_2D_ARRAY_MSAA;
196   case TEXTURE_BUFFER_INDEX:
197      return TGSI_TEXTURE_BUFFER;
198   case TEXTURE_1D_INDEX:
199      return TGSI_TEXTURE_1D;
200   case TEXTURE_2D_INDEX:
201      return TGSI_TEXTURE_2D;
202   case TEXTURE_3D_INDEX:
203      return TGSI_TEXTURE_3D;
204   case TEXTURE_CUBE_INDEX:
205      return TGSI_TEXTURE_CUBE;
206   case TEXTURE_CUBE_ARRAY_INDEX:
207      return TGSI_TEXTURE_CUBE_ARRAY;
208   case TEXTURE_RECT_INDEX:
209      return TGSI_TEXTURE_RECT;
210   case TEXTURE_1D_ARRAY_INDEX:
211      return TGSI_TEXTURE_1D_ARRAY;
212   case TEXTURE_2D_ARRAY_INDEX:
213      return TGSI_TEXTURE_2D_ARRAY;
214   case TEXTURE_EXTERNAL_INDEX:
215      return TGSI_TEXTURE_2D;
216   default:
217      debug_assert(!"unexpected texture target index");
218      return TGSI_TEXTURE_1D;
219   }
220}
221
222
223/**
224 * Map GLSL base type to TGSI return type.
225 */
226enum tgsi_return_type
227st_translate_texture_type(enum glsl_base_type type)
228{
229   switch (type) {
230   case GLSL_TYPE_INT:
231      return TGSI_RETURN_TYPE_SINT;
232   case GLSL_TYPE_UINT:
233      return TGSI_RETURN_TYPE_UINT;
234   case GLSL_TYPE_FLOAT:
235      return TGSI_RETURN_TYPE_FLOAT;
236   default:
237      assert(!"unexpected texture type");
238      return TGSI_RETURN_TYPE_UNKNOWN;
239   }
240}
241
242
243/**
244 * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum.
245 */
246static unsigned
247translate_texture_index(GLbitfield texBit, bool shadow)
248{
249   int index = ffs(texBit);
250   assert(index > 0);
251   assert(index - 1 < NUM_TEXTURE_TARGETS);
252   return st_translate_texture_target(index - 1, shadow);
253}
254
255
256/**
257 * Create a TGSI ureg_dst register from a Mesa dest register.
258 */
259static struct ureg_dst
260translate_dst(struct st_translate *t,
261              const struct prog_dst_register *DstReg,
262              boolean saturate)
263{
264   struct ureg_dst dst = dst_register(t, DstReg->File, DstReg->Index);
265
266   dst = ureg_writemask(dst, DstReg->WriteMask);
267
268   if (saturate)
269      dst = ureg_saturate(dst);
270
271   if (DstReg->RelAddr)
272      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
273
274   return dst;
275}
276
277
278/**
279 * Create a TGSI ureg_src register from a Mesa src register.
280 */
281static struct ureg_src
282translate_src(struct st_translate *t,
283              const struct prog_src_register *SrcReg)
284{
285   struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index);
286
287   src = ureg_swizzle(src,
288                      GET_SWZ(SrcReg->Swizzle, 0) & 0x3,
289                      GET_SWZ(SrcReg->Swizzle, 1) & 0x3,
290                      GET_SWZ(SrcReg->Swizzle, 2) & 0x3,
291                      GET_SWZ(SrcReg->Swizzle, 3) & 0x3);
292
293   if (SrcReg->Negate == NEGATE_XYZW)
294      src = ureg_negate(src);
295
296   if (SrcReg->RelAddr) {
297      src = ureg_src_indirect(src, ureg_src(t->address[0]));
298      if (SrcReg->File != PROGRAM_INPUT &&
299          SrcReg->File != PROGRAM_OUTPUT) {
300         /* If SrcReg->Index was negative, it was set to zero in
301          * src_register().  Reassign it now.  But don't do this
302          * for input/output regs since they get remapped while
303          * const buffers don't.
304          */
305         src.Index = SrcReg->Index;
306      }
307   }
308
309   return src;
310}
311
312
313static struct ureg_src
314swizzle_4v(struct ureg_src src, const unsigned *swz)
315{
316   return ureg_swizzle(src, swz[0], swz[1], swz[2], swz[3]);
317}
318
319
320/**
321 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
322 *
323 *   SWZ dst, src.x-y10
324 *
325 * becomes:
326 *
327 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
328 */
329static void
330emit_swz(struct st_translate *t,
331         struct ureg_dst dst,
332         const struct prog_src_register *SrcReg)
333{
334   struct ureg_program *ureg = t->ureg;
335   struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index);
336
337   unsigned negate_mask =  SrcReg->Negate;
338
339   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
340                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
341                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
342                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
343
344   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
345                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
346                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
347                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
348
349   unsigned negative_one_mask = one_mask & negate_mask;
350   unsigned positive_one_mask = one_mask & ~negate_mask;
351
352   struct ureg_src imm;
353   unsigned i;
354   unsigned mul_swizzle[4] = {0,0,0,0};
355   unsigned add_swizzle[4] = {0,0,0,0};
356   unsigned src_swizzle[4] = {0,0,0,0};
357   boolean need_add = FALSE;
358   boolean need_mul = FALSE;
359
360   if (dst.WriteMask == 0)
361      return;
362
363   /* Is this just a MOV?
364    */
365   if (zero_mask == 0 &&
366       one_mask == 0 &&
367       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) {
368      ureg_MOV(ureg, dst, translate_src(t, SrcReg));
369      return;
370   }
371
372#define IMM_ZERO    0
373#define IMM_ONE     1
374#define IMM_NEG_ONE 2
375
376   imm = ureg_imm3f(ureg, 0, 1, -1);
377
378   for (i = 0; i < 4; i++) {
379      unsigned bit = 1 << i;
380
381      if (dst.WriteMask & bit) {
382         if (positive_one_mask & bit) {
383            mul_swizzle[i] = IMM_ZERO;
384            add_swizzle[i] = IMM_ONE;
385            need_add = TRUE;
386         }
387         else if (negative_one_mask & bit) {
388            mul_swizzle[i] = IMM_ZERO;
389            add_swizzle[i] = IMM_NEG_ONE;
390            need_add = TRUE;
391         }
392         else if (zero_mask & bit) {
393            mul_swizzle[i] = IMM_ZERO;
394            add_swizzle[i] = IMM_ZERO;
395            need_add = TRUE;
396         }
397         else {
398            add_swizzle[i] = IMM_ZERO;
399            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
400            need_mul = TRUE;
401            if (negate_mask & bit) {
402               mul_swizzle[i] = IMM_NEG_ONE;
403            }
404            else {
405               mul_swizzle[i] = IMM_ONE;
406            }
407         }
408      }
409   }
410
411   if (need_mul && need_add) {
412      ureg_MAD(ureg,
413               dst,
414               swizzle_4v(src, src_swizzle),
415               swizzle_4v(imm, mul_swizzle),
416               swizzle_4v(imm, add_swizzle));
417   }
418   else if (need_mul) {
419      ureg_MUL(ureg,
420               dst,
421               swizzle_4v(src, src_swizzle),
422               swizzle_4v(imm, mul_swizzle));
423   }
424   else if (need_add) {
425      ureg_MOV(ureg,
426               dst,
427               swizzle_4v(imm, add_swizzle));
428   }
429   else {
430      debug_assert(0);
431   }
432
433#undef IMM_ZERO
434#undef IMM_ONE
435#undef IMM_NEG_ONE
436}
437
438
439static unsigned
440translate_opcode(unsigned op)
441{
442   switch(op) {
443   case OPCODE_ARL:
444      return TGSI_OPCODE_ARL;
445   case OPCODE_ADD:
446      return TGSI_OPCODE_ADD;
447   case OPCODE_CMP:
448      return TGSI_OPCODE_CMP;
449   case OPCODE_COS:
450      return TGSI_OPCODE_COS;
451   case OPCODE_DP3:
452      return TGSI_OPCODE_DP3;
453   case OPCODE_DP4:
454      return TGSI_OPCODE_DP4;
455   case OPCODE_DST:
456      return TGSI_OPCODE_DST;
457   case OPCODE_EX2:
458      return TGSI_OPCODE_EX2;
459   case OPCODE_EXP:
460      return TGSI_OPCODE_EXP;
461   case OPCODE_FLR:
462      return TGSI_OPCODE_FLR;
463   case OPCODE_FRC:
464      return TGSI_OPCODE_FRC;
465   case OPCODE_KIL:
466      return TGSI_OPCODE_KILL_IF;
467   case OPCODE_LG2:
468      return TGSI_OPCODE_LG2;
469   case OPCODE_LOG:
470      return TGSI_OPCODE_LOG;
471   case OPCODE_LIT:
472      return TGSI_OPCODE_LIT;
473   case OPCODE_LRP:
474      return TGSI_OPCODE_LRP;
475   case OPCODE_MAD:
476      return TGSI_OPCODE_MAD;
477   case OPCODE_MAX:
478      return TGSI_OPCODE_MAX;
479   case OPCODE_MIN:
480      return TGSI_OPCODE_MIN;
481   case OPCODE_MOV:
482      return TGSI_OPCODE_MOV;
483   case OPCODE_MUL:
484      return TGSI_OPCODE_MUL;
485   case OPCODE_POW:
486      return TGSI_OPCODE_POW;
487   case OPCODE_RCP:
488      return TGSI_OPCODE_RCP;
489   case OPCODE_SGE:
490      return TGSI_OPCODE_SGE;
491   case OPCODE_SIN:
492      return TGSI_OPCODE_SIN;
493   case OPCODE_SLT:
494      return TGSI_OPCODE_SLT;
495   case OPCODE_TEX:
496      return TGSI_OPCODE_TEX;
497   case OPCODE_TXB:
498      return TGSI_OPCODE_TXB;
499   case OPCODE_TXP:
500      return TGSI_OPCODE_TXP;
501   case OPCODE_END:
502      return TGSI_OPCODE_END;
503   default:
504      debug_assert(0);
505      return TGSI_OPCODE_NOP;
506   }
507}
508
509
510static void
511compile_instruction(struct gl_context *ctx,
512                    struct st_translate *t,
513                    const struct prog_instruction *inst)
514{
515   struct ureg_program *ureg = t->ureg;
516   GLuint i;
517   struct ureg_dst dst[1] = { { 0 } };
518   struct ureg_src src[4];
519   unsigned num_dst;
520   unsigned num_src;
521
522   num_dst = _mesa_num_inst_dst_regs(inst->Opcode);
523   num_src = _mesa_num_inst_src_regs(inst->Opcode);
524
525   if (num_dst)
526      dst[0] = translate_dst(t, &inst->DstReg, inst->Saturate);
527
528   for (i = 0; i < num_src; i++)
529      src[i] = translate_src(t, &inst->SrcReg[i]);
530
531   switch(inst->Opcode) {
532   case OPCODE_SWZ:
533      emit_swz(t, dst[0], &inst->SrcReg[0]);
534      return;
535
536   case OPCODE_TEX:
537   case OPCODE_TXB:
538   case OPCODE_TXP:
539      src[num_src++] = t->samplers[inst->TexSrcUnit];
540      ureg_tex_insn(ureg,
541                    translate_opcode(inst->Opcode),
542                    dst, num_dst,
543                    st_translate_texture_target(inst->TexSrcTarget,
544                                                inst->TexShadow),
545                    TGSI_RETURN_TYPE_FLOAT,
546                    NULL, 0,
547                    src, num_src);
548      return;
549
550   case OPCODE_SCS:
551      ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X),
552               ureg_scalar(src[0], TGSI_SWIZZLE_X));
553      ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y),
554               ureg_scalar(src[0], TGSI_SWIZZLE_X));
555      break;
556
557   case OPCODE_XPD: {
558      struct ureg_dst tmp = ureg_DECL_temporary(ureg);
559
560      ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
561               ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
562                            TGSI_SWIZZLE_X, 0),
563               ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
564                            TGSI_SWIZZLE_Y, 0));
565      ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
566               ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
567                            TGSI_SWIZZLE_Y, 0),
568               ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y,
569                                        TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
570               ureg_src(tmp));
571      break;
572   }
573
574   case OPCODE_RSQ:
575      ureg_RSQ(ureg, dst[0], ureg_abs(src[0]));
576      break;
577
578   case OPCODE_ABS:
579      ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
580      break;
581
582   case OPCODE_SUB:
583      ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
584      break;
585
586   case OPCODE_DPH: {
587      struct ureg_dst temp = ureg_DECL_temporary(ureg);
588
589      /* DPH = DP4(src0, src1) where src0.w = 1. */
590      ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_XYZ), src[0]);
591      ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_W),
592               ureg_imm1f(ureg, 1));
593      ureg_DP4(ureg, dst[0], ureg_src(temp), src[1]);
594      break;
595   }
596
597   default:
598      ureg_insn(ureg,
599                 translate_opcode(inst->Opcode),
600                 dst, num_dst,
601                 src, num_src, 0);
602      break;
603   }
604}
605
606
607/**
608 * Emit the TGSI instructions for inverting and adjusting WPOS.
609 * This code is unavoidable because it also depends on whether
610 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
611 */
612static void
613emit_wpos_adjustment(struct gl_context *ctx,
614                     struct st_translate *t,
615                     const struct gl_program *program,
616                     boolean invert,
617                     GLfloat adjX, GLfloat adjY[2])
618{
619   struct ureg_program *ureg = t->ureg;
620
621   /* Fragment program uses fragment position input.
622    * Need to replace instances of INPUT[WPOS] with temp T
623    * where T = INPUT[WPOS] by y is inverted.
624    */
625   static const gl_state_index16 wposTransformState[STATE_LENGTH]
626      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
627
628   /* XXX: note we are modifying the incoming shader here!  Need to
629    * do this before emitting the constant decls below, or this
630    * will be missed:
631    */
632   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
633                                                       wposTransformState);
634
635   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
636   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
637   struct ureg_src *wpos =
638      ctx->Const.GLSLFragCoordIsSysVal ?
639         &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
640         &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
641   struct ureg_src wpos_input = *wpos;
642
643   /* First, apply the coordinate shift: */
644   if (adjX || adjY[0] || adjY[1]) {
645      if (adjY[0] != adjY[1]) {
646         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
647          * depending on whether inversion is actually going to be applied
648          * or not, which is determined by testing against the inversion
649          * state variable used below, which will be either +1 or -1.
650          */
651         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
652
653         ureg_CMP(ureg, adj_temp,
654                  ureg_scalar(wpostrans, invert ? 2 : 0),
655                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
656                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
657         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
658      } else {
659         ureg_ADD(ureg, wpos_temp, wpos_input,
660                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
661      }
662      wpos_input = ureg_src(wpos_temp);
663   } else {
664      /* MOV wpos_temp, input[wpos]
665       */
666      ureg_MOV(ureg, wpos_temp, wpos_input);
667   }
668
669   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
670    * inversion/identity, or the other way around if we're drawing to an FBO.
671    */
672   if (invert) {
673      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
674       */
675      ureg_MAD(ureg,
676                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
677                wpos_input,
678                ureg_scalar(wpostrans, 0),
679                ureg_scalar(wpostrans, 1));
680   } else {
681      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
682       */
683      ureg_MAD(ureg,
684                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
685                wpos_input,
686                ureg_scalar(wpostrans, 2),
687                ureg_scalar(wpostrans, 3));
688   }
689
690   /* Use wpos_temp as position input from here on:
691    */
692   *wpos = ureg_src(wpos_temp);
693}
694
695
696/**
697 * Emit fragment position/coordinate code.
698 */
699static void
700emit_wpos(struct st_context *st,
701          struct st_translate *t,
702          const struct gl_program *program,
703          struct ureg_program *ureg)
704{
705   struct pipe_screen *pscreen = st->pipe->screen;
706   GLfloat adjX = 0.0f;
707   GLfloat adjY[2] = { 0.0f, 0.0f };
708   boolean invert = FALSE;
709
710   /* Query the pixel center conventions supported by the pipe driver and set
711    * adjX, adjY to help out if it cannot handle the requested one internally.
712    *
713    * The bias of the y-coordinate depends on whether y-inversion takes place
714    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
715    * drawing to an FBO (causes additional inversion), and whether the pipe
716    * driver origin and the requested origin differ (the latter condition is
717    * stored in the 'invert' variable).
718    *
719    * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
720    *
721    * center shift only:
722    * i -> h: +0.5
723    * h -> i: -0.5
724    *
725    * inversion only:
726    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
727    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
728    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
729    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
730    *
731    * inversion and center shift:
732    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
733    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
734    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
735    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
736    */
737   if (program->info.fs.origin_upper_left) {
738      /* Fragment shader wants origin in upper-left */
739      if (pscreen->get_param(pscreen,
740                             PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
741         /* the driver supports upper-left origin */
742      }
743      else if (pscreen->get_param(pscreen,
744                                  PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
745         /* the driver supports lower-left origin, need to invert Y */
746         ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
747                       TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
748         invert = TRUE;
749      }
750      else
751         assert(0);
752   }
753   else {
754      /* Fragment shader wants origin in lower-left */
755      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
756         /* the driver supports lower-left origin */
757         ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
758                       TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
759      else if (pscreen->get_param(pscreen,
760                                  PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
761         /* the driver supports upper-left origin, need to invert Y */
762         invert = TRUE;
763      else
764         assert(0);
765   }
766
767   if (program->info.fs.pixel_center_integer) {
768      /* Fragment shader wants pixel center integer */
769      if (pscreen->get_param(pscreen,
770                             PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
771         /* the driver supports pixel center integer */
772         adjY[1] = 1.0f;
773         ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
774                       TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
775      }
776      else if (pscreen->get_param(pscreen,
777                            PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
778         /* the driver supports pixel center half integer, need to bias X,Y */
779         adjX = -0.5f;
780         adjY[0] = -0.5f;
781         adjY[1] = 0.5f;
782      }
783      else
784         assert(0);
785   }
786   else {
787      /* Fragment shader wants pixel center half integer */
788      if (pscreen->get_param(pscreen,
789                          PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
790         /* the driver supports pixel center half integer */
791      }
792      else if (pscreen->get_param(pscreen,
793                               PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
794         /* the driver supports pixel center integer, need to bias X,Y */
795         adjX = adjY[0] = adjY[1] = 0.5f;
796         ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
797                       TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
798      }
799      else
800         assert(0);
801   }
802
803   /* we invert after adjustment so that we avoid the MOV to temporary,
804    * and reuse the adjustment ADD instead */
805   emit_wpos_adjustment(st->ctx, t, program, invert, adjX, adjY);
806}
807
808
809/**
810 * Translate Mesa program to TGSI format.
811 * \param program  the program to translate
812 * \param numInputs  number of input registers used
813 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
814 *                      input indexes
815 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
816 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
817 *                            each input
818 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
819 * \param numOutputs  number of output registers used
820 * \param outputMapping  maps Mesa fragment program outputs to TGSI
821 *                       generic outputs
822 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
823 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
824 *                             each output
825 *
826 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
827 */
828enum pipe_error
829st_translate_mesa_program(struct gl_context *ctx,
830                          uint procType,
831                          struct ureg_program *ureg,
832                          const struct gl_program *program,
833                          GLuint numInputs,
834                          const ubyte inputMapping[],
835                          const ubyte inputSemanticName[],
836                          const ubyte inputSemanticIndex[],
837                          const ubyte interpMode[],
838                          GLuint numOutputs,
839                          const ubyte outputMapping[],
840                          const ubyte outputSemanticName[],
841                          const ubyte outputSemanticIndex[])
842{
843   struct st_translate translate, *t;
844   unsigned i;
845   enum pipe_error ret = PIPE_OK;
846
847   assert(numInputs <= ARRAY_SIZE(t->inputs));
848   assert(numOutputs <= ARRAY_SIZE(t->outputs));
849
850   t = &translate;
851   memset(t, 0, sizeof *t);
852
853   t->procType = procType;
854   t->inputMapping = inputMapping;
855   t->outputMapping = outputMapping;
856   t->ureg = ureg;
857
858   /*_mesa_print_program(program);*/
859
860   /*
861    * Declare input attributes.
862    */
863   if (procType == PIPE_SHADER_FRAGMENT) {
864      for (i = 0; i < numInputs; i++) {
865         t->inputs[i] = ureg_DECL_fs_input(ureg,
866                                           inputSemanticName[i],
867                                           inputSemanticIndex[i],
868                                           interpMode[i]);
869      }
870
871      if (program->info.inputs_read & VARYING_BIT_POS) {
872         /* Must do this after setting up t->inputs, and before
873          * emitting constant references, below:
874          */
875         emit_wpos(st_context(ctx), t, program, ureg);
876      }
877
878      /*
879       * Declare output attributes.
880       */
881      for (i = 0; i < numOutputs; i++) {
882         switch (outputSemanticName[i]) {
883         case TGSI_SEMANTIC_POSITION:
884            t->outputs[i] = ureg_DECL_output(ureg,
885                                             TGSI_SEMANTIC_POSITION, /* Z / Depth */
886                                             outputSemanticIndex[i]);
887
888            t->outputs[i] = ureg_writemask(t->outputs[i],
889                                           TGSI_WRITEMASK_Z);
890            break;
891         case TGSI_SEMANTIC_STENCIL:
892            t->outputs[i] = ureg_DECL_output(ureg,
893                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
894                                             outputSemanticIndex[i]);
895            t->outputs[i] = ureg_writemask(t->outputs[i],
896                                           TGSI_WRITEMASK_Y);
897            break;
898         case TGSI_SEMANTIC_COLOR:
899            t->outputs[i] = ureg_DECL_output(ureg,
900                                             TGSI_SEMANTIC_COLOR,
901                                             outputSemanticIndex[i]);
902            break;
903         default:
904            debug_assert(0);
905            return 0;
906         }
907      }
908   }
909   else if (procType == PIPE_SHADER_GEOMETRY) {
910      for (i = 0; i < numInputs; i++) {
911         t->inputs[i] = ureg_DECL_input(ureg,
912                                        inputSemanticName[i],
913                                        inputSemanticIndex[i], 0, 1);
914      }
915
916      for (i = 0; i < numOutputs; i++) {
917         t->outputs[i] = ureg_DECL_output(ureg,
918                                          outputSemanticName[i],
919                                          outputSemanticIndex[i]);
920      }
921   }
922   else {
923      assert(procType == PIPE_SHADER_VERTEX);
924
925      for (i = 0; i < numInputs; i++) {
926         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
927      }
928
929      for (i = 0; i < numOutputs; i++) {
930         t->outputs[i] = ureg_DECL_output(ureg,
931                                          outputSemanticName[i],
932                                          outputSemanticIndex[i]);
933         if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
934            /* force register to contain a fog coordinate in the
935             * form (F, 0, 0, 1).
936             */
937            ureg_MOV(ureg,
938                     ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
939                     ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
940            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
941         }
942      }
943   }
944
945   /* Declare address register.
946    */
947   if (program->arb.NumAddressRegs > 0) {
948      debug_assert(program->arb.NumAddressRegs == 1);
949      t->address[0] = ureg_DECL_address(ureg);
950   }
951
952   /* Declare misc input registers
953    */
954   GLbitfield64 sysInputs = program->info.system_values_read;
955   for (i = 0; sysInputs; i++) {
956      if (sysInputs & (1ull << i)) {
957         unsigned semName = _mesa_sysval_to_semantic(i);
958
959         t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
960
961         if (semName == TGSI_SEMANTIC_INSTANCEID ||
962             semName == TGSI_SEMANTIC_VERTEXID) {
963            /* From Gallium perspective, these system values are always
964             * integer, and require native integer support.  However, if
965             * native integer is supported on the vertex stage but not the
966             * pixel stage (e.g, i915g + draw), Mesa will generate IR that
967             * assumes these system values are floats. To resolve the
968             * inconsistency, we insert a U2F.
969             */
970            struct st_context *st = st_context(ctx);
971            struct pipe_screen *pscreen = st->pipe->screen;
972            assert(procType == PIPE_SHADER_VERTEX);
973            assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
974                   PIPE_SHADER_CAP_INTEGERS));
975            (void) pscreen;  /* silence non-debug build warnings */
976            if (!ctx->Const.NativeIntegers) {
977               struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
978               ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X),
979                        t->systemValues[i]);
980               t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
981            }
982         }
983
984         if (procType == PIPE_SHADER_FRAGMENT &&
985             semName == TGSI_SEMANTIC_POSITION)
986            emit_wpos(st_context(ctx), t, program, ureg);
987
988          sysInputs &= ~(1ull << i);
989      }
990   }
991
992   if (program->arb.IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
993      /* If temps are accessed with indirect addressing, declare temporaries
994       * in sequential order.  Else, we declare them on demand elsewhere.
995       */
996      for (i = 0; i < program->arb.NumTemporaries; i++) {
997         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
998         t->temps[i] = ureg_DECL_temporary(t->ureg);
999      }
1000   }
1001
1002   /* Emit constants and immediates.  Mesa uses a single index space
1003    * for these, so we put all the translated regs in t->constants.
1004    */
1005   if (program->Parameters) {
1006      t->constants = calloc(program->Parameters->NumParameters,
1007                             sizeof t->constants[0]);
1008      if (t->constants == NULL) {
1009         ret = PIPE_ERROR_OUT_OF_MEMORY;
1010         goto out;
1011      }
1012
1013      for (i = 0; i < program->Parameters->NumParameters; i++) {
1014         unsigned pvo = program->Parameters->ParameterValueOffset[i];
1015
1016         switch (program->Parameters->Parameters[i].Type) {
1017         case PROGRAM_STATE_VAR:
1018         case PROGRAM_UNIFORM:
1019            t->constants[i] = ureg_DECL_constant(ureg, i);
1020            break;
1021
1022            /* Emit immediates only when there's no indirect addressing of
1023             * the const buffer.
1024             * FIXME: Be smarter and recognize param arrays:
1025             * indirect addressing is only valid within the referenced
1026             * array.
1027             */
1028         case PROGRAM_CONSTANT:
1029            if (program->arb.IndirectRegisterFiles & PROGRAM_ANY_CONST)
1030               t->constants[i] = ureg_DECL_constant( ureg, i );
1031            else
1032               t->constants[i] =
1033                  ureg_DECL_immediate(ureg,
1034                                      (const float *)
1035                                      program->Parameters->ParameterValues + pvo,
1036                                      4);
1037            break;
1038         default:
1039            break;
1040         }
1041      }
1042   }
1043
1044   /* texture samplers */
1045   for (i = 0;
1046        i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
1047      if (program->SamplersUsed & (1u << i)) {
1048         unsigned target =
1049            translate_texture_index(program->TexturesUsed[i],
1050                                    !!(program->ShadowSamplers & (1 << i)));
1051         t->samplers[i] = ureg_DECL_sampler(ureg, i);
1052         ureg_DECL_sampler_view(ureg, i, target,
1053                                TGSI_RETURN_TYPE_FLOAT,
1054                                TGSI_RETURN_TYPE_FLOAT,
1055                                TGSI_RETURN_TYPE_FLOAT,
1056                                TGSI_RETURN_TYPE_FLOAT);
1057
1058      }
1059   }
1060
1061   /* Emit each instruction in turn:
1062    */
1063   for (i = 0; i < program->arb.NumInstructions; i++)
1064      compile_instruction(ctx, t, &program->arb.Instructions[i]);
1065
1066out:
1067   free(t->constants);
1068   return ret;
1069}
1070