1/**********************************************************
2 * Copyright 1998-2013 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26/**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35#include "pipe/p_compiler.h"
36#include "pipe/p_shader_tokens.h"
37#include "pipe/p_defines.h"
38#include "tgsi/tgsi_build.h"
39#include "tgsi/tgsi_dump.h"
40#include "tgsi/tgsi_info.h"
41#include "tgsi/tgsi_parse.h"
42#include "tgsi/tgsi_scan.h"
43#include "tgsi/tgsi_strings.h"
44#include "tgsi/tgsi_two_side.h"
45#include "tgsi/tgsi_aa_point.h"
46#include "tgsi/tgsi_util.h"
47#include "util/u_math.h"
48#include "util/u_memory.h"
49#include "util/u_bitmask.h"
50#include "util/u_debug.h"
51#include "util/u_pstipple.h"
52
53#include "svga_context.h"
54#include "svga_debug.h"
55#include "svga_link.h"
56#include "svga_shader.h"
57#include "svga_tgsi.h"
58
59#include "VGPU10ShaderTokens.h"
60
61
62#define INVALID_INDEX 99999
63#define MAX_INTERNAL_TEMPS 3
64#define MAX_SYSTEM_VALUES 4
65#define MAX_IMMEDIATE_COUNT \
66        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67#define MAX_TEMP_ARRAYS 64  /* Enough? */
68
69
70/**
71 * Clipping is complicated.  There's four different cases which we
72 * handle during VS/GS shader translation:
73 */
74enum clipping_mode
75{
76   CLIP_NONE,     /**< No clipping enabled */
77   CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
78                   * one or more user-defined clip planes are enabled.  We
79                   * generate extra code to emit clip distances.
80                   */
81   CLIP_DISTANCE, /**< The shader already declares clip distance output
82                   * registers and has code to write to them.
83                   */
84   CLIP_VERTEX    /**< The shader declares a clip vertex output register and
85                  * has code that writes to the register.  We convert the
86                  * clipvertex position into one or more clip distances.
87                  */
88};
89
90
91/* Shader signature info */
92struct svga_shader_signature
93{
94   SVGA3dDXShaderSignatureHeader header;
95   SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
96   SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
97   SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
98};
99
100static inline void
101set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
102                           unsigned index,
103                           SVGA3dDXSignatureSemanticName sgnName,
104                           unsigned mask,
105                           SVGA3dDXSignatureRegisterComponentType compType,
106                           SVGA3dDXSignatureMinPrecision minPrecision)
107{
108   e->registerIndex = index;
109   e->semanticName = sgnName;
110   e->mask = mask;
111   e->componentType = compType;
112   e->minPrecision = minPrecision;
113};
114
115static const SVGA3dDXSignatureSemanticName
116tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
117   SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
118   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124   SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
125   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126   SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
127   SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
128   SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
129   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130   SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
131   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138   SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
139   SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
140   SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
141   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
143   SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
144   SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
145   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
161   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
162};
163
164
165/**
166 * Map tgsi semantic name to SVGA signature semantic name
167 */
168static inline SVGA3dDXSignatureSemanticName
169map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
170{
171   assert(name < TGSI_SEMANTIC_COUNT);
172
173   /* Do a few asserts here to spot check the mapping */
174   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
175          SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
176   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
177          SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
178   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
179          SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
180
181   return tgsi_semantic_to_sgn_name[name];
182}
183
184
185struct svga_shader_emitter_v10
186{
187   /* The token output buffer */
188   unsigned size;
189   char *buf;
190   char *ptr;
191
192   /* Information about the shader and state (does not change) */
193   struct svga_compile_key key;
194   struct tgsi_shader_info info;
195   unsigned unit;
196   unsigned version; /**< Either 40 or 41 at this time */
197
198   unsigned cur_tgsi_token;     /**< current tgsi token position */
199   unsigned inst_start_token;
200   boolean discard_instruction; /**< throw away current instruction? */
201   boolean reemit_instruction;  /**< reemit current instruction */
202   boolean skip_instruction;    /**< skip current instruction */
203
204   union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
205   double (*immediates_dbl)[2];
206   unsigned num_immediates;      /**< Number of immediates emitted */
207   unsigned common_immediate_pos[10];  /**< literals for common immediates */
208   unsigned num_common_immediates;
209   boolean immediates_emitted;
210
211   unsigned num_outputs;      /**< include any extra outputs */
212                              /**  The first extra output is reserved for
213                               *   non-adjusted vertex position for
214                               *   stream output purpose
215                               */
216
217   /* Temporary Registers */
218   unsigned num_shader_temps; /**< num of temps used by original shader */
219   unsigned internal_temp_count;  /**< currently allocated internal temps */
220   struct {
221      unsigned start, size;
222   } temp_arrays[MAX_TEMP_ARRAYS];
223   unsigned num_temp_arrays;
224
225   /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
226   struct {
227      unsigned arrayId, index;
228      boolean initialized;
229   } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
230
231   unsigned initialize_temp_index;
232
233   /** Number of constants used by original shader for each constant buffer.
234    * The size should probably always match with that of svga_state.constbufs.
235    */
236   unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
237
238   /* Samplers */
239   unsigned num_samplers;
240   boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
241   ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
242   ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
243
244   /* Index Range declaration */
245   struct {
246      unsigned start_index;
247      unsigned count;
248      boolean required;
249      unsigned operandType;
250      unsigned size;
251      unsigned dim;
252   } index_range;
253
254   /* Address regs (really implemented with temps) */
255   unsigned num_address_regs;
256   unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
257
258   /* Output register usage masks */
259   ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
260
261   /* To map TGSI system value index to VGPU shader input indexes */
262   ubyte system_value_indexes[MAX_SYSTEM_VALUES];
263
264   struct {
265      /* vertex position scale/translation */
266      unsigned out_index;  /**< the real position output reg */
267      unsigned tmp_index;  /**< the fake/temp position output reg */
268      unsigned so_index;   /**< the non-adjusted position output reg */
269      unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
270      unsigned prescale_scale_index, prescale_trans_index;
271      unsigned num_prescale;      /* number of prescale factor in const buf */
272      unsigned viewport_index;
273      unsigned need_prescale:1;
274      unsigned have_prescale:1;
275   } vposition;
276
277   /* For vertex shaders only */
278   struct {
279      /* viewport constant */
280      unsigned viewport_index;
281
282      unsigned vertex_id_bias_index;
283      unsigned vertex_id_sys_index;
284      unsigned vertex_id_tmp_index;
285
286      /* temp index of adjusted vertex attributes */
287      unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
288   } vs;
289
290   /* For fragment shaders only */
291   struct {
292      unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
293      unsigned num_color_outputs;
294      unsigned color_tmp_index;  /**< fake/temp color output reg */
295      unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
296
297      /* front-face */
298      unsigned face_input_index; /**< real fragment shader face reg (bool) */
299      unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
300
301      unsigned pstipple_sampler_unit;
302
303      unsigned fragcoord_input_index;  /**< real fragment position input reg */
304      unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
305
306      unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
307
308      unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
309      unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
310
311      /** TGSI index of sample mask input sys value */
312      unsigned sample_mask_in_sys_index;
313
314      /** Which texture units are doing shadow comparison in the FS code */
315      unsigned shadow_compare_units;
316
317      /* layer */
318      unsigned layer_input_index;    /**< TGSI index of layer */
319      unsigned layer_imm_index;      /**< immediate for default layer 0 */
320   } fs;
321
322   /* For geometry shaders only */
323   struct {
324      VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
325      VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
326      unsigned input_size;       /**< size of input arrays */
327      unsigned prim_id_index;    /**< primitive id register index */
328      unsigned max_out_vertices; /**< maximum number of output vertices */
329      unsigned invocations;
330      unsigned invocation_id_sys_index;
331
332      unsigned viewport_index_out_index;
333      unsigned viewport_index_tmp_index;
334   } gs;
335
336   /* For tessellation control shaders only */
337   struct {
338      unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
339      unsigned imm_index;                    /**< immediate for tcs */
340      unsigned invocation_id_sys_index;      /**< invocation id */
341      unsigned invocation_id_tmp_index;
342      unsigned instruction_token_pos;        /* token pos for the first instruction */
343      unsigned control_point_input_index;    /* control point input register index */
344      unsigned control_point_addr_index;     /* control point input address register */
345      unsigned control_point_out_index;      /* control point output register index */
346      unsigned control_point_tmp_index;      /* control point temporary register */
347      unsigned control_point_out_count;      /* control point output count */
348      boolean  control_point_phase;          /* true if in control point phase */
349      boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
350      unsigned patch_generic_out_count;      /* per-patch generic output count */
351      unsigned patch_generic_out_index;      /* per-patch generic output register index*/
352      unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
353      unsigned prim_id_index;                /* primitive id */
354      struct {
355         unsigned out_index;      /* real tessinner output register */
356         unsigned temp_index;     /* tessinner temp register */
357         unsigned tgsi_index;     /* tgsi tessinner output register */
358      } inner;
359      struct {
360         unsigned out_index;      /* real tessouter output register */
361         unsigned temp_index;     /* tessouter temp register */
362         unsigned tgsi_index;     /* tgsi tessouter output register */
363      } outer;
364   } tcs;
365
366   /* For tessellation evaluation shaders only */
367   struct {
368      enum pipe_prim_type prim_mode;
369      enum pipe_tess_spacing spacing;
370      boolean vertices_order_cw;
371      boolean point_mode;
372      unsigned tesscoord_sys_index;
373      unsigned prim_id_index;                /* primitive id */
374      struct {
375         unsigned in_index;       /* real tessinner input register */
376         unsigned temp_index;     /* tessinner temp register */
377         unsigned tgsi_index;     /* tgsi tessinner input register */
378      } inner;
379      struct {
380         unsigned in_index;       /* real tessouter input register */
381         unsigned temp_index;     /* tessouter temp register */
382         unsigned tgsi_index;     /* tgsi tessouter input register */
383      } outer;
384   } tes;
385
386   /* For vertex or geometry shaders */
387   enum clipping_mode clip_mode;
388   unsigned clip_dist_out_index; /**< clip distance output register index */
389   unsigned clip_dist_tmp_index; /**< clip distance temporary register */
390   unsigned clip_dist_so_index;  /**< clip distance shadow copy */
391
392   /** Index of temporary holding the clipvertex coordinate */
393   unsigned clip_vertex_out_index; /**< clip vertex output register index */
394   unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
395
396   /* user clip plane constant slot indexes */
397   unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
398
399   unsigned num_output_writes;
400   boolean constant_color_output;
401
402   boolean uses_flat_interp;
403
404   unsigned reserved_token;        /* index to the reserved token */
405   boolean uses_precise_qualifier;
406
407   /* For all shaders: const reg index for RECT coord scaling */
408   unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
409
410   /* For all shaders: const reg index for texture buffer size */
411   unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
412
413   /* VS/TCS/TES/GS/FS Linkage info */
414   struct shader_linkage linkage;
415   struct tgsi_shader_info *prevShaderInfo;
416
417   /* Shader signature */
418   struct svga_shader_signature signature;
419
420   bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
421
422   /* For pipe_debug_message */
423   struct pipe_debug_callback svga_debug_callback;
424
425   /* current loop depth in shader */
426   unsigned current_loop_depth;
427};
428
429
430static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
431static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
432static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
433static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
434static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
435static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
436static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
437static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
438static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
439
440static boolean
441emit_post_helpers(struct svga_shader_emitter_v10 *emit);
442
443static boolean
444emit_vertex(struct svga_shader_emitter_v10 *emit,
445            const struct tgsi_full_instruction *inst);
446
447static boolean
448emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
449                        unsigned inst_number,
450                        const struct tgsi_full_instruction *inst);
451
452static void
453emit_input_declaration(struct svga_shader_emitter_v10 *emit,
454                       unsigned opcodeType, unsigned operandType,
455                       unsigned dim, unsigned index, unsigned size,
456                       unsigned name, unsigned numComp,
457                       unsigned selMode, unsigned usageMask,
458                       unsigned interpMode,
459                       boolean addSignature,
460                       SVGA3dDXSignatureSemanticName sgnName);
461
462static void
463create_temp_array(struct svga_shader_emitter_v10 *emit,
464                  unsigned arrayID, unsigned first, unsigned count,
465                  unsigned startIndex);
466
467static char err_buf[128];
468
469static boolean
470expand(struct svga_shader_emitter_v10 *emit)
471{
472   char *new_buf;
473   unsigned newsize = emit->size * 2;
474
475   if (emit->buf != err_buf)
476      new_buf = REALLOC(emit->buf, emit->size, newsize);
477   else
478      new_buf = NULL;
479
480   if (!new_buf) {
481      emit->ptr = err_buf;
482      emit->buf = err_buf;
483      emit->size = sizeof(err_buf);
484      return FALSE;
485   }
486
487   emit->size = newsize;
488   emit->ptr = new_buf + (emit->ptr - emit->buf);
489   emit->buf = new_buf;
490   return TRUE;
491}
492
493/**
494 * Create and initialize a new svga_shader_emitter_v10 object.
495 */
496static struct svga_shader_emitter_v10 *
497alloc_emitter(void)
498{
499   struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
500
501   if (!emit)
502      return NULL;
503
504   /* to initialize the output buffer */
505   emit->size = 512;
506   if (!expand(emit)) {
507      FREE(emit);
508      return NULL;
509   }
510   return emit;
511}
512
513/**
514 * Free an svga_shader_emitter_v10 object.
515 */
516static void
517free_emitter(struct svga_shader_emitter_v10 *emit)
518{
519   assert(emit);
520   FREE(emit->buf);    /* will be NULL if translation succeeded */
521   FREE(emit);
522}
523
524static inline boolean
525reserve(struct svga_shader_emitter_v10 *emit,
526        unsigned nr_dwords)
527{
528   while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
529      if (!expand(emit))
530         return FALSE;
531   }
532
533   return TRUE;
534}
535
536static boolean
537emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
538{
539   if (!reserve(emit, 1))
540      return FALSE;
541
542   *(uint32 *)emit->ptr = dword;
543   emit->ptr += sizeof dword;
544   return TRUE;
545}
546
547static boolean
548emit_dwords(struct svga_shader_emitter_v10 *emit,
549            const uint32 *dwords,
550            unsigned nr)
551{
552   if (!reserve(emit, nr))
553      return FALSE;
554
555   memcpy(emit->ptr, dwords, nr * sizeof *dwords);
556   emit->ptr += nr * sizeof *dwords;
557   return TRUE;
558}
559
560/** Return the number of tokens in the emitter's buffer */
561static unsigned
562emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
563{
564   return (emit->ptr - emit->buf) / sizeof(unsigned);
565}
566
567
568/**
569 * Check for register overflow.  If we overflow we'll set an
570 * error flag.  This function can be called for register declarations
571 * or use as src/dst instruction operands.
572 * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
573                or VGPU10_OPCODE_DCL_x
574 * \param index  the register index
575 */
576static void
577check_register_index(struct svga_shader_emitter_v10 *emit,
578                     unsigned operandType, unsigned index)
579{
580   bool overflow_before = emit->register_overflow;
581
582   switch (operandType) {
583   case VGPU10_OPERAND_TYPE_TEMP:
584   case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
585   case VGPU10_OPCODE_DCL_TEMPS:
586      if (index >= VGPU10_MAX_TEMPS) {
587         emit->register_overflow = TRUE;
588      }
589      break;
590   case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
591   case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
592      if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
593         emit->register_overflow = TRUE;
594      }
595      break;
596   case VGPU10_OPERAND_TYPE_INPUT:
597   case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
598   case VGPU10_OPCODE_DCL_INPUT:
599   case VGPU10_OPCODE_DCL_INPUT_SGV:
600   case VGPU10_OPCODE_DCL_INPUT_SIV:
601   case VGPU10_OPCODE_DCL_INPUT_PS:
602   case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
603   case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
604      if ((emit->unit == PIPE_SHADER_VERTEX &&
605           index >= VGPU10_MAX_VS_INPUTS) ||
606          (emit->unit == PIPE_SHADER_GEOMETRY &&
607           index >= VGPU10_MAX_GS_INPUTS) ||
608          (emit->unit == PIPE_SHADER_FRAGMENT &&
609           index >= VGPU10_MAX_FS_INPUTS) ||
610          (emit->unit == PIPE_SHADER_TESS_CTRL &&
611           index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
612          (emit->unit == PIPE_SHADER_TESS_EVAL &&
613           index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
614         emit->register_overflow = TRUE;
615      }
616      break;
617   case VGPU10_OPERAND_TYPE_OUTPUT:
618   case VGPU10_OPCODE_DCL_OUTPUT:
619   case VGPU10_OPCODE_DCL_OUTPUT_SGV:
620   case VGPU10_OPCODE_DCL_OUTPUT_SIV:
621      /* Note: we are skipping two output indices in tcs for
622       * tessinner/outer levels. Implementation will not exceed
623       * number of output count but it allows index to go beyond
624       * VGPU11_MAX_HS_OUTPUTS.
625       * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
626       */
627      if ((emit->unit == PIPE_SHADER_VERTEX &&
628           index >= VGPU10_MAX_VS_OUTPUTS) ||
629          (emit->unit == PIPE_SHADER_GEOMETRY &&
630           index >= VGPU10_MAX_GS_OUTPUTS) ||
631          (emit->unit == PIPE_SHADER_FRAGMENT &&
632           index >= VGPU10_MAX_FS_OUTPUTS) ||
633          (emit->unit == PIPE_SHADER_TESS_CTRL &&
634           index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
635          (emit->unit == PIPE_SHADER_TESS_EVAL &&
636           index >= VGPU11_MAX_DS_OUTPUTS)) {
637         emit->register_overflow = TRUE;
638      }
639      break;
640   case VGPU10_OPERAND_TYPE_SAMPLER:
641   case VGPU10_OPCODE_DCL_SAMPLER:
642      if (index >= VGPU10_MAX_SAMPLERS) {
643         emit->register_overflow = TRUE;
644      }
645      break;
646   case VGPU10_OPERAND_TYPE_RESOURCE:
647   case VGPU10_OPCODE_DCL_RESOURCE:
648      if (index >= VGPU10_MAX_RESOURCES) {
649         emit->register_overflow = TRUE;
650      }
651      break;
652   case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
653      if (index >= MAX_IMMEDIATE_COUNT) {
654         emit->register_overflow = TRUE;
655      }
656      break;
657   case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
658      /* nothing */
659      break;
660   default:
661      assert(0);
662      ; /* nothing */
663   }
664
665   if (emit->register_overflow && !overflow_before) {
666      debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
667                   operandType, index);
668   }
669}
670
671
672/**
673 * Examine misc state to determine the clipping mode.
674 */
675static void
676determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
677{
678   /* num_written_clipdistance in the shader info for tessellation
679    * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680    * is not defined for this shader. So we go through all the output declarations
681    * to set the num_written_clipdistance. This is just to determine the
682    * clipping mode.
683    */
684   if (emit->unit == PIPE_SHADER_TESS_CTRL) {
685      unsigned i;
686      for (i = 0; i < emit->info.num_outputs; i++) {
687         if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
688            emit->info.num_written_clipdistance =
689               4 * (emit->info.output_semantic_index[i] + 1);
690         }
691      }
692   }
693
694   if (emit->info.num_written_clipdistance > 0) {
695      emit->clip_mode = CLIP_DISTANCE;
696   }
697   else if (emit->info.writes_clipvertex) {
698      emit->clip_mode = CLIP_VERTEX;
699   }
700   else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
701      /*
702       * Only the last shader in the vertex processing stage needs to
703       * handle the legacy clip mode.
704       */
705      emit->clip_mode = CLIP_LEGACY;
706   }
707   else {
708      emit->clip_mode = CLIP_NONE;
709   }
710}
711
712
713/**
714 * For clip distance register declarations and clip distance register
715 * writes we need to mask the declaration usage or instruction writemask
716 * (respectively) against the set of the really-enabled clipping planes.
717 *
718 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719 * has a VS that writes to all 8 clip distance registers, but the plane enable
720 * flags are a subset of that.
721 *
722 * This function is used to apply the plane enable flags to the register
723 * declaration or instruction writemask.
724 *
725 * \param writemask  the declaration usage mask or instruction writemask
726 * \param clip_reg_index  which clip plane register is being declared/written.
727 *                        The legal values are 0 and 1 (two clip planes per
728 *                        register, for a total of 8 clip planes)
729 */
730static unsigned
731apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
732                      unsigned writemask, unsigned clip_reg_index)
733{
734   unsigned shift;
735
736   assert(clip_reg_index < 2);
737
738   /* four clip planes per clip register: */
739   shift = clip_reg_index * 4;
740   writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
741
742   return writemask;
743}
744
745
746/**
747 * Translate gallium shader type into VGPU10 type.
748 */
749static VGPU10_PROGRAM_TYPE
750translate_shader_type(unsigned type)
751{
752   switch (type) {
753   case PIPE_SHADER_VERTEX:
754      return VGPU10_VERTEX_SHADER;
755   case PIPE_SHADER_GEOMETRY:
756      return VGPU10_GEOMETRY_SHADER;
757   case PIPE_SHADER_FRAGMENT:
758      return VGPU10_PIXEL_SHADER;
759   case PIPE_SHADER_TESS_CTRL:
760      return VGPU10_HULL_SHADER;
761   case PIPE_SHADER_TESS_EVAL:
762      return VGPU10_DOMAIN_SHADER;
763   case PIPE_SHADER_COMPUTE:
764      return VGPU10_COMPUTE_SHADER;
765   default:
766      assert(!"Unexpected shader type");
767      return VGPU10_VERTEX_SHADER;
768   }
769}
770
771
772/**
773 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774 * Note: we only need to translate the opcodes for "simple" instructions,
775 * as seen below.  All other opcodes are handled/translated specially.
776 */
777static VGPU10_OPCODE_TYPE
778translate_opcode(enum tgsi_opcode opcode)
779{
780   switch (opcode) {
781   case TGSI_OPCODE_MOV:
782      return VGPU10_OPCODE_MOV;
783   case TGSI_OPCODE_MUL:
784      return VGPU10_OPCODE_MUL;
785   case TGSI_OPCODE_ADD:
786      return VGPU10_OPCODE_ADD;
787   case TGSI_OPCODE_DP3:
788      return VGPU10_OPCODE_DP3;
789   case TGSI_OPCODE_DP4:
790      return VGPU10_OPCODE_DP4;
791   case TGSI_OPCODE_MIN:
792      return VGPU10_OPCODE_MIN;
793   case TGSI_OPCODE_MAX:
794      return VGPU10_OPCODE_MAX;
795   case TGSI_OPCODE_MAD:
796      return VGPU10_OPCODE_MAD;
797   case TGSI_OPCODE_SQRT:
798      return VGPU10_OPCODE_SQRT;
799   case TGSI_OPCODE_FRC:
800      return VGPU10_OPCODE_FRC;
801   case TGSI_OPCODE_FLR:
802      return VGPU10_OPCODE_ROUND_NI;
803   case TGSI_OPCODE_FSEQ:
804      return VGPU10_OPCODE_EQ;
805   case TGSI_OPCODE_FSGE:
806      return VGPU10_OPCODE_GE;
807   case TGSI_OPCODE_FSNE:
808      return VGPU10_OPCODE_NE;
809   case TGSI_OPCODE_DDX:
810      return VGPU10_OPCODE_DERIV_RTX;
811   case TGSI_OPCODE_DDY:
812      return VGPU10_OPCODE_DERIV_RTY;
813   case TGSI_OPCODE_RET:
814      return VGPU10_OPCODE_RET;
815   case TGSI_OPCODE_DIV:
816      return VGPU10_OPCODE_DIV;
817   case TGSI_OPCODE_IDIV:
818      return VGPU10_OPCODE_VMWARE;
819   case TGSI_OPCODE_DP2:
820      return VGPU10_OPCODE_DP2;
821   case TGSI_OPCODE_BRK:
822      return VGPU10_OPCODE_BREAK;
823   case TGSI_OPCODE_IF:
824      return VGPU10_OPCODE_IF;
825   case TGSI_OPCODE_ELSE:
826      return VGPU10_OPCODE_ELSE;
827   case TGSI_OPCODE_ENDIF:
828      return VGPU10_OPCODE_ENDIF;
829   case TGSI_OPCODE_CEIL:
830      return VGPU10_OPCODE_ROUND_PI;
831   case TGSI_OPCODE_I2F:
832      return VGPU10_OPCODE_ITOF;
833   case TGSI_OPCODE_NOT:
834      return VGPU10_OPCODE_NOT;
835   case TGSI_OPCODE_TRUNC:
836      return VGPU10_OPCODE_ROUND_Z;
837   case TGSI_OPCODE_SHL:
838      return VGPU10_OPCODE_ISHL;
839   case TGSI_OPCODE_AND:
840      return VGPU10_OPCODE_AND;
841   case TGSI_OPCODE_OR:
842      return VGPU10_OPCODE_OR;
843   case TGSI_OPCODE_XOR:
844      return VGPU10_OPCODE_XOR;
845   case TGSI_OPCODE_CONT:
846      return VGPU10_OPCODE_CONTINUE;
847   case TGSI_OPCODE_EMIT:
848      return VGPU10_OPCODE_EMIT;
849   case TGSI_OPCODE_ENDPRIM:
850      return VGPU10_OPCODE_CUT;
851   case TGSI_OPCODE_BGNLOOP:
852      return VGPU10_OPCODE_LOOP;
853   case TGSI_OPCODE_ENDLOOP:
854      return VGPU10_OPCODE_ENDLOOP;
855   case TGSI_OPCODE_ENDSUB:
856      return VGPU10_OPCODE_RET;
857   case TGSI_OPCODE_NOP:
858      return VGPU10_OPCODE_NOP;
859   case TGSI_OPCODE_END:
860      return VGPU10_OPCODE_RET;
861   case TGSI_OPCODE_F2I:
862      return VGPU10_OPCODE_FTOI;
863   case TGSI_OPCODE_IMAX:
864      return VGPU10_OPCODE_IMAX;
865   case TGSI_OPCODE_IMIN:
866      return VGPU10_OPCODE_IMIN;
867   case TGSI_OPCODE_UDIV:
868   case TGSI_OPCODE_UMOD:
869   case TGSI_OPCODE_MOD:
870      return VGPU10_OPCODE_UDIV;
871   case TGSI_OPCODE_IMUL_HI:
872      return VGPU10_OPCODE_IMUL;
873   case TGSI_OPCODE_INEG:
874      return VGPU10_OPCODE_INEG;
875   case TGSI_OPCODE_ISHR:
876      return VGPU10_OPCODE_ISHR;
877   case TGSI_OPCODE_ISGE:
878      return VGPU10_OPCODE_IGE;
879   case TGSI_OPCODE_ISLT:
880      return VGPU10_OPCODE_ILT;
881   case TGSI_OPCODE_F2U:
882      return VGPU10_OPCODE_FTOU;
883   case TGSI_OPCODE_UADD:
884      return VGPU10_OPCODE_IADD;
885   case TGSI_OPCODE_U2F:
886      return VGPU10_OPCODE_UTOF;
887   case TGSI_OPCODE_UCMP:
888      return VGPU10_OPCODE_MOVC;
889   case TGSI_OPCODE_UMAD:
890      return VGPU10_OPCODE_UMAD;
891   case TGSI_OPCODE_UMAX:
892      return VGPU10_OPCODE_UMAX;
893   case TGSI_OPCODE_UMIN:
894      return VGPU10_OPCODE_UMIN;
895   case TGSI_OPCODE_UMUL:
896   case TGSI_OPCODE_UMUL_HI:
897      return VGPU10_OPCODE_UMUL;
898   case TGSI_OPCODE_USEQ:
899      return VGPU10_OPCODE_IEQ;
900   case TGSI_OPCODE_USGE:
901      return VGPU10_OPCODE_UGE;
902   case TGSI_OPCODE_USHR:
903      return VGPU10_OPCODE_USHR;
904   case TGSI_OPCODE_USLT:
905      return VGPU10_OPCODE_ULT;
906   case TGSI_OPCODE_USNE:
907      return VGPU10_OPCODE_INE;
908   case TGSI_OPCODE_SWITCH:
909      return VGPU10_OPCODE_SWITCH;
910   case TGSI_OPCODE_CASE:
911      return VGPU10_OPCODE_CASE;
912   case TGSI_OPCODE_DEFAULT:
913      return VGPU10_OPCODE_DEFAULT;
914   case TGSI_OPCODE_ENDSWITCH:
915      return VGPU10_OPCODE_ENDSWITCH;
916   case TGSI_OPCODE_FSLT:
917      return VGPU10_OPCODE_LT;
918   case TGSI_OPCODE_ROUND:
919      return VGPU10_OPCODE_ROUND_NE;
920   /* Begin SM5 opcodes */
921   case TGSI_OPCODE_F2D:
922      return VGPU10_OPCODE_FTOD;
923   case TGSI_OPCODE_D2F:
924      return VGPU10_OPCODE_DTOF;
925   case TGSI_OPCODE_DMUL:
926      return VGPU10_OPCODE_DMUL;
927   case TGSI_OPCODE_DADD:
928      return VGPU10_OPCODE_DADD;
929   case TGSI_OPCODE_DMAX:
930      return VGPU10_OPCODE_DMAX;
931   case TGSI_OPCODE_DMIN:
932      return VGPU10_OPCODE_DMIN;
933   case TGSI_OPCODE_DSEQ:
934      return VGPU10_OPCODE_DEQ;
935   case TGSI_OPCODE_DSGE:
936      return VGPU10_OPCODE_DGE;
937   case TGSI_OPCODE_DSLT:
938      return VGPU10_OPCODE_DLT;
939   case TGSI_OPCODE_DSNE:
940      return VGPU10_OPCODE_DNE;
941   case TGSI_OPCODE_IBFE:
942      return VGPU10_OPCODE_IBFE;
943   case TGSI_OPCODE_UBFE:
944      return VGPU10_OPCODE_UBFE;
945   case TGSI_OPCODE_BFI:
946      return VGPU10_OPCODE_BFI;
947   case TGSI_OPCODE_BREV:
948      return VGPU10_OPCODE_BFREV;
949   case TGSI_OPCODE_POPC:
950      return VGPU10_OPCODE_COUNTBITS;
951   case TGSI_OPCODE_LSB:
952      return VGPU10_OPCODE_FIRSTBIT_LO;
953   case TGSI_OPCODE_IMSB:
954      return VGPU10_OPCODE_FIRSTBIT_SHI;
955   case TGSI_OPCODE_UMSB:
956      return VGPU10_OPCODE_FIRSTBIT_HI;
957   case TGSI_OPCODE_INTERP_CENTROID:
958      return VGPU10_OPCODE_EVAL_CENTROID;
959   case TGSI_OPCODE_INTERP_SAMPLE:
960      return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
961   case TGSI_OPCODE_BARRIER:
962      return VGPU10_OPCODE_SYNC;
963
964   /* DX11.1 Opcodes */
965   case TGSI_OPCODE_DDIV:
966      return VGPU10_OPCODE_DDIV;
967   case TGSI_OPCODE_DRCP:
968      return VGPU10_OPCODE_DRCP;
969   case TGSI_OPCODE_D2I:
970      return VGPU10_OPCODE_DTOI;
971   case TGSI_OPCODE_D2U:
972      return VGPU10_OPCODE_DTOU;
973   case TGSI_OPCODE_I2D:
974      return VGPU10_OPCODE_ITOD;
975   case TGSI_OPCODE_U2D:
976      return VGPU10_OPCODE_UTOD;
977
978   case TGSI_OPCODE_SAMPLE_POS:
979      /* Note: we never actually get this opcode because there's no GLSL
980       * function to query multisample resource sample positions.  There's
981       * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982       * position of the current sample in the render target.
983       */
984      FALLTHROUGH;
985   case TGSI_OPCODE_SAMPLE_INFO:
986      /* NOTE: we never actually get this opcode because the GLSL compiler
987       * implements the gl_NumSamples variable with a simple constant in the
988       * constant buffer.
989       */
990      FALLTHROUGH;
991   default:
992      assert(!"Unexpected TGSI opcode in translate_opcode()");
993      return VGPU10_OPCODE_NOP;
994   }
995}
996
997
998/**
999 * Translate a TGSI register file type into a VGPU10 operand type.
1000 * \param array  is the TGSI_FILE_TEMPORARY register an array?
1001 */
1002static VGPU10_OPERAND_TYPE
1003translate_register_file(enum tgsi_file_type file, boolean array)
1004{
1005   switch (file) {
1006   case TGSI_FILE_CONSTANT:
1007      return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008   case TGSI_FILE_INPUT:
1009      return VGPU10_OPERAND_TYPE_INPUT;
1010   case TGSI_FILE_OUTPUT:
1011      return VGPU10_OPERAND_TYPE_OUTPUT;
1012   case TGSI_FILE_TEMPORARY:
1013      return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014                   : VGPU10_OPERAND_TYPE_TEMP;
1015   case TGSI_FILE_IMMEDIATE:
1016      /* all immediates are 32-bit values at this time so
1017       * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018       */
1019      return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020   case TGSI_FILE_SAMPLER:
1021      return VGPU10_OPERAND_TYPE_SAMPLER;
1022   case TGSI_FILE_SYSTEM_VALUE:
1023      return VGPU10_OPERAND_TYPE_INPUT;
1024
1025   /* XXX TODO more cases to finish */
1026
1027   default:
1028      assert(!"Bad tgsi register file!");
1029      return VGPU10_OPERAND_TYPE_NULL;
1030   }
1031}
1032
1033
1034/**
1035 * Emit a null dst register
1036 */
1037static void
1038emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039{
1040   VGPU10OperandToken0 operand;
1041
1042   operand.value = 0;
1043   operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044   operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045
1046   emit_dword(emit, operand.value);
1047}
1048
1049
1050/**
1051 * If the given register is a temporary, return the array ID.
1052 * Else return zero.
1053 */
1054static unsigned
1055get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056                  enum tgsi_file_type file, unsigned index)
1057{
1058   if (file == TGSI_FILE_TEMPORARY) {
1059      return emit->temp_map[index].arrayId;
1060   }
1061   else {
1062      return 0;
1063   }
1064}
1065
1066
1067/**
1068 * If the given register is a temporary, convert the index from a TGSI
1069 * TEMPORARY index to a VGPU10 temp index.
1070 */
1071static unsigned
1072remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073                 enum tgsi_file_type file, unsigned index)
1074{
1075   if (file == TGSI_FILE_TEMPORARY) {
1076      return emit->temp_map[index].index;
1077   }
1078   else {
1079      return index;
1080   }
1081}
1082
1083
1084/**
1085 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086 * Note: the operandType field must already be initialized.
1087 * \param file  the register file being accessed
1088 * \param indirect  using indirect addressing of the register file?
1089 * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1090 * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1091 */
1092static VGPU10OperandToken0
1093setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094                        VGPU10OperandToken0 operand0,
1095                        enum tgsi_file_type file,
1096                        boolean indirect,
1097                        boolean index2D, bool indirect2D)
1098{
1099   VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100   VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101
1102   /*
1103    * Compute index dimensions
1104    */
1105   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110       operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111      /* there's no swizzle for in-line immediates */
1112      indexDim = VGPU10_OPERAND_INDEX_0D;
1113      assert(operand0.selectionMode == 0);
1114   }
1115   else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116      indexDim = VGPU10_OPERAND_INDEX_0D;
1117   }
1118   else {
1119      indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120   }
1121
1122   /*
1123    * Compute index representation(s) (immediate vs relative).
1124    */
1125   if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126      index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128
1129      index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131   }
1132   else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133      index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135
1136      index1Rep = 0;
1137   }
1138   else {
1139      index0Rep = 0;
1140      index1Rep = 0;
1141   }
1142
1143   operand0.indexDimension = indexDim;
1144   operand0.index0Representation = index0Rep;
1145   operand0.index1Representation = index1Rep;
1146
1147   return operand0;
1148}
1149
1150
1151/**
1152 * Emit the operand for expressing an address register for indirect indexing.
1153 * Note that the address register is really just a temp register.
1154 * \param addr_reg_index  which address register to use
1155 */
1156static void
1157emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158                       unsigned addr_reg_index)
1159{
1160   unsigned tmp_reg_index;
1161   VGPU10OperandToken0 operand0;
1162
1163   assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164
1165   tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166
1167   /* operand0 is a simple temporary register, selecting one component */
1168   operand0.value = 0;
1169   operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174   operand0.swizzleX = 0;
1175   operand0.swizzleY = 1;
1176   operand0.swizzleZ = 2;
1177   operand0.swizzleW = 3;
1178
1179   emit_dword(emit, operand0.value);
1180   emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181}
1182
1183
1184/**
1185 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186 * \param emit  the emitter context
1187 * \param reg  the TGSI dst register to translate
1188 */
1189static void
1190emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191                  const struct tgsi_full_dst_register *reg)
1192{
1193   enum tgsi_file_type file = reg->Register.File;
1194   unsigned index = reg->Register.Index;
1195   const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196   const unsigned sem_index = emit->info.output_semantic_index[index];
1197   unsigned writemask = reg->Register.WriteMask;
1198   const boolean indirect = reg->Register.Indirect;
1199   unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200   boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201   VGPU10OperandToken0 operand0;
1202
1203   if (file == TGSI_FILE_TEMPORARY) {
1204      emit->temp_map[index].initialized = TRUE;
1205   }
1206
1207   if (file == TGSI_FILE_OUTPUT) {
1208      if (emit->unit == PIPE_SHADER_VERTEX ||
1209          emit->unit == PIPE_SHADER_GEOMETRY ||
1210          emit->unit == PIPE_SHADER_TESS_EVAL) {
1211         if (index == emit->vposition.out_index &&
1212             emit->vposition.tmp_index != INVALID_INDEX) {
1213            /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1214             * vertex position result in a temporary so that we can modify
1215             * it in the post_helper() code.
1216             */
1217            file = TGSI_FILE_TEMPORARY;
1218            index = emit->vposition.tmp_index;
1219         }
1220         else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221                  emit->clip_dist_tmp_index != INVALID_INDEX) {
1222            /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223             * We store the clip distance in a temporary first, then
1224             * we'll copy it to the shadow copy and to CLIPDIST with the
1225             * enabled planes mask in emit_clip_distance_instructions().
1226             */
1227            file = TGSI_FILE_TEMPORARY;
1228            index = emit->clip_dist_tmp_index + sem_index;
1229         }
1230         else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231                  emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232            /* replace the CLIPVERTEX output register with a temporary */
1233            assert(emit->clip_mode == CLIP_VERTEX);
1234            assert(sem_index == 0);
1235            file = TGSI_FILE_TEMPORARY;
1236            index = emit->clip_vertex_tmp_index;
1237         }
1238         else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239                  emit->key.clamp_vertex_color) {
1240
1241            /* set the saturate modifier of the instruction
1242             * to clamp the vertex color.
1243             */
1244            VGPU10OpcodeToken0 *token =
1245               (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246            token->saturate = TRUE;
1247         }
1248         else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249                  emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250            file = TGSI_FILE_TEMPORARY;
1251            index = emit->gs.viewport_index_tmp_index;
1252         }
1253      }
1254      else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255         if (sem_name == TGSI_SEMANTIC_POSITION) {
1256            /* Fragment depth output register */
1257            operand0.value = 0;
1258            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261            emit_dword(emit, operand0.value);
1262            return;
1263         }
1264         else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265            /* Fragment sample mask output */
1266            operand0.value = 0;
1267            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270            emit_dword(emit, operand0.value);
1271            return;
1272         }
1273         else if (index == emit->fs.color_out_index[0] &&
1274             emit->fs.color_tmp_index != INVALID_INDEX) {
1275            /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1276             * fragment color result in a temporary so that we can read it
1277             * it in the post_helper() code.
1278             */
1279            file = TGSI_FILE_TEMPORARY;
1280            index = emit->fs.color_tmp_index;
1281         }
1282         else {
1283            /* Typically, for fragment shaders, the output register index
1284             * matches the color semantic index.  But not when we write to
1285             * the fragment depth register.  In that case, OUT[0] will be
1286             * fragdepth and OUT[1] will be the 0th color output.  We need
1287             * to use the semantic index for color outputs.
1288             */
1289            assert(sem_name == TGSI_SEMANTIC_COLOR);
1290            index = emit->info.output_semantic_index[index];
1291
1292            emit->num_output_writes++;
1293         }
1294      }
1295      else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296         if (index == emit->tcs.inner.tgsi_index) {
1297            /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298             * in temporary for now so that will be store into appropriate
1299             * registers in post_helper() in patch constant phase.
1300             */
1301            if (emit->tcs.control_point_phase) {
1302               /* Discard writing into tessfactor in control point phase */
1303               emit->discard_instruction =  TRUE;
1304            }
1305            else {
1306               file = TGSI_FILE_TEMPORARY;
1307               index = emit->tcs.inner.temp_index;
1308            }
1309         }
1310         else if (index == emit->tcs.outer.tgsi_index) {
1311            /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312             * in temporary for now so that will be store into appropriate
1313             * registers in post_helper().
1314             */
1315            if (emit->tcs.control_point_phase) {
1316               /* Discard writing into tessfactor in control point phase */
1317               emit->discard_instruction =  TRUE;
1318            }
1319            else {
1320               file = TGSI_FILE_TEMPORARY;
1321               index = emit->tcs.outer.temp_index;
1322            }
1323         }
1324         else if (index >= emit->tcs.patch_generic_out_index &&
1325                  index < (emit->tcs.patch_generic_out_index +
1326                          emit->tcs.patch_generic_out_count)) {
1327            if (emit->tcs.control_point_phase) {
1328               /* Discard writing into generic patch constant outputs in
1329                  control point phase */
1330               emit->discard_instruction =  TRUE;
1331            }
1332            else {
1333               if (emit->reemit_instruction) {
1334                  /* Store results of reemitted instruction in temporary register. */
1335                  file = TGSI_FILE_TEMPORARY;
1336                  index = emit->tcs.patch_generic_tmp_index +
1337                          (index - emit->tcs.patch_generic_out_index);
1338                  /**
1339                   * Temporaries for patch constant data can be done
1340                   * as indexable temporaries.
1341                   */
1342                  tempArrayId = get_temp_array_id(emit, file, index);
1343                  index2d = tempArrayId > 0;
1344
1345                  emit->reemit_instruction = FALSE;
1346               }
1347               else {
1348                  /* If per-patch outputs is been read in shader, we
1349                   * reemit instruction and store results in temporaries in
1350                   * patch constant phase. */
1351                  if (emit->info.reads_perpatch_outputs) {
1352                     emit->reemit_instruction = TRUE;
1353                  }
1354               }
1355            }
1356         }
1357         else if (reg->Register.Dimension) {
1358            /* Only control point outputs are declared 2D in tgsi */
1359            if (emit->tcs.control_point_phase) {
1360               if (emit->reemit_instruction) {
1361                  /* Store results of reemitted instruction in temporary register. */
1362                  index2d = FALSE;
1363                  file = TGSI_FILE_TEMPORARY;
1364                  index = emit->tcs.control_point_tmp_index +
1365                          (index - emit->tcs.control_point_out_index);
1366                  emit->reemit_instruction = FALSE;
1367               }
1368               else {
1369                  /* The mapped control point outputs are 1-D */
1370                  index2d = FALSE;
1371                  if (emit->info.reads_pervertex_outputs) {
1372                     /* If per-vertex outputs is been read in shader, we
1373                      * reemit instruction and store results in temporaries
1374                      * control point phase. */
1375                     emit->reemit_instruction = TRUE;
1376                  }
1377               }
1378
1379               if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1381                  /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382                   * We store the clip distance in a temporary first, then
1383                   * we'll copy it to the shadow copy and to CLIPDIST with the
1384                   * enabled planes mask in emit_clip_distance_instructions().
1385                   */
1386                  file = TGSI_FILE_TEMPORARY;
1387                  index = emit->clip_dist_tmp_index + sem_index;
1388               }
1389               else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390                        emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391                  /* replace the CLIPVERTEX output register with a temporary */
1392                  assert(emit->clip_mode == CLIP_VERTEX);
1393                  assert(sem_index == 0);
1394                  file = TGSI_FILE_TEMPORARY;
1395                  index = emit->clip_vertex_tmp_index;
1396               }
1397            }
1398            else {
1399               /* Discard writing into control point outputs in
1400                  patch constant phase */
1401               emit->discard_instruction =  TRUE;
1402            }
1403         }
1404      }
1405   }
1406
1407   /* init operand tokens to all zero */
1408   operand0.value = 0;
1409
1410   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411
1412   /* the operand has a writemask */
1413   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414
1415   /* Which of the four dest components to write to. Note that we can use a
1416    * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417    */
1418   STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419   operand0.mask = writemask;
1420
1421   /* translate TGSI register file type to VGPU10 operand type */
1422   operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423
1424   check_register_index(emit, operand0.operandType, index);
1425
1426   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427                                      index2d, FALSE);
1428
1429   /* Emit tokens */
1430   emit_dword(emit, operand0.value);
1431   if (tempArrayId > 0) {
1432      emit_dword(emit, tempArrayId);
1433   }
1434
1435   emit_dword(emit, remap_temp_index(emit, file, index));
1436
1437   if (indirect) {
1438      emit_indirect_register(emit, reg->Indirect.Index);
1439   }
1440}
1441
1442
1443/**
1444 * Check if temporary register needs to be initialize when
1445 * shader is not using indirect addressing for temporary and uninitialized
1446 * temporary is not used in loop. In these two scenarios, we cannot
1447 * determine if temporary is initialized or not.
1448 */
1449static boolean
1450need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451                             unsigned index)
1452{
1453   if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1454       && emit->current_loop_depth == 0) {
1455      if (!emit->temp_map[index].initialized &&
1456          emit->temp_map[index].index < emit->num_shader_temps) {
1457         return TRUE;
1458      }
1459   }
1460
1461   return FALSE;
1462}
1463
1464
1465/**
1466 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467 * In quite a few cases, we do register substitution.  For example, if
1468 * the TGSI register is the front/back-face register, we replace that with
1469 * a temp register containing a value we computed earlier.
1470 */
1471static void
1472emit_src_register(struct svga_shader_emitter_v10 *emit,
1473                  const struct tgsi_full_src_register *reg)
1474{
1475   enum tgsi_file_type file = reg->Register.File;
1476   unsigned index = reg->Register.Index;
1477   const boolean indirect = reg->Register.Indirect;
1478   unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479   boolean index2d = (reg->Register.Dimension ||
1480                            tempArrayId > 0 ||
1481                            file == TGSI_FILE_CONSTANT);
1482   unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483   boolean indirect2d = reg->Dimension.Indirect;
1484   unsigned swizzleX = reg->Register.SwizzleX;
1485   unsigned swizzleY = reg->Register.SwizzleY;
1486   unsigned swizzleZ = reg->Register.SwizzleZ;
1487   unsigned swizzleW = reg->Register.SwizzleW;
1488   const boolean absolute = reg->Register.Absolute;
1489   const boolean negate = reg->Register.Negate;
1490   VGPU10OperandToken0 operand0;
1491   VGPU10OperandToken1 operand1;
1492
1493   operand0.value = operand1.value = 0;
1494
1495   if (emit->unit == PIPE_SHADER_FRAGMENT){
1496      if (file == TGSI_FILE_INPUT) {
1497         if (index == emit->fs.face_input_index) {
1498            /* Replace INPUT[FACE] with TEMP[FACE] */
1499            file = TGSI_FILE_TEMPORARY;
1500            index = emit->fs.face_tmp_index;
1501         }
1502         else if (index == emit->fs.fragcoord_input_index) {
1503            /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504            file = TGSI_FILE_TEMPORARY;
1505            index = emit->fs.fragcoord_tmp_index;
1506         }
1507         else if (index == emit->fs.layer_input_index) {
1508            /* Replace INPUT[LAYER] with zero.x */
1509            file = TGSI_FILE_IMMEDIATE;
1510            index = emit->fs.layer_imm_index;
1511            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512         }
1513         else {
1514            /* We remap fragment shader inputs to that FS input indexes
1515             * match up with VS/GS output indexes.
1516             */
1517            index = emit->linkage.input_map[index];
1518         }
1519      }
1520      else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521         if (index == emit->fs.sample_pos_sys_index) {
1522            assert(emit->version >= 41);
1523            /* Current sample position is in a temp register */
1524            file = TGSI_FILE_TEMPORARY;
1525            index = emit->fs.sample_pos_tmp_index;
1526         }
1527         else if (index == emit->fs.sample_mask_in_sys_index) {
1528            /* Emitted as vCoverage0.x */
1529            /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530             * elements where s is the maximum number of color samples supported
1531             * by the implementation. With current implementation, we should not
1532             * have more than one element. So assert if Index != 0
1533             */
1534            assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535                   reg->Register.Indirect);
1536            operand0.value = 0;
1537            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540            operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541            emit_dword(emit, operand0.value);
1542            return;
1543         }
1544         else {
1545            /* Map the TGSI system value to a VGPU10 input register */
1546            assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547            file = TGSI_FILE_INPUT;
1548            index = emit->system_value_indexes[index];
1549         }
1550      }
1551   }
1552   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553      if (file == TGSI_FILE_INPUT) {
1554         if (index == emit->gs.prim_id_index) {
1555            operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557         }
1558         index = emit->linkage.input_map[index];
1559      }
1560      else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561               index == emit->gs.invocation_id_sys_index) {
1562         /* Emitted as vGSInstanceID0.x */
1563         operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564         operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565         index = 0;
1566      }
1567   }
1568   else if (emit->unit == PIPE_SHADER_VERTEX) {
1569      if (file == TGSI_FILE_INPUT) {
1570         /* if input is adjusted... */
1571         if ((emit->key.vs.adjust_attrib_w_1 |
1572              emit->key.vs.adjust_attrib_itof |
1573              emit->key.vs.adjust_attrib_utof |
1574              emit->key.vs.attrib_is_bgra |
1575              emit->key.vs.attrib_puint_to_snorm |
1576              emit->key.vs.attrib_puint_to_uscaled |
1577              emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578            file = TGSI_FILE_TEMPORARY;
1579            index = emit->vs.adjusted_input[index];
1580         }
1581      }
1582      else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583         if (index == emit->vs.vertex_id_sys_index &&
1584             emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585            file = TGSI_FILE_TEMPORARY;
1586            index = emit->vs.vertex_id_tmp_index;
1587            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588         }
1589         else {
1590            /* Map the TGSI system value to a VGPU10 input register */
1591            assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592            file = TGSI_FILE_INPUT;
1593            index = emit->system_value_indexes[index];
1594         }
1595      }
1596   }
1597   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598
1599      if (file == TGSI_FILE_SYSTEM_VALUE) {
1600         if (index == emit->tcs.vertices_per_patch_index) {
1601            /**
1602             * if source register is the system value for vertices_per_patch,
1603             * replace it with the immediate.
1604             */
1605            file = TGSI_FILE_IMMEDIATE;
1606            index = emit->tcs.imm_index;
1607            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608         }
1609         else if (index == emit->tcs.invocation_id_sys_index) {
1610            if (emit->tcs.control_point_phase) {
1611               /**
1612                * Emitted as vOutputControlPointID.x
1613                */
1614               operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615               operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616               operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617               operand0.mask = 0;
1618               emit_dword(emit, operand0.value);
1619               return;
1620            }
1621            else {
1622               /* There is no control point ID input declaration in
1623                * the patch constant phase in hull shader.
1624                * Since for now we are emitting all instructions in
1625                * the patch constant phase, we are replacing the
1626                * control point ID reference with the immediate 0.
1627                */
1628               file = TGSI_FILE_IMMEDIATE;
1629               index = emit->tcs.imm_index;
1630               swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631            }
1632         }
1633         else if (index == emit->tcs.prim_id_index) {
1634            /**
1635             * Emitted as vPrim.x
1636             */
1637            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639            index = 0;
1640         }
1641      }
1642      else if (file == TGSI_FILE_INPUT) {
1643         index = emit->linkage.input_map[index];
1644         if (!emit->tcs.control_point_phase) {
1645            /* Emitted as vicp */
1646            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648            assert(reg->Register.Dimension);
1649         }
1650      }
1651      else if (file == TGSI_FILE_OUTPUT) {
1652         if ((index >= emit->tcs.patch_generic_out_index &&
1653             index < (emit->tcs.patch_generic_out_index +
1654                      emit->tcs.patch_generic_out_count)) ||
1655             index == emit->tcs.inner.tgsi_index ||
1656             index == emit->tcs.outer.tgsi_index) {
1657            if (emit->tcs.control_point_phase) {
1658               emit->discard_instruction = TRUE;
1659            }
1660            else {
1661               /* Device doesn't allow reading from output so
1662                * use corresponding temporary register as source */
1663               file = TGSI_FILE_TEMPORARY;
1664               if (index == emit->tcs.inner.tgsi_index) {
1665                  index = emit->tcs.inner.temp_index;
1666               }
1667               else if (index == emit->tcs.outer.tgsi_index) {
1668                  index = emit->tcs.outer.temp_index;
1669               }
1670               else {
1671                  index = emit->tcs.patch_generic_tmp_index +
1672                          (index - emit->tcs.patch_generic_out_index);
1673               }
1674
1675               /**
1676                * Temporaries for patch constant data can be done
1677                * as indexable temporaries.
1678                */
1679               tempArrayId = get_temp_array_id(emit, file, index);
1680               index2d = tempArrayId > 0;
1681               index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682            }
1683         }
1684         else if (index2d) {
1685            if (emit->tcs.control_point_phase) {
1686               /* Device doesn't allow reading from output so
1687                * use corresponding temporary register as source */
1688               file = TGSI_FILE_TEMPORARY;
1689               index2d = FALSE;
1690               index = emit->tcs.control_point_tmp_index +
1691                       (index - emit->tcs.control_point_out_index);
1692            }
1693            else {
1694               emit->discard_instruction = TRUE;
1695            }
1696         }
1697      }
1698   }
1699   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700      if (file == TGSI_FILE_SYSTEM_VALUE) {
1701         if (index == emit->tes.tesscoord_sys_index) {
1702            /**
1703             * Emitted as vDomain
1704             */
1705            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707            index = 0;
1708         }
1709         else if (index == emit->tes.inner.tgsi_index) {
1710            file = TGSI_FILE_TEMPORARY;
1711            index = emit->tes.inner.temp_index;
1712         }
1713         else if (index == emit->tes.outer.tgsi_index) {
1714            file = TGSI_FILE_TEMPORARY;
1715            index = emit->tes.outer.temp_index;
1716         }
1717         else if (index == emit->tes.prim_id_index) {
1718            /**
1719             * Emitted as vPrim.x
1720             */
1721            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723            index = 0;
1724         }
1725
1726      }
1727      else if (file == TGSI_FILE_INPUT) {
1728         if (index2d) {
1729            /* 2D input is emitted as vcp (input control point). */
1730            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732
1733            /* index specifies the element index and is remapped
1734             * to align with the tcs output index.
1735             */
1736            index = emit->linkage.input_map[index];
1737
1738            assert(index2 < emit->key.tes.vertices_per_patch);
1739         }
1740         else {
1741            if (index < emit->key.tes.tessfactor_index)
1742               /* index specifies the generic patch index.
1743                * Remapped to match up with the tcs output index.
1744                */
1745               index = emit->linkage.input_map[index];
1746
1747            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749         }
1750      }
1751   }
1752
1753   if (file == TGSI_FILE_ADDRESS) {
1754      index = emit->address_reg_index[index];
1755      file = TGSI_FILE_TEMPORARY;
1756   }
1757
1758   if (file == TGSI_FILE_TEMPORARY) {
1759      if (need_temp_reg_initialization(emit, index)) {
1760         emit->initialize_temp_index = index;
1761         emit->discard_instruction = TRUE;
1762      }
1763   }
1764
1765   if (operand0.value == 0) {
1766      /* if operand0 was not set above for a special case, do the general
1767       * case now.
1768       */
1769      operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770      operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771   }
1772   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773                                      index2d, indirect2d);
1774
1775   if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776       operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777      /* there's no swizzle for in-line immediates */
1778      if (swizzleX == swizzleY &&
1779          swizzleX == swizzleZ &&
1780          swizzleX == swizzleW) {
1781         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782      }
1783      else {
1784         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785      }
1786
1787      operand0.swizzleX = swizzleX;
1788      operand0.swizzleY = swizzleY;
1789      operand0.swizzleZ = swizzleZ;
1790      operand0.swizzleW = swizzleW;
1791
1792      if (absolute || negate) {
1793         operand0.extended = 1;
1794         operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795         if (absolute && !negate)
1796            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797         if (!absolute && negate)
1798            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799         if (absolute && negate)
1800            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801      }
1802   }
1803
1804   /* Emit the operand tokens */
1805   emit_dword(emit, operand0.value);
1806   if (operand0.extended)
1807      emit_dword(emit, operand1.value);
1808
1809   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810      /* Emit the four float/int in-line immediate values */
1811      unsigned *c;
1812      assert(index < ARRAY_SIZE(emit->immediates));
1813      assert(file == TGSI_FILE_IMMEDIATE);
1814      assert(swizzleX < 4);
1815      assert(swizzleY < 4);
1816      assert(swizzleZ < 4);
1817      assert(swizzleW < 4);
1818      c = (unsigned *) emit->immediates[index];
1819      emit_dword(emit, c[swizzleX]);
1820      emit_dword(emit, c[swizzleY]);
1821      emit_dword(emit, c[swizzleZ]);
1822      emit_dword(emit, c[swizzleW]);
1823   }
1824   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825      /* Emit the register index(es) */
1826      if (index2d) {
1827         emit_dword(emit, index2);
1828
1829         if (indirect2d) {
1830            emit_indirect_register(emit, reg->DimIndirect.Index);
1831         }
1832      }
1833
1834      emit_dword(emit, remap_temp_index(emit, file, index));
1835
1836      if (indirect) {
1837         emit_indirect_register(emit, reg->Indirect.Index);
1838      }
1839   }
1840}
1841
1842
1843/**
1844 * Emit a resource operand (for use with a SAMPLE instruction).
1845 */
1846static void
1847emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848                       unsigned resource_number)
1849{
1850   VGPU10OperandToken0 operand0;
1851
1852   check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853
1854   /* init */
1855   operand0.value = 0;
1856
1857   operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861   operand0.swizzleX = VGPU10_COMPONENT_X;
1862   operand0.swizzleY = VGPU10_COMPONENT_Y;
1863   operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864   operand0.swizzleW = VGPU10_COMPONENT_W;
1865
1866   emit_dword(emit, operand0.value);
1867   emit_dword(emit, resource_number);
1868}
1869
1870
1871/**
1872 * Emit a sampler operand (for use with a SAMPLE instruction).
1873 */
1874static void
1875emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876                      unsigned sampler_number)
1877{
1878   VGPU10OperandToken0 operand0;
1879
1880   check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881
1882   /* init */
1883   operand0.value = 0;
1884
1885   operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887
1888   emit_dword(emit, operand0.value);
1889   emit_dword(emit, sampler_number);
1890}
1891
1892
1893/**
1894 * Emit an operand which reads the IS_FRONT_FACING register.
1895 */
1896static void
1897emit_face_register(struct svga_shader_emitter_v10 *emit)
1898{
1899   VGPU10OperandToken0 operand0;
1900   unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901
1902   /* init */
1903   operand0.value = 0;
1904
1905   operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909
1910   operand0.swizzleX = VGPU10_COMPONENT_X;
1911   operand0.swizzleY = VGPU10_COMPONENT_X;
1912   operand0.swizzleZ = VGPU10_COMPONENT_X;
1913   operand0.swizzleW = VGPU10_COMPONENT_X;
1914
1915   emit_dword(emit, operand0.value);
1916   emit_dword(emit, index);
1917}
1918
1919
1920/**
1921 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922 * instruction.
1923 */
1924static void
1925emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926{
1927   VGPU10OperandToken0 operand0;
1928
1929   /* init */
1930   operand0.value = 0;
1931
1932   /* No register index for rasterizer index (there's only one) */
1933   operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937   operand0.swizzleX = VGPU10_COMPONENT_X;
1938   operand0.swizzleY = VGPU10_COMPONENT_Y;
1939   operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940   operand0.swizzleW = VGPU10_COMPONENT_W;
1941
1942   emit_dword(emit, operand0.value);
1943}
1944
1945
1946/**
1947 * Emit tokens for the "stream" register used by the
1948 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949 */
1950static void
1951emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952{
1953   VGPU10OperandToken0 operand0;
1954
1955   /* init */
1956   operand0.value = 0;
1957
1958   /* No register index for rasterizer index (there's only one) */
1959   operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962
1963   emit_dword(emit, operand0.value);
1964   emit_dword(emit, index);
1965}
1966
1967
1968/**
1969 * Emit the token for a VGPU10 opcode, with precise parameter.
1970 * \param saturate   clamp result to [0,1]?
1971 */
1972static void
1973emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974                    unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975{
1976   VGPU10OpcodeToken0 token0;
1977
1978   token0.value = 0;  /* init all fields to zero */
1979   token0.opcodeType = vgpu10_opcode;
1980   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981   token0.saturate = saturate;
1982
1983   /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984    * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
1985    */
1986   token0.preciseValues = precise && emit->version >= 50;
1987
1988   emit_dword(emit, token0.value);
1989
1990   emit->uses_precise_qualifier |= token0.preciseValues;
1991}
1992
1993
1994/**
1995 * Emit the token for a VGPU10 opcode.
1996 * \param saturate   clamp result to [0,1]?
1997 */
1998static void
1999emit_opcode(struct svga_shader_emitter_v10 *emit,
2000            unsigned vgpu10_opcode, boolean saturate)
2001{
2002   emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003}
2004
2005
2006/**
2007 * Emit the token for a VGPU10 resinfo instruction.
2008 * \param modifier   return type modifier, _uint or _rcpFloat.
2009 *                   TODO: We may want to remove this parameter if it will
2010 *                   only ever be used as _uint.
2011 */
2012static void
2013emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014                    VGPU10_RESINFO_RETURN_TYPE modifier)
2015{
2016   VGPU10OpcodeToken0 token0;
2017
2018   token0.value = 0;  /* init all fields to zero */
2019   token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021   token0.resinfoReturnType = modifier;
2022
2023   emit_dword(emit, token0.value);
2024}
2025
2026
2027/**
2028 * Emit opcode tokens for a texture sample instruction.  Texture instructions
2029 * can be rather complicated (texel offsets, etc) so we have this specialized
2030 * function.
2031 */
2032static void
2033emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034                   unsigned vgpu10_opcode, boolean saturate,
2035                   const int offsets[3])
2036{
2037   VGPU10OpcodeToken0 token0;
2038   VGPU10OpcodeToken1 token1;
2039
2040   token0.value = 0;  /* init all fields to zero */
2041   token0.opcodeType = vgpu10_opcode;
2042   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043   token0.saturate = saturate;
2044
2045   if (offsets[0] || offsets[1] || offsets[2]) {
2046      assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047      assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048      assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049      assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050      assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051      assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052
2053      token0.extended = 1;
2054      token1.value = 0;
2055      token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056      token1.offsetU = offsets[0];
2057      token1.offsetV = offsets[1];
2058      token1.offsetW = offsets[2];
2059   }
2060
2061   emit_dword(emit, token0.value);
2062   if (token0.extended) {
2063      emit_dword(emit, token1.value);
2064   }
2065}
2066
2067
2068/**
2069 * Emit a DISCARD opcode token.
2070 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071 * Otherwise, we'll discard the fragment if the X component is 0.
2072 */
2073static void
2074emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075{
2076   VGPU10OpcodeToken0 opcode0;
2077
2078   opcode0.value = 0;
2079   opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080   if (nonzero)
2081      opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082
2083   emit_dword(emit, opcode0.value);
2084}
2085
2086
2087/**
2088 * We need to call this before we begin emitting a VGPU10 instruction.
2089 */
2090static void
2091begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092{
2093   assert(emit->inst_start_token == 0);
2094   /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095    * Note, we can't save a pointer because it would become invalid if
2096    * we have to realloc the output buffer.
2097    */
2098   emit->inst_start_token = emit_get_num_tokens(emit);
2099}
2100
2101
2102/**
2103 * We need to call this after we emit the last token of a VGPU10 instruction.
2104 * This function patches in the opcode token's instructionLength field.
2105 */
2106static void
2107end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108{
2109   VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110   unsigned inst_length;
2111
2112   assert(emit->inst_start_token > 0);
2113
2114   if (emit->discard_instruction) {
2115      /* Back up the emit->ptr to where this instruction started so
2116       * that we discard the current instruction.
2117       */
2118      emit->ptr = (char *) (tokens + emit->inst_start_token);
2119   }
2120   else {
2121      /* Compute instruction length and patch that into the start of
2122       * the instruction.
2123       */
2124      inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125
2126      assert(inst_length > 0);
2127
2128      tokens[emit->inst_start_token].instructionLength = inst_length;
2129   }
2130
2131   emit->inst_start_token = 0; /* reset to zero for error checking */
2132   emit->discard_instruction = FALSE;
2133}
2134
2135
2136/**
2137 * Return index for a free temporary register.
2138 */
2139static unsigned
2140get_temp_index(struct svga_shader_emitter_v10 *emit)
2141{
2142   assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143   return emit->num_shader_temps + emit->internal_temp_count++;
2144}
2145
2146
2147/**
2148 * Release the temporaries which were generated by get_temp_index().
2149 */
2150static void
2151free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152{
2153   emit->internal_temp_count = 0;
2154}
2155
2156
2157/**
2158 * Create a tgsi_full_src_register.
2159 */
2160static struct tgsi_full_src_register
2161make_src_reg(enum tgsi_file_type file, unsigned index)
2162{
2163   struct tgsi_full_src_register reg;
2164
2165   memset(&reg, 0, sizeof(reg));
2166   reg.Register.File = file;
2167   reg.Register.Index = index;
2168   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172   return reg;
2173}
2174
2175
2176/**
2177 * Create a tgsi_full_src_register with a swizzle such that all four
2178 * vector components have the same scalar value.
2179 */
2180static struct tgsi_full_src_register
2181make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182{
2183   struct tgsi_full_src_register reg;
2184
2185   assert(component >= TGSI_SWIZZLE_X);
2186   assert(component <= TGSI_SWIZZLE_W);
2187
2188   memset(&reg, 0, sizeof(reg));
2189   reg.Register.File = file;
2190   reg.Register.Index = index;
2191   reg.Register.SwizzleX =
2192   reg.Register.SwizzleY =
2193   reg.Register.SwizzleZ =
2194   reg.Register.SwizzleW = component;
2195   return reg;
2196}
2197
2198
2199/**
2200 * Create a tgsi_full_src_register for a temporary.
2201 */
2202static struct tgsi_full_src_register
2203make_src_temp_reg(unsigned index)
2204{
2205   return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206}
2207
2208
2209/**
2210 * Create a tgsi_full_src_register for a constant.
2211 */
2212static struct tgsi_full_src_register
2213make_src_const_reg(unsigned index)
2214{
2215   return make_src_reg(TGSI_FILE_CONSTANT, index);
2216}
2217
2218
2219/**
2220 * Create a tgsi_full_src_register for an immediate constant.
2221 */
2222static struct tgsi_full_src_register
2223make_src_immediate_reg(unsigned index)
2224{
2225   return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226}
2227
2228
2229/**
2230 * Create a tgsi_full_dst_register.
2231 */
2232static struct tgsi_full_dst_register
2233make_dst_reg(enum tgsi_file_type file, unsigned index)
2234{
2235   struct tgsi_full_dst_register reg;
2236
2237   memset(&reg, 0, sizeof(reg));
2238   reg.Register.File = file;
2239   reg.Register.Index = index;
2240   reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241   return reg;
2242}
2243
2244
2245/**
2246 * Create a tgsi_full_dst_register for a temporary.
2247 */
2248static struct tgsi_full_dst_register
2249make_dst_temp_reg(unsigned index)
2250{
2251   return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252}
2253
2254
2255/**
2256 * Create a tgsi_full_dst_register for an output.
2257 */
2258static struct tgsi_full_dst_register
2259make_dst_output_reg(unsigned index)
2260{
2261   return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262}
2263
2264
2265/**
2266 * Create negated tgsi_full_src_register.
2267 */
2268static struct tgsi_full_src_register
2269negate_src(const struct tgsi_full_src_register *reg)
2270{
2271   struct tgsi_full_src_register neg = *reg;
2272   neg.Register.Negate = !reg->Register.Negate;
2273   return neg;
2274}
2275
2276/**
2277 * Create absolute value of a tgsi_full_src_register.
2278 */
2279static struct tgsi_full_src_register
2280absolute_src(const struct tgsi_full_src_register *reg)
2281{
2282   struct tgsi_full_src_register absolute = *reg;
2283   absolute.Register.Absolute = 1;
2284   return absolute;
2285}
2286
2287
2288/** Return the named swizzle term from the src register */
2289static inline unsigned
2290get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291{
2292   switch (term) {
2293   case TGSI_SWIZZLE_X:
2294      return reg->Register.SwizzleX;
2295   case TGSI_SWIZZLE_Y:
2296      return reg->Register.SwizzleY;
2297   case TGSI_SWIZZLE_Z:
2298      return reg->Register.SwizzleZ;
2299   case TGSI_SWIZZLE_W:
2300      return reg->Register.SwizzleW;
2301   default:
2302      assert(!"Bad swizzle");
2303      return TGSI_SWIZZLE_X;
2304   }
2305}
2306
2307
2308/**
2309 * Create swizzled tgsi_full_src_register.
2310 */
2311static struct tgsi_full_src_register
2312swizzle_src(const struct tgsi_full_src_register *reg,
2313            enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314            enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315{
2316   struct tgsi_full_src_register swizzled = *reg;
2317   /* Note: we swizzle the current swizzle */
2318   swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319   swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320   swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321   swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322   return swizzled;
2323}
2324
2325
2326/**
2327 * Create swizzled tgsi_full_src_register where all the swizzle
2328 * terms are the same.
2329 */
2330static struct tgsi_full_src_register
2331scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332{
2333   struct tgsi_full_src_register swizzled = *reg;
2334   /* Note: we swizzle the current swizzle */
2335   swizzled.Register.SwizzleX =
2336   swizzled.Register.SwizzleY =
2337   swizzled.Register.SwizzleZ =
2338   swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339   return swizzled;
2340}
2341
2342
2343/**
2344 * Create new tgsi_full_dst_register with writemask.
2345 * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2346 */
2347static struct tgsi_full_dst_register
2348writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349{
2350   struct tgsi_full_dst_register masked = *reg;
2351   masked.Register.WriteMask = mask;
2352   return masked;
2353}
2354
2355
2356/**
2357 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358 */
2359static boolean
2360same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361{
2362   return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363           reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364           reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365}
2366
2367
2368/**
2369 * Search the vector for the value 'x' and return its position.
2370 */
2371static int
2372find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373                 union tgsi_immediate_data x)
2374{
2375   unsigned i;
2376   for (i = 0; i < 4; i++) {
2377      if (vec[i].Int == x.Int)
2378         return i;
2379   }
2380   return -1;
2381}
2382
2383
2384/**
2385 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386 */
2387static int
2388find_immediate(struct svga_shader_emitter_v10 *emit,
2389               union tgsi_immediate_data x, unsigned startIndex)
2390{
2391   const unsigned endIndex = emit->num_immediates;
2392   unsigned i;
2393
2394   assert(emit->immediates_emitted);
2395
2396   /* Search immediates for x, y, z, w */
2397   for (i = startIndex; i < endIndex; i++) {
2398      if (x.Int == emit->immediates[i][0].Int ||
2399          x.Int == emit->immediates[i][1].Int ||
2400          x.Int == emit->immediates[i][2].Int ||
2401          x.Int == emit->immediates[i][3].Int) {
2402         return i;
2403      }
2404   }
2405   /* Should never try to use an immediate value that wasn't pre-declared */
2406   assert(!"find_immediate() failed!");
2407   return -1;
2408}
2409
2410
2411/**
2412 * As above, but search for a double[2] pair.
2413 */
2414static int
2415find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416                   double x, double y)
2417{
2418   const unsigned endIndex = emit->num_immediates;
2419   unsigned i;
2420
2421   assert(emit->immediates_emitted);
2422
2423   /* Search immediates for x, y, z, w */
2424   for (i = 0; i < endIndex; i++) {
2425      if (x == emit->immediates_dbl[i][0] &&
2426          y == emit->immediates_dbl[i][1]) {
2427         return i;
2428      }
2429   }
2430   /* Should never try to use an immediate value that wasn't pre-declared */
2431   assert(!"find_immediate_dbl() failed!");
2432   return -1;
2433}
2434
2435
2436
2437/**
2438 * Return a tgsi_full_src_register for an immediate/literal
2439 * union tgsi_immediate_data[4] value.
2440 * Note: the values must have been previously declared/allocated in
2441 * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2442 * vec4 immediate.
2443 */
2444static struct tgsi_full_src_register
2445make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446                     const union tgsi_immediate_data imm[4])
2447{
2448   struct tgsi_full_src_register reg;
2449   unsigned i;
2450
2451   for (i = 0; i < emit->num_common_immediates; i++) {
2452      /* search for first component value */
2453      int immpos = find_immediate(emit, imm[0], i);
2454      int x, y, z, w;
2455
2456      assert(immpos >= 0);
2457
2458      /* find remaining components within the immediate vector */
2459      x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460      y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461      z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462      w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463
2464      if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2465         /* found them all */
2466         memset(&reg, 0, sizeof(reg));
2467         reg.Register.File = TGSI_FILE_IMMEDIATE;
2468         reg.Register.Index = immpos;
2469         reg.Register.SwizzleX = x;
2470         reg.Register.SwizzleY = y;
2471         reg.Register.SwizzleZ = z;
2472         reg.Register.SwizzleW = w;
2473         return reg;
2474      }
2475      /* else, keep searching */
2476   }
2477
2478   assert(!"Failed to find immediate register!");
2479
2480   /* Just return IMM[0].xxxx */
2481   memset(&reg, 0, sizeof(reg));
2482   reg.Register.File = TGSI_FILE_IMMEDIATE;
2483   return reg;
2484}
2485
2486
2487/**
2488 * Return a tgsi_full_src_register for an immediate/literal
2489 * union tgsi_immediate_data value of the form {value, value, value, value}.
2490 * \sa make_immediate_reg_4() regarding allowed values.
2491 */
2492static struct tgsi_full_src_register
2493make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494                   union tgsi_immediate_data value)
2495{
2496   struct tgsi_full_src_register reg;
2497   int immpos = find_immediate(emit, value, 0);
2498
2499   assert(immpos >= 0);
2500
2501   memset(&reg, 0, sizeof(reg));
2502   reg.Register.File = TGSI_FILE_IMMEDIATE;
2503   reg.Register.Index = immpos;
2504   reg.Register.SwizzleX =
2505   reg.Register.SwizzleY =
2506   reg.Register.SwizzleZ =
2507   reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508
2509   return reg;
2510}
2511
2512
2513/**
2514 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515 * \sa make_immediate_reg_4() regarding allowed values.
2516 */
2517static struct tgsi_full_src_register
2518make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519                          float x, float y, float z, float w)
2520{
2521   union tgsi_immediate_data imm[4];
2522   imm[0].Float = x;
2523   imm[1].Float = y;
2524   imm[2].Float = z;
2525   imm[3].Float = w;
2526   return make_immediate_reg_4(emit, imm);
2527}
2528
2529
2530/**
2531 * Return a tgsi_full_src_register for an immediate/literal float value
2532 * of the form {value, value, value, value}.
2533 * \sa make_immediate_reg_4() regarding allowed values.
2534 */
2535static struct tgsi_full_src_register
2536make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537{
2538   union tgsi_immediate_data imm;
2539   imm.Float = value;
2540   return make_immediate_reg(emit, imm);
2541}
2542
2543
2544/**
2545 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546 */
2547static struct tgsi_full_src_register
2548make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549                        int x, int y, int z, int w)
2550{
2551   union tgsi_immediate_data imm[4];
2552   imm[0].Int = x;
2553   imm[1].Int = y;
2554   imm[2].Int = z;
2555   imm[3].Int = w;
2556   return make_immediate_reg_4(emit, imm);
2557}
2558
2559
2560/**
2561 * Return a tgsi_full_src_register for an immediate/literal int value
2562 * of the form {value, value, value, value}.
2563 * \sa make_immediate_reg_4() regarding allowed values.
2564 */
2565static struct tgsi_full_src_register
2566make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567{
2568   union tgsi_immediate_data imm;
2569   imm.Int = value;
2570   return make_immediate_reg(emit, imm);
2571}
2572
2573
2574static struct tgsi_full_src_register
2575make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576{
2577   struct tgsi_full_src_register reg;
2578   int immpos = find_immediate_dbl(emit, value, value);
2579
2580   assert(immpos >= 0);
2581
2582   memset(&reg, 0, sizeof(reg));
2583   reg.Register.File = TGSI_FILE_IMMEDIATE;
2584   reg.Register.Index = immpos;
2585   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589
2590   return reg;
2591}
2592
2593
2594/**
2595 * Allocate space for a union tgsi_immediate_data[4] immediate.
2596 * \return  the index/position of the immediate.
2597 */
2598static unsigned
2599alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600                  const union tgsi_immediate_data imm[4])
2601{
2602   unsigned n = emit->num_immediates++;
2603   assert(!emit->immediates_emitted);
2604   assert(n < ARRAY_SIZE(emit->immediates));
2605   emit->immediates[n][0] = imm[0];
2606   emit->immediates[n][1] = imm[1];
2607   emit->immediates[n][2] = imm[2];
2608   emit->immediates[n][3] = imm[3];
2609   return n;
2610}
2611
2612
2613/**
2614 * Allocate space for a float[4] immediate.
2615 * \return  the index/position of the immediate.
2616 */
2617static unsigned
2618alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619                       float x, float y, float z, float w)
2620{
2621   union tgsi_immediate_data imm[4];
2622   imm[0].Float = x;
2623   imm[1].Float = y;
2624   imm[2].Float = z;
2625   imm[3].Float = w;
2626   return alloc_immediate_4(emit, imm);
2627}
2628
2629
2630/**
2631 * Allocate space for an int[4] immediate.
2632 * \return  the index/position of the immediate.
2633 */
2634static unsigned
2635alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636                       int x, int y, int z, int w)
2637{
2638   union tgsi_immediate_data imm[4];
2639   imm[0].Int = x;
2640   imm[1].Int = y;
2641   imm[2].Int = z;
2642   imm[3].Int = w;
2643   return alloc_immediate_4(emit, imm);
2644}
2645
2646
2647static unsigned
2648alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649                        double x, double y)
2650{
2651   unsigned n = emit->num_immediates++;
2652   assert(!emit->immediates_emitted);
2653   assert(n < ARRAY_SIZE(emit->immediates));
2654   emit->immediates_dbl[n][0] = x;
2655   emit->immediates_dbl[n][1] = y;
2656   return n;
2657
2658}
2659
2660
2661/**
2662 * Allocate a shader input to store a system value.
2663 */
2664static unsigned
2665alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666{
2667   const unsigned n = emit->linkage.input_map_max + 1 + index;
2668   assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669   emit->system_value_indexes[index] = n;
2670   return n;
2671}
2672
2673
2674/**
2675 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676 */
2677static boolean
2678emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679                      const struct tgsi_full_immediate *imm)
2680{
2681   /* We don't actually emit any code here.  We just save the
2682    * immediate values and emit them later.
2683    */
2684   alloc_immediate_4(emit, imm->u);
2685   return TRUE;
2686}
2687
2688
2689/**
2690 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691 * containing all the immediate values previously allocated
2692 * with alloc_immediate_4().
2693 */
2694static boolean
2695emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696{
2697   VGPU10OpcodeToken0 token;
2698
2699   assert(!emit->immediates_emitted);
2700
2701   token.value = 0;
2702   token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703   token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704
2705   /* Note: no begin/end_emit_instruction() calls */
2706   emit_dword(emit, token.value);
2707   emit_dword(emit, 2 + 4 * emit->num_immediates);
2708   emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709
2710   emit->immediates_emitted = TRUE;
2711
2712   return TRUE;
2713}
2714
2715
2716/**
2717 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718 * interpolation mode.
2719 * \return a VGPU10_INTERPOLATION_x value
2720 */
2721static unsigned
2722translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723                        enum tgsi_interpolate_mode interp,
2724                        enum tgsi_interpolate_loc interpolate_loc)
2725{
2726   if (interp == TGSI_INTERPOLATE_COLOR) {
2727      interp = emit->key.fs.flatshade ?
2728         TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729   }
2730
2731   switch (interp) {
2732   case TGSI_INTERPOLATE_CONSTANT:
2733      return VGPU10_INTERPOLATION_CONSTANT;
2734   case TGSI_INTERPOLATE_LINEAR:
2735      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738                 emit->version >= 41) {
2739         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740      } else {
2741         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742      }
2743      break;
2744   case TGSI_INTERPOLATE_PERSPECTIVE:
2745      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746         return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748                 emit->version >= 41) {
2749         return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750      } else {
2751         return VGPU10_INTERPOLATION_LINEAR;
2752      }
2753      break;
2754   default:
2755      assert(!"Unexpected interpolation mode");
2756      return VGPU10_INTERPOLATION_CONSTANT;
2757   }
2758}
2759
2760
2761/**
2762 * Translate a TGSI property to VGPU10.
2763 * Don't emit any instructions yet, only need to gather the primitive property
2764 * information.  The output primitive topology might be changed later. The
2765 * final property instructions will be emitted as part of the pre-helper code.
2766 */
2767static boolean
2768emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769                     const struct tgsi_full_property *prop)
2770{
2771   static const VGPU10_PRIMITIVE primType[] = {
2772      VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2773      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2774      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2775      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2776      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2777      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2778      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2779      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2780      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2781      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2782      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2783      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784      VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785      VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786   };
2787
2788   static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789      VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2790      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2791      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2792      VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2793      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2794      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2797      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2798      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2799      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2800      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803   };
2804
2805   static const unsigned inputArraySize[] = {
2806      0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2807      1,       /* VGPU10_PRIMITIVE_POINT */
2808      2,       /* VGPU10_PRIMITIVE_LINE */
2809      3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2810      0,
2811      0,
2812      4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2813      6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814   };
2815
2816   switch (prop->Property.PropertyName) {
2817   case TGSI_PROPERTY_GS_INPUT_PRIM:
2818      assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819      emit->gs.prim_type = primType[prop->u[0].Data];
2820      assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821      emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822      break;
2823
2824   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825      assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826      emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827      assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828      break;
2829
2830   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831      emit->gs.max_out_vertices = prop->u[0].Data;
2832      break;
2833
2834   case TGSI_PROPERTY_GS_INVOCATIONS:
2835      emit->gs.invocations = prop->u[0].Data;
2836      break;
2837
2838   case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839   case TGSI_PROPERTY_NEXT_SHADER:
2840   case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841      /* no-op */
2842      break;
2843
2844   case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845      /* This info is already captured in the shader key */
2846      break;
2847
2848   case TGSI_PROPERTY_TES_PRIM_MODE:
2849      emit->tes.prim_mode = prop->u[0].Data;
2850      break;
2851
2852   case TGSI_PROPERTY_TES_SPACING:
2853      emit->tes.spacing = prop->u[0].Data;
2854      break;
2855
2856   case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857      emit->tes.vertices_order_cw = prop->u[0].Data;
2858      break;
2859
2860   case TGSI_PROPERTY_TES_POINT_MODE:
2861      emit->tes.point_mode = prop->u[0].Data;
2862      break;
2863
2864   default:
2865      debug_printf("Unexpected TGSI property %s\n",
2866                   tgsi_property_names[prop->Property.PropertyName]);
2867   }
2868
2869   return TRUE;
2870}
2871
2872
2873static void
2874emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875                          VGPU10OpcodeToken0 opcode0, unsigned nData,
2876                          unsigned data)
2877{
2878   begin_emit_instruction(emit);
2879   emit_dword(emit, opcode0.value);
2880   if (nData)
2881      emit_dword(emit, data);
2882   end_emit_instruction(emit);
2883}
2884
2885
2886/**
2887 * Emit property instructions
2888 */
2889static void
2890emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891{
2892   VGPU10OpcodeToken0 opcode0;
2893
2894   assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895
2896   /* emit input primitive type declaration */
2897   opcode0.value = 0;
2898   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899   opcode0.primitive = emit->gs.prim_type;
2900   emit_property_instruction(emit, opcode0, 0, 0);
2901
2902   /* emit max output vertices */
2903   opcode0.value = 0;
2904   opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905   emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906
2907   if (emit->version >= 50 && emit->gs.invocations > 0) {
2908      opcode0.value = 0;
2909      opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910      emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911   }
2912}
2913
2914
2915/**
2916 * A helper function to declare tessellator domain in a hull shader or
2917 * in the domain shader.
2918 */
2919static void
2920emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921                        enum pipe_prim_type prim_mode)
2922{
2923   VGPU10OpcodeToken0 opcode0;
2924
2925   opcode0.value = 0;
2926   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927   switch (prim_mode) {
2928   case PIPE_PRIM_QUADS:
2929   case PIPE_PRIM_LINES:
2930      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931      break;
2932   case PIPE_PRIM_TRIANGLES:
2933      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934      break;
2935   default:
2936      debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938   }
2939   begin_emit_instruction(emit);
2940   emit_dword(emit, opcode0.value);
2941   end_emit_instruction(emit);
2942}
2943
2944
2945/**
2946 * Emit domain shader declarations.
2947 */
2948static void
2949emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950{
2951   VGPU10OpcodeToken0 opcode0;
2952
2953   assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954
2955   /* Emit the input control point count */
2956   assert(emit->key.tes.vertices_per_patch >= 0 &&
2957          emit->key.tes.vertices_per_patch <= 32);
2958
2959   opcode0.value = 0;
2960   opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961   opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962   begin_emit_instruction(emit);
2963   emit_dword(emit, opcode0.value);
2964   end_emit_instruction(emit);
2965
2966   emit_tessellator_domain(emit, emit->tes.prim_mode);
2967}
2968
2969
2970/**
2971 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972 * to implement some instructions.  We pre-allocate those values here
2973 * in the immediate constant buffer.
2974 */
2975static void
2976alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977{
2978   unsigned n = 0;
2979
2980   emit->common_immediate_pos[n++] =
2981      alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982
2983   if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984      emit->common_immediate_pos[n++] =
2985         alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986   }
2987
2988   emit->common_immediate_pos[n++] =
2989      alloc_immediate_int4(emit, 0, 1, 0, -1);
2990
2991   if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992       emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993      emit->common_immediate_pos[n++] =
2994         alloc_immediate_int4(emit, 31, 0, 0, 0);
2995   }
2996
2997   if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998       emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999       emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000      emit->common_immediate_pos[n++] =
3001         alloc_immediate_int4(emit, 32, 0, 0, 0);
3002   }
3003
3004   if (emit->key.vs.attrib_puint_to_snorm) {
3005      emit->common_immediate_pos[n++] =
3006         alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007   }
3008
3009   if (emit->key.vs.attrib_puint_to_uscaled) {
3010      emit->common_immediate_pos[n++] =
3011         alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012   }
3013
3014   if (emit->key.vs.attrib_puint_to_sscaled) {
3015      emit->common_immediate_pos[n++] =
3016         alloc_immediate_int4(emit, 22, 12, 2, 0);
3017
3018      emit->common_immediate_pos[n++] =
3019         alloc_immediate_int4(emit, 22, 30, 0, 0);
3020   }
3021
3022   if (emit->vposition.num_prescale > 1) {
3023      unsigned i;
3024      for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025         emit->common_immediate_pos[n++] =
3026            alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027      }
3028   }
3029
3030   emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031
3032   if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033      emit->common_immediate_pos[n++] =
3034         alloc_immediate_double2(emit, -1.0, -1.0);
3035   }
3036
3037   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038      emit->common_immediate_pos[n++] =
3039         alloc_immediate_double2(emit, 0.0, 0.0);
3040      emit->common_immediate_pos[n++] =
3041         alloc_immediate_double2(emit, 1.0, 1.0);
3042   }
3043
3044   if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045      emit->common_immediate_pos[n++] =
3046         alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047   }
3048
3049   assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050
3051   unsigned i;
3052
3053   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054      if (emit->key.tex[i].texel_bias) {
3055         /* Replace 0.0f if more immediate float value is needed */
3056         emit->common_immediate_pos[n++] =
3057            alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058         break;
3059      }
3060   }
3061
3062   assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063   emit->num_common_immediates = n;
3064}
3065
3066
3067/**
3068 * Emit hull shader declarations.
3069*/
3070static void
3071emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072{
3073   VGPU10OpcodeToken0 opcode0;
3074
3075   /* Emit the input control point count */
3076   assert(emit->key.tcs.vertices_per_patch > 0 &&
3077          emit->key.tcs.vertices_per_patch <= 32);
3078
3079   opcode0.value = 0;
3080   opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081   opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082   begin_emit_instruction(emit);
3083   emit_dword(emit, opcode0.value);
3084   end_emit_instruction(emit);
3085
3086   /* Emit the output control point count */
3087   assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088
3089   opcode0.value = 0;
3090   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091   opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092   begin_emit_instruction(emit);
3093   emit_dword(emit, opcode0.value);
3094   end_emit_instruction(emit);
3095
3096   /* Emit tessellator domain */
3097   emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098
3099   /* Emit tessellator output primitive */
3100   opcode0.value = 0;
3101   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102   if (emit->key.tcs.point_mode) {
3103      opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104   }
3105   else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106      opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107   }
3108   else {
3109      assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110             emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111
3112      if (emit->key.tcs.vertices_order_cw)
3113         opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114      else
3115         opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116   }
3117   begin_emit_instruction(emit);
3118   emit_dword(emit, opcode0.value);
3119   end_emit_instruction(emit);
3120
3121   /* Emit tessellator partitioning */
3122   opcode0.value = 0;
3123   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124   switch (emit->key.tcs.spacing) {
3125   case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127      break;
3128   case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130      break;
3131   case PIPE_TESS_SPACING_EQUAL:
3132      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133      break;
3134   default:
3135      debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137   }
3138   begin_emit_instruction(emit);
3139   emit_dword(emit, opcode0.value);
3140   end_emit_instruction(emit);
3141
3142   /* Declare constant registers */
3143   emit_constant_declaration(emit);
3144
3145   /* Declare samplers and resources */
3146   emit_sampler_declarations(emit);
3147   emit_resource_declarations(emit);
3148
3149   alloc_common_immediates(emit);
3150
3151   int nVertices = emit->key.tcs.vertices_per_patch;
3152   emit->tcs.imm_index =
3153      alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154
3155   /* Now, emit the constant block containing all the immediates
3156    * declared by shader, as well as the extra ones seen above.
3157    */
3158   emit_vgpu10_immediates_block(emit);
3159
3160}
3161
3162
3163/**
3164 * A helper function to determine if control point phase is needed.
3165 * Returns TRUE if there is control point output.
3166 */
3167static boolean
3168needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169{
3170   unsigned i;
3171
3172   assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173
3174   /* If output control point count does not match the input count,
3175    * we need a control point phase to explicitly set the output control
3176    * points.
3177    */
3178   if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179       emit->key.tcs.vertices_out)
3180      return TRUE;
3181
3182   for (i = 0; i < emit->info.num_outputs; i++) {
3183      switch (emit->info.output_semantic_name[i]) {
3184      case TGSI_SEMANTIC_PATCH:
3185      case TGSI_SEMANTIC_TESSOUTER:
3186      case TGSI_SEMANTIC_TESSINNER:
3187         break;
3188      default:
3189         return TRUE;
3190      }
3191   }
3192   return FALSE;
3193}
3194
3195
3196/**
3197 * A helper function to add shader signature for passthrough control point
3198 * phase. This signature is also generated for passthrough control point
3199 * phase from HLSL compiler and is needed by Metal Renderer.
3200 */
3201static void
3202emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203{
3204   struct svga_shader_signature *sgn = &emit->signature;
3205   SVGA3dDXShaderSignatureEntry *sgnEntry;
3206   unsigned i;
3207
3208   for (i = 0; i < emit->info.num_inputs; i++) {
3209      unsigned index = emit->linkage.input_map[i];
3210      enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211
3212      sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213
3214      set_shader_signature_entry(sgnEntry, index,
3215                                 tgsi_semantic_to_sgn_name[sem_name],
3216                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219
3220      sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221
3222      set_shader_signature_entry(sgnEntry, i,
3223                                 tgsi_semantic_to_sgn_name[sem_name],
3224                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227   }
3228}
3229
3230
3231/**
3232 * A helper function to emit an instruction to start the control point phase
3233 * in the hull shader.
3234 */
3235static void
3236emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237{
3238   VGPU10OpcodeToken0 opcode0;
3239
3240   opcode0.value = 0;
3241   opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242   begin_emit_instruction(emit);
3243   emit_dword(emit, opcode0.value);
3244   end_emit_instruction(emit);
3245}
3246
3247
3248/**
3249 * Start the hull shader control point phase
3250 */
3251static boolean
3252emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253{
3254   /* If there is no control point output, skip the control point phase. */
3255   if (!needs_control_point_phase(emit)) {
3256      if (!emit->key.tcs.vertices_out) {
3257         /**
3258          * If the tcs does not explicitly generate any control point output
3259          * and the tes does not use any input control point, then
3260          * emit an empty control point phase with zero output control
3261          * point count.
3262          */
3263         emit_control_point_phase_instruction(emit);
3264
3265         /**
3266          * Since this is an empty control point phase, we will need to
3267          * add input signatures when we parse the tcs again in the
3268          * patch constant phase.
3269          */
3270         emit->tcs.fork_phase_add_signature = TRUE;
3271      }
3272      else {
3273         /**
3274          * Before skipping the control point phase, add the signature for
3275          * the passthrough control point.
3276          */
3277         emit_passthrough_control_point_signature(emit);
3278      }
3279      return FALSE;
3280   }
3281
3282   /* Start the control point phase in the hull shader */
3283   emit_control_point_phase_instruction(emit);
3284
3285   /* Declare the output control point ID */
3286   if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287      /* Add invocation id declaration if it does not exist */
3288      emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289   }
3290
3291   emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292                          VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293                          VGPU10_OPERAND_INDEX_0D,
3294                          0, 1,
3295                          VGPU10_NAME_UNDEFINED,
3296                          VGPU10_OPERAND_0_COMPONENT, 0,
3297                          0,
3298                          VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299                          SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300
3301   if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303                             VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304                             VGPU10_OPERAND_INDEX_0D,
3305                             0, 1,
3306                             VGPU10_NAME_UNDEFINED,
3307                             VGPU10_OPERAND_0_COMPONENT,
3308                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309                             0,
3310                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311                             SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312   }
3313
3314   return TRUE;
3315}
3316
3317
3318/**
3319 * Start the hull shader patch constant phase and
3320 * do the second pass of the tcs translation and emit
3321 * the relevant declarations and instructions for this phase.
3322 */
3323static boolean
3324emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325                                      struct tgsi_parse_context *parse)
3326{
3327   unsigned inst_number = 0;
3328   boolean ret = TRUE;
3329   VGPU10OpcodeToken0 opcode0;
3330
3331   emit->skip_instruction = FALSE;
3332
3333   /* Start the patch constant phase */
3334   opcode0.value = 0;
3335   opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336   begin_emit_instruction(emit);
3337   emit_dword(emit, opcode0.value);
3338   end_emit_instruction(emit);
3339
3340   /* Set the current phase to patch constant phase */
3341   emit->tcs.control_point_phase = FALSE;
3342
3343   if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345                             VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346                             VGPU10_OPERAND_INDEX_0D,
3347                             0, 1,
3348                             VGPU10_NAME_UNDEFINED,
3349                             VGPU10_OPERAND_0_COMPONENT,
3350                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351                             0,
3352                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353                             SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354   }
3355
3356   /* Emit declarations for this phase */
3357   emit->index_range.required =
3358      emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359   emit_tcs_input_declarations(emit);
3360
3361   if (emit->index_range.start_index != INVALID_INDEX) {
3362      emit_index_range_declaration(emit);
3363   }
3364
3365   emit->index_range.required =
3366      emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367   emit_tcs_output_declarations(emit);
3368
3369   if (emit->index_range.start_index != INVALID_INDEX) {
3370      emit_index_range_declaration(emit);
3371   }
3372   emit->index_range.required = FALSE;
3373
3374   emit_temporaries_declaration(emit);
3375
3376   /* Reset the token position to the first instruction token
3377    * in preparation for the second pass of the shader
3378    */
3379   parse->Position = emit->tcs.instruction_token_pos;
3380
3381   while (!tgsi_parse_end_of_tokens(parse)) {
3382      tgsi_parse_token(parse);
3383
3384      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385      ret = emit_vgpu10_instruction(emit, inst_number++,
3386                                    &parse->FullToken.FullInstruction);
3387
3388      /* Usually this applies to TCS only. If shader is reading output of
3389       * patch constant in fork phase, we should reemit all instructions
3390       * which are writting into ouput of patch constant in fork phase
3391       * to store results into temporaries.
3392       */
3393      if (emit->reemit_instruction) {
3394         assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395         ret = emit_vgpu10_instruction(emit, inst_number,
3396                                       &parse->FullToken.FullInstruction);
3397      }
3398
3399      if (!ret)
3400         return FALSE;
3401   }
3402
3403   return TRUE;
3404}
3405
3406
3407/**
3408 * Emit index range declaration.
3409 */
3410static boolean
3411emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412{
3413   if (emit->version < 50)
3414      return TRUE;
3415
3416   assert(emit->index_range.start_index != INVALID_INDEX);
3417   assert(emit->index_range.count != 0);
3418   assert(emit->index_range.required);
3419   assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420   assert(emit->index_range.dim != 0);
3421   assert(emit->index_range.size != 0);
3422
3423   VGPU10OpcodeToken0 opcode0;
3424   VGPU10OperandToken0 operand0;
3425
3426   opcode0.value = 0;
3427   opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428
3429   operand0.value = 0;
3430   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431   operand0.indexDimension = emit->index_range.dim;
3432   operand0.operandType = emit->index_range.operandType;
3433   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435
3436   if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438
3439   begin_emit_instruction(emit);
3440   emit_dword(emit, opcode0.value);
3441   emit_dword(emit, operand0.value);
3442
3443   if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444      emit_dword(emit, emit->index_range.size);
3445      emit_dword(emit, emit->index_range.start_index);
3446      emit_dword(emit, emit->index_range.count);
3447   }
3448   else {
3449      emit_dword(emit, emit->index_range.start_index);
3450      emit_dword(emit, emit->index_range.count);
3451   }
3452
3453   end_emit_instruction(emit);
3454
3455   /* Reset fields in emit->index_range struct except
3456    * emit->index_range.required which will be reset afterwards
3457    */
3458   emit->index_range.count = 0;
3459   emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460   emit->index_range.start_index = INVALID_INDEX;
3461   emit->index_range.size = 0;
3462   emit->index_range.dim = 0;
3463
3464   return TRUE;
3465}
3466
3467
3468/**
3469 * Emit a vgpu10 declaration "instruction".
3470 * \param index  the register index
3471 * \param size   array size of the operand. In most cases, it is 1,
3472 *               but for inputs to geometry shader, the array size varies
3473 *               depending on the primitive type.
3474 */
3475static void
3476emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477                      VGPU10OpcodeToken0 opcode0,
3478                      VGPU10OperandToken0 operand0,
3479                      VGPU10NameToken name_token,
3480                      unsigned index, unsigned size)
3481{
3482   assert(opcode0.opcodeType);
3483   assert(operand0.mask ||
3484          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491          (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492
3493   begin_emit_instruction(emit);
3494   emit_dword(emit, opcode0.value);
3495
3496   emit_dword(emit, operand0.value);
3497
3498   if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499      /* Next token is the index of the register to declare */
3500      emit_dword(emit, index);
3501   }
3502   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503      /* Next token is the size of the register */
3504      emit_dword(emit, size);
3505
3506      /* Followed by the index of the register */
3507      emit_dword(emit, index);
3508   }
3509
3510   if (name_token.value) {
3511      emit_dword(emit, name_token.value);
3512   }
3513
3514   end_emit_instruction(emit);
3515}
3516
3517
3518/**
3519 * Emit the declaration for a shader input.
3520 * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522 * \param dim         index dimension
3523 * \param index       the input register index
3524 * \param size        array size of the operand. In most cases, it is 1,
3525 *                    but for inputs to geometry shader, the array size varies
3526 *                    depending on the primitive type. For tessellation control
3527 *                    shader, the array size is the vertex count per patch.
3528 * \param name        one of VGPU10_NAME_x
3529 * \parma numComp     number of components
3530 * \param selMode     component selection mode
3531 * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532 * \param interpMode  interpolation mode
3533 */
3534static void
3535emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536                       VGPU10_OPCODE_TYPE opcodeType,
3537                       VGPU10_OPERAND_TYPE operandType,
3538                       VGPU10_OPERAND_INDEX_DIMENSION dim,
3539                       unsigned index, unsigned size,
3540                       VGPU10_SYSTEM_NAME name,
3541                       VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542                       VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543                       unsigned usageMask,
3544                       VGPU10_INTERPOLATION_MODE interpMode,
3545                       boolean addSignature,
3546                       SVGA3dDXSignatureSemanticName sgnName)
3547{
3548   VGPU10OpcodeToken0 opcode0;
3549   VGPU10OperandToken0 operand0;
3550   VGPU10NameToken name_token;
3551
3552   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553   assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554          opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555          opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559   assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560          operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561          operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562          operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563          operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564          operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565          operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566          operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570
3571   assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572   assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573   assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574   assert(name == VGPU10_NAME_UNDEFINED ||
3575          name == VGPU10_NAME_POSITION ||
3576          name == VGPU10_NAME_INSTANCE_ID ||
3577          name == VGPU10_NAME_VERTEX_ID ||
3578          name == VGPU10_NAME_PRIMITIVE_ID ||
3579          name == VGPU10_NAME_IS_FRONT_FACE ||
3580          name == VGPU10_NAME_SAMPLE_INDEX ||
3581          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582          name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583
3584   assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585          interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586          interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587          interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590          interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592
3593   check_register_index(emit, opcodeType, index);
3594
3595   opcode0.value = operand0.value = name_token.value = 0;
3596
3597   opcode0.opcodeType = opcodeType;
3598   opcode0.interpolationMode = interpMode;
3599
3600   operand0.operandType = operandType;
3601   operand0.numComponents = numComp;
3602   operand0.selectionMode = selMode;
3603   operand0.mask = usageMask;
3604   operand0.indexDimension = dim;
3605   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606   if (dim == VGPU10_OPERAND_INDEX_2D)
3607      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608
3609   name_token.name = name;
3610
3611   emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612
3613   if (addSignature) {
3614      struct svga_shader_signature *sgn = &emit->signature;
3615      if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616         /* Set patch constant signature */
3617         SVGA3dDXShaderSignatureEntry *sgnEntry =
3618            &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619         set_shader_signature_entry(sgnEntry, index,
3620                                    sgnName, usageMask,
3621                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623
3624      } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625                 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626         /* Set input signature */
3627         SVGA3dDXShaderSignatureEntry *sgnEntry =
3628            &sgn->inputs[sgn->header.numInputSignatures++];
3629         set_shader_signature_entry(sgnEntry, index,
3630                                    sgnName, usageMask,
3631                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633      }
3634   }
3635
3636   if (emit->index_range.required) {
3637      /* Here, index_range declaration is only applicable for opcodeType
3638       * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639       * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640       * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641       * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642       */
3643      if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644           opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645          (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646           operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647           operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648         if (emit->index_range.start_index != INVALID_INDEX) {
3649            emit_index_range_declaration(emit);
3650         }
3651         return;
3652      }
3653
3654      if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655         /* Need record new index_range */
3656         emit->index_range.count = 1;
3657         emit->index_range.operandType = operandType;
3658         emit->index_range.start_index = index;
3659         emit->index_range.size = size;
3660         emit->index_range.dim = dim;
3661      }
3662      else if (index !=
3663               (emit->index_range.start_index + emit->index_range.count) ||
3664               emit->index_range.operandType != operandType) {
3665         /* Input index is not contiguous with index range or operandType is
3666          * different from index range's operandType. We need to emit current
3667          * index_range first and then start recording next index range.
3668          */
3669         emit_index_range_declaration(emit);
3670
3671         emit->index_range.count = 1;
3672         emit->index_range.operandType = operandType;
3673         emit->index_range.start_index = index;
3674         emit->index_range.size = size;
3675         emit->index_range.dim = dim;
3676      }
3677      else if (emit->index_range.operandType == operandType) {
3678         /* Since input index is contiguous with index range and operandType
3679          * is same as index range's operandType, increment index range count.
3680          */
3681         emit->index_range.count++;
3682      }
3683   }
3684}
3685
3686
3687/**
3688 * Emit the declaration for a shader output.
3689 * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3690 * \param index  the output register index
3691 * \param name  one of VGPU10_NAME_x
3692 * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693 */
3694static void
3695emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696                        VGPU10_OPCODE_TYPE type, unsigned index,
3697                        VGPU10_SYSTEM_NAME name,
3698                        unsigned writemask,
3699                        boolean addSignature,
3700                        SVGA3dDXSignatureSemanticName sgnName)
3701{
3702   VGPU10OpcodeToken0 opcode0;
3703   VGPU10OperandToken0 operand0;
3704   VGPU10NameToken name_token;
3705
3706   assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707   assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708          type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709          type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710   assert(name == VGPU10_NAME_UNDEFINED ||
3711          name == VGPU10_NAME_POSITION ||
3712          name == VGPU10_NAME_PRIMITIVE_ID ||
3713          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714          name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715          name == VGPU10_NAME_CLIP_DISTANCE);
3716
3717   check_register_index(emit, type, index);
3718
3719   opcode0.value = operand0.value = name_token.value = 0;
3720
3721   opcode0.opcodeType = type;
3722   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725   operand0.mask = writemask;
3726   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728
3729   name_token.name = name;
3730
3731   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732
3733   /* Capture output signature */
3734   if (addSignature) {
3735      struct svga_shader_signature *sgn = &emit->signature;
3736      SVGA3dDXShaderSignatureEntry *sgnEntry =
3737         &sgn->outputs[sgn->header.numOutputSignatures++];
3738      set_shader_signature_entry(sgnEntry, index,
3739                                 sgnName, writemask,
3740                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742   }
3743
3744   if (emit->index_range.required) {
3745      /* Here, index_range declaration is only applicable for opcodeType
3746       * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747       * VGPU10_OPERAND_TYPE_OUTPUT.
3748       */
3749      if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750         if (emit->index_range.start_index != INVALID_INDEX) {
3751            emit_index_range_declaration(emit);
3752         }
3753         return;
3754      }
3755
3756      if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757         /* Need record new index_range */
3758         emit->index_range.count = 1;
3759         emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760         emit->index_range.start_index = index;
3761         emit->index_range.size = 1;
3762         emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763      }
3764      else if (index !=
3765               (emit->index_range.start_index + emit->index_range.count)) {
3766         /* Output index is not contiguous with index range. We need to
3767          * emit current index_range first and then start recording next
3768          * index range.
3769          */
3770         emit_index_range_declaration(emit);
3771
3772         emit->index_range.count = 1;
3773         emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774         emit->index_range.start_index = index;
3775         emit->index_range.size = 1;
3776         emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777      }
3778      else {
3779         /* Since output index is contiguous with index range, increment
3780          * index range count.
3781          */
3782         emit->index_range.count++;
3783      }
3784   }
3785}
3786
3787
3788/**
3789 * Emit the declaration for the fragment depth output.
3790 */
3791static void
3792emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793{
3794   VGPU10OpcodeToken0 opcode0;
3795   VGPU10OperandToken0 operand0;
3796   VGPU10NameToken name_token;
3797
3798   assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799
3800   opcode0.value = operand0.value = name_token.value = 0;
3801
3802   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804   operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806   operand0.mask = 0;
3807
3808   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809}
3810
3811
3812/**
3813 * Emit the declaration for the fragment sample mask/coverage output.
3814 */
3815static void
3816emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817{
3818   VGPU10OpcodeToken0 opcode0;
3819   VGPU10OperandToken0 operand0;
3820   VGPU10NameToken name_token;
3821
3822   assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823   assert(emit->version >= 41);
3824
3825   opcode0.value = operand0.value = name_token.value = 0;
3826
3827   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831   operand0.mask = 0;
3832
3833   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834}
3835
3836
3837/**
3838 * Emit output declarations for fragment shader.
3839 */
3840static void
3841emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842{
3843   unsigned int i;
3844
3845   for (i = 0; i < emit->info.num_outputs; i++) {
3846      /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847      const enum tgsi_semantic semantic_name =
3848         emit->info.output_semantic_name[i];
3849      const unsigned semantic_index = emit->info.output_semantic_index[i];
3850      unsigned index = i;
3851
3852      if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853         assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854
3855         emit->fs.color_out_index[semantic_index] = index;
3856
3857         emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858                                              index + 1);
3859
3860         /* The semantic index is the shader's color output/buffer index */
3861         emit_output_declaration(emit,
3862                                 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863                                 VGPU10_NAME_UNDEFINED,
3864                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865                                 TRUE,
3866                                 map_tgsi_semantic_to_sgn_name(semantic_name));
3867
3868         if (semantic_index == 0) {
3869            if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870               /* Emit declarations for the additional color outputs
3871                * for broadcasting.
3872                */
3873               unsigned j;
3874               for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875                  /* Allocate a new output index */
3876                  unsigned idx = emit->info.num_outputs + j - 1;
3877                  emit->fs.color_out_index[j] = idx;
3878                  emit_output_declaration(emit,
3879                                        VGPU10_OPCODE_DCL_OUTPUT, idx,
3880                                        VGPU10_NAME_UNDEFINED,
3881                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882                                        TRUE,
3883                                        map_tgsi_semantic_to_sgn_name(semantic_name));
3884                  emit->info.output_semantic_index[idx] = j;
3885               }
3886
3887               emit->fs.num_color_outputs =
3888                     emit->key.fs.write_color0_to_n_cbufs;
3889            }
3890         }
3891      }
3892      else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893         /* Fragment depth output */
3894         emit_fragdepth_output_declaration(emit);
3895      }
3896      else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897         /* Sample mask output */
3898         emit_samplemask_output_declaration(emit);
3899      }
3900      else {
3901         assert(!"Bad output semantic name");
3902      }
3903   }
3904}
3905
3906
3907/**
3908 * Emit common output declaration for vertex processing.
3909 */
3910static void
3911emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912                               unsigned index, unsigned writemask,
3913                               boolean addSignature)
3914{
3915   const enum tgsi_semantic semantic_name =
3916         emit->info.output_semantic_name[index];
3917   const unsigned semantic_index = emit->info.output_semantic_index[index];
3918   unsigned name, type;
3919   unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920
3921   assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922          emit->unit != PIPE_SHADER_COMPUTE);
3923
3924   switch (semantic_name) {
3925   case TGSI_SEMANTIC_POSITION:
3926      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927         /* position will be declared in control point only */
3928         assert(emit->tcs.control_point_phase);
3929         type = VGPU10_OPCODE_DCL_OUTPUT;
3930         name = VGPU10_NAME_UNDEFINED;
3931         emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933         return;
3934      }
3935      else {
3936         type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937         name = VGPU10_NAME_POSITION;
3938      }
3939      /* Save the index of the vertex position output register */
3940      emit->vposition.out_index = index;
3941      break;
3942   case TGSI_SEMANTIC_CLIPDIST:
3943      type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944      name = VGPU10_NAME_CLIP_DISTANCE;
3945      /* save the starting index of the clip distance output register */
3946      if (semantic_index == 0)
3947         emit->clip_dist_out_index = index;
3948      final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949      if (final_mask == 0x0)
3950         return; /* discard this do-nothing declaration */
3951      break;
3952   case TGSI_SEMANTIC_CLIPVERTEX:
3953      type = VGPU10_OPCODE_DCL_OUTPUT;
3954      name = VGPU10_NAME_UNDEFINED;
3955      emit->clip_vertex_out_index = index;
3956      break;
3957   default:
3958      /* generic output */
3959      type = VGPU10_OPCODE_DCL_OUTPUT;
3960      name = VGPU10_NAME_UNDEFINED;
3961   }
3962
3963   emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964                           map_tgsi_semantic_to_sgn_name(semantic_name));
3965}
3966
3967
3968/**
3969 * Emit declaration for outputs in vertex shader.
3970 */
3971static void
3972emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973{
3974   unsigned i;
3975   for (i = 0; i < emit->info.num_outputs; i++) {
3976      emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977   }
3978}
3979
3980
3981/**
3982 * A helper function to determine the writemask for an output
3983 * for the specified stream.
3984 */
3985static unsigned
3986output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987                                 ubyte output_usagemask)
3988{
3989   unsigned i;
3990   unsigned writemask = 0;
3991
3992   for (i = 0; i < 4; i++) {
3993      if ((output_streams & 0x3) == stream)
3994         writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995      output_streams >>= 2;
3996   }
3997   return writemask & output_usagemask;
3998}
3999
4000
4001/**
4002 * Emit declaration for outputs in geometry shader.
4003 */
4004static void
4005emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006{
4007   unsigned i;
4008   VGPU10OpcodeToken0 opcode0;
4009   unsigned numStreamsSupported = 1;
4010   int s;
4011
4012   if (emit->version >= 50) {
4013      numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014   }
4015
4016   /**
4017    * Start emitting from the last stream first, so we end with
4018    * stream 0, so any of the auxiliary output declarations will
4019    * go to stream 0.
4020    */
4021   for (s = numStreamsSupported-1; s >= 0; s--) {
4022
4023      if (emit->info.num_stream_output_components[s] == 0)
4024         continue;
4025
4026      if (emit->version >= 50) {
4027         /* DCL_STREAM stream */
4028         begin_emit_instruction(emit);
4029         emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030         emit_stream_register(emit, s);
4031         end_emit_instruction(emit);
4032      }
4033
4034      /* emit output primitive topology declaration */
4035      opcode0.value = 0;
4036      opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037      opcode0.primitiveTopology = emit->gs.prim_topology;
4038      emit_property_instruction(emit, opcode0, 0, 0);
4039
4040      for (i = 0; i < emit->info.num_outputs; i++) {
4041         unsigned writemask;
4042
4043         /* find out the writemask for this stream */
4044         writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045                                                 emit->output_usage_mask[i]);
4046
4047         if (writemask) {
4048            enum tgsi_semantic semantic_name =
4049               emit->info.output_semantic_name[i];
4050
4051            /* TODO: Still need to take care of a special case where a
4052             *       single varying spans across multiple output registers.
4053             */
4054            switch(semantic_name) {
4055            case TGSI_SEMANTIC_PRIMID:
4056               emit_output_declaration(emit,
4057                                       VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058                                       VGPU10_NAME_PRIMITIVE_ID,
4059                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060                                       FALSE,
4061                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4062               break;
4063            case TGSI_SEMANTIC_LAYER:
4064               emit_output_declaration(emit,
4065                                       VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066                                       VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067                                       VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068                                       FALSE,
4069                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4070               break;
4071            case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072               emit_output_declaration(emit,
4073                                       VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074                                       VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075                                       VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076                                       FALSE,
4077                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4078               emit->gs.viewport_index_out_index = i;
4079               break;
4080            default:
4081               emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082            }
4083         }
4084      }
4085   }
4086
4087   /* For geometry shader outputs, it is possible the same register is
4088    * declared multiple times for different streams. So to avoid
4089    * redundant signature entries, geometry shader output signature is done
4090    * outside of the declaration.
4091    */
4092   struct svga_shader_signature *sgn = &emit->signature;
4093   SVGA3dDXShaderSignatureEntry *sgnEntry;
4094
4095   for (i = 0; i < emit->info.num_outputs; i++) {
4096      if (emit->output_usage_mask[i]) {
4097         enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098
4099         sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100         set_shader_signature_entry(sgnEntry, i,
4101                                    map_tgsi_semantic_to_sgn_name(sem_name),
4102                                    emit->output_usage_mask[i],
4103                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105      }
4106   }
4107}
4108
4109
4110/**
4111 * Emit the declaration for the tess inner/outer output.
4112 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115 */
4116static void
4117emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118                           unsigned index, unsigned opcodeType,
4119                           unsigned operandType, VGPU10_SYSTEM_NAME name,
4120                           SVGA3dDXSignatureSemanticName sgnName)
4121{
4122   VGPU10OpcodeToken0 opcode0;
4123   VGPU10OperandToken0 operand0;
4124   VGPU10NameToken name_token;
4125
4126   assert(emit->version >= 50);
4127   assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128          (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129           name == VGPU10_NAME_UNDEFINED));
4130   assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131
4132   assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133          operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134
4135   opcode0.value = operand0.value = name_token.value = 0;
4136
4137   opcode0.opcodeType = opcodeType;
4138   operand0.operandType = operandType;
4139   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144
4145   name_token.name = name;
4146   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147
4148   /* Capture patch constant signature */
4149   struct svga_shader_signature *sgn = &emit->signature;
4150   SVGA3dDXShaderSignatureEntry *sgnEntry =
4151      &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152   set_shader_signature_entry(sgnEntry, index,
4153                              sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154                              SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155                              SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156}
4157
4158
4159/**
4160 * Emit output declarations for tessellation control shader.
4161 */
4162static void
4163emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164{
4165   unsigned int i;
4166   unsigned outputIndex = emit->num_outputs;
4167   struct svga_shader_signature *sgn = &emit->signature;
4168
4169   /**
4170    * Initialize patch_generic_out_count so it won't be counted twice
4171    * since this function is called twice, one for control point phase
4172    * and another time for patch constant phase.
4173    */
4174   emit->tcs.patch_generic_out_count = 0;
4175
4176   for (i = 0; i < emit->info.num_outputs; i++) {
4177      unsigned index = i;
4178      const enum tgsi_semantic semantic_name =
4179         emit->info.output_semantic_name[i];
4180
4181      switch (semantic_name) {
4182      case TGSI_SEMANTIC_TESSINNER:
4183         emit->tcs.inner.tgsi_index = i;
4184
4185         /* skip per-patch output declarations in control point phase */
4186         if (emit->tcs.control_point_phase)
4187            break;
4188
4189         emit->tcs.inner.out_index = outputIndex;
4190         switch (emit->key.tcs.prim_mode) {
4191         case PIPE_PRIM_QUADS:
4192            emit_tesslevel_declaration(emit, outputIndex++,
4193               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194               VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196
4197            emit_tesslevel_declaration(emit, outputIndex++,
4198               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199               VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201            break;
4202         case PIPE_PRIM_TRIANGLES:
4203            emit_tesslevel_declaration(emit, outputIndex++,
4204               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205               VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207            break;
4208         case PIPE_PRIM_LINES:
4209            break;
4210         default:
4211            debug_printf("Unsupported primitive type");
4212         }
4213         break;
4214
4215      case TGSI_SEMANTIC_TESSOUTER:
4216         emit->tcs.outer.tgsi_index = i;
4217
4218         /* skip per-patch output declarations in control point phase */
4219         if (emit->tcs.control_point_phase)
4220            break;
4221
4222         emit->tcs.outer.out_index = outputIndex;
4223         switch (emit->key.tcs.prim_mode) {
4224         case PIPE_PRIM_QUADS:
4225            for (int j = 0; j < 4; j++) {
4226               emit_tesslevel_declaration(emit, outputIndex++,
4227                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228                  VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230            }
4231            break;
4232         case PIPE_PRIM_TRIANGLES:
4233            for (int j = 0; j < 3; j++) {
4234               emit_tesslevel_declaration(emit, outputIndex++,
4235                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236                  VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238            }
4239            break;
4240         case PIPE_PRIM_LINES:
4241            for (int j = 0; j < 2; j++) {
4242               emit_tesslevel_declaration(emit, outputIndex++,
4243                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244                  VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246            }
4247            break;
4248         default:
4249            debug_printf("Unsupported primitive type");
4250         }
4251         break;
4252
4253      case TGSI_SEMANTIC_PATCH:
4254         if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255            emit->tcs.patch_generic_out_index= i;
4256         emit->tcs.patch_generic_out_count++;
4257
4258         /* skip per-patch output declarations in control point phase */
4259         if (emit->tcs.control_point_phase)
4260            break;
4261
4262         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263                                 VGPU10_NAME_UNDEFINED,
4264                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265                                 FALSE,
4266                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4267
4268         SVGA3dDXShaderSignatureEntry *sgnEntry =
4269            &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270         set_shader_signature_entry(sgnEntry, index,
4271                                    map_tgsi_semantic_to_sgn_name(semantic_name),
4272                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275
4276         break;
4277
4278      default:
4279         /* save the starting index of control point outputs */
4280         if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281            emit->tcs.control_point_out_index = i;
4282         emit->tcs.control_point_out_count++;
4283
4284         /* skip control point output declarations in patch constant phase */
4285         if (!emit->tcs.control_point_phase)
4286            break;
4287
4288         emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289                                        TRUE);
4290
4291      }
4292   }
4293
4294   if (emit->tcs.control_point_phase) {
4295      /**
4296       * Add missing control point output in control point phase.
4297       */
4298      if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299         /* use register index after tessellation factors */
4300         switch (emit->key.tcs.prim_mode) {
4301         case PIPE_PRIM_QUADS:
4302            emit->tcs.control_point_out_index = outputIndex + 6;
4303            break;
4304         case PIPE_PRIM_TRIANGLES:
4305            emit->tcs.control_point_out_index = outputIndex + 4;
4306            break;
4307         default:
4308            emit->tcs.control_point_out_index = outputIndex + 2;
4309            break;
4310         }
4311         emit->tcs.control_point_out_count++;
4312         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313                                 emit->tcs.control_point_out_index,
4314                                 VGPU10_NAME_POSITION,
4315                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316                                 TRUE,
4317                                 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318
4319         /* If tcs does not output any control point output,
4320          * we can end the hull shader control point phase here
4321          * after emitting the default control point output.
4322          */
4323         emit->skip_instruction = TRUE;
4324      }
4325   }
4326   else {
4327      if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328         /* since the TCS did not declare out outer tess level output register,
4329          * we declare it here for patch constant phase only.
4330          */
4331         emit->tcs.outer.out_index = outputIndex;
4332         if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333            for (int i = 0; i < 4; i++) {
4334               emit_tesslevel_declaration(emit, outputIndex++,
4335                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336                  VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338            }
4339         }
4340         else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341            for (int i = 0; i < 3; i++) {
4342               emit_tesslevel_declaration(emit, outputIndex++,
4343                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344                  VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346            }
4347         }
4348      }
4349
4350      if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351         /* since the TCS did not declare out inner tess level output register,
4352          * we declare it here
4353          */
4354         emit->tcs.inner.out_index = outputIndex;
4355         if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356            emit_tesslevel_declaration(emit, outputIndex++,
4357               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358               VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360            emit_tesslevel_declaration(emit, outputIndex++,
4361               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362               VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364         }
4365         else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366            emit_tesslevel_declaration(emit, outputIndex++,
4367               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368               VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370         }
4371      }
4372   }
4373   emit->num_outputs = outputIndex;
4374}
4375
4376
4377/**
4378 * Emit output declarations for tessellation evaluation shader.
4379 */
4380static void
4381emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382{
4383   unsigned int i;
4384
4385   for (i = 0; i < emit->info.num_outputs; i++) {
4386      emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387   }
4388}
4389
4390
4391/**
4392 * Emit the declaration for a system value input/output.
4393 */
4394static void
4395emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396                              enum tgsi_semantic semantic_name, unsigned index)
4397{
4398   switch (semantic_name) {
4399   case TGSI_SEMANTIC_INSTANCEID:
4400      index = alloc_system_value_index(emit, index);
4401      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402                             VGPU10_OPERAND_TYPE_INPUT,
4403                             VGPU10_OPERAND_INDEX_1D,
4404                             index, 1,
4405                             VGPU10_NAME_INSTANCE_ID,
4406                             VGPU10_OPERAND_4_COMPONENT,
4407                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410                             map_tgsi_semantic_to_sgn_name(semantic_name));
4411      break;
4412   case TGSI_SEMANTIC_VERTEXID:
4413      emit->vs.vertex_id_sys_index = index;
4414      index = alloc_system_value_index(emit, index);
4415      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416                             VGPU10_OPERAND_TYPE_INPUT,
4417                             VGPU10_OPERAND_INDEX_1D,
4418                             index, 1,
4419                             VGPU10_NAME_VERTEX_ID,
4420                             VGPU10_OPERAND_4_COMPONENT,
4421                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424                             map_tgsi_semantic_to_sgn_name(semantic_name));
4425      break;
4426   case TGSI_SEMANTIC_SAMPLEID:
4427      assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428      emit->fs.sample_id_sys_index = index;
4429      index = alloc_system_value_index(emit, index);
4430      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431                             VGPU10_OPERAND_TYPE_INPUT,
4432                             VGPU10_OPERAND_INDEX_1D,
4433                             index, 1,
4434                             VGPU10_NAME_SAMPLE_INDEX,
4435                             VGPU10_OPERAND_4_COMPONENT,
4436                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438                             VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439                             map_tgsi_semantic_to_sgn_name(semantic_name));
4440      break;
4441   case TGSI_SEMANTIC_SAMPLEPOS:
4442      /* This system value contains the position of the current sample
4443       * when using per-sample shading.  We implement this by calling
4444       * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445       * index as the argument.  See emit_sample_position_instructions().
4446       */
4447      assert(emit->version >= 41);
4448      emit->fs.sample_pos_sys_index = index;
4449      index = alloc_system_value_index(emit, index);
4450      break;
4451   case TGSI_SEMANTIC_INVOCATIONID:
4452      /* Note: invocation id input is mapped to different register depending
4453       * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454       * In TCS, it will be mapped to vOutputControlPointID#.
4455       * Since in both cases, the mapped name is unique rather than
4456       * just a generic input name ("v#"), so there is no need to remap
4457       * the index value.
4458       */
4459      assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460             emit->unit == PIPE_SHADER_TESS_CTRL);
4461      assert(emit->version >= 50);
4462
4463      if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464         emit->gs.invocation_id_sys_index = index;
4465         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466                                VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467                                VGPU10_OPERAND_INDEX_0D,
4468                                index, 1,
4469                                VGPU10_NAME_UNDEFINED,
4470                                VGPU10_OPERAND_0_COMPONENT,
4471                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472                                0,
4473                                VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474                                SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475      } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476         /* The emission of the control point id will be done
4477          * in the control point phase in emit_hull_shader_control_point_phase().
4478          */
4479         emit->tcs.invocation_id_sys_index = index;
4480      }
4481      break;
4482   case TGSI_SEMANTIC_SAMPLEMASK:
4483      /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484       * rather than just a generic input name ("v#") so no need to remap the
4485       * index value.
4486       */
4487      assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488      assert(emit->version >= 50);
4489      emit->fs.sample_mask_in_sys_index = index;
4490      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491                             VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492                             VGPU10_OPERAND_INDEX_0D,
4493                             index, 1,
4494                             VGPU10_NAME_UNDEFINED,
4495                             VGPU10_OPERAND_1_COMPONENT,
4496                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497                             0,
4498                             VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500      break;
4501   case TGSI_SEMANTIC_TESSCOORD:
4502      assert(emit->version >= 50);
4503
4504      unsigned usageMask = 0;
4505
4506      if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507         usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508      }
4509      else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510               emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511         usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512      }
4513
4514      emit->tes.tesscoord_sys_index = index;
4515      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516                             VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517                             VGPU10_OPERAND_INDEX_0D,
4518                             index, 1,
4519                             VGPU10_NAME_UNDEFINED,
4520                             VGPU10_OPERAND_4_COMPONENT,
4521                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522                             usageMask,
4523                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525      break;
4526   case TGSI_SEMANTIC_TESSINNER:
4527      assert(emit->version >= 50);
4528      emit->tes.inner.tgsi_index = index;
4529      break;
4530   case TGSI_SEMANTIC_TESSOUTER:
4531      assert(emit->version >= 50);
4532      emit->tes.outer.tgsi_index = index;
4533      break;
4534   case TGSI_SEMANTIC_VERTICESIN:
4535      assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536      assert(emit->version >= 50);
4537
4538      /* save the system value index */
4539      emit->tcs.vertices_per_patch_index = index;
4540      break;
4541   case TGSI_SEMANTIC_PRIMID:
4542      assert(emit->version >= 50);
4543      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544         emit->tcs.prim_id_index = index;
4545      }
4546      else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547         emit->tes.prim_id_index = index;
4548         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549                                VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550                                VGPU10_OPERAND_INDEX_0D,
4551                                index, 1,
4552                                VGPU10_NAME_UNDEFINED,
4553                                VGPU10_OPERAND_0_COMPONENT,
4554                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555                                0,
4556                                VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557                                map_tgsi_semantic_to_sgn_name(semantic_name));
4558      }
4559      break;
4560   default:
4561      debug_printf("unexpected system value semantic index %u / %s\n",
4562                   semantic_name, tgsi_semantic_names[semantic_name]);
4563   }
4564}
4565
4566/**
4567 * Translate a TGSI declaration to VGPU10.
4568 */
4569static boolean
4570emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571                        const struct tgsi_full_declaration *decl)
4572{
4573   switch (decl->Declaration.File) {
4574   case TGSI_FILE_INPUT:
4575      /* do nothing - see emit_input_declarations() */
4576      return TRUE;
4577
4578   case TGSI_FILE_OUTPUT:
4579      assert(decl->Range.First == decl->Range.Last);
4580      emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581      return TRUE;
4582
4583   case TGSI_FILE_TEMPORARY:
4584      /* Don't declare the temps here.  Just keep track of how many
4585       * and emit the declaration later.
4586       */
4587      if (decl->Declaration.Array) {
4588         /* Indexed temporary array.  Save the start index of the array
4589          * and the size of the array.
4590          */
4591         const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592         assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593
4594         /* Save this array so we can emit the declaration for it later */
4595         create_temp_array(emit, arrayID, decl->Range.First,
4596                           decl->Range.Last - decl->Range.First + 1,
4597                           decl->Range.First);
4598      }
4599
4600      /* for all temps, indexed or not, keep track of highest index */
4601      emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602                                    decl->Range.Last + 1);
4603      return TRUE;
4604
4605   case TGSI_FILE_CONSTANT:
4606      /* Don't declare constants here.  Just keep track and emit later. */
4607      {
4608         unsigned constbuf = 0, num_consts;
4609         if (decl->Declaration.Dimension) {
4610            constbuf = decl->Dim.Index2D;
4611         }
4612         /* We throw an assertion here when, in fact, the shader should never
4613          * have linked due to constbuf index out of bounds, so we shouldn't
4614          * have reached here.
4615          */
4616         assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617
4618         num_consts = MAX2(emit->num_shader_consts[constbuf],
4619                           decl->Range.Last + 1);
4620
4621         if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622            debug_printf("Warning: constant buffer is declared to size [%u]"
4623                         " but [%u] is the limit.\n",
4624                         num_consts,
4625                         VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626         }
4627         /* The linker doesn't enforce the max UBO size so we clamp here */
4628         emit->num_shader_consts[constbuf] =
4629            MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630      }
4631      return TRUE;
4632
4633   case TGSI_FILE_IMMEDIATE:
4634      assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635      return FALSE;
4636
4637   case TGSI_FILE_SYSTEM_VALUE:
4638      emit_system_value_declaration(emit, decl->Semantic.Name,
4639                                    decl->Range.First);
4640      return TRUE;
4641
4642   case TGSI_FILE_SAMPLER:
4643      /* Don't declare samplers here.  Just keep track and emit later. */
4644      emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645      return TRUE;
4646
4647#if 0
4648   case TGSI_FILE_RESOURCE:
4649      /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650      /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651      assert(!"TGSI_FILE_RESOURCE not handled yet");
4652      return FALSE;
4653#endif
4654
4655   case TGSI_FILE_ADDRESS:
4656      emit->num_address_regs = MAX2(emit->num_address_regs,
4657                                    decl->Range.Last + 1);
4658      return TRUE;
4659
4660   case TGSI_FILE_SAMPLER_VIEW:
4661      {
4662         unsigned unit = decl->Range.First;
4663         assert(decl->Range.First == decl->Range.Last);
4664         emit->sampler_target[unit] = decl->SamplerView.Resource;
4665
4666         /* Note: we can ignore YZW return types for now */
4667         emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4668         emit->sampler_view[unit] = TRUE;
4669      }
4670      return TRUE;
4671
4672   default:
4673      assert(!"Unexpected type of declaration");
4674      return FALSE;
4675   }
4676}
4677
4678
4679
4680/**
4681 * Emit input declarations for fragment shader.
4682 */
4683static void
4684emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4685{
4686   unsigned i;
4687
4688   for (i = 0; i < emit->linkage.num_inputs; i++) {
4689      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4690      unsigned usage_mask = emit->info.input_usage_mask[i];
4691      unsigned index = emit->linkage.input_map[i];
4692      unsigned type, interpolationMode, name;
4693      unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4694
4695      if (usage_mask == 0)
4696         continue;  /* register is not actually used */
4697
4698      if (semantic_name == TGSI_SEMANTIC_POSITION) {
4699         /* fragment position input */
4700         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4701         interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4702         name = VGPU10_NAME_POSITION;
4703         if (usage_mask & TGSI_WRITEMASK_W) {
4704            /* we need to replace use of 'w' with '1/w' */
4705            emit->fs.fragcoord_input_index = i;
4706         }
4707      }
4708      else if (semantic_name == TGSI_SEMANTIC_FACE) {
4709         /* fragment front-facing input */
4710         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4711         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4712         name = VGPU10_NAME_IS_FRONT_FACE;
4713         emit->fs.face_input_index = i;
4714      }
4715      else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4716         /* primitive ID */
4717         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4718         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4719         name = VGPU10_NAME_PRIMITIVE_ID;
4720      }
4721      else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4722         /* sample index / ID */
4723         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4724         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4725         name = VGPU10_NAME_SAMPLE_INDEX;
4726      }
4727      else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4728         /* render target array index */
4729         if (emit->key.fs.layer_to_zero) {
4730            /**
4731             * The shader from the previous stage does not write to layer,
4732             * so reading the layer index in fragment shader should return 0.
4733             */
4734            emit->fs.layer_input_index = i;
4735            continue;
4736         } else {
4737            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4738            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4739            name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4740            mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4741         }
4742      }
4743      else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4744         /* viewport index */
4745         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4746         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4747         name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4748         mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4749      }
4750      else {
4751         /* general fragment input */
4752         type = VGPU10_OPCODE_DCL_INPUT_PS;
4753         interpolationMode =
4754               translate_interpolation(emit,
4755                                       emit->info.input_interpolate[i],
4756                                       emit->info.input_interpolate_loc[i]);
4757
4758         /* keeps track if flat interpolation mode is being used */
4759         emit->uses_flat_interp = emit->uses_flat_interp ||
4760               (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4761
4762         name = VGPU10_NAME_UNDEFINED;
4763      }
4764
4765      emit_input_declaration(emit, type,
4766                             VGPU10_OPERAND_TYPE_INPUT,
4767                             VGPU10_OPERAND_INDEX_1D, index, 1,
4768                             name,
4769                             VGPU10_OPERAND_4_COMPONENT,
4770                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4771                             mask,
4772                             interpolationMode, TRUE,
4773                             map_tgsi_semantic_to_sgn_name(semantic_name));
4774   }
4775}
4776
4777
4778/**
4779 * Emit input declarations for vertex shader.
4780 */
4781static void
4782emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4783{
4784   unsigned i;
4785
4786   for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4787      unsigned usage_mask = emit->info.input_usage_mask[i];
4788      unsigned index = i;
4789
4790      if (usage_mask == 0)
4791         continue;  /* register is not actually used */
4792
4793      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4794                             VGPU10_OPERAND_TYPE_INPUT,
4795                             VGPU10_OPERAND_INDEX_1D, index, 1,
4796                             VGPU10_NAME_UNDEFINED,
4797                             VGPU10_OPERAND_4_COMPONENT,
4798                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4799                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4800                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4801                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4802   }
4803}
4804
4805
4806/**
4807 * Emit input declarations for geometry shader.
4808 */
4809static void
4810emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4811{
4812   unsigned i;
4813
4814   for (i = 0; i < emit->info.num_inputs; i++) {
4815      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4816      unsigned usage_mask = emit->info.input_usage_mask[i];
4817      unsigned index = emit->linkage.input_map[i];
4818      unsigned opcodeType, operandType;
4819      unsigned numComp, selMode;
4820      unsigned name;
4821      unsigned dim;
4822
4823      if (usage_mask == 0)
4824         continue;  /* register is not actually used */
4825
4826      opcodeType = VGPU10_OPCODE_DCL_INPUT;
4827      operandType = VGPU10_OPERAND_TYPE_INPUT;
4828      numComp = VGPU10_OPERAND_4_COMPONENT;
4829      selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4830      name = VGPU10_NAME_UNDEFINED;
4831
4832      /* all geometry shader inputs are two dimensional except
4833       * gl_PrimitiveID
4834       */
4835      dim = VGPU10_OPERAND_INDEX_2D;
4836
4837      if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4838         /* Primitive ID */
4839         operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4840         dim = VGPU10_OPERAND_INDEX_0D;
4841         numComp = VGPU10_OPERAND_0_COMPONENT;
4842         selMode = 0;
4843
4844         /* also save the register index so we can check for
4845          * primitive id when emit src register. We need to modify the
4846          * operand type, index dimension when emit primitive id src reg.
4847          */
4848          emit->gs.prim_id_index = i;
4849      }
4850      else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4851         /* vertex position input */
4852         opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4853         name = VGPU10_NAME_POSITION;
4854      }
4855
4856      emit_input_declaration(emit, opcodeType, operandType,
4857                             dim, index,
4858                             emit->gs.input_size,
4859                             name,
4860                             numComp, selMode,
4861                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4862                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4863                             map_tgsi_semantic_to_sgn_name(semantic_name));
4864   }
4865}
4866
4867
4868/**
4869 * Emit input declarations for tessellation control shader.
4870 */
4871static void
4872emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4873{
4874   unsigned i;
4875   unsigned size = emit->key.tcs.vertices_per_patch;
4876   unsigned indicesMask = 0;
4877   boolean addSignature = TRUE;
4878
4879   if (!emit->tcs.control_point_phase)
4880      addSignature = emit->tcs.fork_phase_add_signature;
4881
4882   for (i = 0; i < emit->info.num_inputs; i++) {
4883      unsigned usage_mask = emit->info.input_usage_mask[i];
4884      unsigned index = emit->linkage.input_map[i];
4885      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4886      VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4887      VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4888      SVGA3dDXSignatureSemanticName sgn_name =
4889         map_tgsi_semantic_to_sgn_name(semantic_name);
4890
4891      /* indices that are declared */
4892      indicesMask |= 1 << index;
4893
4894      if (semantic_name == TGSI_SEMANTIC_POSITION ||
4895          index == emit->linkage.position_index) {
4896         /* save the input control point index for later use */
4897         emit->tcs.control_point_input_index = i;
4898      }
4899      else if (usage_mask == 0) {
4900         continue;  /* register is not actually used */
4901      }
4902      else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4903         /* The shadow copy is being used here. So set the signature name
4904          * to UNDEFINED.
4905          */
4906         sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4907      }
4908
4909      /* input control points in the patch constant phase are emitted in the
4910       * vicp register rather than the v register.
4911       */
4912      if (!emit->tcs.control_point_phase) {
4913         operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4914      }
4915
4916      /* Tessellation control shader inputs are two dimensional.
4917       * The array size is determined by the patch vertex count.
4918       */
4919      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4920                             operandType,
4921                             VGPU10_OPERAND_INDEX_2D,
4922                             index, size, name,
4923                             VGPU10_OPERAND_4_COMPONENT,
4924                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4925                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4926                             VGPU10_INTERPOLATION_UNDEFINED,
4927                             addSignature, sgn_name);
4928   }
4929
4930   if (emit->tcs.control_point_phase) {
4931      if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4932
4933         /* Add input control point declaration if it does not exist */
4934         if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4935            emit->linkage.input_map[emit->linkage.num_inputs] =
4936               emit->linkage.position_index;
4937            emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4938
4939            emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4940                                   VGPU10_OPERAND_TYPE_INPUT,
4941                                   VGPU10_OPERAND_INDEX_2D,
4942                                   emit->linkage.position_index,
4943                                   emit->key.tcs.vertices_per_patch,
4944                                   VGPU10_NAME_UNDEFINED,
4945                                   VGPU10_OPERAND_4_COMPONENT,
4946                                   VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4947                                   VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4948                                   VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4949                                   SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4950         }
4951      }
4952
4953      /* Also add an address register for the indirection to the
4954       * input control points
4955       */
4956      emit->tcs.control_point_addr_index = emit->num_address_regs++;
4957   }
4958}
4959
4960
4961static void
4962emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4963{
4964
4965   /* In tcs, tess factors are emitted as extra outputs.
4966    * The starting register index for the tess factors is captured
4967    * in the compile key.
4968    */
4969   unsigned inputIndex = emit->key.tes.tessfactor_index;
4970
4971   if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4972      if (emit->key.tes.need_tessouter) {
4973         emit->tes.outer.in_index = inputIndex;
4974         for (int i = 0; i < 4; i++) {
4975            emit_tesslevel_declaration(emit, inputIndex++,
4976               VGPU10_OPCODE_DCL_INPUT_SIV,
4977               VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4978               VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4979               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4980         }
4981      }
4982
4983      if (emit->key.tes.need_tessinner) {
4984         emit->tes.inner.in_index = inputIndex;
4985         emit_tesslevel_declaration(emit, inputIndex++,
4986            VGPU10_OPCODE_DCL_INPUT_SIV,
4987            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4988            VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4989            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4990
4991         emit_tesslevel_declaration(emit, inputIndex++,
4992            VGPU10_OPCODE_DCL_INPUT_SIV,
4993            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4994            VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4995            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4996      }
4997   }
4998   else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4999      if (emit->key.tes.need_tessouter) {
5000         emit->tes.outer.in_index = inputIndex;
5001         for (int i = 0; i < 3; i++) {
5002            emit_tesslevel_declaration(emit, inputIndex++,
5003               VGPU10_OPCODE_DCL_INPUT_SIV,
5004               VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5005               VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5006               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5007         }
5008      }
5009
5010      if (emit->key.tes.need_tessinner) {
5011         emit->tes.inner.in_index = inputIndex;
5012         emit_tesslevel_declaration(emit, inputIndex++,
5013            VGPU10_OPCODE_DCL_INPUT_SIV,
5014            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5015            VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5016            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5017      }
5018   }
5019   else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5020      if (emit->key.tes.need_tessouter) {
5021         emit->tes.outer.in_index = inputIndex;
5022         emit_tesslevel_declaration(emit, inputIndex++,
5023            VGPU10_OPCODE_DCL_INPUT_SIV,
5024            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5025            VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5026            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5027
5028         emit_tesslevel_declaration(emit, inputIndex++,
5029            VGPU10_OPCODE_DCL_INPUT_SIV,
5030            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5031            VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5032            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5033      }
5034   }
5035}
5036
5037
5038/**
5039 * Emit input declarations for tessellation evaluation shader.
5040 */
5041static void
5042emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5043{
5044   unsigned i;
5045
5046   for (i = 0; i < emit->info.num_inputs; i++) {
5047      unsigned usage_mask = emit->info.input_usage_mask[i];
5048      unsigned index = emit->linkage.input_map[i];
5049      unsigned size;
5050      const enum tgsi_semantic semantic_name =
5051         emit->info.input_semantic_name[i];
5052      SVGA3dDXSignatureSemanticName sgn_name;
5053      VGPU10_OPERAND_TYPE operandType;
5054      VGPU10_OPERAND_INDEX_DIMENSION dim;
5055
5056      if (usage_mask == 0)
5057         usage_mask = 1;  /* at least set usage mask to one */
5058
5059      if (semantic_name == TGSI_SEMANTIC_PATCH) {
5060         operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5061         dim = VGPU10_OPERAND_INDEX_1D;
5062         size = 1;
5063         sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5064      }
5065      else {
5066         operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5067         dim = VGPU10_OPERAND_INDEX_2D;
5068         size = emit->key.tes.vertices_per_patch;
5069         sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5070      }
5071
5072      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5073                             dim, index, size, VGPU10_NAME_UNDEFINED,
5074                             VGPU10_OPERAND_4_COMPONENT,
5075                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5076                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5077                             VGPU10_INTERPOLATION_UNDEFINED,
5078                             TRUE, sgn_name);
5079   }
5080
5081   emit_tessfactor_input_declarations(emit);
5082
5083   /* DX spec requires DS input controlpoint/patch-constant signatures to match
5084    * the HS output controlpoint/patch-constant signatures exactly.
5085    * Add missing input declarations even if they are not used in the shader.
5086    */
5087   if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5088      struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5089      for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5090
5091          /* If a tcs output does not have a corresponding input register in
5092           * tes, add one.
5093           */
5094          if (emit->linkage.prevShader.output_map[i] >
5095              emit->linkage.input_map_max) {
5096             const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5097
5098             if (sem_name == TGSI_SEMANTIC_PATCH) {
5099                emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5100                                       VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5101                                       VGPU10_OPERAND_INDEX_1D,
5102                                       i, 1, VGPU10_NAME_UNDEFINED,
5103                                       VGPU10_OPERAND_4_COMPONENT,
5104                                       VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5105                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5106                                       VGPU10_INTERPOLATION_UNDEFINED,
5107                                       TRUE,
5108                                       map_tgsi_semantic_to_sgn_name(sem_name));
5109
5110             } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5111                        sem_name != TGSI_SEMANTIC_TESSOUTER) {
5112                emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5113                                       VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5114                                       VGPU10_OPERAND_INDEX_2D,
5115                                       i, emit->key.tes.vertices_per_patch,
5116                                       VGPU10_NAME_UNDEFINED,
5117                                       VGPU10_OPERAND_4_COMPONENT,
5118                                       VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5119                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5120                                       VGPU10_INTERPOLATION_UNDEFINED,
5121                                       TRUE,
5122                                       map_tgsi_semantic_to_sgn_name(sem_name));
5123             }
5124             /* tessellation factors are taken care of in
5125              * emit_tessfactor_input_declarations().
5126              */
5127         }
5128      }
5129   }
5130}
5131
5132
5133/**
5134 * Emit all input declarations.
5135 */
5136static boolean
5137emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5138{
5139   emit->index_range.required =
5140      emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5141
5142   switch (emit->unit) {
5143   case PIPE_SHADER_FRAGMENT:
5144      emit_fs_input_declarations(emit);
5145      break;
5146   case PIPE_SHADER_GEOMETRY:
5147      emit_gs_input_declarations(emit);
5148      break;
5149   case PIPE_SHADER_VERTEX:
5150      emit_vs_input_declarations(emit);
5151      break;
5152   case PIPE_SHADER_TESS_CTRL:
5153      emit_tcs_input_declarations(emit);
5154      break;
5155   case PIPE_SHADER_TESS_EVAL:
5156      emit_tes_input_declarations(emit);
5157      break;
5158   case PIPE_SHADER_COMPUTE:
5159      //XXX emit_cs_input_declarations(emit);
5160      break;
5161   default:
5162      assert(0);
5163   }
5164
5165   if (emit->index_range.start_index != INVALID_INDEX) {
5166      emit_index_range_declaration(emit);
5167   }
5168   emit->index_range.required = FALSE;
5169   return TRUE;
5170}
5171
5172
5173/**
5174 * Emit all output declarations.
5175 */
5176static boolean
5177emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5178{
5179   emit->index_range.required =
5180      emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5181
5182   switch (emit->unit) {
5183   case PIPE_SHADER_FRAGMENT:
5184      emit_fs_output_declarations(emit);
5185      break;
5186   case PIPE_SHADER_GEOMETRY:
5187      emit_gs_output_declarations(emit);
5188      break;
5189   case PIPE_SHADER_VERTEX:
5190      emit_vs_output_declarations(emit);
5191      break;
5192   case PIPE_SHADER_TESS_CTRL:
5193      emit_tcs_output_declarations(emit);
5194      break;
5195   case PIPE_SHADER_TESS_EVAL:
5196      emit_tes_output_declarations(emit);
5197      break;
5198   case PIPE_SHADER_COMPUTE:
5199      //XXX emit_cs_output_declarations(emit);
5200      break;
5201   default:
5202      assert(0);
5203   }
5204
5205   if (emit->vposition.so_index != INVALID_INDEX &&
5206       emit->vposition.out_index != INVALID_INDEX) {
5207
5208      assert(emit->unit != PIPE_SHADER_FRAGMENT);
5209
5210      /* Emit the declaration for the non-adjusted vertex position
5211       * for stream output purpose
5212       */
5213      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5214                              emit->vposition.so_index,
5215                              VGPU10_NAME_UNDEFINED,
5216                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5217                              TRUE,
5218                              SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5219   }
5220
5221   if (emit->clip_dist_so_index != INVALID_INDEX &&
5222       emit->clip_dist_out_index != INVALID_INDEX) {
5223
5224      assert(emit->unit != PIPE_SHADER_FRAGMENT);
5225
5226      /* Emit the declaration for the clip distance shadow copy which
5227       * will be used for stream output purpose and for clip distance
5228       * varying variable
5229       */
5230      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5231                              emit->clip_dist_so_index,
5232                              VGPU10_NAME_UNDEFINED,
5233                              emit->output_usage_mask[emit->clip_dist_out_index],
5234                              TRUE,
5235                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5236
5237      if (emit->info.num_written_clipdistance > 4) {
5238         /* for the second clip distance register, each handles 4 planes */
5239         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5240                                 emit->clip_dist_so_index + 1,
5241                                 VGPU10_NAME_UNDEFINED,
5242                                 emit->output_usage_mask[emit->clip_dist_out_index+1],
5243                                 TRUE,
5244                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5245      }
5246   }
5247
5248   if (emit->index_range.start_index != INVALID_INDEX) {
5249      emit_index_range_declaration(emit);
5250   }
5251   emit->index_range.required = FALSE;
5252   return TRUE;
5253}
5254
5255
5256/**
5257 * A helper function to create a temporary indexable array
5258 * and initialize the corresponding entries in the temp_map array.
5259 */
5260static void
5261create_temp_array(struct svga_shader_emitter_v10 *emit,
5262                  unsigned arrayID, unsigned first, unsigned count,
5263                  unsigned startIndex)
5264{
5265   unsigned i, tempIndex = startIndex;
5266
5267   emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5268   assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5269   emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5270
5271   emit->temp_arrays[arrayID].start = first;
5272   emit->temp_arrays[arrayID].size = count;
5273
5274   /* Fill in the temp_map entries for this temp array */
5275   for (i = 0; i < count; i++, tempIndex++) {
5276      emit->temp_map[tempIndex].arrayId = arrayID;
5277      emit->temp_map[tempIndex].index = i;
5278   }
5279}
5280
5281
5282/**
5283 * Emit the declaration for the temporary registers.
5284 */
5285static boolean
5286emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5287{
5288   unsigned total_temps, reg, i;
5289
5290   total_temps = emit->num_shader_temps;
5291
5292   /* If there is indirect access to non-indexable temps in the shader,
5293    * convert those temps to indexable temps. This works around a bug
5294    * in the GLSL->TGSI translator exposed in piglit test
5295    * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5296    * Internal temps added by the driver remain as non-indexable temps.
5297    */
5298   if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5299       emit->num_temp_arrays == 0) {
5300      create_temp_array(emit, 1, 0, total_temps, 0);
5301   }
5302
5303   /* Allocate extra temps for specially-implemented instructions,
5304    * such as LIT.
5305    */
5306   total_temps += MAX_INTERNAL_TEMPS;
5307
5308   /* Allocate extra temps for clip distance or clip vertex.
5309    */
5310   if (emit->clip_mode == CLIP_DISTANCE) {
5311      /* We need to write the clip distance to a temporary register
5312       * first. Then it will be copied to the shadow copy for
5313       * the clip distance varying variable and stream output purpose.
5314       * It will also be copied to the actual CLIPDIST register
5315       * according to the enabled clip planes
5316       */
5317      emit->clip_dist_tmp_index = total_temps++;
5318      if (emit->info.num_written_clipdistance > 4)
5319         total_temps++; /* second clip register */
5320   }
5321   else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5322      /* If the current shader is in the last vertex processing stage,
5323       * We need to convert the TGSI CLIPVERTEX output to one or more
5324       * clip distances.  Allocate a temp reg for the clipvertex here.
5325       */
5326      assert(emit->info.writes_clipvertex > 0);
5327      emit->clip_vertex_tmp_index = total_temps;
5328      total_temps++;
5329   }
5330
5331   if (emit->info.uses_vertexid) {
5332      assert(emit->unit == PIPE_SHADER_VERTEX);
5333      emit->vs.vertex_id_tmp_index = total_temps++;
5334   }
5335
5336   if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5337      if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5338          emit->key.clip_plane_enable ||
5339          emit->vposition.so_index != INVALID_INDEX) {
5340         emit->vposition.tmp_index = total_temps;
5341         total_temps += 1;
5342      }
5343
5344      if (emit->vposition.need_prescale) {
5345         emit->vposition.prescale_scale_index = total_temps++;
5346         emit->vposition.prescale_trans_index = total_temps++;
5347      }
5348
5349      if (emit->unit == PIPE_SHADER_VERTEX) {
5350         unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5351                                 emit->key.vs.adjust_attrib_itof |
5352                                 emit->key.vs.adjust_attrib_utof |
5353                                 emit->key.vs.attrib_is_bgra |
5354                                 emit->key.vs.attrib_puint_to_snorm |
5355                                 emit->key.vs.attrib_puint_to_uscaled |
5356                                 emit->key.vs.attrib_puint_to_sscaled);
5357         while (attrib_mask) {
5358            unsigned index = u_bit_scan(&attrib_mask);
5359            emit->vs.adjusted_input[index] = total_temps++;
5360         }
5361      }
5362      else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5363         if (emit->key.gs.writes_viewport_index)
5364            emit->gs.viewport_index_tmp_index = total_temps++;
5365      }
5366   }
5367   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5368      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5369          emit->key.fs.write_color0_to_n_cbufs > 1) {
5370         /* Allocate a temp to hold the output color */
5371         emit->fs.color_tmp_index = total_temps;
5372         total_temps += 1;
5373      }
5374
5375      if (emit->fs.face_input_index != INVALID_INDEX) {
5376         /* Allocate a temp for the +/-1 face register */
5377         emit->fs.face_tmp_index = total_temps;
5378         total_temps += 1;
5379      }
5380
5381      if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5382         /* Allocate a temp for modified fragment position register */
5383         emit->fs.fragcoord_tmp_index = total_temps;
5384         total_temps += 1;
5385      }
5386
5387      if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5388         /* Allocate a temp for the sample position */
5389         emit->fs.sample_pos_tmp_index = total_temps++;
5390      }
5391   }
5392   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5393      if (emit->vposition.need_prescale) {
5394         emit->vposition.tmp_index = total_temps++;
5395         emit->vposition.prescale_scale_index = total_temps++;
5396         emit->vposition.prescale_trans_index = total_temps++;
5397      }
5398
5399      if (emit->tes.inner.tgsi_index) {
5400         emit->tes.inner.temp_index = total_temps;
5401         total_temps += 1;
5402      }
5403
5404      if (emit->tes.outer.tgsi_index) {
5405         emit->tes.outer.temp_index = total_temps;
5406         total_temps += 1;
5407      }
5408   }
5409   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5410      if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5411         if (!emit->tcs.control_point_phase) {
5412            emit->tcs.inner.temp_index = total_temps;
5413            total_temps += 1;
5414         }
5415      }
5416      if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5417         if (!emit->tcs.control_point_phase) {
5418            emit->tcs.outer.temp_index = total_temps;
5419            total_temps += 1;
5420         }
5421      }
5422
5423      if (emit->tcs.control_point_phase &&
5424          emit->info.reads_pervertex_outputs) {
5425         emit->tcs.control_point_tmp_index = total_temps;
5426         total_temps += emit->tcs.control_point_out_count;
5427      }
5428      else if (!emit->tcs.control_point_phase &&
5429               emit->info.reads_perpatch_outputs) {
5430
5431         /* If there is indirect access to the patch constant outputs
5432          * in the control point phase, then an indexable temporary array
5433          * will be created for these patch constant outputs.
5434          * Note, indirect access can only be applicable to
5435          * patch constant outputs in the control point phase.
5436          */
5437         if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5438            unsigned arrayID =
5439               emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5440            create_temp_array(emit, arrayID, 0,
5441                              emit->tcs.patch_generic_out_count, total_temps);
5442         }
5443         emit->tcs.patch_generic_tmp_index = total_temps;
5444         total_temps += emit->tcs.patch_generic_out_count;
5445      }
5446
5447      emit->tcs.invocation_id_tmp_index = total_temps++;
5448   }
5449
5450   for (i = 0; i < emit->num_address_regs; i++) {
5451      emit->address_reg_index[i] = total_temps++;
5452   }
5453
5454   /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5455    * temp indexes.  Basically, we compact all the non-array temp register
5456    * indexes into a consecutive series.
5457    *
5458    * Before, we may have some TGSI declarations like:
5459    *   DCL TEMP[0..1], LOCAL
5460    *   DCL TEMP[2..4], ARRAY(1), LOCAL
5461    *   DCL TEMP[5..7], ARRAY(2), LOCAL
5462    *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5463    *
5464    * After, we'll have a map like this:
5465    *   temp_map[0] = { array 0, index 0 }
5466    *   temp_map[1] = { array 0, index 1 }
5467    *   temp_map[2] = { array 1, index 0 }
5468    *   temp_map[3] = { array 1, index 1 }
5469    *   temp_map[4] = { array 1, index 2 }
5470    *   temp_map[5] = { array 2, index 0 }
5471    *   temp_map[6] = { array 2, index 1 }
5472    *   temp_map[7] = { array 2, index 2 }
5473    *   temp_map[8] = { array 0, index 2 }
5474    *   temp_map[9] = { array 0, index 3 }
5475    *
5476    * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5477    * temps numbered 0..3
5478    *
5479    * Any time we emit a temporary register index, we'll have to use the
5480    * temp_map[] table to convert the TGSI index to the VGPU10 index.
5481    *
5482    * Finally, we recompute the total_temps value here.
5483    */
5484   reg = 0;
5485   for (i = 0; i < total_temps; i++) {
5486      if (emit->temp_map[i].arrayId == 0) {
5487         emit->temp_map[i].index = reg++;
5488      }
5489   }
5490
5491   if (0) {
5492      debug_printf("total_temps %u\n", total_temps);
5493      for (i = 0; i < total_temps; i++) {
5494         debug_printf("temp %u ->  array %u  index %u\n",
5495                      i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5496      }
5497   }
5498
5499   total_temps = reg;
5500
5501   /* Emit declaration of ordinary temp registers */
5502   if (total_temps > 0) {
5503      VGPU10OpcodeToken0 opcode0;
5504
5505      opcode0.value = 0;
5506      opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5507
5508      begin_emit_instruction(emit);
5509      emit_dword(emit, opcode0.value);
5510      emit_dword(emit, total_temps);
5511      end_emit_instruction(emit);
5512   }
5513
5514   /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5515    * it's unused.
5516    */
5517   for (i = 1; i < emit->num_temp_arrays; i++) {
5518      unsigned num_temps = emit->temp_arrays[i].size;
5519
5520      if (num_temps > 0) {
5521         VGPU10OpcodeToken0 opcode0;
5522
5523         opcode0.value = 0;
5524         opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5525
5526         begin_emit_instruction(emit);
5527         emit_dword(emit, opcode0.value);
5528         emit_dword(emit, i); /* which array */
5529         emit_dword(emit, num_temps);
5530         emit_dword(emit, 4); /* num components */
5531         end_emit_instruction(emit);
5532
5533         total_temps += num_temps;
5534      }
5535   }
5536
5537   /* Check that the grand total of all regular and indexed temps is
5538    * under the limit.
5539    */
5540   check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5541
5542   return TRUE;
5543}
5544
5545
5546static boolean
5547emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5548{
5549   VGPU10OpcodeToken0 opcode0;
5550   VGPU10OperandToken0 operand0;
5551   unsigned total_consts, i;
5552
5553   opcode0.value = 0;
5554   opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5555   opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5556   /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5557
5558   operand0.value = 0;
5559   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5560   operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5561   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562   operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5563   operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5564   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5565   operand0.swizzleX = 0;
5566   operand0.swizzleY = 1;
5567   operand0.swizzleZ = 2;
5568   operand0.swizzleW = 3;
5569
5570   /**
5571    * Emit declaration for constant buffer [0].  We also allocate
5572    * room for the extra constants here.
5573    */
5574   total_consts = emit->num_shader_consts[0];
5575
5576   /* Now, allocate constant slots for the "extra" constants.
5577    * Note: it's critical that these extra constant locations
5578    * exactly match what's emitted by the "extra" constants code
5579    * in svga_state_constants.c
5580    */
5581
5582   /* Vertex position scale/translation */
5583   if (emit->vposition.need_prescale) {
5584      emit->vposition.prescale_cbuf_index = total_consts;
5585      total_consts += (2 * emit->vposition.num_prescale);
5586   }
5587
5588   if (emit->unit == PIPE_SHADER_VERTEX) {
5589      if (emit->key.vs.undo_viewport) {
5590         emit->vs.viewport_index = total_consts++;
5591      }
5592      if (emit->key.vs.need_vertex_id_bias) {
5593         emit->vs.vertex_id_bias_index = total_consts++;
5594      }
5595   }
5596
5597   /* user-defined clip planes */
5598   if (emit->key.clip_plane_enable) {
5599      unsigned n = util_bitcount(emit->key.clip_plane_enable);
5600      assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5601             emit->unit != PIPE_SHADER_COMPUTE);
5602      for (i = 0; i < n; i++) {
5603         emit->clip_plane_const[i] = total_consts++;
5604      }
5605   }
5606
5607   for (i = 0; i < emit->num_samplers; i++) {
5608
5609      if (emit->key.tex[i].sampler_view) {
5610         /* Texcoord scale factors for RECT textures */
5611         if (emit->key.tex[i].unnormalized) {
5612            emit->texcoord_scale_index[i] = total_consts++;
5613         }
5614
5615         /* Texture buffer sizes */
5616         if (emit->key.tex[i].target == PIPE_BUFFER) {
5617            emit->texture_buffer_size_index[i] = total_consts++;
5618         }
5619      }
5620   }
5621
5622   if (total_consts > 0) {
5623      begin_emit_instruction(emit);
5624      emit_dword(emit, opcode0.value);
5625      emit_dword(emit, operand0.value);
5626      emit_dword(emit, 0);  /* which const buffer slot */
5627      emit_dword(emit, total_consts);
5628      end_emit_instruction(emit);
5629   }
5630
5631   /* Declare remaining constant buffers (UBOs) */
5632   for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633      if (emit->num_shader_consts[i] > 0) {
5634         begin_emit_instruction(emit);
5635         emit_dword(emit, opcode0.value);
5636         emit_dword(emit, operand0.value);
5637         emit_dword(emit, i);  /* which const buffer slot */
5638         emit_dword(emit, emit->num_shader_consts[i]);
5639         end_emit_instruction(emit);
5640      }
5641   }
5642
5643   return TRUE;
5644}
5645
5646
5647/**
5648 * Emit declarations for samplers.
5649 */
5650static boolean
5651emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652{
5653   unsigned i;
5654
5655   for (i = 0; i < emit->num_samplers; i++) {
5656      VGPU10OpcodeToken0 opcode0;
5657      VGPU10OperandToken0 operand0;
5658
5659      opcode0.value = 0;
5660      opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661      opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662
5663      operand0.value = 0;
5664      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665      operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668
5669      begin_emit_instruction(emit);
5670      emit_dword(emit, opcode0.value);
5671      emit_dword(emit, operand0.value);
5672      emit_dword(emit, i);
5673      end_emit_instruction(emit);
5674   }
5675
5676   return TRUE;
5677}
5678
5679
5680/**
5681 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682 */
5683static unsigned
5684pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
5685                                   unsigned num_samples,
5686                                   boolean is_array)
5687{
5688   switch (target) {
5689   case PIPE_BUFFER:
5690      return VGPU10_RESOURCE_DIMENSION_BUFFER;
5691   case PIPE_TEXTURE_1D:
5692      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5693   case PIPE_TEXTURE_2D:
5694      return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
5695         VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5696   case PIPE_TEXTURE_RECT:
5697      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5698   case PIPE_TEXTURE_3D:
5699      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5700   case PIPE_TEXTURE_CUBE:
5701      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5702   case PIPE_TEXTURE_1D_ARRAY:
5703      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5704         : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5705   case PIPE_TEXTURE_2D_ARRAY:
5706      if (num_samples > 2 && is_array)
5707         return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
5708      else if (is_array)
5709         return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
5710      else
5711         return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5712   case PIPE_TEXTURE_CUBE_ARRAY:
5713         return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
5714                         VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5715   default:
5716      assert(!"Unexpected resource type");
5717      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5718   }
5719}
5720
5721
5722/**
5723 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5724 */
5725static unsigned
5726tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5727                                   unsigned num_samples,
5728                                   boolean is_array)
5729{
5730   if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5731      target = TGSI_TEXTURE_2D;
5732   }
5733   else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5734      target = TGSI_TEXTURE_2D_ARRAY;
5735   }
5736
5737   switch (target) {
5738   case TGSI_TEXTURE_BUFFER:
5739      return VGPU10_RESOURCE_DIMENSION_BUFFER;
5740   case TGSI_TEXTURE_1D:
5741      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5742   case TGSI_TEXTURE_2D:
5743   case TGSI_TEXTURE_RECT:
5744      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5745   case TGSI_TEXTURE_3D:
5746      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5747   case TGSI_TEXTURE_CUBE:
5748   case TGSI_TEXTURE_SHADOWCUBE:
5749      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5750   case TGSI_TEXTURE_SHADOW1D:
5751      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5752   case TGSI_TEXTURE_SHADOW2D:
5753   case TGSI_TEXTURE_SHADOWRECT:
5754      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5755   case TGSI_TEXTURE_1D_ARRAY:
5756   case TGSI_TEXTURE_SHADOW1D_ARRAY:
5757      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5758         : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5759   case TGSI_TEXTURE_2D_ARRAY:
5760   case TGSI_TEXTURE_SHADOW2D_ARRAY:
5761      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5762         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5763   case TGSI_TEXTURE_2D_MSAA:
5764      return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5765   case TGSI_TEXTURE_2D_ARRAY_MSAA:
5766      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5767         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5768   case TGSI_TEXTURE_CUBE_ARRAY:
5769   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5770      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5771         : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5772   default:
5773      assert(!"Unexpected resource type");
5774      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5775   }
5776}
5777
5778
5779/**
5780 * Given a tgsi_return_type, return true iff it is an integer type.
5781 */
5782static boolean
5783is_integer_type(enum tgsi_return_type type)
5784{
5785   switch (type) {
5786      case TGSI_RETURN_TYPE_SINT:
5787      case TGSI_RETURN_TYPE_UINT:
5788         return TRUE;
5789      case TGSI_RETURN_TYPE_FLOAT:
5790      case TGSI_RETURN_TYPE_UNORM:
5791      case TGSI_RETURN_TYPE_SNORM:
5792         return FALSE;
5793      case TGSI_RETURN_TYPE_COUNT:
5794      default:
5795         assert(!"is_integer_type: Unknown tgsi_return_type");
5796         return FALSE;
5797   }
5798}
5799
5800
5801/**
5802 * Emit declarations for resources.
5803 * XXX When we're sure that all TGSI shaders will be generated with
5804 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5805 * rework this code.
5806 */
5807static boolean
5808emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5809{
5810   unsigned i;
5811
5812   /* Emit resource decl for each sampler */
5813   for (i = 0; i < emit->num_samplers; i++) {
5814      VGPU10OpcodeToken0 opcode0;
5815      VGPU10OperandToken0 operand0;
5816      VGPU10ResourceReturnTypeToken return_type;
5817      VGPU10_RESOURCE_RETURN_TYPE rt;
5818
5819      opcode0.value = 0;
5820      opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5821      if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5822         opcode0.resourceDimension =
5823            tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5824                                               emit->key.tex[i].num_samples,
5825                                               emit->key.tex[i].is_array);
5826      }
5827      else {
5828         opcode0.resourceDimension =
5829            pipe_texture_to_resource_dimension(emit->key.tex[i].target,
5830                                               emit->key.tex[i].num_samples,
5831                                               emit->key.tex[i].is_array);
5832      }
5833      opcode0.sampleCount = emit->key.tex[i].num_samples;
5834      operand0.value = 0;
5835      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5836      operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5837      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5838      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5839
5840#if 1
5841      /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5842      STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5843      STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5844      STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5845      STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5846      STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5847      assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5848      if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5849         rt = emit->sampler_return_type[i] + 1;
5850      }
5851      else {
5852         rt = emit->key.tex[i].sampler_return_type;
5853      }
5854#else
5855      switch (emit->sampler_return_type[i]) {
5856         case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5857         case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5858         case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
5859         case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
5860         case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5861         case TGSI_RETURN_TYPE_COUNT:
5862         default:
5863            rt = VGPU10_RETURN_TYPE_FLOAT;
5864            assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5865      }
5866#endif
5867
5868      return_type.value = 0;
5869      return_type.component0 = rt;
5870      return_type.component1 = rt;
5871      return_type.component2 = rt;
5872      return_type.component3 = rt;
5873
5874      begin_emit_instruction(emit);
5875      emit_dword(emit, opcode0.value);
5876      emit_dword(emit, operand0.value);
5877      emit_dword(emit, i);
5878      emit_dword(emit, return_type.value);
5879      end_emit_instruction(emit);
5880   }
5881
5882   return TRUE;
5883}
5884
5885/**
5886 * Emit instruction with n=1, 2 or 3 source registers.
5887 */
5888static void
5889emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5890                     unsigned opcode,
5891                     const struct tgsi_full_dst_register *dst,
5892                     const struct tgsi_full_src_register *src1,
5893                     const struct tgsi_full_src_register *src2,
5894                     const struct tgsi_full_src_register *src3,
5895                     boolean saturate, bool precise)
5896{
5897   begin_emit_instruction(emit);
5898   emit_opcode_precise(emit, opcode, saturate, precise);
5899   emit_dst_register(emit, dst);
5900   emit_src_register(emit, src1);
5901   if (src2) {
5902      emit_src_register(emit, src2);
5903   }
5904   if (src3) {
5905      emit_src_register(emit, src3);
5906   }
5907   end_emit_instruction(emit);
5908}
5909
5910static void
5911emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5912                     unsigned opcode,
5913                     const struct tgsi_full_dst_register *dst,
5914                     const struct tgsi_full_src_register *src)
5915{
5916   emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5917}
5918
5919static void
5920emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5921                     VGPU10_OPCODE_TYPE opcode,
5922                     const struct tgsi_full_dst_register *dst,
5923                     const struct tgsi_full_src_register *src1,
5924                     const struct tgsi_full_src_register *src2)
5925{
5926   emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5927}
5928
5929static void
5930emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5931                     VGPU10_OPCODE_TYPE opcode,
5932                     const struct tgsi_full_dst_register *dst,
5933                     const struct tgsi_full_src_register *src1,
5934                     const struct tgsi_full_src_register *src2,
5935                     const struct tgsi_full_src_register *src3)
5936{
5937   emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5938}
5939
5940static void
5941emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5942                     VGPU10_OPCODE_TYPE opcode)
5943{
5944   begin_emit_instruction(emit);
5945   emit_opcode(emit, opcode, FALSE);
5946   end_emit_instruction(emit);
5947}
5948
5949/**
5950 * Tessellation inner/outer levels needs to be store into its
5951 * appropriate registers depending on prim_mode.
5952 */
5953static void
5954store_tesslevels(struct svga_shader_emitter_v10 *emit)
5955{
5956   int i;
5957
5958   /* tessellation levels are required input/out in hull shader.
5959    * emitting the inner/outer tessellation levels, either from
5960    * values provided in tcs or fallback default values which is 1.0
5961    */
5962   if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5963      struct tgsi_full_src_register temp_src;
5964
5965      if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5966         temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5967      else
5968         temp_src = make_immediate_reg_float(emit, 1.0f);
5969
5970      for (i = 0; i < 2; i++) {
5971         struct tgsi_full_src_register src =
5972            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5973         struct tgsi_full_dst_register dst =
5974            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5975         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5976         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5977      }
5978
5979      if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5980         temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5981      else
5982         temp_src = make_immediate_reg_float(emit, 1.0f);
5983
5984      for (i = 0; i < 4; i++) {
5985         struct tgsi_full_src_register src =
5986            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5987         struct tgsi_full_dst_register dst =
5988            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5989         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5990         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5991      }
5992   }
5993   else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5994      struct tgsi_full_src_register temp_src;
5995
5996      if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5997         temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5998      else
5999         temp_src = make_immediate_reg_float(emit, 1.0f);
6000
6001      struct tgsi_full_src_register src =
6002         scalar_src(&temp_src, TGSI_SWIZZLE_X);
6003      struct tgsi_full_dst_register dst =
6004         make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6005      dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6006      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6007
6008      if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6009         temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6010      else
6011         temp_src = make_immediate_reg_float(emit, 1.0f);
6012
6013      for (i = 0; i < 3; i++) {
6014         struct tgsi_full_src_register src =
6015            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6016         struct tgsi_full_dst_register dst =
6017            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6018         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6019         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6020      }
6021   }
6022   else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
6023      if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6024         struct tgsi_full_src_register temp_src =
6025            make_src_temp_reg(emit->tcs.outer.temp_index);
6026         for (i = 0; i < 2; i++) {
6027            struct tgsi_full_src_register src =
6028               scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6029            struct tgsi_full_dst_register dst =
6030               make_dst_reg(TGSI_FILE_OUTPUT,
6031                            emit->tcs.outer.out_index + i);
6032            dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6033            emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6034         }
6035      }
6036   }
6037   else {
6038      debug_printf("Unsupported primitive type");
6039   }
6040}
6041
6042
6043/**
6044 * Emit the actual clip distance instructions to be used for clipping
6045 * by copying the clip distance from the temporary registers to the
6046 * CLIPDIST registers written with the enabled planes mask.
6047 * Also copy the clip distance from the temporary to the clip distance
6048 * shadow copy register which will be referenced by the input shader
6049 */
6050static void
6051emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6052{
6053   struct tgsi_full_src_register tmp_clip_dist_src;
6054   struct tgsi_full_dst_register clip_dist_dst;
6055
6056   unsigned i;
6057   unsigned clip_plane_enable = emit->key.clip_plane_enable;
6058   unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6059   int num_written_clipdist = emit->info.num_written_clipdistance;
6060
6061   assert(emit->clip_dist_out_index != INVALID_INDEX);
6062   assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6063
6064   /**
6065    * Temporary reset the temporary clip dist register index so
6066    * that the copy to the real clip dist register will not
6067    * attempt to copy to the temporary register again
6068    */
6069   emit->clip_dist_tmp_index = INVALID_INDEX;
6070
6071   for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6072
6073      tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6074
6075      /**
6076       * copy to the shadow copy for use by varying variable and
6077       * stream output. All clip distances
6078       * will be written regardless of the enabled clipping planes.
6079       */
6080      clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6081                                   emit->clip_dist_so_index + i);
6082
6083      /* MOV clip_dist_so, tmp_clip_dist */
6084      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6085                           &tmp_clip_dist_src);
6086
6087      /**
6088       * copy those clip distances to enabled clipping planes
6089       * to CLIPDIST registers for clipping
6090       */
6091      if (clip_plane_enable & 0xf) {
6092         clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6093                                      emit->clip_dist_out_index + i);
6094         clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6095
6096         /* MOV CLIPDIST, tmp_clip_dist */
6097         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6098                              &tmp_clip_dist_src);
6099      }
6100      /* four clip planes per clip register */
6101      clip_plane_enable >>= 4;
6102   }
6103   /**
6104    * set the temporary clip dist register index back to the
6105    * temporary index for the next vertex
6106    */
6107   emit->clip_dist_tmp_index = clip_dist_tmp_index;
6108}
6109
6110/* Declare clip distance output registers for user-defined clip planes
6111 * or the TGSI_CLIPVERTEX output.
6112 */
6113static void
6114emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6115{
6116   unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6117   unsigned index = emit->num_outputs;
6118   unsigned plane_mask;
6119
6120   assert(emit->unit != PIPE_SHADER_FRAGMENT);
6121   assert(num_clip_planes <= 8);
6122
6123   if (emit->clip_mode != CLIP_LEGACY &&
6124       emit->clip_mode != CLIP_VERTEX) {
6125      return;
6126   }
6127
6128   if (num_clip_planes == 0)
6129      return;
6130
6131   /* Convert clip vertex to clip distances only in the last vertex stage */
6132   if (!emit->key.last_vertex_stage)
6133      return;
6134
6135   /* Declare one or two clip output registers.  The number of components
6136    * in the mask reflects the number of clip planes.  For example, if 5
6137    * clip planes are needed, we'll declare outputs similar to:
6138    * dcl_output_siv o2.xyzw, clip_distance
6139    * dcl_output_siv o3.x, clip_distance
6140    */
6141   emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6142
6143   plane_mask = (1 << num_clip_planes) - 1;
6144   if (plane_mask & 0xf) {
6145      unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6146      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6147                              VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6148                              SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6149      emit->num_outputs++;
6150   }
6151   if (plane_mask & 0xf0) {
6152      unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6153      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6154                              VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6155                              SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6156      emit->num_outputs++;
6157   }
6158}
6159
6160
6161/**
6162 * Emit the instructions for writing to the clip distance registers
6163 * to handle legacy/automatic clip planes.
6164 * For each clip plane, the distance is the dot product of the vertex
6165 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6166 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6167 * output registers already declared.
6168 */
6169static void
6170emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6171                             unsigned vpos_tmp_index)
6172{
6173   unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6174
6175   assert(emit->clip_mode == CLIP_LEGACY);
6176   assert(num_clip_planes <= 8);
6177
6178   assert(emit->unit == PIPE_SHADER_VERTEX ||
6179          emit->unit == PIPE_SHADER_GEOMETRY ||
6180          emit->unit == PIPE_SHADER_TESS_EVAL);
6181
6182   for (i = 0; i < num_clip_planes; i++) {
6183      struct tgsi_full_dst_register dst;
6184      struct tgsi_full_src_register plane_src, vpos_src;
6185      unsigned reg_index = emit->clip_dist_out_index + i / 4;
6186      unsigned comp = i % 4;
6187      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6188
6189      /* create dst, src regs */
6190      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6191      dst = writemask_dst(&dst, writemask);
6192
6193      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6194      vpos_src = make_src_temp_reg(vpos_tmp_index);
6195
6196      /* DP4 clip_dist, plane, vpos */
6197      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6198                           &plane_src, &vpos_src);
6199   }
6200}
6201
6202
6203/**
6204 * Emit the instructions for computing the clip distance results from
6205 * the clip vertex temporary.
6206 * For each clip plane, the distance is the dot product of the clip vertex
6207 * position (found in a temp reg) and the clip plane coefficients.
6208 */
6209static void
6210emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6211{
6212   const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6213   unsigned i;
6214   struct tgsi_full_dst_register dst;
6215   struct tgsi_full_src_register clipvert_src;
6216   const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6217
6218   assert(emit->unit == PIPE_SHADER_VERTEX ||
6219          emit->unit == PIPE_SHADER_GEOMETRY ||
6220          emit->unit == PIPE_SHADER_TESS_EVAL);
6221
6222   assert(emit->clip_mode == CLIP_VERTEX);
6223
6224   clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6225
6226   for (i = 0; i < num_clip; i++) {
6227      struct tgsi_full_src_register plane_src;
6228      unsigned reg_index = emit->clip_dist_out_index + i / 4;
6229      unsigned comp = i % 4;
6230      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6231
6232      /* create dst, src regs */
6233      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6234      dst = writemask_dst(&dst, writemask);
6235
6236      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6237
6238      /* DP4 clip_dist, plane, vpos */
6239      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6240                           &plane_src, &clipvert_src);
6241   }
6242
6243   /* copy temporary clip vertex register to the clip vertex register */
6244
6245   assert(emit->clip_vertex_out_index != INVALID_INDEX);
6246
6247   /**
6248    * temporary reset the temporary clip vertex register index so
6249    * that copy to the clip vertex register will not attempt
6250    * to copy to the temporary register again
6251    */
6252   emit->clip_vertex_tmp_index = INVALID_INDEX;
6253
6254   /* MOV clip_vertex, clip_vertex_tmp */
6255   dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6256   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6257                        &dst, &clipvert_src);
6258
6259   /**
6260    * set the temporary clip vertex register index back to the
6261    * temporary index for the next vertex
6262    */
6263   emit->clip_vertex_tmp_index = clip_vertex_tmp;
6264}
6265
6266/**
6267 * Emit code to convert RGBA to BGRA
6268 */
6269static void
6270emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6271                     const struct tgsi_full_dst_register *dst,
6272                     const struct tgsi_full_src_register *src)
6273{
6274   struct tgsi_full_src_register bgra_src =
6275      swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6276
6277   begin_emit_instruction(emit);
6278   emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6279   emit_dst_register(emit, dst);
6280   emit_src_register(emit, &bgra_src);
6281   end_emit_instruction(emit);
6282}
6283
6284
6285/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6286static void
6287emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6288                    const struct tgsi_full_dst_register *dst,
6289                    const struct tgsi_full_src_register *src)
6290{
6291   struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6292   struct tgsi_full_src_register two =
6293      make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6294   struct tgsi_full_src_register neg_two =
6295      make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6296
6297   unsigned val_tmp = get_temp_index(emit);
6298   struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6299   struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6300
6301   unsigned bias_tmp = get_temp_index(emit);
6302   struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6303   struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6304
6305   /* val = src * 2.0 */
6306   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6307
6308   /* bias = src > 0.5 */
6309   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6310
6311   /* bias = bias & -2.0 */
6312   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6313                        &bias_src, &neg_two);
6314
6315   /* dst = val + bias */
6316   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6317                        &val_src, &bias_src);
6318
6319   free_temp_indexes(emit);
6320}
6321
6322
6323/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6324static void
6325emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6326                      const struct tgsi_full_dst_register *dst,
6327                      const struct tgsi_full_src_register *src)
6328{
6329   struct tgsi_full_src_register scale =
6330      make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6331
6332   /* dst = src * scale */
6333   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6334}
6335
6336
6337/** Convert from R32_UINT to 10_10_10_2_sscaled */
6338static void
6339emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6340                      const struct tgsi_full_dst_register *dst,
6341                      const struct tgsi_full_src_register *src)
6342{
6343   struct tgsi_full_src_register lshift =
6344      make_immediate_reg_int4(emit, 22, 12, 2, 0);
6345   struct tgsi_full_src_register rshift =
6346      make_immediate_reg_int4(emit, 22, 22, 22, 30);
6347
6348   struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6349
6350   unsigned tmp = get_temp_index(emit);
6351   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6352   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6353
6354   /*
6355    * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6356    * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6357    * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6358    * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6359    * dst = i_to_f(r,g,b,a);     # convert to float
6360    */
6361   emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6362                        &src_xxxx, &lshift);
6363   emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6364                        &tmp_src, &rshift);
6365   emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6366
6367   free_temp_indexes(emit);
6368}
6369
6370
6371/**
6372 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6373 */
6374static boolean
6375emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6376              const struct tgsi_full_instruction *inst)
6377{
6378   unsigned index = inst->Dst[0].Register.Index;
6379   struct tgsi_full_dst_register dst;
6380   VGPU10_OPCODE_TYPE opcode;
6381
6382   assert(index < MAX_VGPU10_ADDR_REGS);
6383   dst = make_dst_temp_reg(emit->address_reg_index[index]);
6384   dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6385
6386   /* ARL dst, s0
6387    * Translates into:
6388    * FTOI address_tmp, s0
6389    *
6390    * UARL dst, s0
6391    * Translates into:
6392    * MOV address_tmp, s0
6393    */
6394   if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6395      opcode = VGPU10_OPCODE_FTOI;
6396   else
6397      opcode = VGPU10_OPCODE_MOV;
6398
6399   emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6400
6401   return TRUE;
6402}
6403
6404
6405/**
6406 * Emit code for TGSI_OPCODE_CAL instruction.
6407 */
6408static boolean
6409emit_cal(struct svga_shader_emitter_v10 *emit,
6410         const struct tgsi_full_instruction *inst)
6411{
6412   unsigned label = inst->Label.Label;
6413   VGPU10OperandToken0 operand;
6414   operand.value = 0;
6415   operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6416
6417   begin_emit_instruction(emit);
6418   emit_dword(emit, operand.value);
6419   emit_dword(emit, label);
6420   end_emit_instruction(emit);
6421
6422   return TRUE;
6423}
6424
6425
6426/**
6427 * Emit code for TGSI_OPCODE_IABS instruction.
6428 */
6429static boolean
6430emit_iabs(struct svga_shader_emitter_v10 *emit,
6431          const struct tgsi_full_instruction *inst)
6432{
6433   /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6434    * dst.y = (src0.y < 0) ? -src0.y : src0.y
6435    * dst.z = (src0.z < 0) ? -src0.z : src0.z
6436    * dst.w = (src0.w < 0) ? -src0.w : src0.w
6437    *
6438    * Translates into
6439    *   IMAX dst, src, neg(src)
6440    */
6441   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6442   emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6443                        &inst->Src[0], &neg_src);
6444
6445   return TRUE;
6446}
6447
6448
6449/**
6450 * Emit code for TGSI_OPCODE_CMP instruction.
6451 */
6452static boolean
6453emit_cmp(struct svga_shader_emitter_v10 *emit,
6454         const struct tgsi_full_instruction *inst)
6455{
6456   /* dst.x = (src0.x < 0) ? src1.x : src2.x
6457    * dst.y = (src0.y < 0) ? src1.y : src2.y
6458    * dst.z = (src0.z < 0) ? src1.z : src2.z
6459    * dst.w = (src0.w < 0) ? src1.w : src2.w
6460    *
6461    * Translates into
6462    *   LT tmp, src0, 0.0
6463    *   MOVC dst, tmp, src1, src2
6464    */
6465   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6466   unsigned tmp = get_temp_index(emit);
6467   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6468   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6469
6470   emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6471                        &inst->Src[0], &zero, NULL, FALSE,
6472                        inst->Instruction.Precise);
6473   emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6474                        &tmp_src, &inst->Src[1], &inst->Src[2],
6475                        inst->Instruction.Saturate, FALSE);
6476
6477   free_temp_indexes(emit);
6478
6479   return TRUE;
6480}
6481
6482
6483/**
6484 * Emit code for TGSI_OPCODE_DST instruction.
6485 */
6486static boolean
6487emit_dst(struct svga_shader_emitter_v10 *emit,
6488         const struct tgsi_full_instruction *inst)
6489{
6490   /*
6491    * dst.x = 1
6492    * dst.y = src0.y * src1.y
6493    * dst.z = src0.z
6494    * dst.w = src1.w
6495    */
6496
6497   struct tgsi_full_src_register s0_yyyy =
6498      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6499   struct tgsi_full_src_register s0_zzzz =
6500      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6501   struct tgsi_full_src_register s1_yyyy =
6502      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6503   struct tgsi_full_src_register s1_wwww =
6504      scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6505
6506   /*
6507    * If dst and either src0 and src1 are the same we need
6508    * to create a temporary for it and insert a extra move.
6509    */
6510   unsigned tmp_move = get_temp_index(emit);
6511   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6512   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6513
6514   /* MOV dst.x, 1.0 */
6515   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6516      struct tgsi_full_dst_register dst_x =
6517         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6518      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6519
6520      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6521   }
6522
6523   /* MUL dst.y, s0.y, s1.y */
6524   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6525      struct tgsi_full_dst_register dst_y =
6526         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6527
6528      emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6529                           &s1_yyyy, NULL, inst->Instruction.Saturate,
6530                           inst->Instruction.Precise);
6531   }
6532
6533   /* MOV dst.z, s0.z */
6534   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6535      struct tgsi_full_dst_register dst_z =
6536         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6537
6538      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6539                           &dst_z, &s0_zzzz, NULL, NULL,
6540                           inst->Instruction.Saturate,
6541                           inst->Instruction.Precise);
6542  }
6543
6544   /* MOV dst.w, s1.w */
6545   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6546      struct tgsi_full_dst_register dst_w =
6547         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6548
6549      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6550                           &dst_w, &s1_wwww, NULL, NULL,
6551                           inst->Instruction.Saturate,
6552                           inst->Instruction.Precise);
6553   }
6554
6555   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6556   free_temp_indexes(emit);
6557
6558   return TRUE;
6559}
6560
6561
6562/**
6563 * A helper function to return the stream index as specified in
6564 * the immediate register
6565 */
6566static inline unsigned
6567find_stream_index(struct svga_shader_emitter_v10 *emit,
6568                  const struct tgsi_full_src_register *src)
6569{
6570   return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6571}
6572
6573
6574/**
6575 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6576 */
6577static boolean
6578emit_endprim(struct svga_shader_emitter_v10 *emit,
6579             const struct tgsi_full_instruction *inst)
6580{
6581   assert(emit->unit == PIPE_SHADER_GEOMETRY);
6582
6583   begin_emit_instruction(emit);
6584   if (emit->version >= 50) {
6585      unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6586
6587      if (emit->info.num_stream_output_components[streamIndex] == 0) {
6588         /**
6589          * If there is no output for this stream, discard this instruction.
6590          */
6591         emit->discard_instruction = TRUE;
6592      }
6593      else {
6594         emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6595         assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6596         emit_stream_register(emit, streamIndex);
6597      }
6598   }
6599   else {
6600      emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6601   }
6602   end_emit_instruction(emit);
6603   return TRUE;
6604}
6605
6606
6607/**
6608 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6609 */
6610static boolean
6611emit_ex2(struct svga_shader_emitter_v10 *emit,
6612         const struct tgsi_full_instruction *inst)
6613{
6614   /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6615    * while VGPU10 computes four values.
6616    *
6617    * dst = EX2(src):
6618    *   dst.xyzw = 2.0 ^ src.x
6619    */
6620
6621   struct tgsi_full_src_register src_xxxx =
6622      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6623                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6624
6625   /* EXP tmp, s0.xxxx */
6626   emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6627                        NULL, NULL,
6628                        inst->Instruction.Saturate,
6629                        inst->Instruction.Precise);
6630
6631   return TRUE;
6632}
6633
6634
6635/**
6636 * Emit code for TGSI_OPCODE_EXP instruction.
6637 */
6638static boolean
6639emit_exp(struct svga_shader_emitter_v10 *emit,
6640         const struct tgsi_full_instruction *inst)
6641{
6642   /*
6643    * dst.x = 2 ^ floor(s0.x)
6644    * dst.y = s0.x - floor(s0.x)
6645    * dst.z = 2 ^ s0.x
6646    * dst.w = 1.0
6647    */
6648
6649   struct tgsi_full_src_register src_xxxx =
6650      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6651   unsigned tmp = get_temp_index(emit);
6652   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6653   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6654
6655   /*
6656    * If dst and src are the same we need to create
6657    * a temporary for it and insert a extra move.
6658    */
6659   unsigned tmp_move = get_temp_index(emit);
6660   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6661   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6662
6663   /* only use X component of temp reg */
6664   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6665   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6666
6667   /* ROUND_NI tmp.x, s0.x */
6668   emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6669                        &src_xxxx); /* round to -infinity */
6670
6671   /* EXP dst.x, tmp.x */
6672   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6673      struct tgsi_full_dst_register dst_x =
6674         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6675
6676      emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6677                           NULL, NULL,
6678                           inst->Instruction.Saturate,
6679                           inst->Instruction.Precise);
6680   }
6681
6682   /* ADD dst.y, s0.x, -tmp */
6683   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6684      struct tgsi_full_dst_register dst_y =
6685         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6686      struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6687
6688      emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6689                           &neg_tmp_src, NULL,
6690                           inst->Instruction.Saturate,
6691                           inst->Instruction.Precise);
6692   }
6693
6694   /* EXP dst.z, s0.x */
6695   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6696      struct tgsi_full_dst_register dst_z =
6697         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6698
6699      emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6700                           NULL, NULL,
6701                           inst->Instruction.Saturate,
6702                           inst->Instruction.Precise);
6703   }
6704
6705   /* MOV dst.w, 1.0 */
6706   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6707      struct tgsi_full_dst_register dst_w =
6708         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6709      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6710
6711      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6712   }
6713
6714   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6715
6716   free_temp_indexes(emit);
6717
6718   return TRUE;
6719}
6720
6721
6722/**
6723 * Emit code for TGSI_OPCODE_IF instruction.
6724 */
6725static boolean
6726emit_if(struct svga_shader_emitter_v10 *emit,
6727        const struct tgsi_full_src_register *src)
6728{
6729   VGPU10OpcodeToken0 opcode0;
6730
6731   /* The src register should be a scalar */
6732   assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6733          src->Register.SwizzleX == src->Register.SwizzleZ &&
6734          src->Register.SwizzleX == src->Register.SwizzleW);
6735
6736   /* The only special thing here is that we need to set the
6737    * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6738    * src.x is non-zero.
6739    */
6740   opcode0.value = 0;
6741   opcode0.opcodeType = VGPU10_OPCODE_IF;
6742   opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6743
6744   begin_emit_instruction(emit);
6745   emit_dword(emit, opcode0.value);
6746   emit_src_register(emit, src);
6747   end_emit_instruction(emit);
6748
6749   return TRUE;
6750}
6751
6752
6753/**
6754 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6755 * the register components are negative).
6756 */
6757static boolean
6758emit_kill_if(struct svga_shader_emitter_v10 *emit,
6759             const struct tgsi_full_instruction *inst)
6760{
6761   unsigned tmp = get_temp_index(emit);
6762   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6763   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6764
6765   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6766
6767   struct tgsi_full_dst_register tmp_dst_x =
6768      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6769   struct tgsi_full_src_register tmp_src_xxxx =
6770      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6771
6772   /* tmp = src[0] < 0.0 */
6773   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6774
6775   if (!same_swizzle_terms(&inst->Src[0])) {
6776      /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6777       * logically OR the swizzle terms.  Most uses of KILL_IF only
6778       * test one channel so it's good to avoid these extra steps.
6779       */
6780      struct tgsi_full_src_register tmp_src_yyyy =
6781         scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6782      struct tgsi_full_src_register tmp_src_zzzz =
6783         scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6784      struct tgsi_full_src_register tmp_src_wwww =
6785         scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6786
6787      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6788                           &tmp_src_yyyy);
6789      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6790                           &tmp_src_zzzz);
6791      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6792                           &tmp_src_wwww);
6793   }
6794
6795   begin_emit_instruction(emit);
6796   emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6797   emit_src_register(emit, &tmp_src_xxxx);
6798   end_emit_instruction(emit);
6799
6800   free_temp_indexes(emit);
6801
6802   return TRUE;
6803}
6804
6805
6806/**
6807 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6808 */
6809static boolean
6810emit_kill(struct svga_shader_emitter_v10 *emit,
6811          const struct tgsi_full_instruction *inst)
6812{
6813   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6814
6815   /* DISCARD if 0.0 is zero */
6816   begin_emit_instruction(emit);
6817   emit_discard_opcode(emit, FALSE);
6818   emit_src_register(emit, &zero);
6819   end_emit_instruction(emit);
6820
6821   return TRUE;
6822}
6823
6824
6825/**
6826 * Emit code for TGSI_OPCODE_LG2 instruction.
6827 */
6828static boolean
6829emit_lg2(struct svga_shader_emitter_v10 *emit,
6830         const struct tgsi_full_instruction *inst)
6831{
6832   /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6833    * while VGPU10 computes four values.
6834    *
6835    * dst = LG2(src):
6836    *   dst.xyzw = log2(src.x)
6837    */
6838
6839   struct tgsi_full_src_register src_xxxx =
6840      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6841                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6842
6843   /* LOG tmp, s0.xxxx */
6844   emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6845                        &inst->Dst[0], &src_xxxx, NULL, NULL,
6846                        inst->Instruction.Saturate,
6847                        inst->Instruction.Precise);
6848
6849   return TRUE;
6850}
6851
6852
6853/**
6854 * Emit code for TGSI_OPCODE_LIT instruction.
6855 */
6856static boolean
6857emit_lit(struct svga_shader_emitter_v10 *emit,
6858         const struct tgsi_full_instruction *inst)
6859{
6860   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6861
6862   /*
6863    * If dst and src are the same we need to create
6864    * a temporary for it and insert a extra move.
6865    */
6866   unsigned tmp_move = get_temp_index(emit);
6867   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6868   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6869
6870   /*
6871    * dst.x = 1
6872    * dst.y = max(src.x, 0)
6873    * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6874    * dst.w = 1
6875    */
6876
6877   /* MOV dst.x, 1.0 */
6878   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6879      struct tgsi_full_dst_register dst_x =
6880         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6881      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6882   }
6883
6884   /* MOV dst.w, 1.0 */
6885   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6886      struct tgsi_full_dst_register dst_w =
6887         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6888      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6889   }
6890
6891   /* MAX dst.y, src.x, 0.0 */
6892   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6893      struct tgsi_full_dst_register dst_y =
6894         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6895      struct tgsi_full_src_register zero =
6896         make_immediate_reg_float(emit, 0.0f);
6897      struct tgsi_full_src_register src_xxxx =
6898         swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6899                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6900
6901      emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6902                           &zero, NULL, inst->Instruction.Saturate, FALSE);
6903   }
6904
6905   /*
6906    * tmp1 = clamp(src.w, -128, 128);
6907    *   MAX tmp1, src.w, -128
6908    *   MIN tmp1, tmp1, 128
6909    *
6910    * tmp2 = max(tmp2, 0);
6911    *   MAX tmp2, src.y, 0
6912    *
6913    * tmp1 = pow(tmp2, tmp1);
6914    *   LOG tmp2, tmp2
6915    *   MUL tmp1, tmp2, tmp1
6916    *   EXP tmp1, tmp1
6917    *
6918    * tmp1 = (src.w == 0) ? 1 : tmp1;
6919    *   EQ tmp2, 0, src.w
6920    *   MOVC tmp1, tmp2, 1.0, tmp1
6921    *
6922    * dst.z = (0 < src.x) ? tmp1 : 0;
6923    *   LT tmp2, 0, src.x
6924    *   MOVC dst.z, tmp2, tmp1, 0.0
6925    */
6926   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6927      struct tgsi_full_dst_register dst_z =
6928         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6929
6930      unsigned tmp1 = get_temp_index(emit);
6931      struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6932      struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6933      unsigned tmp2 = get_temp_index(emit);
6934      struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6935      struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6936
6937      struct tgsi_full_src_register src_xxxx =
6938         scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6939      struct tgsi_full_src_register src_yyyy =
6940         scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6941      struct tgsi_full_src_register src_wwww =
6942         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6943
6944      struct tgsi_full_src_register zero =
6945         make_immediate_reg_float(emit, 0.0f);
6946      struct tgsi_full_src_register lowerbound =
6947         make_immediate_reg_float(emit, -128.0f);
6948      struct tgsi_full_src_register upperbound =
6949         make_immediate_reg_float(emit, 128.0f);
6950
6951      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6952                           &lowerbound);
6953      emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6954                           &upperbound);
6955      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6956                           &zero);
6957
6958      /* POW tmp1, tmp2, tmp1 */
6959      /* LOG tmp2, tmp2 */
6960      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6961
6962      /* MUL tmp1, tmp2, tmp1 */
6963      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6964                           &tmp1_src);
6965
6966      /* EXP tmp1, tmp1 */
6967      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6968
6969      /* EQ tmp2, 0, src.w */
6970      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6971      /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6972      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6973                           &tmp2_src, &one, &tmp1_src);
6974
6975      /* LT tmp2, 0, src.x */
6976      emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6977      /* MOVC dst.z, tmp2, tmp1, 0.0 */
6978      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6979                           &tmp2_src, &tmp1_src, &zero);
6980   }
6981
6982   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6983   free_temp_indexes(emit);
6984
6985   return TRUE;
6986}
6987
6988
6989/**
6990 * Emit Level Of Detail Query (LODQ) instruction.
6991 */
6992static boolean
6993emit_lodq(struct svga_shader_emitter_v10 *emit,
6994          const struct tgsi_full_instruction *inst)
6995{
6996   const uint unit = inst->Src[1].Register.Index;
6997
6998   assert(emit->version >= 41);
6999
7000   /* LOD dst, coord, resource, sampler */
7001   begin_emit_instruction(emit);
7002   emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
7003   emit_dst_register(emit, &inst->Dst[0]);
7004   emit_src_register(emit, &inst->Src[0]); /* coord */
7005   emit_resource_register(emit, unit);
7006   emit_sampler_register(emit, unit);
7007   end_emit_instruction(emit);
7008
7009   return TRUE;
7010}
7011
7012
7013/**
7014 * Emit code for TGSI_OPCODE_LOG instruction.
7015 */
7016static boolean
7017emit_log(struct svga_shader_emitter_v10 *emit,
7018         const struct tgsi_full_instruction *inst)
7019{
7020   /*
7021    * dst.x = floor(lg2(abs(s0.x)))
7022    * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7023    * dst.z = lg2(abs(s0.x))
7024    * dst.w = 1.0
7025    */
7026
7027   struct tgsi_full_src_register src_xxxx =
7028      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7029   unsigned tmp = get_temp_index(emit);
7030   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7031   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7032   struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7033
7034   /* only use X component of temp reg */
7035   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7036   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7037
7038   /* LOG tmp.x, abs(s0.x) */
7039   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7040      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7041   }
7042
7043   /* MOV dst.z, tmp.x */
7044   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7045      struct tgsi_full_dst_register dst_z =
7046         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7047
7048      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7049                           &dst_z, &tmp_src, NULL, NULL,
7050                           inst->Instruction.Saturate, FALSE);
7051   }
7052
7053   /* FLR tmp.x, tmp.x */
7054   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7055      emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7056   }
7057
7058   /* MOV dst.x, tmp.x */
7059   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7060      struct tgsi_full_dst_register dst_x =
7061         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7062
7063      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7064                           &dst_x, &tmp_src, NULL, NULL,
7065                           inst->Instruction.Saturate, FALSE);
7066   }
7067
7068   /* EXP tmp.x, tmp.x */
7069   /* DIV dst.y, abs(s0.x), tmp.x */
7070   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7071      struct tgsi_full_dst_register dst_y =
7072         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7073
7074      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7075      emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7076                           &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7077   }
7078
7079   /* MOV dst.w, 1.0 */
7080   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7081      struct tgsi_full_dst_register dst_w =
7082         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7083      struct tgsi_full_src_register one =
7084         make_immediate_reg_float(emit, 1.0f);
7085
7086      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7087   }
7088
7089   free_temp_indexes(emit);
7090
7091   return TRUE;
7092}
7093
7094
7095/**
7096 * Emit code for TGSI_OPCODE_LRP instruction.
7097 */
7098static boolean
7099emit_lrp(struct svga_shader_emitter_v10 *emit,
7100         const struct tgsi_full_instruction *inst)
7101{
7102   /* dst = LRP(s0, s1, s2):
7103    *   dst = s0 * (s1 - s2) + s2
7104    * Translates into:
7105    *   SUB tmp, s1, s2;        tmp = s1 - s2
7106    *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7107    */
7108   unsigned tmp = get_temp_index(emit);
7109   struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7110   struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7111   struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7112
7113   /* ADD tmp, s1, -s2 */
7114   emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7115                        &inst->Src[1], &neg_src2, NULL, FALSE,
7116                        inst->Instruction.Precise);
7117
7118   /* MAD dst, s1, tmp, s3 */
7119   emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7120                        &inst->Src[0], &src_tmp, &inst->Src[2],
7121                        inst->Instruction.Saturate,
7122                        inst->Instruction.Precise);
7123
7124   free_temp_indexes(emit);
7125
7126   return TRUE;
7127}
7128
7129
7130/**
7131 * Emit code for TGSI_OPCODE_POW instruction.
7132 */
7133static boolean
7134emit_pow(struct svga_shader_emitter_v10 *emit,
7135         const struct tgsi_full_instruction *inst)
7136{
7137   /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7138    * src1.x while VGPU10 computes four values.
7139    *
7140    * dst = POW(src0, src1):
7141    *   dst.xyzw = src0.x ^ src1.x
7142    */
7143   unsigned tmp = get_temp_index(emit);
7144   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7145   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7146   struct tgsi_full_src_register src0_xxxx =
7147      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7148                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7149   struct tgsi_full_src_register src1_xxxx =
7150      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7151                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7152
7153   /* LOG tmp, s0.xxxx */
7154   emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7155                        &tmp_dst, &src0_xxxx, NULL, NULL,
7156                        FALSE, inst->Instruction.Precise);
7157
7158   /* MUL tmp, tmp, s1.xxxx */
7159   emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7160                        &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7161                        FALSE, inst->Instruction.Precise);
7162
7163   /* EXP tmp, s0.xxxx */
7164   emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7165                        &inst->Dst[0], &tmp_src, NULL, NULL,
7166                        inst->Instruction.Saturate,
7167                        inst->Instruction.Precise);
7168
7169   /* free tmp */
7170   free_temp_indexes(emit);
7171
7172   return TRUE;
7173}
7174
7175
7176/**
7177 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7178 */
7179static boolean
7180emit_rcp(struct svga_shader_emitter_v10 *emit,
7181         const struct tgsi_full_instruction *inst)
7182{
7183   if (emit->version >= 50) {
7184      /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7185       * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7186       * to manipulate the src register's swizzle.
7187       */
7188      struct tgsi_full_src_register src = inst->Src[0];
7189      src.Register.SwizzleY =
7190      src.Register.SwizzleZ =
7191      src.Register.SwizzleW = src.Register.SwizzleX;
7192
7193      begin_emit_instruction(emit);
7194      emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7195                          inst->Instruction.Saturate,
7196                          inst->Instruction.Precise);
7197      emit_dst_register(emit, &inst->Dst[0]);
7198      emit_src_register(emit, &src);
7199      end_emit_instruction(emit);
7200   }
7201   else {
7202      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7203
7204      unsigned tmp = get_temp_index(emit);
7205      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7206      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7207
7208      struct tgsi_full_dst_register tmp_dst_x =
7209         writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7210      struct tgsi_full_src_register tmp_src_xxxx =
7211         scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7212
7213      /* DIV tmp.x, 1.0, s0 */
7214      emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7215                           &tmp_dst_x, &one, &inst->Src[0], NULL,
7216                           FALSE, inst->Instruction.Precise);
7217
7218      /* MOV dst, tmp.xxxx */
7219      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7220                           &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7221                           inst->Instruction.Saturate,
7222                           inst->Instruction.Precise);
7223
7224      free_temp_indexes(emit);
7225   }
7226
7227   return TRUE;
7228}
7229
7230
7231/**
7232 * Emit code for TGSI_OPCODE_RSQ instruction.
7233 */
7234static boolean
7235emit_rsq(struct svga_shader_emitter_v10 *emit,
7236         const struct tgsi_full_instruction *inst)
7237{
7238   /* dst = RSQ(src):
7239    *   dst.xyzw = 1 / sqrt(src.x)
7240    * Translates into:
7241    *   RSQ tmp, src.x
7242    *   MOV dst, tmp.xxxx
7243    */
7244
7245   unsigned tmp = get_temp_index(emit);
7246   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7247   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7248
7249   struct tgsi_full_dst_register tmp_dst_x =
7250      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7251   struct tgsi_full_src_register tmp_src_xxxx =
7252      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7253
7254   /* RSQ tmp, src.x */
7255   emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7256                        &tmp_dst_x, &inst->Src[0], NULL, NULL,
7257                        FALSE, inst->Instruction.Precise);
7258
7259   /* MOV dst, tmp.xxxx */
7260   emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7261                        &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7262                        inst->Instruction.Saturate,
7263                        inst->Instruction.Precise);
7264
7265   /* free tmp */
7266   free_temp_indexes(emit);
7267
7268   return TRUE;
7269}
7270
7271
7272/**
7273 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7274 */
7275static boolean
7276emit_seq(struct svga_shader_emitter_v10 *emit,
7277         const struct tgsi_full_instruction *inst)
7278{
7279   /* dst = SEQ(s0, s1):
7280    *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7281    * Translates into:
7282    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7283    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7284    */
7285   unsigned tmp = get_temp_index(emit);
7286   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7287   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7288   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7289   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7290
7291   /* EQ tmp, s0, s1 */
7292   emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7293                        &inst->Src[1]);
7294
7295   /* MOVC dst, tmp, one, zero */
7296   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7297                        &one, &zero);
7298
7299   free_temp_indexes(emit);
7300
7301   return TRUE;
7302}
7303
7304
7305/**
7306 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7307 */
7308static boolean
7309emit_sge(struct svga_shader_emitter_v10 *emit,
7310         const struct tgsi_full_instruction *inst)
7311{
7312   /* dst = SGE(s0, s1):
7313    *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7314    * Translates into:
7315    *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7316    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7317    */
7318   unsigned tmp = get_temp_index(emit);
7319   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7320   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7321   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7322   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7323
7324   /* GE tmp, s0, s1 */
7325   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7326                        &inst->Src[1]);
7327
7328   /* MOVC dst, tmp, one, zero */
7329   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7330                        &one, &zero);
7331
7332   free_temp_indexes(emit);
7333
7334   return TRUE;
7335}
7336
7337
7338/**
7339 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7340 */
7341static boolean
7342emit_sgt(struct svga_shader_emitter_v10 *emit,
7343         const struct tgsi_full_instruction *inst)
7344{
7345   /* dst = SGT(s0, s1):
7346    *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7347    * Translates into:
7348    *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7349    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7350    */
7351   unsigned tmp = get_temp_index(emit);
7352   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7353   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7354   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7355   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7356
7357   /* LT tmp, s1, s0 */
7358   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7359                        &inst->Src[0]);
7360
7361   /* MOVC dst, tmp, one, zero */
7362   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7363                        &one, &zero);
7364
7365   free_temp_indexes(emit);
7366
7367   return TRUE;
7368}
7369
7370
7371/**
7372 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7373 */
7374static boolean
7375emit_sincos(struct svga_shader_emitter_v10 *emit,
7376         const struct tgsi_full_instruction *inst)
7377{
7378   unsigned tmp = get_temp_index(emit);
7379   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7380   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7381
7382   struct tgsi_full_src_register tmp_src_xxxx =
7383      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7384   struct tgsi_full_dst_register tmp_dst_x =
7385      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7386
7387   begin_emit_instruction(emit);
7388   emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7389
7390   if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7391   {
7392      emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7393      emit_null_dst_register(emit);  /* second destination register */
7394   }
7395   else {
7396      emit_null_dst_register(emit);
7397      emit_dst_register(emit, &tmp_dst_x);
7398   }
7399
7400   emit_src_register(emit, &inst->Src[0]);
7401   end_emit_instruction(emit);
7402
7403   emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7404                        &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7405                        inst->Instruction.Saturate,
7406                        inst->Instruction.Precise);
7407
7408   free_temp_indexes(emit);
7409
7410   return TRUE;
7411}
7412
7413
7414/**
7415 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7416 */
7417static boolean
7418emit_sle(struct svga_shader_emitter_v10 *emit,
7419         const struct tgsi_full_instruction *inst)
7420{
7421   /* dst = SLE(s0, s1):
7422    *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
7423    * Translates into:
7424    *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7425    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7426    */
7427   unsigned tmp = get_temp_index(emit);
7428   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7429   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7430   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7431   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7432
7433   /* GE tmp, s1, s0 */
7434   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7435                        &inst->Src[0]);
7436
7437   /* MOVC dst, tmp, one, zero */
7438   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7439                        &one, &zero);
7440
7441   free_temp_indexes(emit);
7442
7443   return TRUE;
7444}
7445
7446
7447/**
7448 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7449 */
7450static boolean
7451emit_slt(struct svga_shader_emitter_v10 *emit,
7452         const struct tgsi_full_instruction *inst)
7453{
7454   /* dst = SLT(s0, s1):
7455    *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
7456    * Translates into:
7457    *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7458    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7459    */
7460   unsigned tmp = get_temp_index(emit);
7461   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7462   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7463   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7464   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7465
7466   /* LT tmp, s0, s1 */
7467   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7468                        &inst->Src[1]);
7469
7470   /* MOVC dst, tmp, one, zero */
7471   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7472                        &one, &zero);
7473
7474   free_temp_indexes(emit);
7475
7476   return TRUE;
7477}
7478
7479
7480/**
7481 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7482 */
7483static boolean
7484emit_sne(struct svga_shader_emitter_v10 *emit,
7485         const struct tgsi_full_instruction *inst)
7486{
7487   /* dst = SNE(s0, s1):
7488    *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
7489    * Translates into:
7490    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7491    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7492    */
7493   unsigned tmp = get_temp_index(emit);
7494   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7495   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7496   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7497   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7498
7499   /* NE tmp, s0, s1 */
7500   emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7501                        &inst->Src[1]);
7502
7503   /* MOVC dst, tmp, one, zero */
7504   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7505                        &one, &zero);
7506
7507   free_temp_indexes(emit);
7508
7509   return TRUE;
7510}
7511
7512
7513/**
7514 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7515 */
7516static boolean
7517emit_ssg(struct svga_shader_emitter_v10 *emit,
7518         const struct tgsi_full_instruction *inst)
7519{
7520   /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7521    * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7522    * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7523    * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7524    * Translates into:
7525    *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7526    *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7527    *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7528    *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
7529    */
7530   struct tgsi_full_src_register zero =
7531      make_immediate_reg_float(emit, 0.0f);
7532   struct tgsi_full_src_register one =
7533      make_immediate_reg_float(emit, 1.0f);
7534   struct tgsi_full_src_register neg_one =
7535      make_immediate_reg_float(emit, -1.0f);
7536
7537   unsigned tmp1 = get_temp_index(emit);
7538   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7539   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7540
7541   unsigned tmp2 = get_temp_index(emit);
7542   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7543   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7544
7545   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7546                        &zero);
7547   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7548                        &neg_one, &zero);
7549   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7550                        &inst->Src[0]);
7551   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7552                        &one, &tmp2_src);
7553
7554   free_temp_indexes(emit);
7555
7556   return TRUE;
7557}
7558
7559
7560/**
7561 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7562 */
7563static boolean
7564emit_issg(struct svga_shader_emitter_v10 *emit,
7565          const struct tgsi_full_instruction *inst)
7566{
7567   /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7568    * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7569    * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7570    * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7571    * Translates into:
7572    *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
7573    *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
7574    *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
7575    */
7576   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7577
7578   unsigned tmp1 = get_temp_index(emit);
7579   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7580   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7581
7582   unsigned tmp2 = get_temp_index(emit);
7583   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7584   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7585
7586   struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7587
7588   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7589                        &inst->Src[0], &zero);
7590   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7591                        &zero, &inst->Src[0]);
7592   emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7593                        &tmp1_src, &neg_tmp2);
7594
7595   free_temp_indexes(emit);
7596
7597   return TRUE;
7598}
7599
7600
7601/**
7602 * Emit a comparison instruction.  The dest register will get
7603 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7604 */
7605static void
7606emit_comparison(struct svga_shader_emitter_v10 *emit,
7607                SVGA3dCmpFunc func,
7608                const struct tgsi_full_dst_register *dst,
7609                const struct tgsi_full_src_register *src0,
7610                const struct tgsi_full_src_register *src1)
7611{
7612   struct tgsi_full_src_register immediate;
7613   VGPU10OpcodeToken0 opcode0;
7614   boolean swapSrc = FALSE;
7615
7616   /* Sanity checks for svga vs. gallium enums */
7617   STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7618   STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7619
7620   opcode0.value = 0;
7621
7622   switch (func) {
7623   case SVGA3D_CMP_NEVER:
7624      immediate = make_immediate_reg_int(emit, 0);
7625      /* MOV dst, {0} */
7626      begin_emit_instruction(emit);
7627      emit_dword(emit, VGPU10_OPCODE_MOV);
7628      emit_dst_register(emit, dst);
7629      emit_src_register(emit, &immediate);
7630      end_emit_instruction(emit);
7631      return;
7632   case SVGA3D_CMP_ALWAYS:
7633      immediate = make_immediate_reg_int(emit, -1);
7634      /* MOV dst, {-1} */
7635      begin_emit_instruction(emit);
7636      emit_dword(emit, VGPU10_OPCODE_MOV);
7637      emit_dst_register(emit, dst);
7638      emit_src_register(emit, &immediate);
7639      end_emit_instruction(emit);
7640      return;
7641   case SVGA3D_CMP_LESS:
7642      opcode0.opcodeType = VGPU10_OPCODE_LT;
7643      break;
7644   case SVGA3D_CMP_EQUAL:
7645      opcode0.opcodeType = VGPU10_OPCODE_EQ;
7646      break;
7647   case SVGA3D_CMP_LESSEQUAL:
7648      opcode0.opcodeType = VGPU10_OPCODE_GE;
7649      swapSrc = TRUE;
7650      break;
7651   case SVGA3D_CMP_GREATER:
7652      opcode0.opcodeType = VGPU10_OPCODE_LT;
7653      swapSrc = TRUE;
7654      break;
7655   case SVGA3D_CMP_NOTEQUAL:
7656      opcode0.opcodeType = VGPU10_OPCODE_NE;
7657      break;
7658   case SVGA3D_CMP_GREATEREQUAL:
7659      opcode0.opcodeType = VGPU10_OPCODE_GE;
7660      break;
7661   default:
7662      assert(!"Unexpected comparison mode");
7663      opcode0.opcodeType = VGPU10_OPCODE_EQ;
7664   }
7665
7666   begin_emit_instruction(emit);
7667   emit_dword(emit, opcode0.value);
7668   emit_dst_register(emit, dst);
7669   if (swapSrc) {
7670      emit_src_register(emit, src1);
7671      emit_src_register(emit, src0);
7672   }
7673   else {
7674      emit_src_register(emit, src0);
7675      emit_src_register(emit, src1);
7676   }
7677   end_emit_instruction(emit);
7678}
7679
7680
7681/**
7682 * Get texel/address offsets for a texture instruction.
7683 */
7684static void
7685get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7686                  const struct tgsi_full_instruction *inst, int offsets[3])
7687{
7688   if (inst->Texture.NumOffsets == 1) {
7689      /* According to OpenGL Shader Language spec the offsets are only
7690       * fetched from a previously-declared immediate/literal.
7691       */
7692      const struct tgsi_texture_offset *off = inst->TexOffsets;
7693      const unsigned index = off[0].Index;
7694      const unsigned swizzleX = off[0].SwizzleX;
7695      const unsigned swizzleY = off[0].SwizzleY;
7696      const unsigned swizzleZ = off[0].SwizzleZ;
7697      const union tgsi_immediate_data *imm = emit->immediates[index];
7698
7699      assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7700
7701      offsets[0] = imm[swizzleX].Int;
7702      offsets[1] = imm[swizzleY].Int;
7703      offsets[2] = imm[swizzleZ].Int;
7704   }
7705   else {
7706      offsets[0] = offsets[1] = offsets[2] = 0;
7707   }
7708}
7709
7710
7711/**
7712 * Set up the coordinate register for texture sampling.
7713 * When we're sampling from a RECT texture we have to scale the
7714 * unnormalized coordinate to a normalized coordinate.
7715 * We do that by multiplying the coordinate by an "extra" constant.
7716 * An alternative would be to use the RESINFO instruction to query the
7717 * texture's size.
7718 */
7719static struct tgsi_full_src_register
7720setup_texcoord(struct svga_shader_emitter_v10 *emit,
7721               unsigned unit,
7722               const struct tgsi_full_src_register *coord)
7723{
7724   if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
7725      unsigned scale_index = emit->texcoord_scale_index[unit];
7726      unsigned tmp = get_temp_index(emit);
7727      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7728      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7729      struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7730
7731      if (emit->key.tex[unit].texel_bias) {
7732         /* to fix texture coordinate rounding issue, 0.0001 offset is
7733          * been added. This fixes piglit test fbo-blit-scaled-linear. */
7734         struct tgsi_full_src_register offset =
7735            make_immediate_reg_float(emit, 0.0001f);
7736
7737         /* ADD tmp, coord, offset */
7738         emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7739                              coord, &offset);
7740         /* MUL tmp, tmp, scale */
7741         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7742                              &tmp_src, &scale_src);
7743      }
7744      else {
7745         /* MUL tmp, coord, const[] */
7746         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7747                              coord, &scale_src);
7748      }
7749      return tmp_src;
7750   }
7751   else {
7752      /* use texcoord as-is */
7753      return *coord;
7754   }
7755}
7756
7757
7758/**
7759 * For SAMPLE_C instructions, emit the extra src register which indicates
7760 * the reference/comparision value.
7761 */
7762static void
7763emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7764                          enum tgsi_texture_type target,
7765                          const struct tgsi_full_src_register *coord)
7766{
7767   struct tgsi_full_src_register coord_src_ref;
7768   int component;
7769
7770   assert(tgsi_is_shadow_target(target));
7771
7772   component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7773   assert(component >= 0);
7774
7775   coord_src_ref = scalar_src(coord, component);
7776
7777   emit_src_register(emit, &coord_src_ref);
7778}
7779
7780
7781/**
7782 * Info for implementing texture swizzles.
7783 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7784 * functions use this to encapsulate the extra steps needed to perform
7785 * a texture swizzle, or shadow/depth comparisons.
7786 * The shadow/depth comparison is only done here if for the cases where
7787 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7788 */
7789struct tex_swizzle_info
7790{
7791   boolean swizzled;
7792   boolean shadow_compare;
7793   unsigned unit;
7794   enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
7795   struct tgsi_full_src_register tmp_src;
7796   struct tgsi_full_dst_register tmp_dst;
7797   const struct tgsi_full_dst_register *inst_dst;
7798   const struct tgsi_full_src_register *coord_src;
7799};
7800
7801
7802/**
7803 * Do setup for handling texture swizzles or shadow compares.
7804 * \param unit  the texture unit
7805 * \param inst  the TGSI texture instruction
7806 * \param shadow_compare  do shadow/depth comparison?
7807 * \param swz  returns the swizzle info
7808 */
7809static void
7810begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7811                  unsigned unit,
7812                  const struct tgsi_full_instruction *inst,
7813                  boolean shadow_compare,
7814                  struct tex_swizzle_info *swz)
7815{
7816   swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7817                    emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7818                    emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7819                    emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7820
7821   swz->shadow_compare = shadow_compare;
7822   swz->texture_target = inst->Texture.Texture;
7823
7824   if (swz->swizzled || shadow_compare) {
7825      /* Allocate temp register for the result of the SAMPLE instruction
7826       * and the source of the MOV/compare/swizzle instructions.
7827       */
7828      unsigned tmp = get_temp_index(emit);
7829      swz->tmp_src = make_src_temp_reg(tmp);
7830      swz->tmp_dst = make_dst_temp_reg(tmp);
7831
7832      swz->unit = unit;
7833   }
7834   swz->inst_dst = &inst->Dst[0];
7835   swz->coord_src = &inst->Src[0];
7836
7837   emit->fs.shadow_compare_units |= shadow_compare << unit;
7838}
7839
7840
7841/**
7842 * Returns the register to put the SAMPLE instruction results into.
7843 * This will either be the original instruction dst reg (if no swizzle
7844 * and no shadow comparison) or a temporary reg if there is a swizzle.
7845 */
7846static const struct tgsi_full_dst_register *
7847get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7848{
7849   return (swz->swizzled || swz->shadow_compare)
7850      ? &swz->tmp_dst : swz->inst_dst;
7851}
7852
7853
7854/**
7855 * This emits the MOV instruction that actually implements a texture swizzle
7856 * and/or shadow comparison.
7857 */
7858static void
7859end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7860                const struct tex_swizzle_info *swz)
7861{
7862   if (swz->shadow_compare) {
7863      /* Emit extra instructions to compare the fetched texel value against
7864       * a texture coordinate component.  The result of the comparison
7865       * is 0.0 or 1.0.
7866       */
7867      struct tgsi_full_src_register coord_src;
7868      struct tgsi_full_src_register texel_src =
7869         scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7870      struct tgsi_full_src_register one =
7871         make_immediate_reg_float(emit, 1.0f);
7872      /* convert gallium comparison func to SVGA comparison func */
7873      SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7874
7875      int component =
7876         tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7877      assert(component >= 0);
7878      coord_src = scalar_src(swz->coord_src, component);
7879
7880      /* COMPARE tmp, coord, texel */
7881      emit_comparison(emit, compare_func,
7882                      &swz->tmp_dst, &coord_src, &texel_src);
7883
7884      /* AND dest, tmp, {1.0} */
7885      begin_emit_instruction(emit);
7886      emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7887      if (swz->swizzled) {
7888         emit_dst_register(emit, &swz->tmp_dst);
7889      }
7890      else {
7891         emit_dst_register(emit, swz->inst_dst);
7892      }
7893      emit_src_register(emit, &swz->tmp_src);
7894      emit_src_register(emit, &one);
7895      end_emit_instruction(emit);
7896   }
7897
7898   if (swz->swizzled) {
7899      unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7900      unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7901      unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7902      unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7903      unsigned writemask_0 = 0, writemask_1 = 0;
7904      boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7905
7906      /* Swizzle w/out zero/one terms */
7907      struct tgsi_full_src_register src_swizzled =
7908         swizzle_src(&swz->tmp_src,
7909                     swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7910                     swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7911                     swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7912                     swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7913
7914      /* MOV dst, color(tmp).<swizzle> */
7915      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7916                           swz->inst_dst, &src_swizzled);
7917
7918      /* handle swizzle zero terms */
7919      writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7920                     ((swz_g == PIPE_SWIZZLE_0) << 1) |
7921                     ((swz_b == PIPE_SWIZZLE_0) << 2) |
7922                     ((swz_a == PIPE_SWIZZLE_0) << 3));
7923      writemask_0 &= swz->inst_dst->Register.WriteMask;
7924
7925      if (writemask_0) {
7926         struct tgsi_full_src_register zero = int_tex ?
7927            make_immediate_reg_int(emit, 0) :
7928            make_immediate_reg_float(emit, 0.0f);
7929         struct tgsi_full_dst_register dst =
7930            writemask_dst(swz->inst_dst, writemask_0);
7931
7932         /* MOV dst.writemask_0, {0,0,0,0} */
7933         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7934      }
7935
7936      /* handle swizzle one terms */
7937      writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7938                     ((swz_g == PIPE_SWIZZLE_1) << 1) |
7939                     ((swz_b == PIPE_SWIZZLE_1) << 2) |
7940                     ((swz_a == PIPE_SWIZZLE_1) << 3));
7941      writemask_1 &= swz->inst_dst->Register.WriteMask;
7942
7943      if (writemask_1) {
7944         struct tgsi_full_src_register one = int_tex ?
7945            make_immediate_reg_int(emit, 1) :
7946            make_immediate_reg_float(emit, 1.0f);
7947         struct tgsi_full_dst_register dst =
7948            writemask_dst(swz->inst_dst, writemask_1);
7949
7950         /* MOV dst.writemask_1, {1,1,1,1} */
7951         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7952      }
7953   }
7954}
7955
7956
7957/**
7958 * Emit code for TGSI_OPCODE_SAMPLE instruction.
7959 */
7960static boolean
7961emit_sample(struct svga_shader_emitter_v10 *emit,
7962            const struct tgsi_full_instruction *inst)
7963{
7964   const unsigned resource_unit = inst->Src[1].Register.Index;
7965   const unsigned sampler_unit = inst->Src[2].Register.Index;
7966   struct tgsi_full_src_register coord;
7967   int offsets[3];
7968   struct tex_swizzle_info swz_info;
7969
7970   begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7971
7972   get_texel_offsets(emit, inst, offsets);
7973
7974   coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7975
7976   /* SAMPLE dst, coord(s0), resource, sampler */
7977   begin_emit_instruction(emit);
7978
7979   /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7980    * with LOD=0.  But our virtual GPU accepts this as-is.
7981    */
7982   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7983                      inst->Instruction.Saturate, offsets);
7984   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7985   emit_src_register(emit, &coord);
7986   emit_resource_register(emit, resource_unit);
7987   emit_sampler_register(emit, sampler_unit);
7988   end_emit_instruction(emit);
7989
7990   end_tex_swizzle(emit, &swz_info);
7991
7992   free_temp_indexes(emit);
7993
7994   return TRUE;
7995}
7996
7997
7998/**
7999 * Check if a texture instruction is valid.
8000 * An example of an invalid texture instruction is doing shadow comparison
8001 * with an integer-valued texture.
8002 * If we detect an invalid texture instruction, we replace it with:
8003 *   MOV dst, {1,1,1,1};
8004 * \return TRUE if valid, FALSE if invalid.
8005 */
8006static boolean
8007is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8008                         const struct tgsi_full_instruction *inst)
8009{
8010   const unsigned unit = inst->Src[1].Register.Index;
8011   const enum tgsi_texture_type target = inst->Texture.Texture;
8012   boolean valid = TRUE;
8013
8014   if (tgsi_is_shadow_target(target) &&
8015       is_integer_type(emit->sampler_return_type[unit])) {
8016      debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8017      valid = FALSE;
8018   }
8019   /* XXX might check for other conditions in the future here */
8020
8021   if (!valid) {
8022      /* emit a MOV dst, {1,1,1,1} instruction. */
8023      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8024      begin_emit_instruction(emit);
8025      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
8026      emit_dst_register(emit, &inst->Dst[0]);
8027      emit_src_register(emit, &one);
8028      end_emit_instruction(emit);
8029   }
8030
8031   return valid;
8032}
8033
8034
8035/**
8036 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8037 */
8038static boolean
8039emit_tex(struct svga_shader_emitter_v10 *emit,
8040         const struct tgsi_full_instruction *inst)
8041{
8042   const uint unit = inst->Src[1].Register.Index;
8043   const enum tgsi_texture_type target = inst->Texture.Texture;
8044   VGPU10_OPCODE_TYPE opcode;
8045   struct tgsi_full_src_register coord;
8046   int offsets[3];
8047   struct tex_swizzle_info swz_info;
8048
8049   /* check that the sampler returns a float */
8050   if (!is_valid_tex_instruction(emit, inst))
8051      return TRUE;
8052
8053   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8054
8055   get_texel_offsets(emit, inst, offsets);
8056
8057   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8058
8059   /* SAMPLE dst, coord(s0), resource, sampler */
8060   begin_emit_instruction(emit);
8061
8062   if (tgsi_is_shadow_target(target))
8063      opcode = VGPU10_OPCODE_SAMPLE_C;
8064   else
8065      opcode = VGPU10_OPCODE_SAMPLE;
8066
8067   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8068   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8069   emit_src_register(emit, &coord);
8070   emit_resource_register(emit, unit);
8071   emit_sampler_register(emit, unit);
8072   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8073      emit_tex_compare_refcoord(emit, target, &coord);
8074   }
8075   end_emit_instruction(emit);
8076
8077   end_tex_swizzle(emit, &swz_info);
8078
8079   free_temp_indexes(emit);
8080
8081   return TRUE;
8082}
8083
8084/**
8085 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8086 */
8087static boolean
8088emit_tg4(struct svga_shader_emitter_v10 *emit,
8089         const struct tgsi_full_instruction *inst)
8090{
8091   const uint unit = inst->Src[2].Register.Index;
8092   struct tgsi_full_src_register src;
8093   struct tgsi_full_src_register offset_src, sampler, ref;
8094   int offsets[3];
8095
8096   /* check that the sampler returns a float */
8097   if (!is_valid_tex_instruction(emit, inst))
8098      return TRUE;
8099
8100   if (emit->version >= 50) {
8101      unsigned target = inst->Texture.Texture;
8102      int index = inst->Src[1].Register.Index;
8103      const union tgsi_immediate_data *imm = emit->immediates[index];
8104      int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8105      unsigned select_swizzle = PIPE_SWIZZLE_X;
8106
8107      if (!tgsi_is_shadow_target(target)) {
8108         switch (select_comp) {
8109         case 0:
8110            select_swizzle = emit->key.tex[unit].swizzle_r;
8111            break;
8112         case 1:
8113            select_swizzle = emit->key.tex[unit].swizzle_g;
8114            break;
8115         case 2:
8116            select_swizzle = emit->key.tex[unit].swizzle_b;
8117            break;
8118         case 3:
8119            select_swizzle = emit->key.tex[unit].swizzle_a;
8120            break;
8121         default:
8122            assert(!"Unexpected component in texture gather swizzle");
8123         }
8124      }
8125      else {
8126         select_swizzle = emit->key.tex[unit].swizzle_r;
8127      }
8128
8129      if (select_swizzle == PIPE_SWIZZLE_1) {
8130         src = make_immediate_reg_float(emit, 1.0);
8131         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8132         return TRUE;
8133      }
8134      else if (select_swizzle == PIPE_SWIZZLE_0) {
8135         src = make_immediate_reg_float(emit, 0.0);
8136         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8137         return TRUE;
8138      }
8139
8140      src = setup_texcoord(emit, unit, &inst->Src[0]);
8141
8142      /* GATHER4 dst, coord, resource, sampler */
8143      /* GATHER4_C dst, coord, resource, sampler ref */
8144      /* GATHER4_PO dst, coord, offset resource, sampler */
8145      /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8146      begin_emit_instruction(emit);
8147      if (inst->Texture.NumOffsets == 1) {
8148         if (tgsi_is_shadow_target(target)) {
8149            emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8150                        inst->Instruction.Saturate);
8151         }
8152         else {
8153            emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8154                        inst->Instruction.Saturate);
8155         }
8156      }
8157      else {
8158         if (tgsi_is_shadow_target(target)) {
8159            emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8160                        inst->Instruction.Saturate);
8161         }
8162         else {
8163            emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8164                        inst->Instruction.Saturate);
8165         }
8166      }
8167
8168      emit_dst_register(emit, &inst->Dst[0]);
8169      emit_src_register(emit, &src);
8170      if (inst->Texture.NumOffsets == 1) {
8171         /* offset */
8172         offset_src = make_src_reg(inst->TexOffsets[0].File,
8173                                   inst->TexOffsets[0].Index);
8174         offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8175                                  inst->TexOffsets[0].SwizzleY,
8176                                  inst->TexOffsets[0].SwizzleZ,
8177                                  TGSI_SWIZZLE_W);
8178         emit_src_register(emit, &offset_src);
8179      }
8180
8181      /* resource */
8182      emit_resource_register(emit, unit);
8183
8184      /* sampler */
8185      sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8186      sampler.Register.SwizzleX =
8187      sampler.Register.SwizzleY =
8188      sampler.Register.SwizzleZ =
8189      sampler.Register.SwizzleW = select_swizzle;
8190      emit_src_register(emit, &sampler);
8191
8192      if (tgsi_is_shadow_target(target)) {
8193         /* ref */
8194         if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8195            ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8196            emit_tex_compare_refcoord(emit, target, &ref);
8197         }
8198         else {
8199            emit_tex_compare_refcoord(emit, target, &src);
8200         }
8201      }
8202
8203      end_emit_instruction(emit);
8204      free_temp_indexes(emit);
8205   }
8206   else {
8207      /* Only a single channel is supported in SM4_1 and we report
8208       * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8209       * Only the 0th component will be gathered.
8210       */
8211      switch (emit->key.tex[unit].swizzle_r) {
8212      case PIPE_SWIZZLE_X:
8213         get_texel_offsets(emit, inst, offsets);
8214         src = setup_texcoord(emit, unit, &inst->Src[0]);
8215
8216         /* Gather dst, coord, resource, sampler */
8217         begin_emit_instruction(emit);
8218         emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8219                            inst->Instruction.Saturate, offsets);
8220         emit_dst_register(emit, &inst->Dst[0]);
8221         emit_src_register(emit, &src);
8222         emit_resource_register(emit, unit);
8223
8224         /* sampler */
8225         sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8226         sampler.Register.SwizzleX =
8227         sampler.Register.SwizzleY =
8228         sampler.Register.SwizzleZ =
8229         sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8230         emit_src_register(emit, &sampler);
8231
8232         end_emit_instruction(emit);
8233         break;
8234      case PIPE_SWIZZLE_W:
8235      case PIPE_SWIZZLE_1:
8236         src = make_immediate_reg_float(emit, 1.0);
8237         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8238         break;
8239      case PIPE_SWIZZLE_Y:
8240      case PIPE_SWIZZLE_Z:
8241      case PIPE_SWIZZLE_0:
8242      default:
8243         src = make_immediate_reg_float(emit, 0.0);
8244         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8245         break;
8246      }
8247   }
8248
8249   return TRUE;
8250}
8251
8252
8253
8254/**
8255 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8256 */
8257static boolean
8258emit_tex2(struct svga_shader_emitter_v10 *emit,
8259         const struct tgsi_full_instruction *inst)
8260{
8261   const uint unit = inst->Src[2].Register.Index;
8262   unsigned target = inst->Texture.Texture;
8263   struct tgsi_full_src_register coord, ref;
8264   int offsets[3];
8265   struct tex_swizzle_info swz_info;
8266
8267   /* check that the sampler returns a float */
8268   if (!is_valid_tex_instruction(emit, inst))
8269      return TRUE;
8270
8271   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8272
8273   get_texel_offsets(emit, inst, offsets);
8274
8275   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8276   ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8277
8278   /* SAMPLE_C dst, coord, resource, sampler, ref */
8279   begin_emit_instruction(emit);
8280   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8281                      inst->Instruction.Saturate, offsets);
8282   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8283   emit_src_register(emit, &coord);
8284   emit_resource_register(emit, unit);
8285   emit_sampler_register(emit, unit);
8286   emit_tex_compare_refcoord(emit, target, &ref);
8287   end_emit_instruction(emit);
8288
8289   end_tex_swizzle(emit, &swz_info);
8290
8291   free_temp_indexes(emit);
8292
8293   return TRUE;
8294}
8295
8296
8297/**
8298 * Emit code for TGSI_OPCODE_TXP (projective texture)
8299 */
8300static boolean
8301emit_txp(struct svga_shader_emitter_v10 *emit,
8302         const struct tgsi_full_instruction *inst)
8303{
8304   const uint unit = inst->Src[1].Register.Index;
8305   const enum tgsi_texture_type target = inst->Texture.Texture;
8306   VGPU10_OPCODE_TYPE opcode;
8307   int offsets[3];
8308   unsigned tmp = get_temp_index(emit);
8309   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8310   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8311   struct tgsi_full_src_register src0_wwww =
8312      scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8313   struct tgsi_full_src_register coord;
8314   struct tex_swizzle_info swz_info;
8315
8316   /* check that the sampler returns a float */
8317   if (!is_valid_tex_instruction(emit, inst))
8318      return TRUE;
8319
8320   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8321
8322   get_texel_offsets(emit, inst, offsets);
8323
8324   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8325
8326   /* DIV tmp, coord, coord.wwww */
8327   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8328                        &coord, &src0_wwww);
8329
8330   /* SAMPLE dst, coord(tmp), resource, sampler */
8331   begin_emit_instruction(emit);
8332
8333   if (tgsi_is_shadow_target(target))
8334      /* NOTE: for non-fragment shaders, we should use
8335       * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8336       */
8337      opcode = VGPU10_OPCODE_SAMPLE_C;
8338   else
8339      opcode = VGPU10_OPCODE_SAMPLE;
8340
8341   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8342   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8343   emit_src_register(emit, &tmp_src);  /* projected coord */
8344   emit_resource_register(emit, unit);
8345   emit_sampler_register(emit, unit);
8346   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8347      emit_tex_compare_refcoord(emit, target, &tmp_src);
8348   }
8349   end_emit_instruction(emit);
8350
8351   end_tex_swizzle(emit, &swz_info);
8352
8353   free_temp_indexes(emit);
8354
8355   return TRUE;
8356}
8357
8358
8359/**
8360 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8361 */
8362static boolean
8363emit_txd(struct svga_shader_emitter_v10 *emit,
8364         const struct tgsi_full_instruction *inst)
8365{
8366   const uint unit = inst->Src[3].Register.Index;
8367   const enum tgsi_texture_type target = inst->Texture.Texture;
8368   int offsets[3];
8369   struct tgsi_full_src_register coord;
8370   struct tex_swizzle_info swz_info;
8371
8372   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8373                     &swz_info);
8374
8375   get_texel_offsets(emit, inst, offsets);
8376
8377   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8378
8379   /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8380   begin_emit_instruction(emit);
8381   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8382                      inst->Instruction.Saturate, offsets);
8383   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384   emit_src_register(emit, &coord);
8385   emit_resource_register(emit, unit);
8386   emit_sampler_register(emit, unit);
8387   emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
8388   emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
8389   end_emit_instruction(emit);
8390
8391   end_tex_swizzle(emit, &swz_info);
8392
8393   free_temp_indexes(emit);
8394
8395   return TRUE;
8396}
8397
8398
8399/**
8400 * Emit code for TGSI_OPCODE_TXF (texel fetch)
8401 */
8402static boolean
8403emit_txf(struct svga_shader_emitter_v10 *emit,
8404         const struct tgsi_full_instruction *inst)
8405{
8406   const uint unit = inst->Src[1].Register.Index;
8407   const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8408      && emit->key.tex[unit].num_samples > 1;
8409   int offsets[3];
8410   struct tex_swizzle_info swz_info;
8411
8412   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8413
8414   get_texel_offsets(emit, inst, offsets);
8415
8416   if (msaa) {
8417      assert(emit->key.tex[unit].num_samples > 1);
8418
8419      /* Fetch one sample from an MSAA texture */
8420      struct tgsi_full_src_register sampleIndex =
8421         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422      /* LD_MS dst, coord(s0), resource, sampleIndex */
8423      begin_emit_instruction(emit);
8424      emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8425                         inst->Instruction.Saturate, offsets);
8426      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8427      emit_src_register(emit, &inst->Src[0]);
8428      emit_resource_register(emit, unit);
8429      emit_src_register(emit, &sampleIndex);
8430      end_emit_instruction(emit);
8431   }
8432   else {
8433      /* Fetch one texel specified by integer coordinate */
8434      /* LD dst, coord(s0), resource */
8435      begin_emit_instruction(emit);
8436      emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8437                         inst->Instruction.Saturate, offsets);
8438      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8439      emit_src_register(emit, &inst->Src[0]);
8440      emit_resource_register(emit, unit);
8441      end_emit_instruction(emit);
8442   }
8443
8444   end_tex_swizzle(emit, &swz_info);
8445
8446   free_temp_indexes(emit);
8447
8448   return TRUE;
8449}
8450
8451
8452/**
8453 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8454 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8455 */
8456static boolean
8457emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8458             const struct tgsi_full_instruction *inst)
8459{
8460   const enum tgsi_texture_type target = inst->Texture.Texture;
8461   VGPU10_OPCODE_TYPE opcode;
8462   unsigned unit;
8463   int offsets[3];
8464   struct tgsi_full_src_register coord, lod_bias;
8465   struct tex_swizzle_info swz_info;
8466
8467   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8468          inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8469          inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8470
8471   if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8472      lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8473      unit = inst->Src[2].Register.Index;
8474   }
8475   else {
8476      lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8477      unit = inst->Src[1].Register.Index;
8478   }
8479
8480   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8481                     &swz_info);
8482
8483   get_texel_offsets(emit, inst, offsets);
8484
8485   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8486
8487   /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8488   begin_emit_instruction(emit);
8489   if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8490      opcode = VGPU10_OPCODE_SAMPLE_L;
8491   }
8492   else {
8493      opcode = VGPU10_OPCODE_SAMPLE_B;
8494   }
8495   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8496   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8497   emit_src_register(emit, &coord);
8498   emit_resource_register(emit, unit);
8499   emit_sampler_register(emit, unit);
8500   emit_src_register(emit, &lod_bias);
8501   end_emit_instruction(emit);
8502
8503   end_tex_swizzle(emit, &swz_info);
8504
8505   free_temp_indexes(emit);
8506
8507   return TRUE;
8508}
8509
8510
8511/**
8512 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8513 */
8514static boolean
8515emit_txl2(struct svga_shader_emitter_v10 *emit,
8516          const struct tgsi_full_instruction *inst)
8517{
8518   unsigned target = inst->Texture.Texture;
8519   unsigned opcode, unit;
8520   int offsets[3];
8521   struct tgsi_full_src_register coord, lod;
8522   struct tex_swizzle_info swz_info;
8523
8524   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8525
8526   lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8527   unit = inst->Src[2].Register.Index;
8528
8529   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8530                     &swz_info);
8531
8532   get_texel_offsets(emit, inst, offsets);
8533
8534   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8535
8536   /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8537   begin_emit_instruction(emit);
8538   opcode = VGPU10_OPCODE_SAMPLE_L;
8539   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8540   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8541   emit_src_register(emit, &coord);
8542   emit_resource_register(emit, unit);
8543   emit_sampler_register(emit, unit);
8544   emit_src_register(emit, &lod);
8545   end_emit_instruction(emit);
8546
8547   end_tex_swizzle(emit, &swz_info);
8548
8549   free_temp_indexes(emit);
8550
8551   return TRUE;
8552}
8553
8554
8555/**
8556 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8557 */
8558static boolean
8559emit_txq(struct svga_shader_emitter_v10 *emit,
8560         const struct tgsi_full_instruction *inst)
8561{
8562   const uint unit = inst->Src[1].Register.Index;
8563
8564   if (emit->key.tex[unit].target == PIPE_BUFFER) {
8565      /* RESINFO does not support querying texture buffers, so we instead
8566       * store texture buffer sizes in shader constants, then copy them to
8567       * implement TXQ instead of emitting RESINFO.
8568       * MOV dst, const[texture_buffer_size_index[unit]]
8569       */
8570      struct tgsi_full_src_register size_src =
8571         make_src_const_reg(emit->texture_buffer_size_index[unit]);
8572      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8573   } else {
8574      /* RESINFO dst, srcMipLevel, resource */
8575      begin_emit_instruction(emit);
8576      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8577      emit_dst_register(emit, &inst->Dst[0]);
8578      emit_src_register(emit, &inst->Src[0]);
8579      emit_resource_register(emit, unit);
8580      end_emit_instruction(emit);
8581   }
8582
8583   free_temp_indexes(emit);
8584
8585   return TRUE;
8586}
8587
8588
8589/**
8590 * Does this opcode produce a double-precision result?
8591 * XXX perhaps move this to a TGSI utility.
8592 */
8593static bool
8594opcode_has_dbl_dst(unsigned opcode)
8595{
8596   switch (opcode) {
8597   case TGSI_OPCODE_F2D:
8598   case TGSI_OPCODE_DABS:
8599   case TGSI_OPCODE_DADD:
8600   case TGSI_OPCODE_DFRAC:
8601   case TGSI_OPCODE_DMAX:
8602   case TGSI_OPCODE_DMIN:
8603   case TGSI_OPCODE_DMUL:
8604   case TGSI_OPCODE_DNEG:
8605   case TGSI_OPCODE_I2D:
8606   case TGSI_OPCODE_U2D:
8607      // XXX more TBD
8608      return true;
8609   default:
8610      return false;
8611   }
8612}
8613
8614
8615/**
8616 * Does this opcode use double-precision source registers?
8617 */
8618static bool
8619opcode_has_dbl_src(unsigned opcode)
8620{
8621   switch (opcode) {
8622   case TGSI_OPCODE_D2F:
8623   case TGSI_OPCODE_DABS:
8624   case TGSI_OPCODE_DADD:
8625   case TGSI_OPCODE_DFRAC:
8626   case TGSI_OPCODE_DMAX:
8627   case TGSI_OPCODE_DMIN:
8628   case TGSI_OPCODE_DMUL:
8629   case TGSI_OPCODE_DNEG:
8630   case TGSI_OPCODE_D2I:
8631   case TGSI_OPCODE_D2U:
8632      // XXX more TBD
8633      return true;
8634   default:
8635      return false;
8636   }
8637}
8638
8639
8640/**
8641 * Check that the swizzle for reading from a double-precision register
8642 * is valid.
8643 */
8644static void
8645check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8646{
8647   assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8648           reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8649          (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8650           reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8651
8652   assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8653           reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8654          (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8655           reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8656}
8657
8658
8659/**
8660 * Check that the writemask for a double-precision instruction is valid.
8661 */
8662static void
8663check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8664{
8665   ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8666
8667   switch (inst->Instruction.Opcode) {
8668   case TGSI_OPCODE_DABS:
8669   case TGSI_OPCODE_DADD:
8670   case TGSI_OPCODE_DFRAC:
8671   case TGSI_OPCODE_DNEG:
8672   case TGSI_OPCODE_DMAD:
8673   case TGSI_OPCODE_DMAX:
8674   case TGSI_OPCODE_DMIN:
8675   case TGSI_OPCODE_DMUL:
8676   case TGSI_OPCODE_DRCP:
8677   case TGSI_OPCODE_DSQRT:
8678   case TGSI_OPCODE_F2D:
8679      assert(writemask == TGSI_WRITEMASK_XYZW ||
8680             writemask == TGSI_WRITEMASK_XY ||
8681             writemask == TGSI_WRITEMASK_ZW);
8682      break;
8683   case TGSI_OPCODE_DSEQ:
8684   case TGSI_OPCODE_DSGE:
8685   case TGSI_OPCODE_DSNE:
8686   case TGSI_OPCODE_DSLT:
8687   case TGSI_OPCODE_D2I:
8688   case TGSI_OPCODE_D2U:
8689      /* Write to 1 or 2 components only */
8690      assert(util_bitcount(writemask) <= 2);
8691      break;
8692   default:
8693      /* XXX this list may be incomplete */
8694      ;
8695   }
8696}
8697
8698
8699/**
8700 * Double-precision absolute value.
8701 */
8702static boolean
8703emit_dabs(struct svga_shader_emitter_v10 *emit,
8704          const struct tgsi_full_instruction *inst)
8705{
8706   assert(emit->version >= 50);
8707   check_double_src_swizzle(&inst->Src[0]);
8708   check_double_dst_writemask(inst);
8709
8710   struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8711
8712   /* DMOV dst, |src| */
8713   emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8714
8715   return TRUE;
8716}
8717
8718
8719/**
8720 * Double-precision negation
8721 */
8722static boolean
8723emit_dneg(struct svga_shader_emitter_v10 *emit,
8724          const struct tgsi_full_instruction *inst)
8725{
8726   assert(emit->version >= 50);
8727   check_double_src_swizzle(&inst->Src[0]);
8728   check_double_dst_writemask(inst);
8729
8730   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8731
8732   /* DMOV dst, -src */
8733   emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8734
8735   return TRUE;
8736}
8737
8738
8739/**
8740 * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
8741 */
8742static boolean
8743emit_dmad(struct svga_shader_emitter_v10 *emit,
8744          const struct tgsi_full_instruction *inst)
8745{
8746   assert(emit->version >= 50);
8747   check_double_src_swizzle(&inst->Src[0]);
8748   check_double_src_swizzle(&inst->Src[1]);
8749   check_double_src_swizzle(&inst->Src[2]);
8750   check_double_dst_writemask(inst);
8751
8752   unsigned tmp = get_temp_index(emit);
8753   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8754   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8755
8756   /* DMUL tmp, src[0], src[1] */
8757   emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8758                        &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8759                        FALSE, inst->Instruction.Precise);
8760
8761   /* DADD dst, tmp, src[2] */
8762   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8763                        &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8764                        inst->Instruction.Saturate, inst->Instruction.Precise);
8765   free_temp_indexes(emit);
8766
8767   return TRUE;
8768}
8769
8770
8771/**
8772 * Double precision reciprocal square root
8773 */
8774static boolean
8775emit_drsq(struct svga_shader_emitter_v10 *emit,
8776          const struct tgsi_full_dst_register *dst,
8777          const struct tgsi_full_src_register *src)
8778{
8779   assert(emit->version >= 50);
8780
8781   VGPU10OpcodeToken0 token0;
8782   begin_emit_instruction(emit);
8783
8784   token0.value = 0;
8785   token0.opcodeType = VGPU10_OPCODE_VMWARE;
8786   token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8787   emit_dword(emit, token0.value);
8788
8789   emit_dst_register(emit, dst);
8790
8791   check_double_src_swizzle(src);
8792   emit_src_register(emit, src);
8793
8794   end_emit_instruction(emit);
8795
8796   return TRUE;
8797}
8798
8799
8800/**
8801 * There is no SM5 opcode for double precision square root.
8802 * It will be implemented with DRSQ.
8803 * dst = src * DRSQ(src)
8804 */
8805static boolean
8806emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8807          const struct tgsi_full_instruction *inst)
8808{
8809   assert(emit->version >= 50);
8810
8811   check_double_src_swizzle(&inst->Src[0]);
8812
8813   /* temporary register to hold the source */
8814   unsigned tmp = get_temp_index(emit);
8815   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8816   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8817
8818   /* temporary register to hold the DEQ result */
8819   unsigned tmp_cond = get_temp_index(emit);
8820   struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8821   struct tgsi_full_dst_register tmp_cond_dst_xy =
8822      writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8823   struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8824   struct tgsi_full_src_register tmp_cond_src_xy =
8825         swizzle_src(&tmp_cond_src,
8826                     PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8827                     PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8828
8829   /* The reciprocal square root of zero yields INF.
8830    * So if the source is 0, we replace it with 1 in the tmp register.
8831    * The later multiplication of zero in the original source will yield 0
8832    * in the result.
8833    */
8834
8835   /* tmp1 = (src == 0) ? 1 : src;
8836    *   EQ tmp1, 0, src
8837    *   MOVC tmp, tmp1, 1.0, src
8838    */
8839   struct tgsi_full_src_register zero =
8840               make_immediate_reg_double(emit, 0);
8841
8842   struct tgsi_full_src_register one =
8843               make_immediate_reg_double(emit, 1.0);
8844
8845   emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8846                        &zero, &inst->Src[0]);
8847   emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8848                        &tmp_cond_src_xy, &one, &inst->Src[0]);
8849
8850   struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8851   struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8852
8853   /* DRSQ tmp_rsq, tmp */
8854   emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8855
8856   /* DMUL dst, tmp_rsq, src[0] */
8857   emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8858                        &tmp_rsq_src, &inst->Src[0]);
8859
8860   free_temp_indexes(emit);
8861
8862   return TRUE;
8863}
8864
8865
8866static boolean
8867emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8868                   const struct tgsi_full_instruction *inst)
8869{
8870   assert(emit->version >= 50);
8871
8872   /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8873    * where (0,0) is the center of the pixel.  We need to translate that
8874    * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8875    * Also need to flip the Y axis (I think).
8876    */
8877   unsigned tmp = get_temp_index(emit);
8878   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8879   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8880   struct tgsi_full_dst_register tmp_dst_xy =
8881      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8882   struct tgsi_full_src_register const16 =
8883      make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8884
8885   /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8886   emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8887                        &tmp_dst_xy, &inst->Src[1], &const16);
8888
8889   /* FTOI tmp.xy, tmp */
8890   emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8891
8892   /* EVAL_SNAPPED dst, src0, tmp */
8893   emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8894                        &inst->Dst[0], &inst->Src[0], &tmp_src);
8895
8896   free_temp_indexes(emit);
8897
8898   return TRUE;
8899}
8900
8901
8902/**
8903 * Emit a simple instruction (like ADD, MUL, MIN, etc).
8904 */
8905static boolean
8906emit_simple(struct svga_shader_emitter_v10 *emit,
8907            const struct tgsi_full_instruction *inst)
8908{
8909   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8910   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8911   const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8912   const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8913   unsigned i;
8914
8915   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8916      emit->current_loop_depth++;
8917   }
8918   else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8919      emit->current_loop_depth--;
8920   }
8921
8922   begin_emit_instruction(emit);
8923   emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8924                       inst->Instruction.Saturate,
8925                       inst->Instruction.Precise);
8926   for (i = 0; i < op->num_dst; i++) {
8927      if (dbl_dst) {
8928         check_double_dst_writemask(inst);
8929      }
8930      emit_dst_register(emit, &inst->Dst[i]);
8931   }
8932   for (i = 0; i < op->num_src; i++) {
8933      if (dbl_src) {
8934         check_double_src_swizzle(&inst->Src[i]);
8935      }
8936      emit_src_register(emit, &inst->Src[i]);
8937   }
8938   end_emit_instruction(emit);
8939
8940   return TRUE;
8941}
8942
8943
8944/**
8945 * Emit MSB instruction (like IMSB, UMSB).
8946 *
8947 * GLSL returns the index starting from the LSB;
8948 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8949 * To get correct location as per glsl from SM5 device, we should
8950 * return (31 - index) if returned index is not -1.
8951 */
8952static boolean
8953emit_msb(struct svga_shader_emitter_v10 *emit,
8954         const struct tgsi_full_instruction *inst)
8955{
8956   const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8957
8958   assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8959
8960   struct tgsi_full_src_register index_src =
8961      make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8962   struct tgsi_full_src_register imm31 =
8963      make_immediate_reg_int(emit, 31);
8964   imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8965   struct tgsi_full_src_register neg_one =
8966      make_immediate_reg_int(emit, -1);
8967   neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8968   unsigned tmp = get_temp_index(emit);
8969   const struct tgsi_full_dst_register tmp_dst =
8970      make_dst_temp_reg(tmp);
8971   const struct tgsi_full_dst_register tmp_dst_x =
8972      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8973   const struct tgsi_full_src_register tmp_src_x =
8974       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8975   int writemask = TGSI_WRITEMASK_X;
8976   int src_swizzle = TGSI_SWIZZLE_X;
8977   int dst_writemask = index_dst->Register.WriteMask;
8978
8979   emit_simple(emit, inst);
8980
8981   /* index conversion from SM5 to GLSL */
8982   while (writemask & dst_writemask) {
8983      struct tgsi_full_src_register index_src_comp =
8984         scalar_src(&index_src, src_swizzle);
8985      struct tgsi_full_dst_register index_dst_comp =
8986         writemask_dst(index_dst, writemask);
8987
8988      /* check if index_src_comp != -1 */
8989      emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8990                           &tmp_dst_x, &index_src_comp, &neg_one);
8991
8992      /* if */
8993      emit_if(emit, &tmp_src_x);
8994
8995      index_src_comp = negate_src(&index_src_comp);
8996      /* SUB DST, IMM{31}, DST */
8997      emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8998                           &index_dst_comp, &imm31, &index_src_comp);
8999
9000      /* endif */
9001      emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9002
9003      writemask = writemask << 1;
9004      src_swizzle = src_swizzle + 1;
9005   }
9006   free_temp_indexes(emit);
9007   return TRUE;
9008}
9009
9010
9011/**
9012 * Emit a BFE instruction (like UBFE, IBFE).
9013 * tgsi representation:
9014 * U/IBFE dst, value, offset, width
9015 * SM5 representation:
9016 * U/IBFE dst, width, offset, value
9017 * Note: SM5 has width & offset range (0-31);
9018 *      whereas GLSL has width & offset range (0-32)
9019 */
9020static boolean
9021emit_bfe(struct svga_shader_emitter_v10 *emit,
9022         const struct tgsi_full_instruction *inst)
9023{
9024   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9025   struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9026   imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9027   struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9028   zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9029
9030   unsigned tmp1 = get_temp_index(emit);
9031   const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9032   const struct tgsi_full_dst_register cond1_dst_x =
9033      writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9034   const struct tgsi_full_src_register cond1_src_x =
9035      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9036
9037   unsigned tmp2 = get_temp_index(emit);
9038   const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9039   const struct tgsi_full_dst_register cond2_dst_x =
9040      writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9041   const struct tgsi_full_src_register cond2_src_x =
9042      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9043
9044   /**
9045    * In SM5, when width = 32  and offset = 0, it returns 0.
9046    * On the other hand GLSL, expects value to be copied as it is, to dst.
9047    */
9048
9049   /* cond1 = width ! = 32 */
9050   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9051                        &cond1_dst_x, &inst->Src[2], &imm32);
9052
9053   /* cond2 = offset ! = 0 */
9054   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9055                        &cond2_dst_x, &inst->Src[1], &zero);
9056
9057   /* cond 2 = cond1 & cond 2 */
9058   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9059                        &cond2_src_x,
9060                        &cond1_src_x);
9061   /* IF */
9062   emit_if(emit, &cond2_src_x);
9063
9064   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9065                        &inst->Src[0]);
9066
9067   /* ELSE */
9068   emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9069
9070   /* U/IBFE dst, width, offset, value */
9071   emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9072                        &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9073
9074   /* ENDIF */
9075   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9076
9077   free_temp_indexes(emit);
9078   return TRUE;
9079}
9080
9081
9082/**
9083 * Emit BFI  instruction
9084 * tgsi representation:
9085 * BFI dst, base, insert, offset, width
9086 * SM5 representation:
9087 * BFI dst, width, offset, insert, base
9088 * Note: SM5 has width & offset range (0-31);
9089 *      whereas GLSL has width & offset range (0-32)
9090 */
9091static boolean
9092emit_bfi(struct svga_shader_emitter_v10 *emit,
9093         const struct tgsi_full_instruction *inst)
9094{
9095   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9096   struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9097   imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9098
9099   struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9100   zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9101
9102   unsigned tmp1 = get_temp_index(emit);
9103   const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9104   const struct tgsi_full_dst_register cond1_dst_x =
9105      writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9106   const struct tgsi_full_src_register cond1_src_x =
9107      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9108
9109   unsigned tmp2 = get_temp_index(emit);
9110   const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9111   const struct tgsi_full_dst_register cond2_dst_x =
9112      writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9113   const struct tgsi_full_src_register cond2_src_x =
9114      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9115
9116   /**
9117    * In SM5, when width = 32  and offset = 0, it returns 0.
9118    * On the other hand GLSL, expects insert to be copied as it is, to dst.
9119    */
9120
9121   /* cond1 = width == 32 */
9122   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9123                        &cond1_dst_x, &inst->Src[3], &imm32);
9124
9125   /* cond1 = offset == 0 */
9126   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9127                        &cond2_dst_x, &inst->Src[2], &zero);
9128
9129   /* cond2 = cond1 & cond2 */
9130   emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9131                        &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9132
9133   /* if */
9134   emit_if(emit, &cond2_src_x);
9135
9136   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9137                        &inst->Src[1]);
9138
9139   /* else */
9140   emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9141
9142   /* BFI dst, width, offset, insert, base */
9143   begin_emit_instruction(emit);
9144   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9145   emit_dst_register(emit, &inst->Dst[0]);
9146   emit_src_register(emit, &inst->Src[3]);
9147   emit_src_register(emit, &inst->Src[2]);
9148   emit_src_register(emit, &inst->Src[1]);
9149   emit_src_register(emit, &inst->Src[0]);
9150   end_emit_instruction(emit);
9151
9152   /* endif */
9153   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9154
9155   free_temp_indexes(emit);
9156   return TRUE;
9157}
9158
9159
9160/**
9161 * We only special case the MOV instruction to try to detect constant
9162 * color writes in the fragment shader.
9163 */
9164static boolean
9165emit_mov(struct svga_shader_emitter_v10 *emit,
9166         const struct tgsi_full_instruction *inst)
9167{
9168   const struct tgsi_full_src_register *src = &inst->Src[0];
9169   const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9170
9171   if (emit->unit == PIPE_SHADER_FRAGMENT &&
9172       dst->Register.File == TGSI_FILE_OUTPUT &&
9173       dst->Register.Index == 0 &&
9174       src->Register.File == TGSI_FILE_CONSTANT &&
9175       !src->Register.Indirect) {
9176      emit->constant_color_output = TRUE;
9177   }
9178
9179   return emit_simple(emit, inst);
9180}
9181
9182
9183/**
9184 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9185 * where TGSI only uses one dest register.
9186 */
9187static boolean
9188emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9189                 const struct tgsi_full_instruction *inst,
9190                 unsigned dst_count,
9191                 unsigned dst_index)
9192{
9193   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9194   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9195   unsigned i;
9196
9197   begin_emit_instruction(emit);
9198   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9199
9200   for (i = 0; i < dst_count; i++) {
9201      if (i == dst_index) {
9202         emit_dst_register(emit, &inst->Dst[0]);
9203      } else {
9204         emit_null_dst_register(emit);
9205      }
9206   }
9207
9208   for (i = 0; i < op->num_src; i++) {
9209      emit_src_register(emit, &inst->Src[i]);
9210   }
9211   end_emit_instruction(emit);
9212
9213   return TRUE;
9214}
9215
9216
9217/**
9218 * Emit a vmware specific VGPU10 instruction.
9219 */
9220static boolean
9221emit_vmware(struct svga_shader_emitter_v10 *emit,
9222            const struct tgsi_full_instruction *inst,
9223            VGPU10_VMWARE_OPCODE_TYPE subopcode)
9224{
9225   VGPU10OpcodeToken0 token0;
9226   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9227   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9228   const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9229   const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9230
9231   unsigned i;
9232
9233   begin_emit_instruction(emit);
9234
9235   assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9236
9237   token0.value = 0;
9238   token0.opcodeType = VGPU10_OPCODE_VMWARE;
9239   token0.vmwareOpcodeType = subopcode;
9240   emit_dword(emit, token0.value);
9241
9242   if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9243      /* IDIV only uses the first dest register. */
9244      emit_dst_register(emit, &inst->Dst[0]);
9245      emit_null_dst_register(emit);
9246   } else {
9247      for (i = 0; i < op->num_dst; i++) {
9248         if (dbl_dst) {
9249            check_double_dst_writemask(inst);
9250         }
9251         emit_dst_register(emit, &inst->Dst[i]);
9252      }
9253   }
9254
9255   for (i = 0; i < op->num_src; i++) {
9256      if (dbl_src) {
9257         check_double_src_swizzle(&inst->Src[i]);
9258      }
9259      emit_src_register(emit, &inst->Src[i]);
9260   }
9261   end_emit_instruction(emit);
9262
9263   return TRUE;
9264}
9265
9266
9267/**
9268 * Translate a single TGSI instruction to VGPU10.
9269 */
9270static boolean
9271emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9272                        unsigned inst_number,
9273                        const struct tgsi_full_instruction *inst)
9274{
9275   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9276
9277   if (emit->skip_instruction)
9278      return TRUE;
9279
9280   switch (opcode) {
9281   case TGSI_OPCODE_ADD:
9282   case TGSI_OPCODE_AND:
9283   case TGSI_OPCODE_BGNLOOP:
9284   case TGSI_OPCODE_BRK:
9285   case TGSI_OPCODE_CEIL:
9286   case TGSI_OPCODE_CONT:
9287   case TGSI_OPCODE_DDX:
9288   case TGSI_OPCODE_DDY:
9289   case TGSI_OPCODE_DIV:
9290   case TGSI_OPCODE_DP2:
9291   case TGSI_OPCODE_DP3:
9292   case TGSI_OPCODE_DP4:
9293   case TGSI_OPCODE_ELSE:
9294   case TGSI_OPCODE_ENDIF:
9295   case TGSI_OPCODE_ENDLOOP:
9296   case TGSI_OPCODE_ENDSUB:
9297   case TGSI_OPCODE_F2I:
9298   case TGSI_OPCODE_F2U:
9299   case TGSI_OPCODE_FLR:
9300   case TGSI_OPCODE_FRC:
9301   case TGSI_OPCODE_FSEQ:
9302   case TGSI_OPCODE_FSGE:
9303   case TGSI_OPCODE_FSLT:
9304   case TGSI_OPCODE_FSNE:
9305   case TGSI_OPCODE_I2F:
9306   case TGSI_OPCODE_IMAX:
9307   case TGSI_OPCODE_IMIN:
9308   case TGSI_OPCODE_INEG:
9309   case TGSI_OPCODE_ISGE:
9310   case TGSI_OPCODE_ISHR:
9311   case TGSI_OPCODE_ISLT:
9312   case TGSI_OPCODE_MAD:
9313   case TGSI_OPCODE_MAX:
9314   case TGSI_OPCODE_MIN:
9315   case TGSI_OPCODE_MUL:
9316   case TGSI_OPCODE_NOP:
9317   case TGSI_OPCODE_NOT:
9318   case TGSI_OPCODE_OR:
9319   case TGSI_OPCODE_UADD:
9320   case TGSI_OPCODE_USEQ:
9321   case TGSI_OPCODE_USGE:
9322   case TGSI_OPCODE_USLT:
9323   case TGSI_OPCODE_UMIN:
9324   case TGSI_OPCODE_UMAD:
9325   case TGSI_OPCODE_UMAX:
9326   case TGSI_OPCODE_ROUND:
9327   case TGSI_OPCODE_SQRT:
9328   case TGSI_OPCODE_SHL:
9329   case TGSI_OPCODE_TRUNC:
9330   case TGSI_OPCODE_U2F:
9331   case TGSI_OPCODE_UCMP:
9332   case TGSI_OPCODE_USHR:
9333   case TGSI_OPCODE_USNE:
9334   case TGSI_OPCODE_XOR:
9335   /* Begin SM5 opcodes */
9336   case TGSI_OPCODE_F2D:
9337   case TGSI_OPCODE_D2F:
9338   case TGSI_OPCODE_DADD:
9339   case TGSI_OPCODE_DMUL:
9340   case TGSI_OPCODE_DMAX:
9341   case TGSI_OPCODE_DMIN:
9342   case TGSI_OPCODE_DSGE:
9343   case TGSI_OPCODE_DSLT:
9344   case TGSI_OPCODE_DSEQ:
9345   case TGSI_OPCODE_DSNE:
9346   case TGSI_OPCODE_BREV:
9347   case TGSI_OPCODE_POPC:
9348   case TGSI_OPCODE_LSB:
9349   case TGSI_OPCODE_INTERP_CENTROID:
9350   case TGSI_OPCODE_INTERP_SAMPLE:
9351      /* simple instructions */
9352      return emit_simple(emit, inst);
9353   case TGSI_OPCODE_RET:
9354      if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9355          !emit->tcs.control_point_phase) {
9356
9357         /* store the tessellation levels in the patch constant phase only */
9358         store_tesslevels(emit);
9359      }
9360      return emit_simple(emit, inst);
9361
9362   case TGSI_OPCODE_IMSB:
9363   case TGSI_OPCODE_UMSB:
9364      return emit_msb(emit, inst);
9365   case TGSI_OPCODE_IBFE:
9366   case TGSI_OPCODE_UBFE:
9367      return emit_bfe(emit, inst);
9368   case TGSI_OPCODE_BFI:
9369      return emit_bfi(emit, inst);
9370   case TGSI_OPCODE_MOV:
9371      return emit_mov(emit, inst);
9372   case TGSI_OPCODE_EMIT:
9373      return emit_vertex(emit, inst);
9374   case TGSI_OPCODE_ENDPRIM:
9375      return emit_endprim(emit, inst);
9376   case TGSI_OPCODE_IABS:
9377      return emit_iabs(emit, inst);
9378   case TGSI_OPCODE_ARL:
9379      FALLTHROUGH;
9380   case TGSI_OPCODE_UARL:
9381      return emit_arl_uarl(emit, inst);
9382   case TGSI_OPCODE_BGNSUB:
9383      /* no-op */
9384      return TRUE;
9385   case TGSI_OPCODE_CAL:
9386      return emit_cal(emit, inst);
9387   case TGSI_OPCODE_CMP:
9388      return emit_cmp(emit, inst);
9389   case TGSI_OPCODE_COS:
9390      return emit_sincos(emit, inst);
9391   case TGSI_OPCODE_DST:
9392      return emit_dst(emit, inst);
9393   case TGSI_OPCODE_EX2:
9394      return emit_ex2(emit, inst);
9395   case TGSI_OPCODE_EXP:
9396      return emit_exp(emit, inst);
9397   case TGSI_OPCODE_IF:
9398      return emit_if(emit, &inst->Src[0]);
9399   case TGSI_OPCODE_KILL:
9400      return emit_kill(emit, inst);
9401   case TGSI_OPCODE_KILL_IF:
9402      return emit_kill_if(emit, inst);
9403   case TGSI_OPCODE_LG2:
9404      return emit_lg2(emit, inst);
9405   case TGSI_OPCODE_LIT:
9406      return emit_lit(emit, inst);
9407   case TGSI_OPCODE_LODQ:
9408      return emit_lodq(emit, inst);
9409   case TGSI_OPCODE_LOG:
9410      return emit_log(emit, inst);
9411   case TGSI_OPCODE_LRP:
9412      return emit_lrp(emit, inst);
9413   case TGSI_OPCODE_POW:
9414      return emit_pow(emit, inst);
9415   case TGSI_OPCODE_RCP:
9416      return emit_rcp(emit, inst);
9417   case TGSI_OPCODE_RSQ:
9418      return emit_rsq(emit, inst);
9419   case TGSI_OPCODE_SAMPLE:
9420      return emit_sample(emit, inst);
9421   case TGSI_OPCODE_SEQ:
9422      return emit_seq(emit, inst);
9423   case TGSI_OPCODE_SGE:
9424      return emit_sge(emit, inst);
9425   case TGSI_OPCODE_SGT:
9426      return emit_sgt(emit, inst);
9427   case TGSI_OPCODE_SIN:
9428      return emit_sincos(emit, inst);
9429   case TGSI_OPCODE_SLE:
9430      return emit_sle(emit, inst);
9431   case TGSI_OPCODE_SLT:
9432      return emit_slt(emit, inst);
9433   case TGSI_OPCODE_SNE:
9434      return emit_sne(emit, inst);
9435   case TGSI_OPCODE_SSG:
9436      return emit_ssg(emit, inst);
9437   case TGSI_OPCODE_ISSG:
9438      return emit_issg(emit, inst);
9439   case TGSI_OPCODE_TEX:
9440      return emit_tex(emit, inst);
9441   case TGSI_OPCODE_TG4:
9442      return emit_tg4(emit, inst);
9443   case TGSI_OPCODE_TEX2:
9444      return emit_tex2(emit, inst);
9445   case TGSI_OPCODE_TXP:
9446      return emit_txp(emit, inst);
9447   case TGSI_OPCODE_TXB:
9448   case TGSI_OPCODE_TXB2:
9449   case TGSI_OPCODE_TXL:
9450      return emit_txl_txb(emit, inst);
9451   case TGSI_OPCODE_TXD:
9452      return emit_txd(emit, inst);
9453   case TGSI_OPCODE_TXF:
9454      return emit_txf(emit, inst);
9455   case TGSI_OPCODE_TXL2:
9456      return emit_txl2(emit, inst);
9457   case TGSI_OPCODE_TXQ:
9458      return emit_txq(emit, inst);
9459   case TGSI_OPCODE_UIF:
9460      return emit_if(emit, &inst->Src[0]);
9461   case TGSI_OPCODE_UMUL_HI:
9462   case TGSI_OPCODE_IMUL_HI:
9463   case TGSI_OPCODE_UDIV:
9464      /* These cases use only the FIRST of two destination registers */
9465      return emit_simple_1dst(emit, inst, 2, 0);
9466   case TGSI_OPCODE_IDIV:
9467      return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9468   case TGSI_OPCODE_UMUL:
9469   case TGSI_OPCODE_UMOD:
9470   case TGSI_OPCODE_MOD:
9471      /* These cases use only the SECOND of two destination registers */
9472      return emit_simple_1dst(emit, inst, 2, 1);
9473
9474   /* Begin SM5 opcodes */
9475   case TGSI_OPCODE_DABS:
9476      return emit_dabs(emit, inst);
9477   case TGSI_OPCODE_DNEG:
9478      return emit_dneg(emit, inst);
9479   case TGSI_OPCODE_DRCP:
9480      return emit_simple(emit, inst);
9481   case TGSI_OPCODE_DSQRT:
9482      return emit_dsqrt(emit, inst);
9483   case TGSI_OPCODE_DMAD:
9484      return emit_dmad(emit, inst);
9485   case TGSI_OPCODE_DFRAC:
9486      return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9487   case TGSI_OPCODE_D2I:
9488   case TGSI_OPCODE_D2U:
9489      return emit_simple(emit, inst);
9490   case TGSI_OPCODE_I2D:
9491   case TGSI_OPCODE_U2D:
9492      return emit_simple(emit, inst);
9493   case TGSI_OPCODE_DRSQ:
9494      return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9495   case TGSI_OPCODE_DDIV:
9496      return emit_simple(emit, inst);
9497   case TGSI_OPCODE_INTERP_OFFSET:
9498      return emit_interp_offset(emit, inst);
9499
9500   /* The following opcodes should never be seen here.  We return zero
9501    * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9502    * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9503    */
9504   case TGSI_OPCODE_FMA:
9505   case TGSI_OPCODE_LDEXP:
9506   case TGSI_OPCODE_DSSG:
9507   case TGSI_OPCODE_DFRACEXP:
9508   case TGSI_OPCODE_DLDEXP:
9509   case TGSI_OPCODE_DTRUNC:
9510   case TGSI_OPCODE_DCEIL:
9511   case TGSI_OPCODE_DFLR:
9512      debug_printf("Unexpected TGSI opcode %s.  "
9513                   "Should have been translated away by the GLSL compiler.\n",
9514                   tgsi_get_opcode_name(opcode));
9515      return FALSE;
9516
9517   case TGSI_OPCODE_LOAD:
9518   case TGSI_OPCODE_STORE:
9519   case TGSI_OPCODE_ATOMAND:
9520   case TGSI_OPCODE_ATOMCAS:
9521   case TGSI_OPCODE_ATOMIMAX:
9522   case TGSI_OPCODE_ATOMIMIN:
9523   case TGSI_OPCODE_ATOMOR:
9524   case TGSI_OPCODE_ATOMUADD:
9525   case TGSI_OPCODE_ATOMUMAX:
9526   case TGSI_OPCODE_ATOMUMIN:
9527   case TGSI_OPCODE_ATOMXCHG:
9528   case TGSI_OPCODE_ATOMXOR:
9529      return FALSE;
9530   case TGSI_OPCODE_BARRIER:
9531      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9532         /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9533          * in shader, don't do anything for this opcode and continue rest
9534          * of shader translation
9535          */
9536         pipe_debug_message(&emit->svga_debug_callback, INFO,
9537                            "barrier instruction is not supported in tessellation control shader\n");
9538         return TRUE;
9539      }
9540      else {
9541         return emit_simple(emit, inst);
9542      }
9543
9544   case TGSI_OPCODE_END:
9545      if (!emit_post_helpers(emit))
9546         return FALSE;
9547      return emit_simple(emit, inst);
9548
9549   default:
9550      debug_printf("Unimplemented tgsi instruction %s\n",
9551                   tgsi_get_opcode_name(opcode));
9552      return FALSE;
9553   }
9554
9555   return TRUE;
9556}
9557
9558
9559/**
9560 * Emit the extra instructions to adjust the vertex position.
9561 * There are two possible adjustments:
9562 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9563 *    "prescale" and "pretranslate" values.
9564 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9565 * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
9566 */
9567static void
9568emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9569{
9570   struct tgsi_full_src_register tmp_pos_src;
9571   struct tgsi_full_dst_register pos_dst;
9572   const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9573
9574   /* Don't bother to emit any extra vertex instructions if vertex position is
9575    * not written out
9576    */
9577   if (emit->vposition.out_index == INVALID_INDEX)
9578      return;
9579
9580   /**
9581    * Reset the temporary vertex position register index
9582    * so that emit_dst_register() will use the real vertex position output
9583    */
9584   emit->vposition.tmp_index = INVALID_INDEX;
9585
9586   tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9587   pos_dst = make_dst_output_reg(emit->vposition.out_index);
9588
9589   /* If non-adjusted vertex position register index
9590    * is valid, copy the vertex position from the temporary
9591    * vertex position register before it is modified by the
9592    * prescale computation.
9593    */
9594   if (emit->vposition.so_index != INVALID_INDEX) {
9595      struct tgsi_full_dst_register pos_so_dst =
9596         make_dst_output_reg(emit->vposition.so_index);
9597
9598      /* MOV pos_so, tmp_pos */
9599      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9600   }
9601
9602   if (emit->vposition.need_prescale) {
9603      /* This code adjusts the vertex position to match the VGPU10 convention.
9604       * If p is the position computed by the shader (usually by applying the
9605       * modelview and projection matrices), the new position q is computed by:
9606       *
9607       * q.x = p.w * trans.x + p.x * scale.x
9608       * q.y = p.w * trans.y + p.y * scale.y
9609       * q.z = p.w * trans.z + p.z * scale.z;
9610       * q.w = p.w * trans.w + p.w;
9611       */
9612      struct tgsi_full_src_register tmp_pos_src_w =
9613         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9614      struct tgsi_full_dst_register tmp_pos_dst =
9615         make_dst_temp_reg(vs_pos_tmp_index);
9616      struct tgsi_full_dst_register tmp_pos_dst_xyz =
9617         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9618
9619      struct tgsi_full_src_register prescale_scale =
9620         make_src_temp_reg(emit->vposition.prescale_scale_index);
9621      struct tgsi_full_src_register prescale_trans =
9622         make_src_temp_reg(emit->vposition.prescale_trans_index);
9623
9624      /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9625      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9626                           &tmp_pos_src, &prescale_scale);
9627
9628      /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9629      emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9630                           &prescale_trans, &tmp_pos_src);
9631   }
9632   else if (emit->key.vs.undo_viewport) {
9633      /* This code computes the final vertex position from the temporary
9634       * vertex position by undoing the viewport transformation and the
9635       * divide-by-W operation (we convert window coords back to clip coords).
9636       * This is needed when we use the 'draw' module for fallbacks.
9637       * If p is the temp pos in window coords, then the NDC coord q is:
9638       *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9639       *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9640       *   q.z = p.z * p.w
9641       *   q.w = p.w
9642       * CONST[vs_viewport_index] contains:
9643       *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9644       */
9645      struct tgsi_full_dst_register tmp_pos_dst =
9646         make_dst_temp_reg(vs_pos_tmp_index);
9647      struct tgsi_full_dst_register tmp_pos_dst_xy =
9648         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9649      struct tgsi_full_src_register tmp_pos_src_wwww =
9650         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9651
9652      struct tgsi_full_dst_register pos_dst_xyz =
9653         writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9654      struct tgsi_full_dst_register pos_dst_w =
9655         writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9656
9657      struct tgsi_full_src_register vp_xyzw =
9658         make_src_const_reg(emit->vs.viewport_index);
9659      struct tgsi_full_src_register vp_zwww =
9660         swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9661                     TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9662
9663      /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9664      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9665                           &tmp_pos_src, &vp_zwww);
9666
9667      /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9668      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9669                           &tmp_pos_src, &vp_xyzw);
9670
9671      /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9672      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9673                           &tmp_pos_src, &tmp_pos_src_wwww);
9674
9675      /* MOV pos.w, tmp_pos.w */
9676      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9677   }
9678   else if (vs_pos_tmp_index != INVALID_INDEX) {
9679      /* This code is to handle the case where the temporary vertex
9680       * position register is created when the vertex shader has stream
9681       * output and prescale is disabled because rasterization is to be
9682       * discarded.
9683       */
9684      struct tgsi_full_dst_register pos_dst =
9685         make_dst_output_reg(emit->vposition.out_index);
9686
9687      /* MOV pos, tmp_pos */
9688      begin_emit_instruction(emit);
9689      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9690      emit_dst_register(emit, &pos_dst);
9691      emit_src_register(emit, &tmp_pos_src);
9692      end_emit_instruction(emit);
9693   }
9694
9695   /* Restore original vposition.tmp_index value for the next GS vertex.
9696    * It doesn't matter for VS.
9697    */
9698   emit->vposition.tmp_index = vs_pos_tmp_index;
9699}
9700
9701static void
9702emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9703{
9704   if (emit->clip_mode == CLIP_DISTANCE) {
9705      /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9706      emit_clip_distance_instructions(emit);
9707
9708   } else if (emit->clip_mode == CLIP_VERTEX &&
9709              emit->key.last_vertex_stage) {
9710      /* Convert TGSI CLIPVERTEX to CLIPDIST */
9711      emit_clip_vertex_instructions(emit);
9712   }
9713
9714   /**
9715    * Emit vertex position and take care of legacy user planes only if
9716    * there is a valid vertex position register index.
9717    * This is to take care of the case
9718    * where the shader doesn't output vertex position. Then in
9719    * this case, don't bother to emit more vertex instructions.
9720    */
9721   if (emit->vposition.out_index == INVALID_INDEX)
9722      return;
9723
9724   /**
9725    * Emit per-vertex clipping instructions for legacy user defined clip planes.
9726    * NOTE: we must emit the clip distance instructions before the
9727    * emit_vpos_instructions() call since the later function will change
9728    * the TEMP[vs_pos_tmp_index] value.
9729    */
9730   if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9731      /* Emit CLIPDIST for legacy user defined clip planes */
9732      emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9733   }
9734}
9735
9736
9737/**
9738 * Emit extra per-vertex instructions.  This includes clip-coordinate
9739 * space conversion and computing clip distances.  This is called for
9740 * each GS emit-vertex instruction and at the end of VS translation.
9741 */
9742static void
9743emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9744{
9745   /* Emit clipping instructions based on clipping mode */
9746   emit_clipping_instructions(emit);
9747
9748   /* Emit vertex position instructions */
9749   emit_vpos_instructions(emit);
9750}
9751
9752
9753/**
9754 * Translate the TGSI_OPCODE_EMIT GS instruction.
9755 */
9756static boolean
9757emit_vertex(struct svga_shader_emitter_v10 *emit,
9758            const struct tgsi_full_instruction *inst)
9759{
9760   unsigned ret = TRUE;
9761
9762   assert(emit->unit == PIPE_SHADER_GEOMETRY);
9763
9764   /**
9765    * Emit the viewport array index for the first vertex.
9766    */
9767   if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9768      struct tgsi_full_dst_register viewport_index_out =
9769         make_dst_output_reg(emit->gs.viewport_index_out_index);
9770      struct tgsi_full_dst_register viewport_index_out_x =
9771         writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9772      struct tgsi_full_src_register viewport_index_tmp =
9773         make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9774
9775      /* Set the out index to INVALID_INDEX, so it will not
9776       * be assigned to a temp again in emit_dst_register, and
9777       * the viewport index will not be assigned again in the
9778       * subsequent vertices.
9779       */
9780      emit->gs.viewport_index_out_index = INVALID_INDEX;
9781      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9782                           &viewport_index_out_x, &viewport_index_tmp);
9783   }
9784
9785   /**
9786    * Find the stream index associated with this emit vertex instruction.
9787    */
9788   assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9789   unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9790
9791   /**
9792    * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9793    * outputs are always associated with vertex stream zero.
9794    * So emit the extra vertex instructions for position or clip distance
9795    * for stream zero only.
9796    */
9797   if (streamIndex == 0) {
9798      /**
9799       * Before emitting vertex instructions, emit the temporaries for
9800       * the prescale constants based on the viewport index if needed.
9801       */
9802      if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9803         emit_temp_prescale_instructions(emit);
9804
9805      emit_vertex_instructions(emit);
9806   }
9807
9808   begin_emit_instruction(emit);
9809   if (emit->version >= 50) {
9810      if (emit->info.num_stream_output_components[streamIndex] == 0) {
9811         /**
9812          * If there is no output for this stream, discard this instruction.
9813          */
9814         emit->discard_instruction = TRUE;
9815      }
9816      else {
9817         emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9818         emit_stream_register(emit, streamIndex);
9819      }
9820   }
9821   else {
9822      emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9823   }
9824   end_emit_instruction(emit);
9825
9826   return ret;
9827}
9828
9829
9830/**
9831 * Emit the extra code to convert from VGPU10's boolean front-face
9832 * register to TGSI's signed front-face register.
9833 *
9834 * TODO: Make temporary front-face register a scalar.
9835 */
9836static void
9837emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9838{
9839   assert(emit->unit == PIPE_SHADER_FRAGMENT);
9840
9841   if (emit->fs.face_input_index != INVALID_INDEX) {
9842      /* convert vgpu10 boolean face register to gallium +/-1 value */
9843      struct tgsi_full_dst_register tmp_dst =
9844         make_dst_temp_reg(emit->fs.face_tmp_index);
9845      struct tgsi_full_src_register one =
9846         make_immediate_reg_float(emit, 1.0f);
9847      struct tgsi_full_src_register neg_one =
9848         make_immediate_reg_float(emit, -1.0f);
9849
9850      /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9851      begin_emit_instruction(emit);
9852      emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9853      emit_dst_register(emit, &tmp_dst);
9854      emit_face_register(emit);
9855      emit_src_register(emit, &one);
9856      emit_src_register(emit, &neg_one);
9857      end_emit_instruction(emit);
9858   }
9859}
9860
9861
9862/**
9863 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9864 */
9865static void
9866emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9867{
9868   assert(emit->unit == PIPE_SHADER_FRAGMENT);
9869
9870   if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9871      struct tgsi_full_dst_register tmp_dst =
9872         make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9873      struct tgsi_full_dst_register tmp_dst_xyz =
9874         writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9875      struct tgsi_full_dst_register tmp_dst_w =
9876         writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9877      struct tgsi_full_src_register one =
9878         make_immediate_reg_float(emit, 1.0f);
9879      struct tgsi_full_src_register fragcoord =
9880         make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9881
9882      /* save the input index */
9883      unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9884      /* set to invalid to prevent substitution in emit_src_register() */
9885      emit->fs.fragcoord_input_index = INVALID_INDEX;
9886
9887      /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9888      begin_emit_instruction(emit);
9889      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9890      emit_dst_register(emit, &tmp_dst_xyz);
9891      emit_src_register(emit, &fragcoord);
9892      end_emit_instruction(emit);
9893
9894      /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9895      begin_emit_instruction(emit);
9896      emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9897      emit_dst_register(emit, &tmp_dst_w);
9898      emit_src_register(emit, &one);
9899      emit_src_register(emit, &fragcoord);
9900      end_emit_instruction(emit);
9901
9902      /* restore saved value */
9903      emit->fs.fragcoord_input_index = fragcoord_input_index;
9904   }
9905}
9906
9907
9908/**
9909 * Emit the extra code to get the current sample position value and
9910 * put it into a temp register.
9911 */
9912static void
9913emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9914{
9915   assert(emit->unit == PIPE_SHADER_FRAGMENT);
9916
9917   if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9918      assert(emit->version >= 41);
9919
9920      struct tgsi_full_dst_register tmp_dst =
9921         make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9922      struct tgsi_full_src_register half =
9923         make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9924
9925      struct tgsi_full_src_register tmp_src =
9926         make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9927      struct tgsi_full_src_register sample_index_reg =
9928         make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9929                             emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9930
9931      /* The first src register is a shader resource (if we want a
9932       * multisampled resource sample position) or the rasterizer register
9933       * (if we want the current sample position in the color buffer).  We
9934       * want the later.
9935       */
9936
9937      /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9938      begin_emit_instruction(emit);
9939      emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9940      emit_dst_register(emit, &tmp_dst);
9941      emit_rasterizer_register(emit);
9942      emit_src_register(emit, &sample_index_reg);
9943      end_emit_instruction(emit);
9944
9945      /* Convert from D3D coords to GL coords by adding 0.5 bias */
9946      /* ADD dst, dst, half */
9947      begin_emit_instruction(emit);
9948      emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9949      emit_dst_register(emit, &tmp_dst);
9950      emit_src_register(emit, &tmp_src);
9951      emit_src_register(emit, &half);
9952      end_emit_instruction(emit);
9953   }
9954}
9955
9956
9957/**
9958 * Emit extra instructions to adjust VS inputs/attributes.  This can
9959 * mean casting a vertex attribute from int to float or setting the
9960 * W component to 1, or both.
9961 */
9962static void
9963emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9964{
9965   const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9966   const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9967   const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9968   const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9969   const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9970   const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9971   const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9972
9973   unsigned adjust_mask = (save_w_1_mask |
9974                           save_itof_mask |
9975                           save_utof_mask |
9976                           save_is_bgra_mask |
9977                           save_puint_to_snorm_mask |
9978                           save_puint_to_uscaled_mask |
9979                           save_puint_to_sscaled_mask);
9980
9981   assert(emit->unit == PIPE_SHADER_VERTEX);
9982
9983   if (adjust_mask) {
9984      struct tgsi_full_src_register one =
9985         make_immediate_reg_float(emit, 1.0f);
9986
9987      struct tgsi_full_src_register one_int =
9988         make_immediate_reg_int(emit, 1);
9989
9990      /* We need to turn off these bitmasks while emitting the
9991       * instructions below, then restore them afterward.
9992       */
9993      emit->key.vs.adjust_attrib_w_1 = 0;
9994      emit->key.vs.adjust_attrib_itof = 0;
9995      emit->key.vs.adjust_attrib_utof = 0;
9996      emit->key.vs.attrib_is_bgra = 0;
9997      emit->key.vs.attrib_puint_to_snorm = 0;
9998      emit->key.vs.attrib_puint_to_uscaled = 0;
9999      emit->key.vs.attrib_puint_to_sscaled = 0;
10000
10001      while (adjust_mask) {
10002         unsigned index = u_bit_scan(&adjust_mask);
10003
10004         /* skip the instruction if this vertex attribute is not being used */
10005         if (emit->info.input_usage_mask[index] == 0)
10006            continue;
10007
10008         unsigned tmp = emit->vs.adjusted_input[index];
10009         struct tgsi_full_src_register input_src =
10010            make_src_reg(TGSI_FILE_INPUT, index);
10011
10012         struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10013         struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10014         struct tgsi_full_dst_register tmp_dst_w =
10015            writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
10016
10017         /* ITOF/UTOF/MOV tmp, input[index] */
10018         if (save_itof_mask & (1 << index)) {
10019            emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
10020                                 &tmp_dst, &input_src);
10021         }
10022         else if (save_utof_mask & (1 << index)) {
10023            emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
10024                                 &tmp_dst, &input_src);
10025         }
10026         else if (save_puint_to_snorm_mask & (1 << index)) {
10027            emit_puint_to_snorm(emit, &tmp_dst, &input_src);
10028         }
10029         else if (save_puint_to_uscaled_mask & (1 << index)) {
10030            emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
10031         }
10032         else if (save_puint_to_sscaled_mask & (1 << index)) {
10033            emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
10034         }
10035         else {
10036            assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
10037            emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10038                                 &tmp_dst, &input_src);
10039         }
10040
10041         if (save_is_bgra_mask & (1 << index)) {
10042            emit_swap_r_b(emit, &tmp_dst, &tmp_src);
10043         }
10044
10045         if (save_w_1_mask & (1 << index)) {
10046            /* MOV tmp.w, 1.0 */
10047            if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
10048               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10049                                    &tmp_dst_w, &one_int);
10050            }
10051            else {
10052               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10053                                    &tmp_dst_w, &one);
10054            }
10055         }
10056      }
10057
10058      emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10059      emit->key.vs.adjust_attrib_itof = save_itof_mask;
10060      emit->key.vs.adjust_attrib_utof = save_utof_mask;
10061      emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10062      emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10063      emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10064      emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10065   }
10066}
10067
10068
10069/* Find zero-value immedate for default layer index */
10070static void
10071emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10072{
10073   assert(emit->unit == PIPE_SHADER_FRAGMENT);
10074
10075   /* immediate for default layer index 0 */
10076   if (emit->fs.layer_input_index != INVALID_INDEX) {
10077      union tgsi_immediate_data imm;
10078      imm.Int = 0;
10079      emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10080   }
10081}
10082
10083
10084static void
10085emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10086                             unsigned cbuf_index,
10087                             struct tgsi_full_dst_register *scale,
10088                             struct tgsi_full_dst_register *translate)
10089{
10090   struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10091   struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10092
10093   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10094   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10095}
10096
10097
10098/**
10099 * A recursive helper function to find the prescale from the constant buffer
10100 */
10101static void
10102find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10103                        unsigned index, unsigned num_prescale,
10104                        struct tgsi_full_src_register *vp_index,
10105                        struct tgsi_full_dst_register *scale,
10106                        struct tgsi_full_dst_register *translate,
10107                        struct tgsi_full_src_register *tmp_src,
10108                        struct tgsi_full_dst_register *tmp_dst)
10109{
10110   if (num_prescale == 0)
10111      return;
10112
10113   if (index > 0) {
10114      /* ELSE */
10115      emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10116   }
10117
10118   struct tgsi_full_src_register index_src =
10119	                            make_immediate_reg_int(emit, index);
10120
10121   if (index == 0) {
10122      /* GE tmp, vp_index, index */
10123      emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10124                           vp_index, &index_src);
10125   } else {
10126      /* EQ tmp, vp_index, index */
10127      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10128                           vp_index, &index_src);
10129   }
10130
10131   /* IF tmp */
10132   emit_if(emit, tmp_src);
10133   emit_temp_prescale_from_cbuf(emit,
10134                                emit->vposition.prescale_cbuf_index + 2 * index,
10135                                scale, translate);
10136
10137   find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10138                           vp_index, scale, translate,
10139                           tmp_src, tmp_dst);
10140
10141   /* ENDIF */
10142   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10143}
10144
10145
10146/**
10147 * This helper function emits instructions to set the prescale
10148 * and translate temporaries to the correct constants from the
10149 * constant buffer according to the designated viewport.
10150 */
10151static void
10152emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10153{
10154   struct tgsi_full_dst_register prescale_scale =
10155         make_dst_temp_reg(emit->vposition.prescale_scale_index);
10156   struct tgsi_full_dst_register prescale_translate =
10157         make_dst_temp_reg(emit->vposition.prescale_trans_index);
10158
10159   unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10160
10161   if (emit->vposition.num_prescale == 1) {
10162      emit_temp_prescale_from_cbuf(emit,
10163                                   prescale_cbuf_index,
10164                                   &prescale_scale, &prescale_translate);
10165   } else {
10166      /**
10167       * Since SM5 device does not support dynamic indexing, we need
10168       * to do the if-else to find the prescale constants for the
10169       * specified viewport.
10170       */
10171      struct tgsi_full_src_register vp_index_src =
10172         make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10173
10174      struct tgsi_full_src_register vp_index_src_x =
10175         scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10176
10177      unsigned tmp = get_temp_index(emit);
10178      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10179      struct tgsi_full_src_register tmp_src_x =
10180                scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10181      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10182
10183      find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10184                              &vp_index_src_x,
10185		              &prescale_scale, &prescale_translate,
10186                              &tmp_src_x, &tmp_dst);
10187   }
10188
10189   /* Mark prescale temporaries are emitted */
10190   emit->vposition.have_prescale = 1;
10191}
10192
10193
10194/**
10195 * A helper function to emit an instruction in a vertex shader to add a bias
10196 * to the VertexID system value. This patches the VertexID in the SVGA vertex
10197 * shader to include the base vertex of an indexed primitive or the start index
10198 * of a non-indexed primitive.
10199 */
10200static void
10201emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10202{
10203   struct tgsi_full_src_register vertex_id_bias_index =
10204      make_src_const_reg(emit->vs.vertex_id_bias_index);
10205   struct tgsi_full_src_register vertex_id_sys_src =
10206      make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10207   struct tgsi_full_src_register vertex_id_sys_src_x =
10208      scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10209   struct tgsi_full_dst_register vertex_id_tmp_dst =
10210      make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10211
10212   /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10213   unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10214   emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10215   emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10216                        &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10217                        FALSE);
10218   emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10219}
10220
10221/**
10222 * Hull Shader must have control point outputs. But tessellation
10223 * control shader can return without writing to control point output.
10224 * In this case, the control point output is assumed to be passthrough
10225 * from the control point input.
10226 * This helper function is to write out a control point output first in case
10227 * the tessellation control shader returns before writing a
10228 * control point output.
10229 */
10230static void
10231emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10232{
10233   assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10234   assert(emit->tcs.control_point_phase);
10235   assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10236   assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10237   assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10238
10239   /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10240
10241   struct tgsi_full_src_register invocation_src;
10242   struct tgsi_full_dst_register addr_dst;
10243   struct tgsi_full_dst_register addr_dst_x;
10244   unsigned addr_tmp;
10245
10246   addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10247   addr_dst = make_dst_temp_reg(addr_tmp);
10248   addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10249
10250   invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10251                                 emit->tcs.invocation_id_sys_index);
10252
10253   begin_emit_instruction(emit);
10254   emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10255   emit_dst_register(emit, &addr_dst_x);
10256   emit_src_register(emit, &invocation_src);
10257   end_emit_instruction(emit);
10258
10259
10260   /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10261
10262   struct tgsi_full_src_register input_control_point;
10263   struct tgsi_full_dst_register output_control_point;
10264
10265   input_control_point = make_src_reg(TGSI_FILE_INPUT,
10266                                      emit->tcs.control_point_input_index);
10267   input_control_point.Register.Dimension = 1;
10268   input_control_point.Dimension.Indirect = 1;
10269   input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10270   input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10271   output_control_point =
10272      make_dst_output_reg(emit->tcs.control_point_out_index);
10273
10274   begin_emit_instruction(emit);
10275   emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10276   emit_dst_register(emit, &output_control_point);
10277   emit_src_register(emit, &input_control_point);
10278   end_emit_instruction(emit);
10279}
10280
10281/**
10282 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10283 * values in domain shader. SM5 has tessfactors as floating point values where
10284 * as tgsi emit them as vector. This function allows to construct temp
10285 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10286 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10287 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10288 */
10289static void
10290emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10291{
10292   struct tgsi_full_src_register src;
10293   struct tgsi_full_dst_register dst;
10294
10295   if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10296      dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10297
10298      switch (emit->tes.prim_mode) {
10299      case PIPE_PRIM_QUADS:
10300         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10301                  emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10302         dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10303         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10304         FALLTHROUGH;
10305      case PIPE_PRIM_TRIANGLES:
10306         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10307                  emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10308         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10309         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10310         break;
10311      case PIPE_PRIM_LINES:
10312         /**
10313          * As per SM5 spec, InsideTessFactor for isolines are unused.
10314          * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10315          * any application try to read tessInnerLevel in TES when primitive type
10316          * is isolines, then instead of driver throwing segfault for accesing it,
10317          * return atleast vec(1.0f)
10318          */
10319         src = make_immediate_reg_float(emit, 1.0f);
10320         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10321         break;
10322      default:
10323         break;
10324      }
10325   }
10326
10327   if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10328      dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10329
10330      switch (emit->tes.prim_mode) {
10331      case PIPE_PRIM_QUADS:
10332         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10333                  emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10334         dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10335         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10336         FALLTHROUGH;
10337      case PIPE_PRIM_TRIANGLES:
10338         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10339                  emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10340         dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10341         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10342         FALLTHROUGH;
10343      case PIPE_PRIM_LINES:
10344         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10345                  emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10346         dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10347         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10348
10349         src = make_src_scalar_reg(TGSI_FILE_INPUT,
10350                  emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10351         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10352         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10353
10354         break;
10355      default:
10356         break;
10357      }
10358   }
10359}
10360
10361
10362static void
10363emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10364{
10365   struct tgsi_full_src_register src;
10366   struct tgsi_full_dst_register dst;
10367   unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10368                                                 emit->initialize_temp_index);
10369   src = make_immediate_reg_float(emit, 0.0f);
10370   dst = make_dst_temp_reg(vgpu10_temp_index);
10371   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10372   emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10373   emit->initialize_temp_index = INVALID_INDEX;
10374}
10375
10376
10377/**
10378 * Emit any extra/helper declarations/code that we might need between
10379 * the declaration section and code section.
10380 */
10381static boolean
10382emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10383{
10384   /* Properties */
10385   if (emit->unit == PIPE_SHADER_GEOMETRY)
10386      emit_property_instructions(emit);
10387   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10388      emit_hull_shader_declarations(emit);
10389
10390      /* Save the position of the first instruction token so that we can
10391       * do a second pass of the instructions for the patch constant phase.
10392       */
10393      emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10394      emit->tcs.fork_phase_add_signature = FALSE;
10395
10396      if (!emit_hull_shader_control_point_phase(emit)) {
10397         emit->skip_instruction = TRUE;
10398         return TRUE;
10399      }
10400
10401      /* Set the current tcs phase to control point phase */
10402      emit->tcs.control_point_phase = TRUE;
10403   }
10404   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10405      emit_domain_shader_declarations(emit);
10406   }
10407
10408   /* Declare inputs */
10409   if (!emit_input_declarations(emit))
10410      return FALSE;
10411
10412   /* Declare outputs */
10413   if (!emit_output_declarations(emit))
10414      return FALSE;
10415
10416   /* Declare temporary registers */
10417   emit_temporaries_declaration(emit);
10418
10419   /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10420    * will already be declared in hs_decls (emit_hull_shader_declarations)
10421    */
10422   if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10423      /* Declare constant registers */
10424      emit_constant_declaration(emit);
10425
10426      /* Declare samplers and resources */
10427      emit_sampler_declarations(emit);
10428      emit_resource_declarations(emit);
10429
10430      alloc_common_immediates(emit);
10431      /* Now, emit the constant block containing all the immediates
10432       * declared by shader, as well as the extra ones seen above.
10433       */
10434   }
10435
10436   if (emit->unit != PIPE_SHADER_FRAGMENT) {
10437      /*
10438       * Declare clip distance output registers for ClipVertex or
10439       * user defined planes
10440       */
10441      emit_clip_distance_declarations(emit);
10442   }
10443
10444   if (emit->unit == PIPE_SHADER_FRAGMENT &&
10445       emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10446      float alpha = emit->key.fs.alpha_ref;
10447      emit->fs.alpha_ref_index =
10448         alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10449   }
10450
10451   if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10452      /**
10453       * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10454       * hs_decls
10455       */
10456      emit_vgpu10_immediates_block(emit);
10457   }
10458   else {
10459      emit_tcs_default_control_point_output(emit);
10460   }
10461
10462   if (emit->unit == PIPE_SHADER_FRAGMENT) {
10463      emit_frontface_instructions(emit);
10464      emit_fragcoord_instructions(emit);
10465      emit_sample_position_instructions(emit);
10466      emit_default_layer_instructions(emit);
10467   }
10468   else if (emit->unit == PIPE_SHADER_VERTEX) {
10469      emit_vertex_attrib_instructions(emit);
10470
10471      if (emit->info.uses_vertexid)
10472         emit_vertex_id_nobase_instruction(emit);
10473   }
10474   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10475      emit_temp_tessfactor_instructions(emit);
10476   }
10477
10478   /**
10479    * For geometry shader that writes to viewport index, the prescale
10480    * temporaries will be done at the first vertex emission.
10481    */
10482   if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10483      emit_temp_prescale_instructions(emit);
10484
10485   return TRUE;
10486}
10487
10488
10489/**
10490 * The device has no direct support for the pipe_blend_state::alpha_to_one
10491 * option so we implement it here with shader code.
10492 *
10493 * Note that this is kind of pointless, actually.  Here we're clobbering
10494 * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
10495 * up with 100% coverage.  That's almost certainly not what the user wants.
10496 * The work-around is to add extra shader code to compute coverage from alpha
10497 * and write it to the coverage output register (if the user's shader doesn't
10498 * do so already).  We'll probably do that in the future.
10499 */
10500static void
10501emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10502                               unsigned fs_color_tmp_index)
10503{
10504   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10505   unsigned i;
10506
10507   /* Note: it's not 100% clear from the spec if we're supposed to clobber
10508    * the alpha for all render targets.  But that's what NVIDIA does and
10509    * that's what Piglit tests.
10510    */
10511   for (i = 0; i < emit->fs.num_color_outputs; i++) {
10512      struct tgsi_full_dst_register color_dst;
10513
10514      if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10515         /* write to the temp color register */
10516         color_dst = make_dst_temp_reg(fs_color_tmp_index);
10517      }
10518      else {
10519         /* write directly to the color[i] output */
10520         color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10521      }
10522
10523      color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10524
10525      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10526   }
10527}
10528
10529
10530/**
10531 * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
10532 * against the alpha reference value and discards the fragment if the
10533 * comparison fails.
10534 */
10535static void
10536emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10537                             unsigned fs_color_tmp_index)
10538{
10539   /* compare output color's alpha to alpha ref and kill */
10540   unsigned tmp = get_temp_index(emit);
10541   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10542   struct tgsi_full_src_register tmp_src_x =
10543      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10544   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10545   struct tgsi_full_src_register color_src =
10546      make_src_temp_reg(fs_color_tmp_index);
10547   struct tgsi_full_src_register color_src_w =
10548      scalar_src(&color_src, TGSI_SWIZZLE_W);
10549   struct tgsi_full_src_register ref_src =
10550      make_src_immediate_reg(emit->fs.alpha_ref_index);
10551   struct tgsi_full_dst_register color_dst =
10552      make_dst_output_reg(emit->fs.color_out_index[0]);
10553
10554   assert(emit->unit == PIPE_SHADER_FRAGMENT);
10555
10556   /* dst = src0 'alpha_func' src1 */
10557   emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10558                   &color_src_w, &ref_src);
10559
10560   /* DISCARD if dst.x == 0 */
10561   begin_emit_instruction(emit);
10562   emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
10563   emit_src_register(emit, &tmp_src_x);
10564   end_emit_instruction(emit);
10565
10566   /* If we don't need to broadcast the color below, emit the final color here.
10567    */
10568   if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10569      /* MOV output.color, tempcolor */
10570      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10571   }
10572
10573   free_temp_indexes(emit);
10574}
10575
10576
10577/**
10578 * Emit instructions for writing a single color output to multiple
10579 * color buffers.
10580 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10581 * when key.fs.white_fragments is true).
10582 * property is set and the number of render targets is greater than one.
10583 * \param fs_color_tmp_index  index of the temp register that holds the
10584 *                            color to broadcast.
10585 */
10586static void
10587emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10588                                 unsigned fs_color_tmp_index)
10589{
10590   const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10591   unsigned i;
10592   struct tgsi_full_src_register color_src;
10593
10594   if (emit->key.fs.white_fragments) {
10595      /* set all color outputs to white */
10596      color_src = make_immediate_reg_float(emit, 1.0f);
10597   }
10598   else {
10599      /* set all color outputs to TEMP[fs_color_tmp_index] */
10600      assert(fs_color_tmp_index != INVALID_INDEX);
10601      color_src = make_src_temp_reg(fs_color_tmp_index);
10602   }
10603
10604   assert(emit->unit == PIPE_SHADER_FRAGMENT);
10605
10606   for (i = 0; i < n; i++) {
10607      unsigned output_reg = emit->fs.color_out_index[i];
10608      struct tgsi_full_dst_register color_dst =
10609         make_dst_output_reg(output_reg);
10610
10611      /* Fill in this semantic here since we'll use it later in
10612       * emit_dst_register().
10613       */
10614      emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10615
10616      /* MOV output.color[i], tempcolor */
10617      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10618   }
10619}
10620
10621
10622/**
10623 * Emit extra helper code after the original shader code, but before the
10624 * last END/RET instruction.
10625 * For vertex shaders this means emitting the extra code to apply the
10626 * prescale scale/translation.
10627 */
10628static boolean
10629emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10630{
10631   if (emit->unit == PIPE_SHADER_VERTEX) {
10632      emit_vertex_instructions(emit);
10633   }
10634   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10635      const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10636
10637      assert(!(emit->key.fs.white_fragments &&
10638               emit->key.fs.write_color0_to_n_cbufs == 0));
10639
10640      /* We no longer want emit_dst_register() to substitute the
10641       * temporary fragment color register for the real color output.
10642       */
10643      emit->fs.color_tmp_index = INVALID_INDEX;
10644
10645      if (emit->key.fs.alpha_to_one) {
10646         emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10647      }
10648      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10649         emit_alpha_test_instructions(emit, fs_color_tmp_index);
10650      }
10651      if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10652          emit->key.fs.white_fragments) {
10653         emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10654      }
10655   }
10656   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10657      if (!emit->tcs.control_point_phase) {
10658         /* store the tessellation levels in the patch constant phase only */
10659         store_tesslevels(emit);
10660      }
10661      else {
10662         emit_clipping_instructions(emit);
10663      }
10664   }
10665   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10666      emit_vertex_instructions(emit);
10667   }
10668
10669   return TRUE;
10670}
10671
10672
10673/**
10674 * Translate the TGSI tokens into VGPU10 tokens.
10675 */
10676static boolean
10677emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10678                         const struct tgsi_token *tokens)
10679{
10680   struct tgsi_parse_context parse;
10681   boolean ret = TRUE;
10682   boolean pre_helpers_emitted = FALSE;
10683   unsigned inst_number = 0;
10684
10685   tgsi_parse_init(&parse, tokens);
10686
10687   while (!tgsi_parse_end_of_tokens(&parse)) {
10688
10689      /* Save the current tgsi token starting position */
10690      emit->cur_tgsi_token = parse.Position;
10691
10692      tgsi_parse_token(&parse);
10693
10694      switch (parse.FullToken.Token.Type) {
10695      case TGSI_TOKEN_TYPE_IMMEDIATE:
10696         ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10697         if (!ret)
10698            goto done;
10699         break;
10700
10701      case TGSI_TOKEN_TYPE_DECLARATION:
10702         ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10703         if (!ret)
10704            goto done;
10705         break;
10706
10707      case TGSI_TOKEN_TYPE_INSTRUCTION:
10708         if (!pre_helpers_emitted) {
10709            ret = emit_pre_helpers(emit);
10710            if (!ret)
10711               goto done;
10712            pre_helpers_emitted = TRUE;
10713         }
10714         ret = emit_vgpu10_instruction(emit, inst_number++,
10715                                       &parse.FullToken.FullInstruction);
10716
10717         /* Usually this applies to TCS only. If shader is reading control
10718          * point outputs in control point phase, we should reemit all
10719          * instructions which are writting into control point output in
10720          * control phase to store results into temporaries.
10721          */
10722         if (emit->reemit_instruction) {
10723            assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10724            ret = emit_vgpu10_instruction(emit, inst_number,
10725                                          &parse.FullToken.FullInstruction);
10726         }
10727         else if (emit->initialize_temp_index != INVALID_INDEX) {
10728            emit_initialize_temp_instruction(emit);
10729            emit->initialize_temp_index = INVALID_INDEX;
10730            ret = emit_vgpu10_instruction(emit, inst_number - 1,
10731                                          &parse.FullToken.FullInstruction);
10732         }
10733
10734         if (!ret)
10735            goto done;
10736         break;
10737
10738      case TGSI_TOKEN_TYPE_PROPERTY:
10739         ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10740         if (!ret)
10741            goto done;
10742         break;
10743
10744      default:
10745         break;
10746      }
10747   }
10748
10749   if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10750      ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10751   }
10752
10753done:
10754   tgsi_parse_free(&parse);
10755   return ret;
10756}
10757
10758
10759/**
10760 * Emit the first VGPU10 shader tokens.
10761 */
10762static boolean
10763emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10764{
10765   VGPU10ProgramToken ptoken;
10766
10767   /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
10768   ptoken.value = 0; /* init whole token to zero */
10769   ptoken.majorVersion = emit->version / 10;
10770   ptoken.minorVersion = emit->version % 10;
10771   ptoken.programType = translate_shader_type(emit->unit);
10772   if (!emit_dword(emit, ptoken.value))
10773      return FALSE;
10774
10775   /* Second token: total length of shader, in tokens.  We can't fill this
10776    * in until we're all done.  Emit zero for now.
10777    */
10778   if (!emit_dword(emit, 0))
10779      return FALSE;
10780
10781   if (emit->version >= 50) {
10782      VGPU10OpcodeToken0 token;
10783
10784      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10785         /* For hull shader, we need to start the declarations phase first before
10786          * emitting any declarations including the global flags.
10787          */
10788         token.value = 0;
10789         token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10790         begin_emit_instruction(emit);
10791         emit_dword(emit, token.value);
10792         end_emit_instruction(emit);
10793      }
10794
10795      /* Emit global flags */
10796      token.value = 0;    /* init whole token to zero */
10797      token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10798      token.enableDoublePrecisionFloatOps = 1;  /* set bit */
10799      token.instructionLength = 1;
10800      if (!emit_dword(emit, token.value))
10801         return FALSE;
10802   }
10803
10804   if (emit->version >= 40) {
10805      VGPU10OpcodeToken0 token;
10806
10807      /* Reserved for global flag such as refactoringAllowed.
10808       * If the shader does not use the precise qualifier, we will set the
10809       * refactoringAllowed global flag; otherwise, we will leave the reserved
10810       * token to NOP.
10811       */
10812      emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10813      token.value = 0;
10814      token.opcodeType = VGPU10_OPCODE_NOP;
10815      token.instructionLength = 1;
10816      if (!emit_dword(emit, token.value))
10817         return FALSE;
10818   }
10819
10820   return TRUE;
10821}
10822
10823
10824static boolean
10825emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10826{
10827   VGPU10ProgramToken *tokens;
10828
10829   /* Replace the second token with total shader length */
10830   tokens = (VGPU10ProgramToken *) emit->buf;
10831   tokens[1].value = emit_get_num_tokens(emit);
10832
10833   if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10834      /* Replace the reserved token with the RefactoringAllowed global flag */
10835      VGPU10OpcodeToken0 *ptoken;
10836
10837      ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10838      assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10839      ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10840      ptoken->refactoringAllowed = 1;
10841   }
10842
10843   return TRUE;
10844}
10845
10846
10847/**
10848 * Modify the FS to read the BCOLORs and use the FACE register
10849 * to choose between the front/back colors.
10850 */
10851static const struct tgsi_token *
10852transform_fs_twoside(const struct tgsi_token *tokens)
10853{
10854   if (0) {
10855      debug_printf("Before tgsi_add_two_side ------------------\n");
10856      tgsi_dump(tokens,0);
10857   }
10858   tokens = tgsi_add_two_side(tokens);
10859   if (0) {
10860      debug_printf("After tgsi_add_two_side ------------------\n");
10861      tgsi_dump(tokens, 0);
10862   }
10863   return tokens;
10864}
10865
10866
10867/**
10868 * Modify the FS to do polygon stipple.
10869 */
10870static const struct tgsi_token *
10871transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10872                      const struct tgsi_token *tokens)
10873{
10874   const struct tgsi_token *new_tokens;
10875   unsigned unit;
10876
10877   if (0) {
10878      debug_printf("Before pstipple ------------------\n");
10879      tgsi_dump(tokens,0);
10880   }
10881
10882   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10883                                                     TGSI_FILE_INPUT);
10884
10885   emit->fs.pstipple_sampler_unit = unit;
10886
10887   /* Setup texture state for stipple */
10888   emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10889   emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10890   emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10891   emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10892   emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10893   emit->key.tex[unit].target = PIPE_TEXTURE_2D;
10894
10895   if (0) {
10896      debug_printf("After pstipple ------------------\n");
10897      tgsi_dump(new_tokens, 0);
10898   }
10899
10900   return new_tokens;
10901}
10902
10903/**
10904 * Modify the FS to support anti-aliasing point.
10905 */
10906static const struct tgsi_token *
10907transform_fs_aapoint(const struct tgsi_token *tokens,
10908                     int aa_coord_index)
10909{
10910   if (0) {
10911      debug_printf("Before tgsi_add_aa_point ------------------\n");
10912      tgsi_dump(tokens,0);
10913   }
10914   tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10915   if (0) {
10916      debug_printf("After tgsi_add_aa_point ------------------\n");
10917      tgsi_dump(tokens, 0);
10918   }
10919   return tokens;
10920}
10921
10922
10923/**
10924 * A helper function to determine the shader in the previous stage and
10925 * then call the linker function to determine the input mapping for this
10926 * shader to match the output indices from the shader in the previous stage.
10927 */
10928static void
10929compute_input_mapping(struct svga_context *svga,
10930                      struct svga_shader_emitter_v10 *emit,
10931                      enum pipe_shader_type unit)
10932{
10933   struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
10934
10935   if (unit == PIPE_SHADER_FRAGMENT) {
10936      prevShader = svga->curr.gs ?
10937         &svga->curr.gs->base : (svga->curr.tes ?
10938         &svga->curr.tes->base : &svga->curr.vs->base);
10939   } else if (unit == PIPE_SHADER_GEOMETRY) {
10940      prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10941   } else if (unit == PIPE_SHADER_TESS_EVAL) {
10942      assert(svga->curr.tcs);
10943      prevShader = &svga->curr.tcs->base;
10944   } else if (unit == PIPE_SHADER_TESS_CTRL) {
10945      assert(svga->curr.vs);
10946      prevShader = &svga->curr.vs->base;
10947   }
10948
10949   if (prevShader != NULL) {
10950      svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10951      emit->prevShaderInfo = &prevShader->info;
10952   }
10953   else {
10954      /**
10955       * Since vertex shader does not need to go through the linker to
10956       * establish the input map, we need to make sure the highest index
10957       * of input registers is set properly here.
10958       */
10959      emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10960                                         emit->info.file_max[TGSI_FILE_INPUT]);
10961   }
10962}
10963
10964
10965/**
10966 * Copies the shader signature info to the shader variant
10967 */
10968static void
10969copy_shader_signature(struct svga_shader_signature *sgn,
10970                      struct svga_shader_variant *variant)
10971{
10972   SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10973
10974   /* Calculate the signature length */
10975   variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10976                           (header->numInputSignatures +
10977                            header->numOutputSignatures +
10978                            header->numPatchConstantSignatures) *
10979                           sizeof(SVGA3dDXShaderSignatureEntry);
10980
10981   /* Allocate buffer for the signature info */
10982   variant->signature =
10983      (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10984
10985   char *sgnBuf = (char *)variant->signature;
10986   unsigned sgnLen;
10987
10988   /* Copy the signature info to the shader variant structure */
10989   memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10990   sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10991
10992   if (header->numInputSignatures) {
10993      sgnLen =
10994         header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10995      memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10996      sgnBuf += sgnLen;
10997   }
10998
10999   if (header->numOutputSignatures) {
11000      sgnLen =
11001         header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11002      memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
11003      sgnBuf += sgnLen;
11004   }
11005
11006   if (header->numPatchConstantSignatures) {
11007      sgnLen =
11008         header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11009      memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
11010   }
11011}
11012
11013
11014/**
11015 * This is the main entrypoint for the TGSI -> VPGU10 translator.
11016 */
11017struct svga_shader_variant *
11018svga_tgsi_vgpu10_translate(struct svga_context *svga,
11019                           const struct svga_shader *shader,
11020                           const struct svga_compile_key *key,
11021                           enum pipe_shader_type unit)
11022{
11023   struct svga_shader_variant *variant = NULL;
11024   struct svga_shader_emitter_v10 *emit;
11025   const struct tgsi_token *tokens = shader->tokens;
11026
11027   (void) make_immediate_reg_double;   /* unused at this time */
11028
11029   assert(unit == PIPE_SHADER_VERTEX ||
11030          unit == PIPE_SHADER_GEOMETRY ||
11031          unit == PIPE_SHADER_FRAGMENT ||
11032          unit == PIPE_SHADER_TESS_CTRL ||
11033          unit == PIPE_SHADER_TESS_EVAL ||
11034          unit == PIPE_SHADER_COMPUTE);
11035
11036   /* These two flags cannot be used together */
11037   assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
11038
11039   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
11040   /*
11041    * Setup the code emitter
11042    */
11043   emit = alloc_emitter();
11044   if (!emit)
11045      goto done;
11046
11047   emit->unit = unit;
11048   if (svga_have_sm5(svga)) {
11049      emit->version = 50;
11050   } else if (svga_have_sm4_1(svga)) {
11051      emit->version = 41;
11052   } else {
11053      emit->version = 40;
11054   }
11055
11056   emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11057
11058   emit->key = *key;
11059
11060   emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11061                                    emit->key.gs.need_prescale ||
11062                                    emit->key.tes.need_prescale);
11063
11064   /* Determine how many prescale factors in the constant buffer */
11065   emit->vposition.num_prescale = 1;
11066   if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11067      assert(emit->unit == PIPE_SHADER_GEOMETRY);
11068      emit->vposition.num_prescale = emit->key.gs.num_prescale;
11069   }
11070
11071   emit->vposition.tmp_index = INVALID_INDEX;
11072   emit->vposition.so_index = INVALID_INDEX;
11073   emit->vposition.out_index = INVALID_INDEX;
11074
11075   emit->vs.vertex_id_sys_index = INVALID_INDEX;
11076   emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11077   emit->vs.vertex_id_bias_index = INVALID_INDEX;
11078
11079   emit->fs.color_tmp_index = INVALID_INDEX;
11080   emit->fs.face_input_index = INVALID_INDEX;
11081   emit->fs.fragcoord_input_index = INVALID_INDEX;
11082   emit->fs.sample_id_sys_index = INVALID_INDEX;
11083   emit->fs.sample_pos_sys_index = INVALID_INDEX;
11084   emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11085   emit->fs.layer_input_index = INVALID_INDEX;
11086   emit->fs.layer_imm_index = INVALID_INDEX;
11087
11088   emit->gs.prim_id_index = INVALID_INDEX;
11089   emit->gs.invocation_id_sys_index = INVALID_INDEX;
11090   emit->gs.viewport_index_out_index = INVALID_INDEX;
11091   emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11092
11093   emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11094   emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11095   emit->tcs.control_point_input_index = INVALID_INDEX;
11096   emit->tcs.control_point_addr_index = INVALID_INDEX;
11097   emit->tcs.control_point_out_index = INVALID_INDEX;
11098   emit->tcs.control_point_tmp_index = INVALID_INDEX;
11099   emit->tcs.control_point_out_count = 0;
11100   emit->tcs.inner.out_index = INVALID_INDEX;
11101   emit->tcs.inner.out_index = INVALID_INDEX;
11102   emit->tcs.inner.temp_index = INVALID_INDEX;
11103   emit->tcs.inner.tgsi_index = INVALID_INDEX;
11104   emit->tcs.outer.out_index = INVALID_INDEX;
11105   emit->tcs.outer.temp_index = INVALID_INDEX;
11106   emit->tcs.outer.tgsi_index = INVALID_INDEX;
11107   emit->tcs.patch_generic_out_count = 0;
11108   emit->tcs.patch_generic_out_index = INVALID_INDEX;
11109   emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11110   emit->tcs.prim_id_index = INVALID_INDEX;
11111
11112   emit->tes.tesscoord_sys_index = INVALID_INDEX;
11113   emit->tes.inner.in_index = INVALID_INDEX;
11114   emit->tes.inner.temp_index = INVALID_INDEX;
11115   emit->tes.inner.tgsi_index = INVALID_INDEX;
11116   emit->tes.outer.in_index = INVALID_INDEX;
11117   emit->tes.outer.temp_index = INVALID_INDEX;
11118   emit->tes.outer.tgsi_index = INVALID_INDEX;
11119   emit->tes.prim_id_index = INVALID_INDEX;
11120
11121   emit->clip_dist_out_index = INVALID_INDEX;
11122   emit->clip_dist_tmp_index = INVALID_INDEX;
11123   emit->clip_dist_so_index = INVALID_INDEX;
11124   emit->clip_vertex_out_index = INVALID_INDEX;
11125   emit->clip_vertex_tmp_index = INVALID_INDEX;
11126   emit->svga_debug_callback = svga->debug.callback;
11127
11128   emit->index_range.start_index = INVALID_INDEX;
11129   emit->index_range.count = 0;
11130   emit->index_range.required = FALSE;
11131   emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11132   emit->index_range.dim = 0;
11133   emit->index_range.size = 0;
11134
11135   emit->current_loop_depth = 0;
11136
11137   emit->initialize_temp_index = INVALID_INDEX;
11138
11139   if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11140      emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11141   }
11142
11143   if (unit == PIPE_SHADER_FRAGMENT) {
11144      if (key->fs.light_twoside) {
11145         tokens = transform_fs_twoside(tokens);
11146      }
11147      if (key->fs.pstipple) {
11148         const struct tgsi_token *new_tokens =
11149            transform_fs_pstipple(emit, tokens);
11150         if (tokens != shader->tokens) {
11151            /* free the two-sided shader tokens */
11152            tgsi_free_tokens(tokens);
11153         }
11154         tokens = new_tokens;
11155      }
11156      if (key->fs.aa_point) {
11157         tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11158      }
11159   }
11160
11161   if (SVGA_DEBUG & DEBUG_TGSI) {
11162      debug_printf("#####################################\n");
11163      debug_printf("### TGSI Shader %u\n", shader->id);
11164      tgsi_dump(tokens, 0);
11165   }
11166
11167   /**
11168    * Rescan the header if the token string is different from the one
11169    * included in the shader; otherwise, the header info is already up-to-date
11170    */
11171   if (tokens != shader->tokens) {
11172      tgsi_scan_shader(tokens, &emit->info);
11173   } else {
11174      emit->info = shader->info;
11175   }
11176
11177   emit->num_outputs = emit->info.num_outputs;
11178
11179   /**
11180    * Compute input mapping to match the outputs from shader
11181    * in the previous stage
11182    */
11183   compute_input_mapping(svga, emit, unit);
11184
11185   determine_clipping_mode(emit);
11186
11187   if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11188       unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11189      if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11190         /* if there is stream output declarations associated
11191          * with this shader or the shader writes to ClipDistance
11192          * then reserve extra registers for the non-adjusted vertex position
11193          * and the ClipDistance shadow copy.
11194          */
11195         emit->vposition.so_index = emit->num_outputs++;
11196
11197         if (emit->clip_mode == CLIP_DISTANCE) {
11198            emit->clip_dist_so_index = emit->num_outputs++;
11199            if (emit->info.num_written_clipdistance > 4)
11200               emit->num_outputs++;
11201         }
11202      }
11203   }
11204
11205   /*
11206    * Do actual shader translation.
11207    */
11208   if (!emit_vgpu10_header(emit)) {
11209      debug_printf("svga: emit VGPU10 header failed\n");
11210      goto cleanup;
11211   }
11212
11213   if (!emit_vgpu10_instructions(emit, tokens)) {
11214      debug_printf("svga: emit VGPU10 instructions failed\n");
11215      goto cleanup;
11216   }
11217
11218   if (!emit_vgpu10_tail(emit)) {
11219      debug_printf("svga: emit VGPU10 tail failed\n");
11220      goto cleanup;
11221   }
11222
11223   if (emit->register_overflow) {
11224      goto cleanup;
11225   }
11226
11227   /*
11228    * Create, initialize the 'variant' object.
11229    */
11230   variant = svga_new_shader_variant(svga, unit);
11231   if (!variant)
11232      goto cleanup;
11233
11234   variant->shader = shader;
11235   variant->nr_tokens = emit_get_num_tokens(emit);
11236   variant->tokens = (const unsigned *)emit->buf;
11237
11238   /* Copy shader signature info to the shader variant */
11239   if (svga_have_sm5(svga)) {
11240      copy_shader_signature(&emit->signature, variant);
11241   }
11242
11243   emit->buf = NULL;  /* buffer is no longer owed by emitter context */
11244   memcpy(&variant->key, key, sizeof(*key));
11245   variant->id = UTIL_BITMASK_INVALID_INDEX;
11246
11247   /* The extra constant starting offset starts with the number of
11248    * shader constants declared in the shader.
11249    */
11250   variant->extra_const_start = emit->num_shader_consts[0];
11251   if (key->gs.wide_point) {
11252      /**
11253       * The extra constant added in the transformed shader
11254       * for inverse viewport scale is to be supplied by the driver.
11255       * So the extra constant starting offset needs to be reduced by 1.
11256       */
11257      assert(variant->extra_const_start > 0);
11258      variant->extra_const_start--;
11259   }
11260
11261   if (unit == PIPE_SHADER_FRAGMENT) {
11262      struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11263
11264      fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11265
11266      /* If there was exactly one write to a fragment shader output register
11267       * and it came from a constant buffer, we know all fragments will have
11268       * the same color (except for blending).
11269       */
11270      fs_variant->constant_color_output =
11271         emit->constant_color_output && emit->num_output_writes == 1;
11272
11273      /** keep track in the variant if flat interpolation is used
11274       *  for any of the varyings.
11275       */
11276      fs_variant->uses_flat_interp = emit->uses_flat_interp;
11277
11278      fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11279   }
11280   else if (unit == PIPE_SHADER_TESS_EVAL) {
11281      struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11282
11283      /* Keep track in the tes variant some of the layout parameters.
11284       * These parameters will be referenced by the tcs to emit
11285       * the necessary declarations for the hull shader.
11286       */
11287      tes_variant->prim_mode = emit->tes.prim_mode;
11288      tes_variant->spacing = emit->tes.spacing;
11289      tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11290      tes_variant->point_mode = emit->tes.point_mode;
11291   }
11292
11293
11294   if (tokens != shader->tokens) {
11295      tgsi_free_tokens(tokens);
11296   }
11297
11298cleanup:
11299   free_emitter(emit);
11300
11301done:
11302   SVGA_STATS_TIME_POP(svga_sws(svga));
11303   return variant;
11304}
11305